From 56a722a1d01eb49bfbe5120065c615ecf1e16fe5 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 10 Jul 2017 14:22:18 +0800 Subject: [PATCH 001/170] output all beam search results in layer group. --- .../RecurrentGradientMachine.cpp | 104 ++++++++++++------ .../RecurrentGradientMachine.h | 7 +- paddle/parameter/Argument.cpp | 36 +++--- paddle/parameter/Argument.h | 1 + .../paddle/trainer_config_helpers/networks.py | 13 +-- 5 files changed, 102 insertions(+), 59 deletions(-) diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index 41e0929959..4cb5b8ec2d 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -1012,11 +1012,6 @@ void RecurrentGradientMachine::generateSequence() { /* width */ resultNum, false, /* useGpu */ false); - Matrix::resizeOrCreate(generator_.outArg.value, - /* height */ maxGenWordCount, - /* width */ 1, - false, - /* useGpu */ false); } ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions, numSequences + 1, @@ -1026,7 +1021,7 @@ void RecurrentGradientMachine::generateSequence() { } else { oneWaySearch(numSequences); } - if (dataArgsSize_) createDataOutlink(batchMachineIdVec_); + if (dataArgsSize_) createDataOutlink(); size_t size = generator_.ids.size(); generator_.outArg.ids->resize(size); @@ -1106,6 +1101,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) { } batchMachineIdVec_.clear(); + batchMachineStartPos_.clear(); int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false); starts[0] = 0; generator_.ids.clear(); @@ -1312,13 +1308,20 @@ void RecurrentGradientMachine::fillGenOutputs() { finalPaths_[i].resize(minFinalPathsSize); } - batchMachineIdVec_.clear(); generator_.ids.clear(); int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false); starts[0] = 0; if (numResults > 1) { - real* probs = generator_.outArg.in->getData(); + int idsProbSaveSize = 0; + for (auto inSeq : finalPaths_) { + for (auto path : inSeq) idsProbSaveSize += path.ids.size(); + idsProbSaveSize += inSeq.size(); + } + Matrix::resizeOrCreate( + generator_.outArg.value, idsProbSaveSize, 1, false, false); real* idsProb = generator_.outArg.value->getData(); + + real* probs = generator_.outArg.in->getData(); size_t curPos = 0; for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t j = 0; j < finalPaths_[i].size(); ++j) { @@ -1333,24 +1336,16 @@ void RecurrentGradientMachine::fillGenOutputs() { curPos += genLen; idsProb[curPos++] = -1.0; probs[i * numResults + j] = path.logProb; - - if (!j && dataArgsSize_) { - // in beam search, here only reserved the top 1 generated result - // for out_links that are not the generated word indices. - batchMachineIdVec_.insert(batchMachineIdVec_.end(), - path.machineIdVec.begin(), - path.machineIdVec.end()); - } } starts[i + 1] = generator_.ids.size(); } } else { for (size_t i = 0; i < finalPaths_.size(); ++i) { CHECK(!finalPaths_[i].empty()); - generator_.ids.insert(generator_.ids.begin(), - finalPaths_[i][0].ids.begin(), - finalPaths_[i][0].ids.end()); - starts[i + 1] = starts[i] + finalPaths_[i][0].ids.size(); + Path& path = finalPaths_[i][0]; + generator_.ids.insert( + generator_.ids.begin(), path.ids.begin(), path.ids.end()); + starts[i + 1] = starts[i] + path.ids.size(); } } } @@ -1364,25 +1359,70 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) { } } -void RecurrentGradientMachine::createDataOutlink( - std::vector& machineIdVec) { - size_t seqNum = - getBeamSize() > 1UL ? finalPaths_.size() : finalPaths_[0].size(); - std::vector starts(seqNum + 1, 0); - for (size_t i = 0; i < seqNum; ++i) { - size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size() - : finalPaths_[0][i].ids.size(); - starts[i + 1] = starts[i] + seqLen; +void RecurrentGradientMachine::createDataOutlinkSelRowsInfo( + bool isSeq, std::vector& outArgs) { + batchMachineIdVec_.clear(); + + size_t seqIdx = 0; + for (size_t i = 0; i < finalPaths_.size(); ++i) { + for (size_t j = 0; j < finalPaths_[i].size(); ++j) { + std::vector& machineIdVec = finalPaths_[i][j].machineIdVec; + if (isSeq) { + for (size_t i = 0; i < machineIdVec.size(); ++i) { + size_t rowId = machineIdVec[i]; + int* seqPos = + outArgs[i].sequenceStartPositions->getMutableData(false); + batchMachineIdVec_.push_back(seqPos[rowId]); + } + } else { + batchMachineIdVec_.insert( + batchMachineIdVec_.end(), machineIdVec.begin(), machineIdVec.end()); + } + seqIdx++; + } + } +} + +void RecurrentGradientMachine::createDataOutlinkCopySizeInfo( + bool isSeq, std::vector& outArgs, std::vector& copySize) { + size_t totalSeqNum = std::accumulate( + finalPaths_.begin(), + finalPaths_.end(), + 0UL, + [](size_t a, const std::vector& b) { return a + b.size(); }); + copySize.resize(totalSeqNum, 1); + + batchMachineStartPos_.resize(totalSeqNum + 1, 0); + if (isSeq) { + ICpuGpuVectorPtr inputSeqStartPos = outArgs[0].sequenceStartPositions; + CHECK_EQ(inputSeqStartPos->getSize() - 1, finalPaths_.size()); + int* starts = inputSeqStartPos->getMutableData(false); + int seqId = 0; + for (int i = 0; i < finalPaths_.size(); ++i) { + for (int j = 0; j < finalPaths_[i].size(); ++j) { + copySize[seqId] = starts[i + 1] - starts[i]; + batchMachineStartPos_[seqId + 1] = + batchMachineStartPos_[seqId] + finalPaths_[i][j].ids.size(); + seqId++; + } + } } +} +void RecurrentGradientMachine::createDataOutlink() { for (size_t i = 0; i < dataArgsSize_; i++) { + bool isSeq = dataArgsFrame_[i][0].hasSeq(); + std::vector copySize; + createDataOutlinkCopySizeInfo(isSeq, dataArgsFrame_[i], copySize); + createDataOutlinkSelRowsInfo(isSeq, dataArgsFrame_[i]); + dataArgs_[i].concat(dataArgsFrame_[i], - machineIdVec, - starts, + batchMachineIdVec_, + batchMachineStartPos_, + copySize, useGpu_, HPPL_STREAM_1, PASS_TEST); - auto dataAgent = dynamic_cast(outFrameLines_[i + 1].agentLayer.get()); CHECK_NOTNULL(dataAgent); diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h index fb3fc5877a..bd096770b7 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h @@ -480,7 +480,11 @@ private: * @param machineIdVec : select a row of output matrix in each frame * that the generation process expanded. */ - void createDataOutlink(std::vector& machineIdVec); + void createDataOutlink(); + void createDataOutlinkCopySizeInfo(bool isSeq, + std::vector& outArgs, + std::vector& copySize); + void createDataOutlinkSelRowsInfo(bool isSeq, std::vector& outArgs); /* * @brief used in beam search, connect previous frame to form recurrent link @@ -543,6 +547,7 @@ private: std::vector topIds_; std::vector seqIds_; std::vector batchMachineIdVec_; + std::vector batchMachineStartPos_; std::vector> finalPaths_; std::vector minFinalPathLogProb_; BeamSearchControlCallbacks* beamSearchCtrlCallbacks_; diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index ef72b973c1..e7522def08 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -276,17 +276,21 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src, void Argument::concat(const std::vector& args, const std::vector& selectRows, const std::vector& seqStartPos, + const std::vector& copySize, bool useGpu, hl_stream_t stream, PassType passType) { CHECK(!subSequenceStartPositions) << "undefined behavior for subsequence positions"; - size_t batchSize = selectRows.size(); + size_t batchSize = 0; + for (size_t i = 0; i < copySize.size(); ++i) + batchSize += copySize[i] * (seqStartPos[i + 1] - seqStartPos[i]); + auto copyArg = [batchSize, stream](MatrixPtr& dst, MatrixPtr src, - int startRow, - int pos, + int desStartRow, + int srcStartRow, int size, bool useGpu) { if (!src) { @@ -300,8 +304,8 @@ void Argument::concat(const std::vector& args, dst->resize(batchSize, width); } - MatrixPtr tmpMatrix = dst->subMatrix(startRow, size); - tmpMatrix->copyFrom(*src->subMatrix(pos, size), stream); + MatrixPtr tmpMatrix = dst->subMatrix(desStartRow, size); + tmpMatrix->copyFrom(*src->subMatrix(srcStartRow, size), stream); }; auto copyIds = [batchSize, stream](IVectorPtr& dst, @@ -339,24 +343,24 @@ void Argument::concat(const std::vector& args, dataId = args[0].dataId; CHECK_NE(seqStartPos.size(), 0UL); - size_t sampleNum = seqStartPos.size() - 1; - for (size_t i = 0; i < sampleNum; ++i) { + int desStartRow = 0; + for (size_t i = 0; i < copySize.size(); ++i) { int startPos = seqStartPos[i]; int endPos = seqStartPos[i + 1]; CHECK_GE(args.size(), static_cast(endPos - startPos)); for (int j = startPos; j < endPos; ++j) { const Argument& arg = args[j - startPos]; - CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have" - << " same dataId"; - const int copySize = 1; - const int rowIdx = selectRows[j]; - copyArg(in, arg.in, j, rowIdx, copySize, useGpu); - copyArg(value, arg.value, j, rowIdx, copySize, useGpu); + CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have the " + << "same dataId"; + const int srcStartRow = selectRows[j]; + copyArg(in, arg.in, desStartRow, srcStartRow, copySize[i], useGpu); + copyArg(value, arg.value, desStartRow, srcStartRow, copySize[i], useGpu); if (passType != PASS_TEST) { - copyArg(grad, arg.grad, j, rowIdx, copySize, useGpu); + copyArg(grad, arg.grad, desStartRow, srcStartRow, copySize[i], useGpu); } - copyIds(ids, arg.ids, j, rowIdx, copySize, useGpu); - copyStrs(strs, arg.strs, j, rowIdx, copySize, useGpu); + copyIds(ids, arg.ids, desStartRow, srcStartRow, copySize[i], useGpu); + copyStrs(strs, arg.strs, desStartRow, srcStartRow, copySize[i], useGpu); + desStartRow += copySize[i]; } } ICpuGpuVector::resizeOrCreate( diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 0ccdef802e..be87175658 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -240,6 +240,7 @@ struct Argument { void concat(const std::vector& args, const std::vector& selectRows, const std::vector& seqStartPos, + const std::vector& copySize, bool useGpu, hl_stream_t stream, PassType passType); diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index b77932ce5f..c0b2ced234 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -1370,14 +1370,7 @@ def simple_attention(encoded_sequence, param_attr=softmax_param_attr, name="%s_softmax" % name, bias_attr=False) - - scaled = scaling_layer( - weight=attention_weight, - input=encoded_sequence, - name='%s_scaling' % name) - - return pooling_layer( - input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name) + return attention_weight def inputs(layers, *args): @@ -1395,7 +1388,7 @@ def inputs(layers, *args): if len(args) != 0: layers.extend(args) - Inputs(* [l.name for l in layers]) + Inputs(*[l.name for l in layers]) def outputs(layers, *args): @@ -1438,7 +1431,7 @@ def outputs(layers, *args): assert len(layers) > 0 if HasInputsSet(): # input already set - Outputs(* [l.name for l in layers]) + Outputs(*[l.name for l in layers]) return # just return outputs. if len(layers) != 1: From 4c134c7c7d201a9f28449974d489111b51c6f6fb Mon Sep 17 00:00:00 2001 From: caoying03 Date: Fri, 14 Jul 2017 17:21:36 +0800 Subject: [PATCH 002/170] add comments. --- .../RecurrentGradientMachine.h | 38 ++++++++++++++++--- paddle/parameter/Argument.cpp | 4 +- .../paddle/trainer_config_helpers/networks.py | 4 +- 3 files changed, 36 insertions(+), 10 deletions(-) diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h index a3d04b207c..cc0eda9f13 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h @@ -190,7 +190,7 @@ public: std::vector ids; /** - * @brief idsProb, log probability of each generated words. + * @brief idsProb, log probability of each generated word. */ std::vector idsProb; @@ -472,16 +472,42 @@ private: void copyDataOutlinkFrame(size_t machineCur); /* - * @brief In generation, if the layer group has more than 1 outlink, outlinks - * except the first one are data outlinks. This function creates the data - * outlinks. - * @note In beam search, only one generated sequence with the hightest log - * probabilites are retained. + * @brief In generation, if the layer group has more than 1 outlink, outlink + * except the first one is a data outlink. In RecurrentLayerGroup, each time + * step is a separate Network, outputs of a layer inside the + * RecurrentLayerGroup are stored in separate Arguments. If one layer is + * specified as an outlink of RecurrentLayerGroup. This function will + * collect outputs in each time step of each generated sequence which are + * dispersed in separate Arguments to form a new single Argument as output of + * RecurrentLayerGroup. */ void createDataOutlink(); + + /* + * @brief decide to select how many rows from the Matrix stored the forward + * pass results from a start position. + * + * @param isSeq: a flag indicating whetehr the layer to be output of the + * RecurrentGradientMachine is a sequence or not + * @param outArgs: all of the the returned Arguments of the forward pass + * during the generation process. + * @param copySize: the returned result, number of rows to select from the + * Matrix stored the forward pass results from a start position. + */ void createDataOutlinkCopySizeInfo(bool isSeq, std::vector& outArgs, std::vector& copySize); + + /* + * @brief decide index of the start row for each time step of a generated + * sequence in Matrix stored the entire beam search batch's forward pass + * results. + * + * @param isSeq: a flag indicating whetehr the layer to be output of the + * RecurrentGradientMachine is a sequence or not + * @param outArgs: all of the the returned Arguments of the forward pass + * during the generation process. + */ void createDataOutlinkSelRowsInfo(bool isSeq, std::vector& outArgs); /* diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index f45a51d7b1..9a9092af9b 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -352,8 +352,8 @@ void Argument::concat(const std::vector& args, CHECK_GE(args.size(), static_cast(endPos - startPos)); for (int j = startPos; j < endPos; ++j) { const Argument& arg = args[j - startPos]; - CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have the " - << "same dataId"; + CHECK_EQ(arg.dataId, dataId) << "Arguments to concatenate should have " + << "the same dataId."; const int srcStartRow = selectRows[j]; copyArg(in, arg.in, desStartRow, srcStartRow, copySize[i], useGpu); copyArg(value, arg.value, desStartRow, srcStartRow, copySize[i], useGpu); diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 30c826ffc8..810bea913e 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -1375,9 +1375,9 @@ def simple_attention(encoded_sequence, weight=attention_weight, input=encoded_sequence, name='%s_scaling' % name) + return pooling_layer( - input=scaled, pooling_type=SumPooling(), - name="%s_pooling" % name), attention_weight + input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name) def inputs(layers, *args): From 9d569c5a38582cbf9022578c046f89a88697c493 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Thu, 3 Aug 2017 17:57:00 -0700 Subject: [PATCH 003/170] Update Backward.md Add the "Backward Operator Registry" section --- paddle/framework/backward.md | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md index 74c001b06a..61f308b469 100644 --- a/paddle/framework/backward.md +++ b/paddle/framework/backward.md @@ -1,8 +1,28 @@ -## Operator/expression 's Backward +# Operator/expression 's Backward -### Motivation +## Motivation In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the fundmental gradient operators/expressions together with chain rule . Every forward network need a backward network to construct the full computation lineage, the operator/ expression's Backward feature will generate the backward pass respect to forward pass. + +## Backward Operator Registry + +A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients, and then calculate its input gradients. In most cases, there is a one-to-one correspondence between forward and backward operators. We use registry mechanism to save these correspondences, which is quite similar with operator registry itself. + +For example, we have got a `add_two_op`, and is registered by the following code: + +```cpp +REGISTER_OP(add_two, AddTwoOp, AddTwoOpMaker); +``` + +`add_two` is the operator's type. `AddTwoOp` and `AddTwoOpMaker` are the operator class and the operator maker class respectively. + +Assume that we have also got the backward operator of `add_two_op`, which calculating the gradients of `add_two_op`'s inputs. Then we register it by the following way: + +```cpp +REGISTER_GRADIENT_OP(add_two, add_two_grad, AddTwoGradOp); +``` + +`add_two_grad` is the type of backward operator, and `AddTwoGradOp` is its class name. ### Implement : gradient operator registry From 84627bb934ed6b4c7213eeebc0fe59e5fbe7a84b Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 7 Aug 2017 14:03:13 +0800 Subject: [PATCH 004/170] add config helper for sequence slice layer. --- doc/api/v2/config/layer.rst | 5 ++ python/paddle/trainer/config_parser.py | 45 +++++++++++ .../paddle/trainer_config_helpers/layers.py | 68 ++++++++++++++++ .../tests/configs/file_list.sh | 3 +- .../protostr/test_seq_slice_layer.protostr | 79 +++++++++++++++++++ .../tests/configs/test_seq_slice_layer.py | 13 +++ 6 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 372272a53c..232ea6b49b 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -257,6 +257,11 @@ seq_concat .. autoclass:: paddle.v2.layer.seq_concat :noindex: +seq_slice +--------- +.. autoclass:: paddle.v2.layer.seq_slice + :noindex: + Reshaping Layers ================ diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 9ea69fc5e5..11e54ba420 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2657,6 +2657,51 @@ class SubSequenceLayer(LayerBase): self.create_bias_parameter(bias, size) +@config_layer('seq_slice') +class SeqSliceLayer(LayerBase): + def __init__(self, name, inputs, starts, ends, bias=False, **xargs): + if isinstance(inputs, list): + assert len(inputs) == 1, ('the first input of sequence slice layer ' + 'is a single sequence input.') + else: + inputs = [inputs] + + if starts is not None: + if isinstance(starts, list): + assert len(starts) == 1, ( + 'the start indices for sequence slice layer cannot ' + 'be a list having more than one element.') + starts = starts[0] + inputs.append(starts) + + if ends is not None: + if isinstance(ends, list): + assert len(ends) == 1, ( + 'the end indices for sequence slice layer cannot ' + 'be a list having more than one element.') + ends = ends[0] + inputs.append(ends) + assert len(inputs) >= 2, ( + 'the sequence slice layer has at least two inputs.') + + super(SeqSliceLayer, self).__init__( + name, 'seq_slice', 0, inputs=inputs, **xargs) + input_layer0 = self.get_input_layer(0) + size = input_layer0.size + self.set_layer_size(size) + + if len(inputs) == 3: + assert ( + self.get_input_layer(1).size == self.get_input_layer(2).size), ( + 'If start and end indices are both given to' + 'sequence slice layer, they should have the same width.') + elif len(inputs) == 2: + if starts is not None: + self.config.select_first = True + else: + self.config.select_first = False + + @config_layer('out_prod') class OuterProdLayer(LayerBase): def __init__(self, name, inputs, device=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index ea5fdcc50f..15636b1442 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -131,6 +131,7 @@ __all__ = [ 'crop_layer', 'clip_layer', 'slice_projection', + 'seq_slice_layer', ] @@ -225,6 +226,7 @@ class LayerType(object): PRELU = 'prelu' CROP_LAYER = 'crop' CLIP_LAYER = 'clip' + SEQ_SLICE = 'seq_slice' @staticmethod def is_layer_type(type_name): @@ -6119,3 +6121,69 @@ def clip_layer(input, min, max, name=None): max=max) return LayerOutput( name, LayerType.CLIP_LAYER, parents=[input], size=input.size) + + +@wrap_name_default() +def seq_slice_layer(input, starts, ends, name=None): + """ + seq_slice_layer will return one or several sub-sequences from the + input sequence layer given start and end indices. + + - If only start indices are given, and end indices are set to None, + this layer slices the input sequence from the given start indices + to its end. + - If only end indices are given, and start indices are set to None, + this layer slices the input sequence from its beginning to the + given end indices. + - If start and end indices are both given, they should have the same + number of elements. + + If start or end indices contains more than one elements, the input sequence + will be sliced for multiple times. + + + .. code-block:: python + + seq_silce = seq_slice_layer(input=input_seq, + starts=start_pos, ends=end_pos) + + :param name: name of this layer. + :type name: basestring + :param input: input for this layer, it should be a sequence. + :type input: LayerOutput + :param starts: start indices to slice the input sequence. + :type starts: LayerOutput|None + :param ends: end indices to slice the input sequence. + :type ends: LayerOutput|None + :return: LayerOutput object. + :rtype: LayerOutput + + """ + + assert isinstance(input, LayerOutput), ( + 'The first input of seq_slice layer must be a PaddlePaddle layer.') + + if starts is not None: + assert isinstance(starts, LayerOutput), ( + 'The start indices for seq_slice layer ' + 'must be a PaddlePaddle layer.') + if ends is not None: + assert isinstance(ends, LayerOutput), ( + 'The end indices for seq_slice layer must be a PaddlePaddle layer.') + assert starts is not None or ends is not None, ( + 'start and end indices ' + 'cannot be set to None at the same time, at least one of ' + 'them should be given.') + if starts is not None and ends is not None: + assert starts.size == ends.size, ( + 'If start and end indices are both given to seq_slice_layer, ' + 'they should have the same width.') + + Layer( + name=name, + type=LayerType.SEQ_SLICE, + inputs=input.name, + starts=starts.name if starts is not None else None, + ends=ends.name if ends is not None else None) + return LayerOutput( + name, LayerType.SEQ_SLICE, parents=[input], size=input.size) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 0ffa58bc1e..1ce865ceac 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -7,6 +7,7 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer -test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer) +test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer +test_seq_slice_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr new file mode 100644 index 0000000000..5b73d614fe --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr @@ -0,0 +1,79 @@ +type: "nn" +layers { + name: "word" + type: "data" + size: 128 + active_type: "" +} +layers { + name: "starts" + type: "data" + size: 5 + active_type: "" +} +layers { + name: "ends" + type: "data" + size: 5 + active_type: "" +} +layers { + name: "__seq_slice_layer_0__" + type: "seq_slice" + size: 128 + active_type: "" + inputs { + input_layer_name: "word" + } + inputs { + input_layer_name: "starts" + } + inputs { + input_layer_name: "ends" + } +} +layers { + name: "__seq_slice_layer_1__" + type: "seq_slice" + size: 128 + active_type: "" + inputs { + input_layer_name: "word" + } + inputs { + input_layer_name: "starts" + } + select_first: true +} +layers { + name: "__seq_slice_layer_2__" + type: "seq_slice" + size: 128 + active_type: "" + inputs { + input_layer_name: "word" + } + inputs { + input_layer_name: "ends" + } + select_first: false +} +input_layer_names: "word" +output_layer_names: "__seq_slice_layer_0__" +output_layer_names: "__seq_slice_layer_1__" +output_layer_names: "__seq_slice_layer_2__" +sub_models { + name: "root" + layer_names: "word" + layer_names: "starts" + layer_names: "ends" + layer_names: "__seq_slice_layer_0__" + layer_names: "__seq_slice_layer_1__" + layer_names: "__seq_slice_layer_2__" + input_layer_names: "word" + output_layer_names: "__seq_slice_layer_0__" + output_layer_names: "__seq_slice_layer_1__" + output_layer_names: "__seq_slice_layer_2__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py new file mode 100644 index 0000000000..510ad32208 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python +#coding=utf-8 +from paddle.trainer_config_helpers import * + +input_seq = data_layer("word", size=128) +starts = data_layer("starts", size=5) +ends = data_layer("ends", size=5) + +seq_slice1 = seq_slice_layer(input=input_seq, starts=starts, ends=ends) +seq_slice2 = seq_slice_layer(input=input_seq, starts=starts, ends=None) +seq_slice3 = seq_slice_layer(input=input_seq, starts=None, ends=ends) + +outputs(seq_slice1, seq_slice2, seq_slice3) From 2988a58ef01a56e84cff02463972e0150bc6ab13 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 8 Aug 2017 08:52:05 +0800 Subject: [PATCH 005/170] add unittest. --- paddle/gserver/tests/CMakeLists.txt | 6 + .../gserver/tests/test_SeqSliceLayerGrad.cpp | 214 ++++++++++++++++++ 2 files changed, 220 insertions(+) create mode 100644 paddle/gserver/tests/test_SeqSliceLayerGrad.cpp diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 4546d12a90..9fdb148864 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -30,6 +30,12 @@ add_unittest_without_exec(test_CRFLayerGrad add_test(NAME test_CRFLayerGrad COMMAND test_CRFLayerGrad) +################ test_SeqSliceLayerGrad #################### +add_unittest_without_exec(test_SeqSliceLayerGrad + test_SeqSliceLayerGrad.cpp + LayerGradUtil.cpp) +add_test(NAME test_SeqSliceLayerGrad + COMMAND test_SeqSliceLayerGrad) add_unittest_without_exec(test_ActivationGrad test_ActivationGrad.cpp diff --git a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp new file mode 100644 index 0000000000..e456dd5db7 --- /dev/null +++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp @@ -0,0 +1,214 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "ModelConfig.pb.h" +#include "paddle/gserver/layers/DataLayer.h" +#include "paddle/trainer/Trainer.h" + +#include "LayerGradUtil.h" +#include "paddle/testing/TestUtil.h" + +using namespace paddle; // NOLINT +using namespace std; // NOLINT + +DECLARE_int32(gpu_id); +DECLARE_bool(thread_local_rand_use_global_seed); + +const int MAX_SEQ_NUM = 5; +const int MAX_SEQ_LEN = 5; +const int MAX_BEAM_SIZE = 3; + +vector randSampling(real range, int n) { + CHECK_GE(range, n); + vector num(range); + iota(begin(num), end(num), 0.); + if (range == n) return num; + + random_shuffle(begin(num), end(num)); + num.resize(n); + sort(begin(num), end(num)); + return num; +} + +void genSeqInfo(vector& seqStartPos, vector& subSeqStartPos) { + seqStartPos.resize(1, 0); + subSeqStartPos.resize(1, 0); + + // srand((size_t)(time(NULL))); + srand(1); + int seqNum = 1 + (rand() % MAX_SEQ_NUM); + for (int i = 0; i < seqNum; ++i) { + int subSeqNum = 1 + (rand() % MAX_SEQ_NUM); + for (int j = 0; j < subSeqNum; ++j) + subSeqStartPos.push_back(subSeqStartPos.back() + + (1 + (rand() % MAX_SEQ_LEN))); + seqStartPos.push_back(subSeqStartPos.back()); + } +} + +/* + generate start indices according to sequence start positions. + */ +void genStarts(vector& seqStartPos, + vector>& starts, + size_t beamSize) { + starts.clear(); + starts.resize(seqStartPos.size() - 1, vector(beamSize, -1.)); + + for (size_t i = 0; i < seqStartPos.size() - 1; ++i) { + int seqLen = seqStartPos[i + 1] - seqStartPos[i]; + vector randStarts = + randSampling(seqLen, min(seqLen, static_cast(beamSize))); + copy(begin(randStarts), end(randStarts), begin(starts[i])); + } +} + +/* + generate end indices according to sequence start positions and start indices. + */ +void genEnds(vector& seqStartPos, + vector>& starts, + vector>& ends, + size_t beamSize) { + CHECK_EQ(seqStartPos.size() - 1, starts.size()); + ends.clear(); + ends.resize(seqStartPos.size() - 1, vector(beamSize, -1.)); + + for (size_t i = 0; i < starts.size(); ++i) { + for (size_t j = 0; j < starts[i].size(); ++j) { + int seqLen = seqStartPos[i + 1] - seqStartPos[i]; + CHECK_GE(seqLen - 1, starts[i][j]); + if (starts[i][j] == -1.) break; + if (starts[i][j] == (seqLen - 1)) { + ends[i][j] = starts[i][j]; + } else { + ends[i][j] = starts[i][j] + randSampling(seqLen - starts[i][j], 1)[0]; + } + } + } +} + +void genTestData(vector& seqStartPos, + vector& subSeqStartPos, + vector>& starts, + vector>& ends, + bool hasSubseq) { + size_t beamSize = MAX_BEAM_SIZE; + genSeqInfo(seqStartPos, subSeqStartPos); + + genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize); + genEnds(hasSubseq ? subSeqStartPos : seqStartPos, starts, ends, beamSize); +} + +template +void flatten2dVector(vector>& inVec, vector& outVec) { + size_t totalSize{0}; + for (auto const& items : inVec) totalSize += items.size(); + outVec.reserve(totalSize); + + for (auto& items : inVec) + move(items.begin(), items.end(), back_inserter(outVec)); +} + +void testSeqSliceLayer(bool hasSubseq, + bool useGpu, + vector& seqStartPos, + vector& subSeqStartPos, + vector>& starts, + vector>& ends) { + // layer size is not crutial for this layer, + // so here use a small layer size in the unittest. + const size_t layerSize{4}; + TestConfig config; + config.layerConfig.set_type("seq_slice"); + config.layerConfig.set_size(layerSize); + + // add the first input + MatrixPtr seqInputPtr = + Matrix::create(hasSubseq ? subSeqStartPos.back() : seqStartPos.back(), + layerSize, + false, + false); + seqInputPtr->randomizeUniform(); + + if (hasSubseq) { + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, + "seq_input", + seqInputPtr, + seqStartPos, + subSeqStartPos}); + } else { + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "seq_input", seqInputPtr, seqStartPos}); + } + config.layerConfig.add_inputs(); + + // add start indices + if (starts.size()) { + vector startsToVec; + flatten2dVector(starts, startsToVec); + + MatrixPtr startMatrixPtr = + Matrix::create(starts.size(), starts[0].size(), false, false); + startMatrixPtr->copyFrom(startsToVec.data(), startsToVec.size()); + + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr}); + config.layerConfig.add_inputs(); + } + + // add end indices + if (ends.size()) { + vector endsToVec; + flatten2dVector(ends, endsToVec); + MatrixPtr endMatrixPtr = + Matrix::create(ends.size(), ends[0].size(), false, false); + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr}); + config.layerConfig.add_inputs(); + } + + testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false); +} + +TEST(Layer, SeqSliceLayer) { + vector seqStartPos; + vector subSeqStartPos; + vector> starts; + vector> ends; + + genSeqInfo(seqStartPos, subSeqStartPos); + for (bool hasSubseq : {false, true}) { + genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq); + for (bool useGpu : {false, true}) { + vector> tmp; + testSeqSliceLayer( + hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends); + testSeqSliceLayer( + hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, tmp); + testSeqSliceLayer( + hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, ends); + } + } +} + +int main(int argc, char** argv) { + initMain(argc, argv); + hl_start(); + hl_init(FLAGS_gpu_id); + FLAGS_thread_local_rand_use_global_seed = true; + srand(1); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From 7304006b7121c844d071227a6c2d24245a06e32e Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Tue, 8 Aug 2017 16:38:27 -0700 Subject: [PATCH 006/170] Update backward.md --- paddle/framework/backward.md | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md index 61f308b469..c717c2f30b 100644 --- a/paddle/framework/backward.md +++ b/paddle/framework/backward.md @@ -24,20 +24,31 @@ REGISTER_GRADIENT_OP(add_two, add_two_grad, AddTwoGradOp); `add_two_grad` is the type of backward operator, and `AddTwoGradOp` is its class name. -### Implement : gradient operator registry +## Backward Opeartor Creating -| | forward operator | backward operator | -| ---------------------- | ---------------- | -------------------------------- | -| **Operator::inputs_** | Inputs | Inputs, Outputs, OutputGradients | -| **Operator::outputs_** | Outputs | InputGradients | +### Usage -Inputs/Outputs means the input/output of the operator, InputGradients/OutputGradients is the gradient respect to forward opeartor. Forward operator and Backward operator are isomorphic, save their corresponding needs into member attribute. +Given a certain forward operator, we can get its corresponding backward opeartor by calling: -We use a global hash map record the gradient operators available, follow the philosophy of minimum core, make operator pluggable unit. Each gradient is an operator and it needs to regist itself. +```cpp +OperatorBase* bwd_op = BuildGradOp(const OperatorBase* fwd_op); +``` + +The function `BuildGradOp` will sequentially execute following processes: + +1. Getting the `type_` of given forward operator, and then creating the corresponding backward operator. + +2. Copying all the attributes of forward operator expect `input_format` and `output_format`(if it has), for their elements differ between forward and backward operators. + +3. Copying forward operator's `inputs_` and `outputs_` to backward operator's `inputs_`. And adding forward inputs' gradient variables into backward `output_`, adding forward outputs' gradient variables into backward `input_`. + +4. Building backward operator's `input_format`, `output_format` (if necessary) and `in_out_idxs_` according to its `inputs_` and `outputs_` just created. + +## Backward Network Building -grad_op_builder(fengjiayi) +A backward network is a series of backward operators. The main idea of building a backward network is creating backward operators in the inverted sequence and put them together. -### Implement : Backward network +In our design, the network itself is also a kind of operator. So the operators contained by a big network may be some small network. given a forward network, it generates the backward network. We only care about the Gradients—`OutputGradients`,`InputGradients`. From 26bc5b12596c945956f7a6b003712805e579a36d Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 8 Aug 2017 18:48:11 +0800 Subject: [PATCH 007/170] add implementations. --- paddle/gserver/layers/KmaxSeqScoreLayer.cpp | 5 + paddle/gserver/layers/SequenceSliceLayer.cpp | 228 ++++++++++++++++++ .../gserver/layers/SubNestedSequenceLayer.cpp | 16 +- .../gserver/tests/test_SeqSliceLayerGrad.cpp | 25 +- paddle/parameter/Argument.cpp | 27 ++- 5 files changed, 278 insertions(+), 23 deletions(-) create mode 100644 paddle/gserver/layers/SequenceSliceLayer.cpp diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp index 8ce591d476..e96fd61fc1 100644 --- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp +++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp @@ -97,6 +97,11 @@ void KmaxSeqScoreLayer::forward(PassType passType) { scores_ = inputScore; } + // TODO(caoying) + // Here selSubSeqIdx is automatically converted from real to int + // This is very dangerous if user fill this matrix himself, invalid data may + // occur. The selected indices should be stored in + // CpuSparseMatrix with SparseValueType set to NO_VALUE. Matrix::resizeOrCreate( output_.value, input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(), diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp new file mode 100644 index 0000000000..410aba663e --- /dev/null +++ b/paddle/gserver/layers/SequenceSliceLayer.cpp @@ -0,0 +1,228 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/Vector.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +class SequenceSliceLayer : public Layer { +public: + explicit SequenceSliceLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +private: + // TODO(caoying) + // Here selSubSeqIdx is automatically converted from real to int + // This is very dangerous if user fill this matrix himself, invalid data + // may occur. The selected indices should be stored in CpuSparseMatrix + // with SparseValueType set to NO_VALUE. + MatrixPtr startIdsOnCpu_; + MatrixPtr endIdsOnCpu_; + + std::vector selectedRows_; + IVectorPtr rowIndice_; + std::vector> inputSeqInfoVec_; + std::vector outSubSeqStartPos_; + std::vector outSeqStartPos_; + + void checkInputs(); + void copySliceIdsToCpu(); + void calSelectedRows(const MatrixPtr starts, const MatrixPtr ends); +}; + +REGISTER_LAYER(seq_slice, SequenceSliceLayer); + +bool SequenceSliceLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + CHECK_GE(inputLayers_.size(), 2U); + CHECK_LE(inputLayers_.size(), 3U); + + setNeedSequenceInfo(false); + return true; +} + +void SequenceSliceLayer::checkInputs() { + const Argument& inputSeq = getInput(0); + CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer " + << "must be a sequence."; + // Check inputs + const MatrixPtr indices1 = getInputValue(1); + CHECK_EQ(indices1->getHeight(), + inputSeq.hasSubseq() ? inputSeq.getNumSubSequences() + : inputSeq.getNumSequences()) + << "Height of the second input should be equal to number of sequence " + << "in the first input."; + if (inputLayers_.size() == 3) { + const MatrixPtr indices2 = getInputValue(2); + CHECK_EQ(indices2->getHeight(), indices1->getHeight()) + << "start indices and end indices should have the same height."; + CHECK_EQ(indices2->getWidth(), indices1->getWidth()) + << "start indices and end indices should have the same Width."; + } +} + +void SequenceSliceLayer::copySliceIdsToCpu() { + if (!useGpu_) { + if (inputLayers_.size() == 2U) { + if (config_.select_first()) { + startIdsOnCpu_ = getInputValue(1); + endIdsOnCpu_ = nullptr; + } else { + startIdsOnCpu_ = nullptr; + endIdsOnCpu_ = getInputValue(1); + } + } else if (inputLayers_.size() == 3U) { + startIdsOnCpu_ = getInputValue(1); + endIdsOnCpu_ = getInputValue(2); + } + return; + } + + const MatrixPtr indices1 = getInputValue(1); + if (inputLayers_.size() == 2U) { + if (config_.select_first()) { + Matrix::resizeOrCreate(startIdsOnCpu_, + indices1->getHeight(), + indices1->getWidth(), + false /* trans */, + false /* useGpu */); + startIdsOnCpu_->copyFrom(*indices1); + endIdsOnCpu_ = nullptr; + } else { + Matrix::resizeOrCreate(endIdsOnCpu_, + indices1->getHeight(), + indices1->getWidth(), + false /* trans */, + false /* useGpu */); + endIdsOnCpu_->copyFrom(*indices1); + startIdsOnCpu_ = nullptr; + } + } else if (inputLayers_.size() == 3U) { + Matrix::resizeOrCreate(startIdsOnCpu_, + indices1->getHeight(), + indices1->getWidth(), + false /* trans */, + false /* useGpu */); + startIdsOnCpu_->copyFrom(*indices1); + + const MatrixPtr indices2 = getInputValue(2); + Matrix::resizeOrCreate(endIdsOnCpu_, + indices2->getHeight(), + indices2->getWidth(), + false /* trans */, + false /* useGpu */); + endIdsOnCpu_->copyFrom(*indices2); + } +} + +void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, + const MatrixPtr ends) { + outSeqStartPos_.resize(1, 0); + outSubSeqStartPos_.resize(1, 0); + selectedRows_.clear(); + + size_t beamSize = starts ? starts->getWidth() : ends->getWidth(); + // iterate over sequence + size_t rowIdx = 0; + for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) { + // iterate over sub-sequence in a sequence + for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) { + // iterate over each index for slicing. + for (size_t k = 0; k < beamSize; ++k) { + if (starts) { + if (starts->getElement(rowIdx, k) == -1.) break; + } else if (ends->getElement(rowIdx, k) == -1.) + break; + + int begPos = inputSeqInfoVec_[i][j]; + if (starts) begPos += starts->getElement(rowIdx, k); + + int endPos = inputSeqInfoVec_[i][j + 1] - 1; + if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k); + + int seqLen = endPos - begPos + 1; + CHECK(seqLen); + for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m); + inputSeqInfoVec_.size() > 1 + ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen) + : outSeqStartPos_.push_back(outSeqStartPos_.back() + seqLen); + } + rowIdx++; + } + if (inputSeqInfoVec_.size() > 1) + outSeqStartPos_.push_back(outSubSeqStartPos_.back()); + } + + if (useGpu_) { + rowIndice_ = IVector::create(selectedRows_.size(), useGpu_); + rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size()); + } else { + rowIndice_ = + IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_); + } + + // create the sequence information for the output. + ICpuGpuVector::resizeOrCreate( + output_.sequenceStartPositions, outSeqStartPos_.size(), false); + output_.sequenceStartPositions->copyFrom( + outSeqStartPos_.data(), outSeqStartPos_.size(), false); + + if (inputSeqInfoVec_.size() > 1) { + ICpuGpuVector::resizeOrCreate( + output_.subSequenceStartPositions, outSubSeqStartPos_.size(), false); + output_.subSequenceStartPositions->copyFrom( + outSubSeqStartPos_.data(), outSubSeqStartPos_.size(), false); + } +} + +void SequenceSliceLayer::forward(PassType passType) { + Layer::forward(passType); + checkInputs(); + + const Argument& inputSeq = getInput(0); + inputSeqInfoVec_.clear(); + Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, + inputSeq.subSequenceStartPositions, + inputSeqInfoVec_); + copySliceIdsToCpu(); + + // calculate the selected row indices in a batch, + // and build the output sequence information. + calSelectedRows(startIdsOnCpu_ ? startIdsOnCpu_ : nullptr, + endIdsOnCpu_ ? endIdsOnCpu_ : nullptr); + + resetOutput(selectedRows_.size(), getSize()); + + getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); +} + +void SequenceSliceLayer::backward(const UpdateCallback& callback) { + MatrixPtr inputSeqGrad = getInputGrad(0); + MatrixPtr outputGrad = getOutputGrad(); + + outputGrad->addToRows(*inputSeqGrad, *rowIndice_); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp index 76f587fff7..0db0300270 100644 --- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -52,11 +52,10 @@ private: * ] * * ths output is saved to private member rowIndice_; - * [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, - * 16,17,18,19,20,21,22,23,24,25,26,27] + * [0,1,2,3,4,5,6,7,8,9,15,16,17,18,19,20,21,23,24,25,26,27] */ - void calSelectedCols(const MatrixPtr selectedIndices, + void calSelectedRows(const MatrixPtr selectedIndices, const std::vector>& inputSeqInfo); // if the second input of this layer is on GPU memory, copy it to CPU memory. @@ -67,7 +66,7 @@ private: std::vector> inputSeqInfoVec_; // the final selected row indices in a batch, - // rowIdx_ and selectedRows_ actually share a same memory. + // rowIndice_ and selectedRows_ actually share a same memory. IVectorPtr rowIndice_; std::vector selectedRows_; }; @@ -83,7 +82,7 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap, return true; } -void SubNestedSequenceLayer::calSelectedCols( +void SubNestedSequenceLayer::calSelectedRows( const MatrixPtr selectedIndices, const std::vector>& inputSeqInfo) { selectedRows_.clear(); @@ -96,6 +95,11 @@ void SubNestedSequenceLayer::calSelectedCols( for (size_t i = 0; i < seqNum; ++i) { for (size_t j = 0; j < beamSize; ++j) { if (selectedIndices->getElement(i, j) == -1.) break; + // TODO(caoying) + // Here selSubSeqIdx is automatically converted from real to int + // This is very dangerous if user fill this matrix himself, invalid data + // may occur. The selected indices should be stored in + // CpuSparseMatrix with SparseValueType set to NO_VALUE. int selSubSeqIdx = selectedIndices->getElement(i, j); CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx); @@ -160,7 +164,7 @@ void SubNestedSequenceLayer::forward(PassType passType) { Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, inputSeq.subSequenceStartPositions, inputSeqInfoVec_); - calSelectedCols(selIdsCpu_, inputSeqInfoVec_); + calSelectedRows(selIdsCpu_, inputSeqInfoVec_); resetOutput(selectedRows_.size(), getSize()); getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); diff --git a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp index e456dd5db7..d560ca650b 100644 --- a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp +++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp @@ -26,9 +26,9 @@ using namespace std; // NOLINT DECLARE_int32(gpu_id); DECLARE_bool(thread_local_rand_use_global_seed); -const int MAX_SEQ_NUM = 5; -const int MAX_SEQ_LEN = 5; -const int MAX_BEAM_SIZE = 3; +const int MAX_SEQ_NUM = 17; +const int MAX_SEQ_LEN = 23; +const int MAX_BEAM_SIZE = 13; vector randSampling(real range, int n) { CHECK_GE(range, n); @@ -46,8 +46,7 @@ void genSeqInfo(vector& seqStartPos, vector& subSeqStartPos) { seqStartPos.resize(1, 0); subSeqStartPos.resize(1, 0); - // srand((size_t)(time(NULL))); - srand(1); + srand((size_t)(time(NULL))); int seqNum = 1 + (rand() % MAX_SEQ_NUM); for (int i = 0; i < seqNum; ++i) { int subSeqNum = 1 + (rand() % MAX_SEQ_NUM); @@ -105,7 +104,7 @@ void genTestData(vector& seqStartPos, vector>& starts, vector>& ends, bool hasSubseq) { - size_t beamSize = MAX_BEAM_SIZE; + size_t beamSize = 1 + (rand() % MAX_BEAM_SIZE); genSeqInfo(seqStartPos, subSeqStartPos); genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize); @@ -167,16 +166,21 @@ void testSeqSliceLayer(bool hasSubseq, config.inputDefs.push_back( {INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr}); config.layerConfig.add_inputs(); + config.layerConfig.set_select_first(true); } // add end indices if (ends.size()) { vector endsToVec; flatten2dVector(ends, endsToVec); + MatrixPtr endMatrixPtr = Matrix::create(ends.size(), ends[0].size(), false, false); + endMatrixPtr->copyFrom(endsToVec.data(), endsToVec.size()); + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr}); config.layerConfig.add_inputs(); + config.layerConfig.set_select_first(false); } testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false); @@ -188,10 +192,15 @@ TEST(Layer, SeqSliceLayer) { vector> starts; vector> ends; + std::vector mode = {false}; +#ifndef PADDLE_ONLY_CPU + mode.push_back(true); +#endif genSeqInfo(seqStartPos, subSeqStartPos); - for (bool hasSubseq : {false, true}) { + for (bool hasSubseq : {true, false}) { + LOG(INFO) << "hasSubSeq : " << hasSubseq; genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq); - for (bool useGpu : {false, true}) { + for (bool useGpu : mode) { vector> tmp; testSeqSliceLayer( hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends); diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 0547ac93cd..06f7e5245f 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -670,19 +670,28 @@ void Argument::reorganizeSeqInfo( const ICpuGpuVectorPtr seqStartPos, const ICpuGpuVectorPtr subSeqStartPos, std::vector>& reorganizedSeqInfo) { - int* seqStarts = seqStartPos->getMutableData(false); - int* subSeqStarts = subSeqStartPos->getMutableData(false); + CHECK(seqStartPos); int seqNum = seqStartPos->getSize() - 1; - reorganizedSeqInfo.resize(seqNum, std::vector()); - int seqIdx = 0; - for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) { - reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); - if (subSeqStarts[i] == seqStarts[seqIdx + 1]) { - seqIdx++; - if (seqIdx == seqNum) return; + int* seqStarts = seqStartPos->getMutableData(false); + + if (subSeqStartPos) { + int* subSeqStarts = subSeqStartPos->getMutableData(false); + reorganizedSeqInfo.resize(seqNum, std::vector()); + int seqIdx = 0; + for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) { reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); + if (subSeqStarts[i] == seqStarts[seqIdx + 1]) { + seqIdx++; + if (seqIdx == seqNum) return; + reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); + } } + } else { + reorganizedSeqInfo.resize(1, std::vector(seqNum + 1, 0)); + memcpy(reorganizedSeqInfo[0].data(), + seqStarts, + sizeof(int) * seqStartPos->getSize()); } } From b97f020f9c34da04e093deb4691f6286f4017e62 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Thu, 10 Aug 2017 10:37:07 +0800 Subject: [PATCH 008/170] fix unittest error. --- paddle/gserver/layers/SequenceSliceLayer.cpp | 3 +-- python/paddle/trainer_config_helpers/layers.py | 1 + .../protostr/test_kmax_seq_socre_layer.protostr | 17 +++++------------ .../tests/configs/test_kmax_seq_socre_layer.py | 4 +--- 4 files changed, 8 insertions(+), 17 deletions(-) diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp index 424f898553..165ee6311a 100644 --- a/paddle/gserver/layers/SequenceSliceLayer.cpp +++ b/paddle/gserver/layers/SequenceSliceLayer.cpp @@ -70,9 +70,8 @@ void SequenceSliceLayer::checkInputs() { const Argument& inputSeq = getInput(0); CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer " << "must be a sequence."; - // Check inputs const MatrixPtr indices1 = getInputValue(1); - CHECK_EQ(indices1->getHeight(), + CHECK_EQ(static_cast(indices1->getHeight()), inputSeq.hasSubseq() ? inputSeq.getNumSubSequences() : inputSeq.getNumSequences()) << "Height of the second input should be equal to number of sequence " diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index e51332da0d..79d24cfe5b 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -6242,6 +6242,7 @@ def seq_slice_layer(input, starts, ends, name=None): name, LayerType.SEQ_SLICE, parents=[input], size=input.size) +@wrap_name_default() @layer_support() def kmax_sequence_score_layer(input, name=None, beam_size=1): """ diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr index 81bd71f68e..3d32220bfb 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr @@ -1,12 +1,6 @@ type: "nn" layers { - name: "input" - type: "data" - size: 300 - active_type: "" -} -layers { - name: "data" + name: "input_seq" type: "data" size: 128 active_type: "" @@ -17,7 +11,7 @@ layers { size: 1 active_type: "exponential" inputs { - input_layer_name: "data" + input_layer_name: "input_seq" input_parameter_name: "___fc_layer_0__.w0" } bias_parameter_name: "___fc_layer_0__.wbias" @@ -51,15 +45,14 @@ parameters { initial_strategy: 0 initial_smart: false } -input_layer_names: "data" +input_layer_names: "input_seq" output_layer_names: "__kmax_sequence_score_layer_0__" sub_models { name: "root" - layer_names: "input" - layer_names: "data" + layer_names: "input_seq" layer_names: "__fc_layer_0__" layer_names: "__kmax_sequence_score_layer_0__" - input_layer_names: "data" + input_layer_names: "input_seq" output_layer_names: "__kmax_sequence_score_layer_0__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py index d245c5a41c..48d0cd55da 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py @@ -2,9 +2,7 @@ #coding=utf-8 from paddle.trainer_config_helpers import * -data = data_layer(name='input', size=300) - -data = data_layer(name="data", size=128) +data = data_layer(name="input_seq", size=128) scores = fc_layer(input=data, size=1, act=ExpActivation()) kmax_seq_id = kmax_sequence_score_layer(input=scores, beam_size=5) From cfb86c4e23d424328066fe8d2fbbacb9c9ead6c1 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Sun, 13 Aug 2017 09:30:41 +0800 Subject: [PATCH 009/170] Add vol2col and col2vol cuda kernel --- paddle/cuda/include/hl_matrix.h | 58 ++++++++++ paddle/cuda/include/stub/hl_matrix_stub.h | 15 +++ paddle/cuda/src/hl_cuda_matrix.cu | 135 ++++++++++++++++++++++ 3 files changed, 208 insertions(+) diff --git a/paddle/cuda/include/hl_matrix.h b/paddle/cuda/include/hl_matrix.h index eb454c59c1..da2ed8cabb 100644 --- a/paddle/cuda/include/hl_matrix.h +++ b/paddle/cuda/include/hl_matrix.h @@ -224,4 +224,62 @@ extern void hl_matrix_collect_shared_bias(real* B_d, extern void hl_matrix_rotate( real* mat, real* matRot, int dimM, int dimN, bool clockWise); +/** + * @brief Matrix vol2Col: Convert 3D volume into col matrix + * + * @param[in] matSrc input matrix. + * @param[in] channel channel of matSrc. + * @param[in] depth depth of matSrc. + * @param[in] height height of matSrc. + * @param[in] width width of matSrc. + * @param[in] filterD depth of filter. + * @param[in] filterH height of filter. + * @param[in] filterW width of filter. + * @param[in] strideD stride in the depth. + * @param[in] strideH stride in the height. + * @param[in] strideW stride in the width. + * @param[in] paddingD padding in the depth. + * @param[in] paddingH padding in the height. + * @param[in] paddingW padding in the width. + * @param[out] matDst output matrix. + * + */ +extern void hl_matrix_vol2Col(real* matSrc, + int channel, int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW, + real* matDst); + +/** + * @brief Matrix col2Vol: Convert col matrix into 3D volume + * + * @param[out] matDst output matrix. + * @param[in] channel channel of matDst. + * @param[in] depth depth of matDst. + * @param[in] height height of matDst. + * @param[in] width width of matDst. + * @param[in] filterD depth of filter. + * @param[in] filterH height of filter. + * @param[in] filterW width of filter. + * @param[in] strideD stride in the depth. + * @param[in] strideH stride in the height. + * @param[in] strideW stride in the width. + * @param[in] paddingD padding in the depth. + * @param[in] paddingH padding in the height. + * @param[in] paddingW padding in the width. + * @param[in] matSrc input matrix. + * @param[in] beta input + * @param[in] alpha input + * + */ +extern void hl_matrix_col2Vol(real* matDst, + int channels, int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW, + real* matSrc, + real alpha, real beta); + + #endif /* HL_MATRIX_H_ */ diff --git a/paddle/cuda/include/stub/hl_matrix_stub.h b/paddle/cuda/include/stub/hl_matrix_stub.h index 127cb7e279..0b73777812 100644 --- a/paddle/cuda/include/stub/hl_matrix_stub.h +++ b/paddle/cuda/include/stub/hl_matrix_stub.h @@ -99,4 +99,19 @@ inline void hl_matrix_collect_shared_bias(real* B_d, inline void hl_matrix_rotate( real* mat, real* matRot, int dimM, int dimN, bool clockWise) {} +inline void hl_matrix_vol2Col(real* data, + int channels, int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW, + real* data_col) {} + +inline void hl_matrix_col2Vol(real* data, + int channels, int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW, + real* data_Im, + real alpha, real beta) {} + #endif // HL_MATRIX_STUB_H_ diff --git a/paddle/cuda/src/hl_cuda_matrix.cu b/paddle/cuda/src/hl_cuda_matrix.cu index 39272456c3..f626c07a0c 100644 --- a/paddle/cuda/src/hl_cuda_matrix.cu +++ b/paddle/cuda/src/hl_cuda_matrix.cu @@ -592,3 +592,138 @@ void hl_matrix_rotate( mat, matRot, dimM, dimN, clockWise); CHECK_SYNC("hl_matrix_rotate failed"); } + + +__global__ void keMatrixVol2Col( + int num_kernels, real*dataSrc, real* dataDst, + int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW, + int depth_col, int height_col, int width_col){ + + for (int index = blockIdx.x * blockDim.x + threadIdx.x; + index < num_kernels; + index += blockDim.x * gridDim.x){ + + int w_out = index % width_col; + int h_out = (index / width_col ) % height_col; + int d_out = (index / width_col / height_col) % depth_col; + int channel_in = index / width_col / height_col / depth_col; + int channel_out = channel_in * filterD * filterH * filterW; + int w_in = w_out * strideW - paddingW; + int h_in = h_out * strideH - paddingH; + int d_in = d_out * strideD - paddingD; + + dataDst += ((channel_out * depth_col + d_out) * height_col + h_out) * width_col + w_out; + dataSrc += ((channel_in * depth + d_in) * height + h_in) * width + w_in; + for (int k = 0; k < filterD; ++k) { + for (int i = 0; i < filterH; ++i) { + for (int j = 0; j < filterW; ++j) { + int d = d_in + k; + int h = h_in + i; + int w = w_in + j; + *dataDst = (d >= 0 && d < depth && h >= 0 && h < height && w >= 0 && w < width ) ? + dataSrc[(k * height + i) * width + j] : 0; + dataDst += depth_col * height_col * width_col; + } + } + } + } +} + +void hl_matrix_vol2Col(real* dataSrc, + int channels, int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW, real* dataDst){ + + int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1; + int height_col = (height + 2 * paddingH - filterH) / strideH + 1; + int width_col = (width + 2 * paddingW - filterW) / strideW + 1; + int num_kernels = channels * depth_col * height_col * width_col; + + const int threads = 512; + const int blocks = DIVUP(num_kernels, threads); + + keMatrixVol2Col<<< blocks, threads >>>( + num_kernels, dataSrc, dataDst, + depth, height, width, + filterD, filterH, filterW, + strideD, strideH, strideW, + paddingD, paddingH, paddingW, + depth_col, height_col, width_col); + CHECK_SYNC("hl_matrix_vol2Col failed"); +} + +__global__ void keMatrixCol2Vol( + int num_kernels, real*dataDst, real* dataSrc, + int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW, + int depth_col, int height_col, int width_col, + real alpha, real beta){ + + for (int index = blockIdx.x * blockDim.x + threadIdx.x; + index < num_kernels; + index += blockDim.x * gridDim.x) { + + real val = 0; + int w = index % width + paddingW; + int h = (index / width) % height + paddingH; + int d = (index / width / height) % depth + paddingD; + int c = index / (width * height * depth); + // compute the start and end of the output + int w_col_start = (w < filterW) ? 0 : (w - filterW) / strideW + 1; + int w_col_end = min(w / strideW + 1, width_col); + int h_col_start = (h < filterH) ? 0 : (h - filterH) / strideH + 1; + int h_col_end = min(h / strideH + 1, height_col); + int d_col_start = (d < filterD) ? 0 : (d - filterD) / strideD + 1; + int d_col_end = min(d / strideD + 1, depth_col); + + int offset = (c * filterD * filterW * filterH + \ + d * filterW * filterH + h * filterW + w) * depth_col * height_col * width_col; + + int coeff_d_col = (1 - strideD * filterW * filterH * depth_col) * height_col * width_col; + int coeff_h_col = (1 - strideH * filterW * depth_col * height_col) * width_col; + int coeff_w_col = (1 - strideW * depth_col * height_col * width_col); + + for (int d_col = d_col_start; d_col < d_col_end; ++d_col) { + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { + for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { + val += dataSrc[offset + d_col * coeff_d_col + h_col * coeff_h_col + w_col * coeff_w_col]; + } + } + } + dataDst[index] = val; + } +} + +void hl_matrix_col2Vol(real* dataDst, + int channels, int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW, + real* dataSrc, + real alpha, real beta){ + + int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1; + int height_col = (height + 2 * paddingH - filterH) / strideH + 1; + int width_col = (width + 2 * paddingW - filterW) / strideW + 1; + int num_kernels = channels * depth * height * width; + + const int threads = 512; + const int blocks = DIVUP(num_kernels, threads); + + keMatrixCol2Vol<<< blocks, threads >>>( + num_kernels, dataDst, dataSrc, + depth, height, width, + filterD, filterH, filterW, + strideD, strideH, strideW, + paddingD, paddingH, paddingW, + depth_col, height_col, width_col, + alpha, beta); + + CHECK_SYNC("hl_matrix_col2Vol failed"); +} From 8cc0eb9c5d564b71452e65d1bac3f9f19f5bf89e Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Sun, 13 Aug 2017 09:38:02 +0800 Subject: [PATCH 010/170] Modify ConvConfig, Add depth dimension --- proto/ModelConfig.proto | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 4f3d5bf3f6..043ae502b0 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -82,6 +82,12 @@ message ConvConfig { // if not set, use img_size optional uint32 img_size_y = 14; + + optional uint32 filter_size_z = 15 [ default = 1 ]; + optional uint32 padding_z = 16 [ default = 1 ]; + optional uint32 stride_z = 17 [ default = 1 ]; + optional uint32 output_z = 18 [ default = 1 ]; + optional uint32 img_size_z = 19 [ default = 1 ]; } message PoolConfig { @@ -631,4 +637,4 @@ message ModelConfig { // For External Machine, defining how to split a neural network // into multiple parts. optional ExternalConfig external_config = 9; -}; +}; \ No newline at end of file From 5d7f6dde52af781e15953c041374b5671bdf918d Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Sun, 13 Aug 2017 09:42:48 +0800 Subject: [PATCH 011/170] Add depth dimension information to ConvBaseLayer --- paddle/gserver/layers/ConvBaseLayer.cpp | 17 +++++++++++++---- paddle/gserver/layers/ConvBaseLayer.h | 8 ++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp index e161d89c38..e437b0b86e 100644 --- a/paddle/gserver/layers/ConvBaseLayer.cpp +++ b/paddle/gserver/layers/ConvBaseLayer.cpp @@ -21,9 +21,11 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { /* Initialize the basic parent class */ Layer::init(layerMap, parameterMap); - isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv") - ? false - : true; + isDeconv_ = (config_.type() == "exconv" || + config_.type() == "cudnn_conv" || + config_.type() == "conv3d" || + config_.type() == "deconv3d" ) + ? false : true; /* Initialize the convolutional layer parameter */ numFilters_ = config_.num_filters(); @@ -36,7 +38,6 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, paddingY_.push_back(conf.padding_y()); strideY_.push_back(conf.stride_y()); filterSizeY_.push_back(conf.filter_size_y()); - filterPixels_.push_back(filterSize_.back() * filterSizeY_.back()); channels_.push_back(conf.channels()); imgSizeH_.push_back(conf.has_img_size_y() ? conf.img_size_y() : conf.img_size()); @@ -45,6 +46,14 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, filterChannels_.push_back(conf.filter_channels()); outputH_.push_back(conf.has_output_y() ? conf.output_y() : conf.output_x()); outputW_.push_back(conf.output_x()); + + paddingZ_.push_back(conf.padding_z()); + strideZ_.push_back(conf.stride_z()); + filterSizeZ_.push_back(conf.filter_size_z()); + imgSizeD_.push_back(conf.img_size_z()); + outputD_.push_back(conf.output_z()); + filterPixels_.push_back( + filterSize_.back() * filterSizeY_.back() * filterSizeZ_.back()); } CHECK(inputLayers_.size() == parameters_.size()); diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h index e9d15d94f8..8d1fd989e8 100644 --- a/paddle/gserver/layers/ConvBaseLayer.h +++ b/paddle/gserver/layers/ConvBaseLayer.h @@ -23,6 +23,7 @@ namespace paddle { * with learned filters and (optionally) adds biases. */ + class ConvBaseLayer : public Layer { protected: typedef std::vector IntV; @@ -58,6 +59,13 @@ protected: IntV outputH_; /// The spatial dimensions of output feature map width. IntV outputW_; + + IntV outputD_; + IntV imgSizeD_; + IntV filterSizeZ_; + IntV strideZ_; + IntV paddingZ_; + /// Group size, refer to grouped convolution in /// Alex Krizhevsky's paper: when group=2, the first half of the /// filters are only connected to the first half of the input channels, From 11975b4f9185907b5f2518722e5311d744361887 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Sun, 13 Aug 2017 09:47:37 +0800 Subject: [PATCH 012/170] Add Conv3DLayer --- paddle/gserver/layers/Conv3DLayer.cpp | 225 ++++++++++++++++++++++++++ paddle/gserver/layers/Conv3DLayer.h | 57 +++++++ 2 files changed, 282 insertions(+) create mode 100644 paddle/gserver/layers/Conv3DLayer.cpp create mode 100644 paddle/gserver/layers/Conv3DLayer.h diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/gserver/layers/Conv3DLayer.cpp new file mode 100644 index 0000000000..0fa9c5f9f5 --- /dev/null +++ b/paddle/gserver/layers/Conv3DLayer.cpp @@ -0,0 +1,225 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" +#include "Conv3DLayer.h" + +namespace paddle { + +REGISTER_LAYER(conv3d, Conv3DLayer); + +bool Conv3DLayer::init(const LayerMap &layerMap, + const ParameterMap ¶meterMap) { + if (!ConvBaseLayer::init(layerMap, parameterMap)) + return false; + int index = 0; + for (auto &inputConfig : config_.inputs()) { + const ConvConfig &conf = inputConfig.conv_conf(); + M_.push_back(numFilters_ / conf.groups()); + K_.push_back( + conf.filter_channels() * conf.filter_size_z() * \ + conf.filter_size_y() * conf.filter_size()); + weights_[index]->getW()->reshape( + weights_[index]->getW()->getWidth(), + weights_[index]->getW()->getHeight()); + weights_[index]->getWGrad()->reshape( + weights_[index]->getWGrad()->getWidth(), + weights_[index]->getWGrad()->getHeight()); + ++index; + } + biases_->getWGrad()->reshape( + biases_->getWGrad()->width_, biases_->getWGrad()->height_); + biases_->getW()->reshape( + biases_->getW()->width_, biases_->getW()->height_); + CHECK(inputLayers_.size() == parameters_.size()); + return true; +} + + +size_t Conv3DLayer::getSize() { + CHECK_NE(inputLayers_.size(), 0UL); + // imgSizeH_.clear(); + // imgSizeW_.clear(); + // imgSizeD_.clear(); + outputH_.clear(); + outputW_.clear(); + outputD_.clear(); + N_.clear(); + size_t layerSize = 0; + for (size_t i = 0; i < inputLayers_.size(); ++i) { + // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); + // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); + // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth()); + outputW_.push_back(outputSize( + imgSizeW_[i], filterSize_[i], + padding_[i], stride_[i], true)); + outputH_.push_back(outputSize( + imgSizeH_[i], filterSizeY_[i], + paddingY_[i], strideY_[i], true)); + outputD_.push_back(outputSize( + imgSizeD_[i], filterSizeZ_[i], + paddingZ_[i], strideZ_[i], true)); + + N_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); + CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize); + layerSize += N_[i] * numFilters_; + } + getOutput().setFrameHeight(outputH_[0]); + getOutput().setFrameWidth(outputW_[0]); + getOutput().setFrameDepth(outputD_[0]); + return layerSize; +} + +void Conv3DLayer::forward(PassType passType) { + Layer::forward(passType); + + int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); + int outWidth = getSize(); + resetOutput(batchSize, outWidth); + const MatrixPtr outMat = getOutputValue(); + + for (size_t i = 0; i != inputLayers_.size(); ++i) { + REGISTER_TIMER_INFO("FwdConv3D", getName().c_str()); + const MatrixPtr& inMat = getInputValue(i); + int width = inMat->getWidth(); + int M = M_[i]; + int N = N_[i]; + int K = K_[i]; + Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); + MatrixPtr wMat = weights_[i]->getW(); + for (int n = 0; n < batchSize; ++n) { + colBuf_->vol2Col(inMat->getData() + n * width, channels_[i], + imgSizeD_[i], imgSizeH_[i], imgSizeW_[i], + filterSizeZ_[i], filterSizeY_[i], filterSize_[i], + strideZ_[i], strideY_[i], stride_[i], + paddingZ_[i], paddingY_[i], padding_[i]); + + real *outData = outMat->getData() + n * outWidth; + MatrixPtr outMatSub = + Matrix::create(outData, groups_[i] * M, N, false, useGpu_); + for (int g = 0; g < groups_[i]; g++) { + MatrixPtr wMatSub = wMat->subMatrix(g * M, M); + MatrixPtr in = colBuf_->subMatrix(g * K, K); + MatrixPtr out = outMatSub->subMatrix(g * M, M); + out->mul(*wMatSub, *in, 1.0, 0.0); + } + } + } + if (nullptr != this->biasParameter_) { + REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); + this->addBias(); + } + forwardActivation(); +} + +void Conv3DLayer::backward(const UpdateCallback &callback) { + backwardActivation(); + + if (biases_ && biases_->getWGrad()) { + bpropBiases(); + biases_->getParameterPtr()->incUpdate(callback); + } + + for (size_t i = 0; i != inputLayers_.size(); ++i) { + REGISTER_TIMER_INFO("BwdConv3D", getName().c_str()); + if (weights_[i]->getWGrad()) { + bpropWeights(i); + } + if (this->needGradient_) { + bpropData(i); + } + REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + weights_[i]->getParameterPtr()->incUpdate(callback); + } +} + +void Conv3DLayer::bpropWeights(int i) { + int M = M_[i]; + int N = N_[i]; + int K = K_[i]; + const MatrixPtr& inMat = getInputValue(i); + int width = inMat->getWidth(); + Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); + MatrixPtr wGradMat = weights_[i]->getWGrad(); + real* outGradData = getOutputGrad()->getData(); + int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); + + for (int n = 0; n < batchSize; ++n) { + colBuf_->vol2Col(inMat->getData() + n * width, channels_[i], + imgSizeD_[i], imgSizeH_[i], imgSizeW_[i], + filterSizeZ_[i], filterSizeY_[i], filterSize_[i], + strideZ_[i], strideY_[i], stride_[i], + paddingZ_[i], paddingY_[i], padding_[i]); + outGradData += n * getOutputGrad()->getWidth(); + MatrixPtr outGradSub = + Matrix::create(outGradData, groups_[i] * M, N, false, useGpu_); + for (int g = 0; g < groups_[i]; ++g) { + MatrixPtr inMatSub = colBuf_->subMatrix(g * K, K); + MatrixPtr outG = outGradSub->subMatrix(g * M, M); + MatrixPtr wGradSub = wGradMat->subMatrix(g * M, M); + wGradSub->mul(*outG, *(inMatSub->getTranspose()), 1.0, 1.0); + } + } +} + +void Conv3DLayer::bpropData(int i) { + int M = M_[i]; + int N = N_[i]; + int K = K_[i]; + Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); + MatrixPtr wMat = weights_[i]->getW(); + real* outGradData = getOutputGrad()->getData(); + real* preGradData = getInputGrad(i)->getData(); + int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); + for (int n = 0; n < batchSize; ++n) { + outGradData += n * getOutputGrad()->getWidth(); + preGradData += n * getInputGrad(i)->getWidth(); + MatrixPtr outGradSub = + Matrix::create(outGradData, M * groups_[i], N, false, useGpu_); + for (int g = 0; g < groups_[i]; ++g) { + MatrixPtr wMatSub = wMat->subMatrix(g * M, M); + MatrixPtr outG = outGradSub->subMatrix(g * M, M); + MatrixPtr inGradMatSub = colBuf_->subMatrix(g * K, K); + inGradMatSub->mul(*(wMatSub->getTranspose()), *outG, 1.0, 0.0); + } + colBuf_->col2Vol(preGradData, channels_[i], + imgSizeD_[i], imgSizeH_[i], imgSizeW_[i], + filterSizeZ_[i], filterSizeY_[i], filterSize_[i], + strideZ_[i], strideY_[i], stride_[i], + paddingZ_[i], paddingY_[i], padding_[i], + 1.0, 1.0); + } +} + +void Conv3DLayer::bpropBiases() { + MatrixPtr outGradMat = getOutputGrad(); + if (this->sharedBiases_) { + biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f); + } else { + biases_->getWGrad()->collectBias(*outGradMat, 1.0f); + } +} + +void Conv3DLayer::addBias() { + MatrixPtr outMat = getOutputValue(); + + if (this->sharedBiases_) { + outMat->addSharedBias(*(biases_->getW()), 1.0f); + } else { + outMat->addBias(*(biases_->getW()), 1.0f); + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/Conv3DLayer.h b/paddle/gserver/layers/Conv3DLayer.h new file mode 100644 index 0000000000..703671e5d0 --- /dev/null +++ b/paddle/gserver/layers/Conv3DLayer.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + + +#pragma once + +#include "ConvBaseLayer.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/MathUtils.h" +#include + +namespace paddle { + +/** + * @brief A subclass of convolution layer. + * This layer expands input and use matrix multiplication to + * calculate convolution operation. + */ +class Conv3DLayer : public ConvBaseLayer { +public: + explicit Conv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {} + + ~Conv3DLayer() {} + + bool init(const LayerMap &layerMap, const ParameterMap ¶meterMap); + + size_t getSize(); + + void forward(PassType passType); + void addBias(); + + void backward(const UpdateCallback& callback); + + void bpropBiases(); + void bpropData(int i); + void bpropWeights(int i); + +protected: + // Figure out the dimensions for individual gemms. + IntV M_; /// numFilters_ / filter_group_; + IntV N_; /// channels_ * filterSizeZ_ * filterSize_ * filterSizeY_ + IntV K_; /// outputD_ * outputH_ * outputW_ + MatrixPtr colBuf_; +}; + +} // namespace paddle From 23cf0c61e066f54b360efc4e17576a056868b050 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Sun, 13 Aug 2017 09:48:59 +0800 Subject: [PATCH 013/170] Add DeConv3DLayer --- paddle/gserver/layers/DeConv3DLayer.cpp | 211 ++++++++++++++++++++++++ paddle/gserver/layers/DeConv3DLayer.h | 58 +++++++ 2 files changed, 269 insertions(+) create mode 100644 paddle/gserver/layers/DeConv3DLayer.cpp create mode 100644 paddle/gserver/layers/DeConv3DLayer.h diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp new file mode 100644 index 0000000000..8de40b681d --- /dev/null +++ b/paddle/gserver/layers/DeConv3DLayer.cpp @@ -0,0 +1,211 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" +#include "DeConv3DLayer.h" + +namespace paddle { + +REGISTER_LAYER(deconv3d, DeConv3DLayer); + +#define DECONV_OUTPUT_SIZE(IN_SIZE, STRID, PAD, KSIZE) \ + (((IN_SIZE) - 1) * (STRID) - 2 * (PAD) + (KSIZE)) + +bool DeConv3DLayer::init(const LayerMap &layerMap, + const ParameterMap ¶meterMap) { + if (!ConvBaseLayer::init(layerMap, parameterMap)) return false; + // for Deconv, the dimension of Kernel is + // channel * output * depth * height * weigth + // Matrix storage format: (output * depth * height * weigth) x channel + for (int index = 0; index < config_.inputs().size(); ++index) { + M_.push_back(filterChannels_[index]); + K_.push_back( + filterPixels_[index] * (numFilters_/groups_[index])); + weights_[index]->getW()->reshape( + filterPixels_[index] * numFilters_, + filterChannels_[index]); + weights_[index]->getWGrad()->reshape( + filterPixels_[index] * numFilters_, + filterChannels_[index]); + } + biases_->getWGrad()->reshape( + biases_->getWGrad()->width_, biases_->getWGrad()->height_); + biases_->getW()->reshape( + biases_->getW()->width_, biases_->getW()->height_); + CHECK(inputLayers_.size() == parameters_.size()); + return true; +} + + +size_t DeConv3DLayer::getSize() { + CHECK_NE(inputLayers_.size(), 0UL); + // imgSizeH_.clear(); + // imgSizeW_.clear(); + // imgSizeD_.clear(); + outputH_.clear(); + outputW_.clear(); + outputD_.clear(); + N_.clear(); + No_.clear(); + size_t layerSize = 0; + for (size_t i = 0; i < inputLayers_.size(); ++i) { + // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); + // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); + // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth()); + outputW_.push_back( + DECONV_OUTPUT_SIZE( + imgSizeW_[i], stride_[i], + padding_[i], filterSize_[i])); + outputH_.push_back( + DECONV_OUTPUT_SIZE( + imgSizeH_[i], strideY_[i], + paddingY_[i], filterSizeY_[i])); + outputD_.push_back( + DECONV_OUTPUT_SIZE( + imgSizeD_[i], strideZ_[i], + paddingZ_[i], filterSizeZ_[i])); + No_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); + N_.push_back(imgSizeD_[i] * imgSizeH_[i] * imgSizeW_[i]); + CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize); + layerSize += No_[i] * numFilters_; + } + getOutput().setFrameHeight(outputH_[0]); + getOutput().setFrameWidth(outputW_[0]); + getOutput().setFrameDepth(outputD_[0]); + return layerSize; +} + +void DeConv3DLayer::forward(PassType passType) { + Layer::forward(passType); + int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); + int outWidth = getSize(); + resetOutput(batchSize, outWidth); + const MatrixPtr outMat = getOutputValue(); + + for (size_t i = 0; i != inputLayers_.size(); ++i) { + REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str()); + const MatrixPtr& inMat = getInputValue(i); + int width = inMat->getWidth(); + int M = M_[i]; + int N = N_[i]; + int K = K_[i]; + MatrixPtr wMat = weights_[i]->getW(); + Matrix::resizeOrCreate(colBuf_, K * groups_[i] , N, false, useGpu_); + + for (int n = 0; n < batchSize; ++n) { + real *inData = inMat->getData() + n * width; + real *colBufData = colBuf_->getData(); + for (int g = 0; g < groups_[i]; g++) { + MatrixPtr wMatSub = wMat->subMatrix(g * K, K); + MatrixPtr inMatSub = + Matrix::create(inData, M, N, false, useGpu_); + MatrixPtr colBufDataSub = + Matrix::create(colBufData, K, N, false, useGpu_); + colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0); + colBufData += K * N; + inData += M * N; + } + colBuf_->col2Vol(outMat->getData()+ n * outMat->getWidth(), + numFilters_, outputD_[i], outputH_[i], outputW_[i], + filterSizeZ_[i], filterSizeY_[i], filterSize_[i], + strideZ_[i], strideY_[i], stride_[i], + paddingZ_[i], paddingY_[i], padding_[i], 1.0, 1.0); + } + } + if (nullptr != this->biasParameter_) { + REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); + this->addBias(); + } + forwardActivation(); +} + +void DeConv3DLayer::backward(const UpdateCallback &callback) { + backwardActivation(); + int batchSize = getOutputGrad()->getHeight(); + int outputWidth = getOutputGrad()->getWidth(); + if (biases_ && biases_->getWGrad()) { + bpropBiases(); + biases_->getParameterPtr()->incUpdate(callback); + } + for (size_t i =0; i < inputLayers_.size(); ++i) { + int M = M_[i]; + int N = N_[i]; + int K = K_[i]; + Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); + const MatrixPtr& inMat = getInputValue(i); + for (int n = 0; n < batchSize; ++n) { + REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str()); + if (weights_[i]->getWGrad() || this->needGradient_) { + colBuf_->vol2Col(getOutputGrad()->getData() + n * outputWidth, + numFilters_, outputD_[i], outputH_[i], outputW_[i], + filterSizeZ_[i], filterSizeY_[i], filterSize_[i], + strideZ_[i], strideY_[i], stride_[i], + paddingZ_[i], paddingY_[i], padding_[i]); + } + if (weights_[i]->getWGrad()) { + real *inData = inMat->getData() + n * inMat->getWidth();; + real *wGradData = weights_[i]->getWGrad()->getData(); + for (int g = 0; g < groups_[i]; g++) { + MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K); + MatrixPtr inMatSub = Matrix::create( + inData, M, N, false, useGpu_); + MatrixPtr wGradMatSub = Matrix::create( + wGradData, K, M, false, useGpu_); + wGradMatSub->mul(*colBufDataSub, + *(inMatSub->getTranspose()), 1.0, 1.0); + wGradData += K * M; + inData += M * N; + } + weights_[i]->getParameterPtr()->incUpdate(callback); + } + if (this->needGradient_) { + real* preGrad = getInputGrad(i)->getData(); + for (int g = 0; g < groups_[i]; ++g) { + MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K); + MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K); + MatrixPtr inGradMatSub = Matrix::create( + preGrad, M, N, false, useGpu_); + inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 0.0); + preGrad += M * N; + } + } + REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + } + } +} + +void DeConv3DLayer::bpropWeights(int i) { } +void DeConv3DLayer::bpropData(int i) { } + +void DeConv3DLayer::bpropBiases() { + MatrixPtr outGradMat = getOutputGrad(); + + if (this->sharedBiases_) { + biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f); + } else { + biases_->getWGrad()->collectBias(*outGradMat, 1.0f); + } +} + +void DeConv3DLayer::addBias() { + MatrixPtr outMat = getOutputValue(); + if (this->sharedBiases_) { + outMat->addSharedBias(*(biases_->getW()), 1.0f); + } else { + outMat->addBias(*(biases_->getW()), 1.0f); + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/DeConv3DLayer.h b/paddle/gserver/layers/DeConv3DLayer.h new file mode 100644 index 0000000000..435807fe5d --- /dev/null +++ b/paddle/gserver/layers/DeConv3DLayer.h @@ -0,0 +1,58 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + + +#pragma once + +#include "ConvBaseLayer.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/MathUtils.h" +#include + +namespace paddle { + +/** + * @brief A subclass of deconvolution3D layer. + * This layer expands input and use matrix multiplication to + * calculate deconvolution3D operation. + */ +class DeConv3DLayer : public ConvBaseLayer { +public: + explicit DeConv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {} + + ~DeConv3DLayer() {} + + bool init(const LayerMap &layerMap, const ParameterMap ¶meterMap); + + size_t getSize(); + + void forward(PassType passType); + void addBias(); + + void backward(const UpdateCallback& callback); + + void bpropBiases(); + void bpropData(int i); + void bpropWeights(int i); + +protected: + // Figure out the dimensions for individual gemms. + IntV M_; /// numFilters_ / filter_group_; + IntV N_; /// channels_ * filterSizeZ_ * filterSize_ * filterSizeY_ + IntV K_; /// outputD_ * outputH_ * outputW_ + IntV No_; + MatrixPtr colBuf_; +}; + +} // namespace paddle From 52ceeedba5ca1371302414a0ad11ff93d9ed7d9a Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Sun, 13 Aug 2017 09:51:39 +0800 Subject: [PATCH 014/170] Add col2vol and vol2col CPU funtion --- paddle/math/Matrix.cpp | 135 +++++++++++++++++++++++++++++++++++++++++ paddle/math/Matrix.h | 64 +++++++++++++++++++ 2 files changed, 199 insertions(+) diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 27f7d95b75..66868e73b3 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1389,6 +1389,52 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { output_d, grad_d, mat_d, height_, width_); } +void GpuMatrix::vol2Col(real* data, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW) { + hl_matrix_vol2Col(data, + channels, depth, height, width, + filterD, filterH, filterW, + strideD, strideH, strideW, + paddingD, paddingH, paddingW, getData()); +} + +void GpuMatrix::col2Vol(real* trg, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + real alpha, + real beta) { + hl_matrix_col2Vol(trg, + channels, depth, height, width, + filterD, filterH, filterW, + strideD, strideH, strideW, + paddingD, paddingH, paddingW, + getData(), + alpha, beta); + } + /** * CpuMatrix */ @@ -3975,6 +4021,95 @@ void CpuMatrix::bilinearBackward(const Matrix& out, } } +void CpuMatrix::vol2Col(real* data, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW) { + real* outData = getData(); + int outHeight = (height + 2 * paddingH - filterH) / strideH + 1; + int outWidth = (width + 2 * paddingW - filterW) / strideW + 1; + int outDepth = (depth + 2 * paddingD - filterD) / strideD + 1; + + int channelsCol = channels * filterD * filterH * filterW; + for (int c = 0; c < channelsCol; ++c) { + int wOffset = c % filterW; + int hOffset = (c / filterW) % filterH; + int dOffset = (c / filterW / filterH) % filterD; + int cIn = c / filterW / filterH / filterD; + for (int d = 0; d < outDepth; ++d) { + for (int h = 0; h < outHeight; ++h) { + for (int w = 0; w < outWidth; ++w) { + int dPad = d * strideD - paddingD + dOffset; + int hPad = h * strideH - paddingH + hOffset; + int wPad = w * strideW - paddingW + wOffset; + + if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width && + dPad >= 0 && dPad < depth) + outData[((c * outDepth + d) * outHeight + h) * outWidth + w] = + data[((cIn * depth + dPad) * height + hPad) * width + wPad]; + else + outData[((c * outDepth + d) * outHeight + h) * outWidth + w] = 0; + } + } + } + } +} + +void CpuMatrix::col2Vol(real* trg, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + real alpha, + real beta) { + real* src = getData(); + int outDepth = (depth + 2 * paddingH - filterD) / strideD + 1; + int outHeight = (height + 2 * paddingH - filterH) / strideH + 1; + int outWidth = (width + 2 * paddingW - filterW) / strideW + 1; + int channelsCol = channels * filterD * filterH * filterW; + for (int c = 0; c < channelsCol; ++c) { + int wOffset = c % filterW; + int hOffset = (c / filterW) % filterH; + int dOffset = (c / filterW / filterH) % filterD; + int cIm = c / filterW / filterH / filterD; + for (int d = 0; d < outDepth; ++d) { + for (int h = 0; h < outHeight; ++h) { + for (int w = 0; w < outWidth; ++w) { + int dPad = d * strideD - paddingD + dOffset; + int hPad = h * strideH - paddingH + hOffset; + int wPad = w * strideW - paddingW + wOffset; + if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width && + dPad >= 0 && dPad < depth) + trg[((cIm * depth + dPad) * height + hPad) * width + wPad] = + alpha * + src[((c * outDepth + d) * outHeight + h) * outWidth + w] + + beta * + trg[((cIm * depth + dPad) * height + hPad) * width + wPad]; + } + } + } + } +} + //////////////////////////////////////////////////////////////// // functions executed via cpu // //////////////////////////////////////////////////////////////// diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index bb802bbb2c..4354996ce0 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -1039,6 +1039,42 @@ public: LOG(FATAL) << "Not implemented"; } + virtual void vol2Col(real* data, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW) { + LOG(FATAL) << "Not implemeted"; + } + + virtual void col2Vol(real* trg, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + real alpha, + real beta) { + LOG(FATAL) << "Not implemeted"; + } + virtual void bilinearForward(const Matrix& in, const size_t inImgH, const size_t inImgW, @@ -1374,6 +1410,20 @@ public: const real ratioH, const real ratioW); + void vol2Col(real* data, + int channels, + int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW); + + void col2Vol(real* trg, + int channels, int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW, + real alpha, real beta); + void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label); void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label); @@ -1715,6 +1765,20 @@ public: const real ratioH, const real ratioW); + void vol2Col(real* data, + int channels, + int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW); + + void col2Vol(real* trg, + int channels, int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW, + real alpha, real beta); + template void operator=(const ExpressionType& expr) { TensorCpuApply(*this, expr); From 9b3d6acdbfc2fd6bc26185ddb9c38dfb90632324 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Sun, 13 Aug 2017 09:54:10 +0800 Subject: [PATCH 015/170] Add depth dimension information to Argument --- paddle/parameter/Argument.cpp | 2 ++ paddle/parameter/Argument.h | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 0547ac93cd..77fd0c5890 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -186,6 +186,7 @@ void Argument::resizeAndCopyFrom(const Argument& src, resizeAndCopy(strs, src.strs, useGpu, stream); frameWidth = src.frameWidth; frameHeight = src.frameHeight; + frameDepth = src.frameDepth; } int32_t Argument::resizeAndCopyFrom(const Argument& src, @@ -206,6 +207,7 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src, dataId = src.dataId; frameWidth = src.frameWidth; frameHeight = src.frameHeight; + frameDepth = src.frameDepth; if (!src.sequenceStartPositions) { // non-sequence input, copy samples directly diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index d8d7a4398f..ba3ad2fd4d 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -1,11 +1,8 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -35,6 +32,7 @@ struct Argument { strs(nullptr), frameHeight(0), frameWidth(0), + frameDepth(0), sequenceStartPositions(nullptr), subSequenceStartPositions(nullptr), cpuSequenceDims(nullptr), @@ -64,6 +62,7 @@ struct Argument { allCount = argument.allCount; frameHeight = argument.frameHeight; frameWidth = argument.frameWidth; + frameDepth = argument.frameDepth; dataId = argument.dataId; } @@ -76,6 +75,7 @@ struct Argument { // A dataBatch includes batchSize frames, one frame maybe not only vector size_t frameHeight; size_t frameWidth; + size_t frameDepth; // If NULL, each position is treated independently. // Otherwise, its size should be #NumberOfSequences + 1. @@ -136,8 +136,10 @@ struct Argument { } size_t getFrameHeight() const { return frameHeight; } size_t getFrameWidth() const { return frameWidth; } + size_t getFrameDepth() const { return frameDepth; } void setFrameHeight(size_t h) { frameHeight = h; } void setFrameWidth(size_t w) { frameWidth = w; } + void setFrameDepth(size_t d) { frameDepth = d; } int64_t getNumSequences() const { return sequenceStartPositions ? sequenceStartPositions->getSize() - 1 From 44ae44da49f206af56d02816aff8e9b2920d0bf8 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 14 Aug 2017 09:01:22 +0800 Subject: [PATCH 016/170] add configuratioin helpers. --- python/paddle/trainer/config_parser.py | 16 ++ .../paddle/trainer_config_helpers/layers.py | 34 ++- .../tests/configs/file_list.sh | 2 +- .../test_cross_entropy_over_beam.protostr | 208 ++++++++++++++++++ .../configs/test_cross_entropy_over_beam.py | 39 ++++ 5 files changed, 295 insertions(+), 4 deletions(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index da99e5bd53..a24299787b 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1602,6 +1602,21 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): self.config.softmax_selfnorm_alpha = softmax_selfnorm_alpha +@config_layer('cross_entropy_over_beam') +class CrossEntropyOverBeamLayer(LayerBase): + def __init__(self, name, inputs, **xargs): + config_assert(len(inputs) % 3 == 0, "Error input numbers.") + super(CrossEntropyOverBeamLayer, self).__init__( + name, 'cross_entropy_over_beam', 0, inputs, **xargs) + input_num = len(inputs) / 3 + for i in range(input_num): + input_layer = self.get_input_layer(i * 2) + config_assert( + input_layer.size == 1, "Inputs for this layer are made up of " + "several pairs and the first one in a pair is scores for " + "all the candidates, so its size should be equal to 1.") + + @config_layer('fc') class FCLayer(LayerBase): layer_type = 'fc' @@ -2249,6 +2264,7 @@ def define_cost(class_name, cost_type): define_cost('MultiClassCrossEntropy', 'multi-class-cross-entropy') +define_cost('CrossEntropyOverBeamCostLayer', 'cross_entropy_over_beam') define_cost('RankingCost', 'rank-cost') define_cost('AucValidation', 'auc-validation') define_cost('PnpairValidation', 'pnpair-validation') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 1bc55c8696..2b01b6ad4d 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import functools import collections import inspect @@ -104,6 +103,7 @@ __all__ = [ 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', + 'cross_entropy_over_beam', 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', @@ -219,6 +219,7 @@ class LayerType(object): HUBER = 'huber' CROSS_ENTROPY = 'multi-class-cross-entropy' CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm' + CROSS_ENTROPY_OVER_BEAM = 'cross_entropy_over_beam' SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy' MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy' SUM_COST = 'sum_cost' @@ -4028,8 +4029,12 @@ def __cost_input__(input, label, weight=None): """ inputs and parents for cost layers. """ - ipts = [Input(input.name), Input(label.name)] - parents = [input, label] + if isinstance(input, LayerOutput): + input = [input] + if isinstance(label, LayerOutput): + label = [label] + ipts = [Input(ipt.name) for ipt in (input + label)] + parents = [ipt for ipt in (input + label)] if weight is not None: assert weight.size == 1 ipts.append(Input(weight.name)) @@ -5692,6 +5697,29 @@ def multi_binary_label_cross_entropy(input, size=1) +@wrap_name_default() +@layer_support() +def cross_entropy_over_beam(input, label, name=None, coeff=1.0, weight=None): + """ + TODO(caoying) add comments. + """ + + assert len(input) / 2 == len(label), "Error input numbers." + for i in range(0, len(input), 2): + assert (input[i].size == 1), ( + "Inputs for this layer are made up of " + "several pairs and the first one in a pair is scores for " + "all the candidates, so its size should be equal to 1.") + + ipts, parents = __cost_input__(input, label, weight) + Layer( + name=name, + type=LayerType.CROSS_ENTROPY_OVER_BEAM, + inputs=ipts, + coeff=coeff) + return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1) + + @wrap_name_default() @layer_support() def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None): diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index a61beb871a..130e6332a7 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer -test_kmax_seq_socre_layer test_seq_select_layers) +test_kmax_seq_socre_layer test_seq_select_layers test_cross_entropy_over_beam) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr new file mode 100644 index 0000000000..e44478ec2b --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr @@ -0,0 +1,208 @@ +type: "nn" +layers { + name: "sentence_states" + type: "data" + size: 32 + active_type: "" +} +layers { + name: "sentence_scores" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "__kmax_sequence_score_layer_0__" + type: "kmax_seq_score" + active_type: "" + inputs { + input_layer_name: "sentence_scores" + } + beam_size: 5 +} +layers { + name: "__sub_nested_seq_layer_0__" + type: "sub_nested_seq" + size: 32 + active_type: "" + inputs { + input_layer_name: "sentence_states" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_0__" + } +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 1 + active_type: "" + inputs { + input_layer_name: "__sub_nested_seq_layer_0__" + input_parameter_name: "___fc_layer_0__.w0" + } + bias_parameter_name: "___fc_layer_0__.wbias" +} +layers { + name: "__kmax_sequence_score_layer_1__" + type: "kmax_seq_score" + active_type: "" + inputs { + input_layer_name: "sentence_scores" + } + beam_size: 5 +} +layers { + name: "__seq_slice_layer_0__" + type: "seq_slice" + size: 32 + active_type: "" + inputs { + input_layer_name: "__sub_nested_seq_layer_0__" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_1__" + } + select_first: true +} +layers { + name: "__fc_layer_1__" + type: "fc" + size: 1 + active_type: "" + inputs { + input_layer_name: "__seq_slice_layer_0__" + input_parameter_name: "___fc_layer_1__.w0" + } + bias_parameter_name: "___fc_layer_1__.wbias" +} +layers { + name: "__kmax_sequence_score_layer_2__" + type: "kmax_seq_score" + active_type: "" + inputs { + input_layer_name: "__fc_layer_1__" + } + beam_size: 5 +} +layers { + name: "sentences_ids" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "start_ids" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "end_ids" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "__cross_entropy_over_beam_0__" + type: "cross_entropy_over_beam" + active_type: "" + inputs { + input_layer_name: "sentence_scores" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_0__" + } + inputs { + input_layer_name: "__fc_layer_0__" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_1__" + } + inputs { + input_layer_name: "__fc_layer_1__" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_2__" + } + inputs { + input_layer_name: "sentences_ids" + } + inputs { + input_layer_name: "start_ids" + } + inputs { + input_layer_name: "end_ids" + } + coeff: 1.0 +} +parameters { + name: "___fc_layer_0__.w0" + size: 32 + initial_mean: 0.0 + initial_std: 0.176776695297 + dims: 32 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__.wbias" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___fc_layer_1__.w0" + size: 32 + initial_mean: 0.0 + initial_std: 0.176776695297 + dims: 32 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_1__.wbias" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "sentence_scores" +input_layer_names: "sentence_states" +input_layer_names: "sentences_ids" +input_layer_names: "start_ids" +input_layer_names: "end_ids" +output_layer_names: "__cross_entropy_over_beam_0__" +sub_models { + name: "root" + layer_names: "sentence_states" + layer_names: "sentence_scores" + layer_names: "__kmax_sequence_score_layer_0__" + layer_names: "__sub_nested_seq_layer_0__" + layer_names: "__fc_layer_0__" + layer_names: "__kmax_sequence_score_layer_1__" + layer_names: "__seq_slice_layer_0__" + layer_names: "__fc_layer_1__" + layer_names: "__kmax_sequence_score_layer_2__" + layer_names: "sentences_ids" + layer_names: "start_ids" + layer_names: "end_ids" + layer_names: "__cross_entropy_over_beam_0__" + input_layer_names: "sentence_scores" + input_layer_names: "sentence_states" + input_layer_names: "sentences_ids" + input_layer_names: "start_ids" + input_layer_names: "end_ids" + output_layer_names: "__cross_entropy_over_beam_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py new file mode 100644 index 0000000000..edc2d32fca --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +#coding=utf-8 + +from paddle.trainer_config_helpers import * +beam_size = 5 + +# the first beam expansion. +sentence_states = data_layer(name="sentence_states", size=32) +sentence_scores = data_layer(name="sentence_scores", size=1) +topk_sentence_ids = kmax_sequence_score_layer( + input=sentence_scores, beam_size=beam_size) + +# the second beam expansion. +topk_sen = sub_nested_seq_layer( + input=sentence_states, selected_indices=topk_sentence_ids) +start_pos_scores = fc_layer(input=topk_sen, size=1, act=LinearActivation()) +topk_start_pos_ids = kmax_sequence_score_layer( + input=sentence_scores, beam_size=beam_size) + +# the final beam expansion. +topk_start_spans = seq_slice_layer( + input=topk_sen, starts=topk_start_pos_ids, ends=None) +end_pos_scores = fc_layer( + input=topk_start_spans, size=1, act=LinearActivation()) +topk_end_pos_ids = kmax_sequence_score_layer( + input=end_pos_scores, beam_size=beam_size) + +# define the cost +sentence_idx = data_layer(name="sentences_ids", size=1) +start_idx = data_layer(name="start_ids", size=1) +end_idx = data_layer(name="end_ids", size=1) +cost = cross_entropy_over_beam( + input=[ + sentence_scores, topk_sentence_ids, start_pos_scores, + topk_start_pos_ids, end_pos_scores, topk_end_pos_ids + ], + label=[sentence_idx, start_idx, end_idx]) + +outputs(cost) From 05e8a26b4bb093f9dccb9aeb533a5851aaed09b8 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 14 Aug 2017 10:33:28 +0800 Subject: [PATCH 017/170] add unittest. --- .../gserver/layers/CrossEntropyOverBeam.cpp | 35 +++++++ paddle/gserver/layers/CrossEntropyOverBeam.h | 31 ++++++ paddle/gserver/tests/CMakeLists.txt | 6 ++ paddle/gserver/tests/LayerGradUtil.cpp | 25 +++-- paddle/gserver/tests/LayerGradUtil.h | 18 ++++ .../tests/test_CrossEntropyOverBeamGrad.cpp | 94 +++++++++++++++++++ 6 files changed, 201 insertions(+), 8 deletions(-) create mode 100644 paddle/gserver/layers/CrossEntropyOverBeam.cpp create mode 100644 paddle/gserver/layers/CrossEntropyOverBeam.h create mode 100644 paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp new file mode 100644 index 0000000000..8b6223ec6a --- /dev/null +++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp @@ -0,0 +1,35 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "CrossEntropyOverBeam.h" + +namespace paddle { + +REGISTER_LAYER(cross_entropy_over_beam, CrossEntropyOverBeam); + +bool CrossEntropyOverBeam::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + + setNeedSequenceInfo(false); + + return true; +} + +void CrossEntropyOverBeam::forward(PassType passType) {} + +void CrossEntropyOverBeam::backward(const UpdateCallback& callback) {} + +} // namespace paddle diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/gserver/layers/CrossEntropyOverBeam.h new file mode 100644 index 0000000000..3106f9858b --- /dev/null +++ b/paddle/gserver/layers/CrossEntropyOverBeam.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "CrossEntropyOverBeam.h" +#include "Layer.h" + +namespace paddle { + +class CrossEntropyOverBeam : public Layer { +public: + explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index c2a2993620..24df7e7220 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -34,6 +34,12 @@ add_unittest_without_exec(test_CRFLayerGrad add_test(NAME test_CRFLayerGrad COMMAND test_CRFLayerGrad) +################ test_CrossEntropyOverBeam #################### +add_unittest_without_exec(test_CrossEntropyOverBeam + test_CrossEntropyOverBeamGrad.cpp + LayerGradUtil.cpp) +add_test(NAME test_CrossEntropyOverBeam + COMMAND test_CrossEntropyOverBeam) add_unittest_without_exec(test_ActivationGrad test_ActivationGrad.cpp diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index fd9cfa1dc7..a38880e14c 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -388,14 +388,23 @@ void initDataLayer(TestConfig testConf, data.grad->zeroMem(); break; case INPUT_SELF_DEFINE_DATA: { - size_t height = testConf.inputDefs[i].selfDefinedData->getHeight(); - size_t width = testConf.inputDefs[i].selfDefinedData->getWidth(); - CHECK_GT(static_cast(height), 0); - CHECK_GT(static_cast(width), 0); - data.value = Matrix::create(height, width, false, useGpu); - data.grad = Matrix::create(height, width, false, useGpu); - data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData); - data.grad->zeroMem(); + if (testConf.inputDefs[i].ids.size()) { + data.ids = IVector::create(testConf.inputDefs[i].ids.size(), useGpu); + data.ids->copyFrom(testConf.inputDefs[i].ids.data(), + testConf.inputDefs[i].ids.size()); + } else if (testConf.inputDefs[i].selfDefinedData) { + size_t height = testConf.inputDefs[i].selfDefinedData->getHeight(); + size_t width = testConf.inputDefs[i].selfDefinedData->getWidth(); + CHECK_GT(static_cast(height), 0); + CHECK_GT(static_cast(width), 0); + data.value = Matrix::create(height, width, false, useGpu); + data.grad = Matrix::create(height, width, false, useGpu); + data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData); + data.grad->zeroMem(); + } else { + LOG(FATAL) << "No self-defined data are given."; + return; + } const std::vector& labelSeqStartPositions = testConf.inputDefs[i].labelSeqStartPositions; diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index 5debedf5ef..a35edd2b5e 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -68,6 +68,7 @@ struct InputDef { std::vector labelInitValue; std::vector labelSeqStartPositions; std::vector labelSubSeqStartPositions; + std::vector ids; MatrixPtr selfDefinedData; InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) { @@ -95,6 +96,23 @@ struct InputDef { isStatic = false; } + InputDef(InputType type, + string nameIn, + std::vector ids, + std::vector selfDefinedSeqStartPos = {}, + std::vector selfDefinedSubSeqStartPos = {}) + : labelSeqStartPositions(selfDefinedSeqStartPos), + labelSubSeqStartPositions(selfDefinedSubSeqStartPos), + ids(ids) { + selfDefinedData = nullptr; + inputType = type; + name = nameIn; + dim = 0; + sparse = {""}; + paraSize = 0; + isStatic = false; + } + InputDef(InputType type, string nameIn, size_t dimIn, diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp new file mode 100644 index 0000000000..54daba3656 --- /dev/null +++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp @@ -0,0 +1,94 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include + +#include +#include "ModelConfig.pb.h" +#include "paddle/gserver/layers/DataLayer.h" +#include "paddle/trainer/Trainer.h" + +#include "LayerGradUtil.h" +#include "paddle/testing/TestUtil.h" + +using namespace paddle; // NOLINT + +DECLARE_int32(gpu_id); +DECLARE_bool(thread_local_rand_use_global_seed); + +struct SingleBeamExpansion { + vector seqStartPos; + vector subSeqStartPos; + + vector candidateScores; + // TODO(caoying): store this into Argument.ids + vector selectedIndices; + vector groundTruth; +}; + +void genRandomBeamExpansion(size_t expansionCount, + vector& beamExpansions) { + beamExpansions.clear(); +} + +void testCrossEntropyOverBeam() { + const size_t expansionCount = 3; + vector beams; + genRandomBeamExpansion(expansionCount, beams); + + for (size_t i = 0; i < beams.size(); ++i) { + const SingleBeamExpansion& beam = beams[i]; + // create scores for all the candidates + MatrixPtr candidateScorePtr = + Matrix::create(beam.candidateScores.size(), 1, false, false); + candidateScorePtr->copyFrom(candidateScores.data(), candidateScores.size()); + + ostringstream paramName; + paramName << "candidate_scores_" << i; + beam.subSeqStartPos.size() + ? config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, + ostr.str(), + candidateScorePtr, + beam.seqStartPos, + beam.subSeqStartPos}) + : config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, + ostr.str(), + candidateScorePtr, + beam.seqStartPos}); + // create indices for the selected candidates + + // create the ground truth + } +} + +TestConfig config; +config.layerConfig.set_type("cross_entropy_over_beam"); + +// testLayerGrad( +// config, "cross_entropy_over_beam", seqNum, false, useGpu, false); +} + +TEST(Layer, CrossEntropyOverBeam) { + for (bool useGpu : {false, true}) testCrossEntropyOverBeam(useGpu); +} + +int main(int argc, char** argv) { + initMain(argc, argv); + hl_start(); + hl_init(FLAGS_gpu_id); + FLAGS_thread_local_rand_use_global_seed = true; + srand(1); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From e6db484d154c041c1cf6650743bcf27dd2549b77 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 14 Aug 2017 15:51:00 +0800 Subject: [PATCH 018/170] make clear that current huber_cost is for two-classification --- paddle/gserver/layers/CostLayer.cpp | 29 ++++++++++--------- paddle/gserver/layers/CostLayer.h | 18 +++++------- paddle/gserver/tests/test_LayerGrad.cpp | 2 +- python/paddle/trainer/config_parser.py | 2 +- .../paddle/trainer_config_helpers/layers.py | 27 ++++++++++++----- .../protostr/test_cost_layers.protostr | 10 +++---- .../tests/configs/test_cost_layers.py | 2 +- 7 files changed, 50 insertions(+), 40 deletions(-) diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 6bfdea3c6e..138c86a6d6 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -575,10 +575,10 @@ void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output, // // Huber loss for robust 2-classes classification // -REGISTER_LAYER(huber, HuberTwoClass); +REGISTER_LAYER(huber, HuberTwoClassification); -bool HuberTwoClass::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { +bool HuberTwoClassification::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { CostLayer::init(layerMap, parameterMap); if (useGpu_) { tmpCpuInput_.reserve(inputLayers_.size()); @@ -589,7 +589,9 @@ bool HuberTwoClass::init(const LayerMap& layerMap, return true; } -void HuberTwoClass::forwardImp(Matrix& output, Argument& label, Matrix& cost) { +void HuberTwoClassification::forwardImp(Matrix& output, + Argument& label, + Matrix& cost) { if (useGpu_) { for (size_t i = 0; i < inputLayers_.size(); i++) { tmpCpuInput_[i].resizeAndCopyFrom( @@ -600,10 +602,11 @@ void HuberTwoClass::forwardImp(Matrix& output, Argument& label, Matrix& cost) { forwardImpIn(output, label, cost); } -void HuberTwoClass::forwardImpIn(Matrix& output, - Argument& label, - Matrix& target) { +void HuberTwoClassification::forwardImpIn(Matrix& output, + Argument& label, + Matrix& target) { size_t numSamples = target.getHeight(); + CHECK(label.ids); CHECK_EQ((*label.ids).getSize(), numSamples); CHECK_EQ(output.getHeight(), numSamples); CHECK_EQ(output.getWidth(), (size_t)1); @@ -624,9 +627,9 @@ void HuberTwoClass::forwardImpIn(Matrix& output, target.copyFrom(cost.data(), numSamples); } -void HuberTwoClass::backwardImp(Matrix& outputValue, - Argument& label, - Matrix& outputGrad) { +void HuberTwoClassification::backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) { if (useGpu_) { backwardImpIn( *tmpCpuInput_[0].value, tmpCpuInput_[1], *tmpCpuInput_[0].grad); @@ -636,9 +639,9 @@ void HuberTwoClass::backwardImp(Matrix& outputValue, } } -void HuberTwoClass::backwardImpIn(Matrix& output, - Argument& label, - Matrix& outputG) { +void HuberTwoClassification::backwardImpIn(Matrix& output, + Argument& label, + Matrix& outputG) { size_t numSamples = output.getHeight(); real* out = output.getData(); real* grad = outputG.getData(); diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index 14c0b33ec1..77427b7a08 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -307,21 +307,17 @@ public: /** * Huber loss for robust 2-classes classification. * - * For label={0, 1}, let y=2*label-1. Given output f, the loss is: - * \f[ - * Loss = - * \left\{\begin{matrix} - * 4 * y * f & \textit{if} \ \ y* f < -1 \\ - * (1 - y * f)^2 & \textit{if} \ \ -1 < y * f < 1 \\ - * 0 & \textit{otherwise} - * \end{matrix}\right. - * \f] + * For label={0, 1}, let y=2*label-1. Given output f(x), the loss is: + * Loss = 4 * y * f, if y* f < -1 \\ + * Loss = (1 - y * f)^2, if -1 < y * f < 1 \\ + * Loss = 0, otherwise */ -class HuberTwoClass : public CostLayer { +class HuberTwoClassification : public CostLayer { std::vector tmpCpuInput_; public: - explicit HuberTwoClass(const LayerConfig& config) : CostLayer(config) {} + explicit HuberTwoClassification(const LayerConfig& config) + : CostLayer(config) {} bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 0f312b6ca5..6d60250f6d 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -830,7 +830,7 @@ TEST(Layer, square_error_weighted) { TEST(Layer, huber_two_class) { TestConfig config; - config.layerConfig.set_type("huber"); + config.layerConfig.set_type("huber_classification"); config.biasSize = 0; config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index da99e5bd53..248da9417f 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2255,7 +2255,7 @@ define_cost('PnpairValidation', 'pnpair-validation') define_cost('SumOfSquaresCostLayer', 'square_error') define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy') define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy') -define_cost('HuberTwoClass', 'huber') +define_cost('HuberTwoClassification', 'huber_classification') define_cost('SumCost', 'sum_cost') define_cost('SmoothL1Cost', 'smooth_l1') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 1bc55c8696..20d96efe15 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -108,7 +108,7 @@ __all__ = [ 'sum_cost', 'rank_cost', 'lambda_cost', - 'huber_cost', + 'huber_classification_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer', @@ -216,7 +216,7 @@ class LayerType(object): RANK_COST = 'rank-cost' LAMBDA_COST = 'lambda_cost' - HUBER = 'huber' + HUBER_CLASSIFICATION = 'huber_classification' CROSS_ENTROPY = 'multi-class-cross-entropy' CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm' SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy' @@ -5605,16 +5605,26 @@ def sum_cost(input, name=None, layer_attr=None): @wrap_name_default() @layer_support() -def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): +def huber_classification_cost(input, + label, + name=None, + coeff=1.0, + layer_attr=None): """ - A loss layer for huber loss. + For classification purposes, a variant of the Huber loss called modified Huber + is sometimes used. Given a prediction f(x) (a real-valued classifier score) and + a true binary class label :math:`y\in \left \{-1, 1 \right \}`, the modified Huber + loss is defined as: + + .. math: + loss = \max \left ( 0, 1-yf(x) \right )^2, yf(x)\geq 1 + loss = -4yf(x), \text{otherwise} The example usage is: .. code-block:: python - cost = huber_cost(input=input_layer, - label=label_layer) + cost = huber_classification_cost(input=input_layer, label=label_layer) :param input: The first input layer. :type input: LayerOutput. @@ -5634,11 +5644,12 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): assert input.size == 1 Layer( name=name, - type=LayerType.HUBER, + type=LayerType.HUBER_CLASSIFICATION, inputs=[input.name, label.name], coeff=coeff, **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput(name, LayerType.HUBER, parents=[input, label], size=1) + return LayerOutput( + name, LayerType.HUBER_CLASSIFICATION, parents=[input, label], size=1) @wrap_name_default() diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr index 05847344be..a64e5ea0dd 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr @@ -180,8 +180,8 @@ layers { active_type: "" } layers { - name: "__huber_cost_0__" - type: "huber" + name: "__huber_classification_cost_0__" + type: "huber_classification" size: 1 active_type: "" inputs { @@ -300,7 +300,7 @@ output_layer_names: "__rank_cost_0__" output_layer_names: "__lambda_cost_0__" output_layer_names: "__cross_entropy_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__" -output_layer_names: "__huber_cost_0__" +output_layer_names: "__huber_classification_cost_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__sum_cost_0__" output_layer_names: "__nce_layer_0__" @@ -326,7 +326,7 @@ sub_models { layer_names: "__cross_entropy_with_selfnorm_0__" layer_names: "huber_probs" layer_names: "huber_label" - layer_names: "__huber_cost_0__" + layer_names: "__huber_classification_cost_0__" layer_names: "__multi_binary_label_cross_entropy_0__" layer_names: "__sum_cost_0__" layer_names: "__nce_layer_0__" @@ -349,7 +349,7 @@ sub_models { output_layer_names: "__lambda_cost_0__" output_layer_names: "__cross_entropy_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__" - output_layer_names: "__huber_cost_0__" + output_layer_names: "__huber_classification_cost_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__sum_cost_0__" output_layer_names: "__nce_layer_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py index d2a3b702a1..98bf026d60 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py @@ -33,7 +33,7 @@ outputs( input=probs, label=xe_label), cross_entropy_with_selfnorm( input=probs, label=xe_label), - huber_cost( + huber_classification_cost( input=data_layer( name='huber_probs', size=1), label=data_layer( From af1eb31afc92ae3ac59869a6a5b0e890e009c44b Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Fri, 11 Aug 2017 11:55:56 -0700 Subject: [PATCH 019/170] add as an operator --- paddle/operators/CMakeLists.txt | 2 ++ paddle/operators/gather_op.cc | 64 +++++++++++++++++++++++++++++++++ paddle/operators/gather_op.h | 52 +++++++++++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 paddle/operators/gather_op.cc create mode 100644 paddle/operators/gather_op.h diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index a7c89787e4..5ac898a8d3 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -43,6 +43,8 @@ endfunction() add_subdirectory(math) cc_test(gather_test SRCS gather_test.cc DEPS tensor) +cc_library(gather_op SRCS gather_op.cc DEPS op_registry) +# cc_test(gather_op_test SRCS gather_op_test.cc DEPS gather_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc new file mode 100644 index 0000000000..1008a57a87 --- /dev/null +++ b/paddle/operators/gather_op.cc @@ -0,0 +1,64 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/gather_op.h" +#include "paddle/framework/ddim.h" + +namespace paddle { +namespace operators { + +class GatherOp : public framework::OperatorWithKernel { + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE(ctx.InputSize() == 2, ""); + PADDLE_ENFORCE(ctx.OutputSize() == 1, ""); + int batch_size = ctx.Input(1)->dims()[0]; + PADDLE_ENFORCE(batch_size > 0); + } +}; + +class GatherOpMaker : public framework::OpProtoAndCheckerMaker { + public: + GatherOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The source input of gather op"); + AddInput("Index", "The index input of gather op"); + AddOutput("Y", "The output of add op"); + AddComment(R"DOC( +Gather Operator by selecting from the first axis, + +Y = X[Index] +)DOC"); + } +}; + +class GatherGradOp : public framework::OperatorWithKernel { + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + // ctx.Output("X" + framework::kGradVarSuffix) + // ->Resize(ctx.Input("X")->dims()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(gather, ops::GatherOp, ops::GatherOpMaker); +REGISTER_OP_CPU_KERNEL(gather, + ops::GatherOpKernel); +REGISTER_GRADIENT_OP(gather, gather_grad, ops::GatherGradOp); +REGISTER_OP_CPU_KERNEL( + gather_grad, + ops::GatherGradientOpKernel); diff --git a/paddle/operators/gather_op.h b/paddle/operators/gather_op.h new file mode 100644 index 0000000000..13e4c9b058 --- /dev/null +++ b/paddle/operators/gather_op.h @@ -0,0 +1,52 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "gather.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" +#include "scatter.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class GatherOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto X = ctx.Input("X"); + auto Index = ctx.Input("Index"); + auto Y = ctx.Output("Y"); + + Y->mutable_data(ctx.GetPlace()); + Gather(ctx.GetPlace(), X, Index, Y); + } +}; + +template +class GatherGradientOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto Index = ctx.Input("Index"); + auto dX = ctx.Output(framework::GradVarName("X")); + auto dY = ctx.Input(framework::GradVarName("Y")); + + ScatterUpdate(ctx.GetPlace(), dY, Index, dX); + } +}; + +} // namespace operators +} // namespace paddle From caaa5f86b91beda67daf8ae295cf99fa4dce12ba Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Fri, 11 Aug 2017 15:09:04 -0700 Subject: [PATCH 020/170] gather op added --- paddle/framework/CMakeLists.txt | 2 ++ paddle/framework/empty_test.cc | 56 +++++++++++++++++++++++++++++++++ paddle/operators/gather_op.cc | 2 ++ 3 files changed, 60 insertions(+) create mode 100644 paddle/framework/empty_test.cc diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 0398526024..9e306c8650 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -10,6 +10,8 @@ cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor) +cc_test(empty_test SRCS empty_test.cc DEPS tensor) + cc_test(variable_test SRCS variable_test.cc) cc_library(scope SRCS scope.cc) diff --git a/paddle/framework/empty_test.cc b/paddle/framework/empty_test.cc new file mode 100644 index 0000000000..2237f8ce0e --- /dev/null +++ b/paddle/framework/empty_test.cc @@ -0,0 +1,56 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include +#include +#include "paddle/framework/tensor.h" + +TEST(Empty, Dims) { + using namespace paddle::framework; + using namespace paddle::platform; + Tensor tt; + tt.Resize(make_ddim({0, 3, 4})); + DDim dims = tt.dims(); + ASSERT_EQ(arity(dims), 3); + EXPECT_EQ(0, dims[0]); + EXPECT_EQ(3, dims[1]); + EXPECT_EQ(4, dims[2]); +} + +TEST(Empty, MutableData) { + using namespace paddle::framework; + using namespace paddle::platform; + { + Tensor src_tensor; + float* p1 = nullptr; + // initialization + p1 = src_tensor.mutable_data(make_ddim({0, 2, 3}), CPUPlace()); + EXPECT_NE(p1, nullptr); + } + +#ifndef PADDLE_ONLY_CPU + { + Tensor src_tensor; + float* p1 = nullptr; + float* p2 = nullptr; + // initialization + p1 = src_tensor.mutable_data(make_ddim({0, 2, 3}), GPUPlace()); + EXPECT_NE(p1, nullptr); + // set src_tensor a new dim with large size + // momery is supposed to be re-allocated + p2 = src_tensor.mutable_data(make_ddim({0, 4}), GPUPlace()); + EXPECT_NE(p2, nullptr); + // EXPECT_NE(p1, p2); + } +#endif +} diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc index 1008a57a87..3414a3c263 100644 --- a/paddle/operators/gather_op.cc +++ b/paddle/operators/gather_op.cc @@ -23,6 +23,8 @@ class GatherOp : public framework::OperatorWithKernel { void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.InputSize() == 2, ""); PADDLE_ENFORCE(ctx.OutputSize() == 1, ""); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), + "Inputs of GatherOp must all be set"); int batch_size = ctx.Input(1)->dims()[0]; PADDLE_ENFORCE(batch_size > 0); } From 2a42a73db1ff32f7d1b9dfc772070fc7827aa4e7 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Fri, 11 Aug 2017 15:10:59 -0700 Subject: [PATCH 021/170] modify gather_op with test --- paddle/operators/gather_op.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc index 3414a3c263..5a4f889f3d 100644 --- a/paddle/operators/gather_op.cc +++ b/paddle/operators/gather_op.cc @@ -27,6 +27,9 @@ class GatherOp : public framework::OperatorWithKernel { "Inputs of GatherOp must all be set"); int batch_size = ctx.Input(1)->dims()[0]; PADDLE_ENFORCE(batch_size > 0); + paddle::framework::DDim output_dims(ctx.Input(0)->dims()); + output_dims[0] = batch_size; + ctx.Output(0)->Resize(output_dims); } }; @@ -48,8 +51,8 @@ Y = X[Index] class GatherGradOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - // ctx.Output("X" + framework::kGradVarSuffix) - // ->Resize(ctx.Input("X")->dims()); + ctx.Output("X" + framework::kGradVarSuffix) + ->Resize(ctx.Input("X")->dims()); } }; From f6bffd4e1ff506319fa1a3338038d61d3f653181 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Fri, 11 Aug 2017 15:40:23 -0700 Subject: [PATCH 022/170] gather_op modified --- paddle/operators/gather_op.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc index 5a4f889f3d..05ba52ce06 100644 --- a/paddle/operators/gather_op.cc +++ b/paddle/operators/gather_op.cc @@ -51,8 +51,10 @@ Y = X[Index] class GatherGradOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - ctx.Output("X" + framework::kGradVarSuffix) - ->Resize(ctx.Input("X")->dims()); + auto X_grad = ctx.Output(framework::GradVarName("X")); + auto X = ctx.Input("X"); + + X_grad->Resize(X->dims()); } }; From 323d4233f3cb0f72ddac36977941e84880a7eedc Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Tue, 15 Aug 2017 23:50:56 +0000 Subject: [PATCH 023/170] gather op added with python unittest --- paddle/operators/gather_op.cu | 20 ++++++++++++++++ .../v2/framework/tests/test_gather_op.py | 23 +++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 paddle/operators/gather_op.cu create mode 100644 python/paddle/v2/framework/tests/test_gather_op.py diff --git a/paddle/operators/gather_op.cu b/paddle/operators/gather_op.cu new file mode 100644 index 0000000000..3f04a7b3f8 --- /dev/null +++ b/paddle/operators/gather_op.cu @@ -0,0 +1,20 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/gather_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(gather, + ops::GatherOpKernel); diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/framework/tests/test_gather_op.py new file mode 100644 index 0000000000..2ffbf17236 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_gather_op.py @@ -0,0 +1,23 @@ +import unittest + +import numpy +import paddle.v2.framework.core as core +from paddle.v2.framework.op import Operator + +from op_test_util import OpTestMeta + + +class TestGatherOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "gather" + self.inputs = { + 'X': numpy.random.random((10, 20)).astype("float32"), + 'Index': numpy.array([1, 3, 5]).astype("int") + } + self.outputs = {'Y': self.input['X'][self.input['Index']]} + + +if __name__ == "__main__": + unittest.main() From 4d2adab772e3c0789e9696533da61ee3583363d1 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Tue, 15 Aug 2017 23:54:16 +0000 Subject: [PATCH 024/170] gather op added with python unittest --- paddle/framework/CMakeLists.txt | 1 + paddle/framework/pybind.cc | 1 + paddle/operators/CMakeLists.txt | 3 +- paddle/operators/gather_op.cc | 43 +++++++++++-------- .../paddle/v2/framework/tests/CMakeLists.txt | 1 + 5 files changed, 29 insertions(+), 20 deletions(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 9e306c8650..30313780a3 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -47,6 +47,7 @@ cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python backward sgd_op + gather_op add_op mul_op rowwise_add_op diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index fe0c87bc57..90311e0dc3 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -42,6 +42,7 @@ USE_OP(fill_zeros_like); USE_OP_ITSELF(recurrent_op); USE_OP(gaussian_random); USE_OP(uniform_random); +USE_CPU_ONLY_OP(gather); namespace paddle { namespace framework { diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 5ac898a8d3..6849e39cb7 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -43,7 +43,8 @@ endfunction() add_subdirectory(math) cc_test(gather_test SRCS gather_test.cc DEPS tensor) -cc_library(gather_op SRCS gather_op.cc DEPS op_registry) +op_library(gather_op SRCS gather_op.cc gather_op.cu) +# DEPS op_registry) # cc_test(gather_op_test SRCS gather_op_test.cc DEPS gather_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc index 05ba52ce06..2e08ba8dcc 100644 --- a/paddle/operators/gather_op.cc +++ b/paddle/operators/gather_op.cc @@ -19,17 +19,33 @@ namespace paddle { namespace operators { class GatherOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 2, ""); - PADDLE_ENFORCE(ctx.OutputSize() == 1, ""); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), - "Inputs of GatherOp must all be set"); - int batch_size = ctx.Input(1)->dims()[0]; + // PADDLE_ENFORCE(ctx.InputSize() == 2, ""); + // PADDLE_ENFORCE(ctx.OutputSize() == 1, ""); + // PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), + // "Inputs of GatherOp must all be set"); + int batch_size = ctx.Input("Index")->dims()[0]; PADDLE_ENFORCE(batch_size > 0); paddle::framework::DDim output_dims(ctx.Input(0)->dims()); output_dims[0] = batch_size; - ctx.Output(0)->Resize(output_dims); + ctx.Output("Y")->Resize(output_dims); + } +}; + +class GatherGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto X_grad = ctx.Output(framework::GradVarName("X")); + auto X = ctx.Input("X"); + + X_grad->Resize(X->dims()); } }; @@ -47,25 +63,14 @@ Y = X[Index] )DOC"); } }; - -class GatherGradOp : public framework::OperatorWithKernel { - protected: - void InferShape(const framework::InferShapeContext &ctx) const override { - auto X_grad = ctx.Output(framework::GradVarName("X")); - auto X = ctx.Input("X"); - - X_grad->Resize(X->dims()); - } -}; - } // namespace operators } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(gather, ops::GatherOp, ops::GatherOpMaker); +REGISTER_OP(gather, ops::GatherOp, ops::GatherOpMaker, gather_grad, + ops::GatherGradOp); REGISTER_OP_CPU_KERNEL(gather, ops::GatherOpKernel); -REGISTER_GRADIENT_OP(gather, gather_grad, ops::GatherGradOp); REGISTER_OP_CPU_KERNEL( gather_grad, ops::GatherGradientOpKernel); diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 96fad9b42e..1032743a13 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -13,6 +13,7 @@ py_test(test_add_two_op SRCS test_add_two_op.py) py_test(test_sigmoid_op SRCS test_sigmoid_op.py) py_test(test_softmax_op SRCS test_softmax_op.py) py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py) +py_test(test_gather_op SRCS test_gather_op.py) py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py) py_test(gradient_checker SRCS gradient_checker.py) From a037b099f7f4bf8370e882f397bd4c691b0e0986 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 14 Aug 2017 15:49:48 +0800 Subject: [PATCH 025/170] finish unittest. --- .../gserver/layers/CrossEntropyOverBeam.cpp | 1 + .../tests/test_CrossEntropyOverBeamGrad.cpp | 218 +++++++++++++++--- 2 files changed, 191 insertions(+), 28 deletions(-) diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp index 8b6223ec6a..88d80aa83a 100644 --- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp +++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp @@ -22,6 +22,7 @@ bool CrossEntropyOverBeam::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { /* Initialize the basic parent class */ Layer::init(layerMap, parameterMap); + CHECK_EQ(0U, inputLayers_.size() % 3) << "Error input number."; setNeedSequenceInfo(false); diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp index e9ecebcfe5..a5f06c15dc 100644 --- a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp +++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include @@ -27,6 +28,10 @@ using namespace paddle; // NOLINT DECLARE_int32(gpu_id); DECLARE_bool(thread_local_rand_use_global_seed); +const size_t MAX_SEQ_NUM = 10; +const size_t MAX_SEQ_LEN = 27; +const size_t MAX_BEAM_SIZE = 10; + struct SingleBeamExpansion { vector seqStartPos; vector subSeqStartPos; @@ -34,37 +39,195 @@ struct SingleBeamExpansion { // TODO(caoying): store this into Argument.ids vector selectedIndices; + vector groundTruth; - vector labelSeqStartPos; + vector inBeam; + vector rowIdxInBeam; }; -void genCandidateScores(bool hasSubSeq, - vector& scores, +void genRand(real* numbers, size_t n) { + default_random_engine generator; + uniform_real_distribution distribution(0.0, 1.0); + for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator); +} + +vector randSampling(real range, int n) { + CHECK_GE(range, n); + vector num(range); + iota(begin(num), end(num), 0.); + if (range == n) return num; + + random_shuffle(begin(num), end(num)); + num.resize(n); + sort(begin(num), end(num)); + return num; +} + +void genCandidateScores(bool hasSubseq, + size_t beamSize, + SingleBeamExpansion& prevBeam, + SingleBeamExpansion& curBeam) { + vector& seqStartPos = curBeam.seqStartPos; + seqStartPos.resize(1, 0); + vector& subSeqStartPos = curBeam.subSeqStartPos; + subSeqStartPos.resize(1, 0); + + srand((size_t)(time(NULL))); + // srand(1); + if (prevBeam.selectedIndices.size()) { + if (prevBeam.subSeqStartPos.size() > 1) { + int seqIdx = 1; + // samples in previous beam are nested sequences. + for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) { + for (size_t j = 0; j < beamSize; ++j) { + if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break; + for (size_t k = 0; k < beamSize; ++k) + subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) + + subSeqStartPos.back()); + } + if (prevBeam.seqStartPos[seqIdx] == prevBeam.subSeqStartPos[i]) { + seqStartPos.push_back(subSeqStartPos.back()); + seqIdx++; + } + } + } else { + // samples in previous beam are sequences. + for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) { + if (i && i % beamSize == 0) { + seqStartPos.push_back(subSeqStartPos.back()); + if (i == prevBeam.selectedIndices.size()) break; + } + if (prevBeam.selectedIndices[i] == -1.) continue; + subSeqStartPos.push_back(subSeqStartPos.back() + + (1 + (rand() % MAX_SEQ_LEN))); + } + } + } else { + // the first beam expansion + int seqNum = 1 + (rand() % MAX_SEQ_NUM); + for (int i = 0; i < seqNum; ++i) { + if (hasSubseq) { + for (size_t j = 0; j < 1 + (rand() % MAX_SEQ_NUM); ++j) + subSeqStartPos.push_back(subSeqStartPos.back() + + (1 + (rand() % MAX_SEQ_LEN))); + seqStartPos.push_back(subSeqStartPos.back()); + } else { + seqStartPos.push_back(seqStartPos.back() + + (1 + (rand() % MAX_SEQ_LEN))); + } + } + } + + size_t totalSeqNum = hasSubseq ? subSeqStartPos.back() : seqStartPos.back(); + curBeam.candidateScores.resize(totalSeqNum, 0.); + genRand(curBeam.candidateScores.data(), totalSeqNum); +} + +void genSelectedIndices(size_t beamSize, vector& seqStartPos, - vector& subSeqStartPos) {} - -void genSelectedIndicesAndGroundtruth(size_t beamSize, - vector& seqStartPos, - vector& selectedIndices) {} - -SingleBeamExpansion genOneBeam(size_t beamSize, bool hasSubSeq) { - SingleBeamExpansion beam; - genCandidateScores( - hasSubSeq, beam.candidateScores, beam.seqStartPos, beam.subSeqStartPos); - genSelectedIndicesAndGroundtruth( - beamSize, - hasSubSeq ? beam.subSeqStartPos : beam.seqStartPos, - beam.selectedIndices); - return beam; + vector& selectedIndices) { + size_t selectedIdsCount = beamSize * (seqStartPos.size() - 1); + selectedIndices.resize(selectedIdsCount, -1.); + + for (size_t i = 0; i < seqStartPos.size() - 1; ++i) { + int seqLen = seqStartPos[i + 1] - seqStartPos[i]; + int n = min(seqLen, static_cast(beamSize)); + vector ids = randSampling(seqLen, n); + memcpy(selectedIndices.data() + i * beamSize, + ids.data(), + sizeof(real) * ids.size()); + } +} + +void genGroundTruth(vector& beamExpansions, + size_t beamSize) { + size_t seqNum = beamExpansions[1].seqStartPos.size() - 1; + for (size_t i = 2; i < beamExpansions.size(); ++i) + CHECK_EQ(seqNum, beamExpansions[i - 1].seqStartPos.size() - 1); + + // srand(1); + srand((size_t)(time(NULL))); + + // initialize the first beam. + SingleBeamExpansion& beam = beamExpansions[1]; + beam.groundTruth.resize(seqNum, 0); + beam.inBeam.resize(seqNum, 0); + beam.rowIdxInBeam.resize(seqNum, -1); + + auto begPos = beam.selectedIndices.begin(); + for (size_t i = 0; i < seqNum; ++i) { + int seqLen = beam.seqStartPos[i + 1] - beam.seqStartPos[i]; + int label = rand() % seqLen; + auto endPos = begPos + beamSize; + beam.groundTruth[i] = label; + if (find(begPos, endPos, real(label)) != endPos) beam.inBeam[i] = 1; + begPos = endPos; + beam.rowIdxInBeam[i] = i; + } + + // iterate over each beam expansions + for (size_t i = 2; i < beamExpansions.size(); ++i) { + SingleBeamExpansion& curBeam = beamExpansions[i]; + SingleBeamExpansion& prevBeam = beamExpansions[i - 1]; + + curBeam.groundTruth.resize(seqNum, 0); + curBeam.inBeam.resize(seqNum, 0); + curBeam.rowIdxInBeam.resize(seqNum, -1); + + // iterate over each sequence + for (size_t j = 0; j < seqNum; ++j) { + if (prevBeam.inBeam[j]) { + // gold sequence falls in the beam in previous search. + + auto begPos = prevBeam.selectedIndices.begin(); + auto endPos = begPos + prevBeam.rowIdxInBeam[j] * beamSize; + size_t totalExpansion = + prevBeam.rowIdxInBeam[j] * beamSize - count(begPos, endPos, -1.); + curBeam.rowIdxInBeam[j] = totalExpansion + prevBeam.groundTruth[j]; + + CHECK_LE(curBeam.rowIdxInBeam[j] + 1, + curBeam.subSeqStartPos.size() - 1); + int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]]; + int end = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j] + 1]; + CHECK_GT(size_t(end), size_t(start)); + int label = rand() % (end - start); + + curBeam.groundTruth[j] = label; + auto findBeg = curBeam.selectedIndices.begin() + + curBeam.rowIdxInBeam[j] * beamSize; + auto findEnd = findBeg + beamSize; + if (find(findBeg, findEnd, real(label)) != findEnd) + curBeam.inBeam[j] = 1; + } else { + // in previous search, gold sequence has fallen off the beam, + // the beam search stops, here use -1 as a dummy label. + // It will not used in calculation the cost. + beamExpansions[i].groundTruth[j] = -1; + } + } + } +} + +void genOneBeam(size_t beamSize, + bool hasSubseq, + SingleBeamExpansion& prevBeam, + SingleBeamExpansion& curBeam) { + genCandidateScores(hasSubseq, beamSize, prevBeam, curBeam); + genSelectedIndices(beamSize, + hasSubseq ? curBeam.subSeqStartPos : curBeam.seqStartPos, + curBeam.selectedIndices); } void genRandomBeamExpansion(size_t expansionCount, size_t beamSize, vector& beamExpansions) { beamExpansions.clear(); - for (size_t i = 0; i < expansionCount; ++i) { - beamExpansions.emplace_back(genOneBeam(beamSize, i)); - } + beamExpansions.resize(expansionCount + 1); + + // beamExpansions[0] is reserved. + for (size_t i = 1; i <= expansionCount; ++i) + genOneBeam(beamSize, bool(i - 1), beamExpansions[i - 1], beamExpansions[i]); + genGroundTruth(beamExpansions, beamSize); } void testCrossEntropyOverBeam(bool useGpu) { @@ -72,12 +235,12 @@ void testCrossEntropyOverBeam(bool useGpu) { config.layerConfig.set_type("cross_entropy_over_beam"); const size_t expansionCount = 3; - const size_t beamSize = 3; + const size_t beamSize = MAX_BEAM_SIZE; vector beams; genRandomBeamExpansion(expansionCount, beamSize, beams); size_t seqNum = 0; - for (size_t i = 0; i < beams.size(); ++i) { + for (size_t i = 1; i < beams.size(); ++i) { const SingleBeamExpansion& beam = beams[i]; // create scores for all the candidates MatrixPtr candidateScorePtr = @@ -88,7 +251,7 @@ void testCrossEntropyOverBeam(bool useGpu) { ostringstream paramName; paramName << "candidate_scores_" << i; - if (beam.subSeqStartPos.size()) { + if (beam.subSeqStartPos.size() > 1) { seqNum = beam.subSeqStartPos.size() - 1; config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, paramName.str(), @@ -118,10 +281,9 @@ void testCrossEntropyOverBeam(bool useGpu) { // create the ground truth paramName.clear(); paramName << "label_" << i; - config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, - paramName.str(), - beam.groundTruth, - beam.labelSeqStartPos}); + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, paramName.str(), beam.groundTruth}); + config.layerConfig.add_inputs(); } testLayerGrad( From 6075928d5531b5eecff0d3183c1d47ab3b0962d4 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Wed, 16 Aug 2017 19:02:29 +0000 Subject: [PATCH 026/170] gather op added --- paddle/operators/gather.h | 2 ++ paddle/operators/gather_op.cc | 8 ++------ python/paddle/v2/framework/tests/test_gather_op.py | 7 ++++--- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h index d6e6990394..3f299ea1a6 100644 --- a/paddle/operators/gather.h +++ b/paddle/operators/gather.h @@ -17,6 +17,8 @@ limitations under the License. */ #include #include "paddle/framework/ddim.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" #include "paddle/framework/tensor.h" #include "paddle/platform/place.h" diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc index 2e08ba8dcc..499def05a7 100644 --- a/paddle/operators/gather_op.cc +++ b/paddle/operators/gather_op.cc @@ -24,13 +24,9 @@ class GatherOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - // PADDLE_ENFORCE(ctx.InputSize() == 2, ""); - // PADDLE_ENFORCE(ctx.OutputSize() == 1, ""); - // PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), - // "Inputs of GatherOp must all be set"); int batch_size = ctx.Input("Index")->dims()[0]; - PADDLE_ENFORCE(batch_size > 0); - paddle::framework::DDim output_dims(ctx.Input(0)->dims()); + PADDLE_ENFORCE_GE(batch_size, 0, "Batch size must be >0"); + paddle::framework::DDim output_dims(ctx.Input("X")->dims()); output_dims[0] = batch_size; ctx.Output("Y")->Resize(output_dims); } diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/framework/tests/test_gather_op.py index 2ffbf17236..049054d07b 100644 --- a/python/paddle/v2/framework/tests/test_gather_op.py +++ b/python/paddle/v2/framework/tests/test_gather_op.py @@ -12,11 +12,12 @@ class TestGatherOp(unittest.TestCase): def setUp(self): self.type = "gather" + xnp = numpy.random.random((10, 20)).astype("float32") self.inputs = { - 'X': numpy.random.random((10, 20)).astype("float32"), - 'Index': numpy.array([1, 3, 5]).astype("int") + 'X': xnp, + 'Index': numpy.array([1, 3, 5]).astype("int32") } - self.outputs = {'Y': self.input['X'][self.input['Index']]} + self.outputs = {'Y': self.inputs['X'][self.inputs['Index']]} if __name__ == "__main__": From 02299813685a7172d9e9182631b71473b492c904 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Wed, 16 Aug 2017 23:54:38 +0000 Subject: [PATCH 027/170] remove empty test --- paddle/framework/CMakeLists.txt | 2 -- paddle/framework/empty_test.cc | 56 --------------------------------- 2 files changed, 58 deletions(-) delete mode 100644 paddle/framework/empty_test.cc diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 68b5cec2c5..2cdf323c53 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -10,8 +10,6 @@ cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor) -cc_test(empty_test SRCS empty_test.cc DEPS tensor) - cc_test(variable_test SRCS variable_test.cc) cc_library(scope SRCS scope.cc) diff --git a/paddle/framework/empty_test.cc b/paddle/framework/empty_test.cc deleted file mode 100644 index 2237f8ce0e..0000000000 --- a/paddle/framework/empty_test.cc +++ /dev/null @@ -1,56 +0,0 @@ -/* - Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include -#include -#include "paddle/framework/tensor.h" - -TEST(Empty, Dims) { - using namespace paddle::framework; - using namespace paddle::platform; - Tensor tt; - tt.Resize(make_ddim({0, 3, 4})); - DDim dims = tt.dims(); - ASSERT_EQ(arity(dims), 3); - EXPECT_EQ(0, dims[0]); - EXPECT_EQ(3, dims[1]); - EXPECT_EQ(4, dims[2]); -} - -TEST(Empty, MutableData) { - using namespace paddle::framework; - using namespace paddle::platform; - { - Tensor src_tensor; - float* p1 = nullptr; - // initialization - p1 = src_tensor.mutable_data(make_ddim({0, 2, 3}), CPUPlace()); - EXPECT_NE(p1, nullptr); - } - -#ifndef PADDLE_ONLY_CPU - { - Tensor src_tensor; - float* p1 = nullptr; - float* p2 = nullptr; - // initialization - p1 = src_tensor.mutable_data(make_ddim({0, 2, 3}), GPUPlace()); - EXPECT_NE(p1, nullptr); - // set src_tensor a new dim with large size - // momery is supposed to be re-allocated - p2 = src_tensor.mutable_data(make_ddim({0, 4}), GPUPlace()); - EXPECT_NE(p2, nullptr); - // EXPECT_NE(p1, p2); - } -#endif -} From 27a99bfb1446171969da0219a6125a79c39eb582 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Thu, 17 Aug 2017 18:10:37 +0800 Subject: [PATCH 028/170] Add base class for huber_regression_cost and huber_classification_cost --- doc/api/v2/config/layer.rst | 6 +-- paddle/gserver/layers/CostLayer.cpp | 55 ++++++++++++---------------- paddle/gserver/layers/CostLayer.h | 27 ++++++++++---- python/paddle/v2/tests/test_layer.py | 2 +- 4 files changed, 46 insertions(+), 44 deletions(-) diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index cb330ea5e1..22a6b2ab84 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -409,9 +409,9 @@ multi_binary_label_cross_entropy_cost .. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost :noindex: -huber_cost ----------- -.. autoclass:: paddle.v2.layer.huber_cost +huber_classification_cost +------------------------- +.. autoclass:: paddle.v2.layer.huber_classification_cost :noindex: lambda_cost diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 138c86a6d6..69cf393225 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -572,13 +572,8 @@ void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output, } } -// -// Huber loss for robust 2-classes classification -// -REGISTER_LAYER(huber, HuberTwoClassification); - -bool HuberTwoClassification::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { +bool HuberCost::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { CostLayer::init(layerMap, parameterMap); if (useGpu_) { tmpCpuInput_.reserve(inputLayers_.size()); @@ -589,9 +584,7 @@ bool HuberTwoClassification::init(const LayerMap& layerMap, return true; } -void HuberTwoClassification::forwardImp(Matrix& output, - Argument& label, - Matrix& cost) { +void HuberCost::forwardImp(Matrix& output, Argument& label, Matrix& cost) { if (useGpu_) { for (size_t i = 0; i < inputLayers_.size(); i++) { tmpCpuInput_[i].resizeAndCopyFrom( @@ -599,12 +592,22 @@ void HuberTwoClassification::forwardImp(Matrix& output, } hl_stream_synchronize(HPPL_STREAM_DEFAULT); } - forwardImpIn(output, label, cost); } -void HuberTwoClassification::forwardImpIn(Matrix& output, - Argument& label, - Matrix& target) { +// +// Huber loss for robust 2-classes classification +// +REGISTER_LAYER(huber_classification, HuberTwoClassification); + +bool HuberTwoClassification::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + return HuberCost::init(layerMap, parameterMap); +} + +void HuberTwoClassification::forwardImp(Matrix& output, + Argument& label, + Matrix& target) { + HuberCost::forwardImp(output, label, target); size_t numSamples = target.getHeight(); CHECK(label.ids); CHECK_EQ((*label.ids).getSize(), numSamples); @@ -627,25 +630,13 @@ void HuberTwoClassification::forwardImpIn(Matrix& output, target.copyFrom(cost.data(), numSamples); } -void HuberTwoClassification::backwardImp(Matrix& outputValue, +void HuberTwoClassification::backwardImp(Matrix& output, Argument& label, - Matrix& outputGrad) { - if (useGpu_) { - backwardImpIn( - *tmpCpuInput_[0].value, tmpCpuInput_[1], *tmpCpuInput_[0].grad); - outputGrad.copyFrom(*tmpCpuInput_[0].grad); - } else { - backwardImpIn(outputValue, label, outputGrad); - } -} - -void HuberTwoClassification::backwardImpIn(Matrix& output, - Argument& label, - Matrix& outputG) { + Matrix& outputG) { size_t numSamples = output.getHeight(); - real* out = output.getData(); - real* grad = outputG.getData(); - int* lbl = (*label.ids).getData(); + real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); + int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData(); + real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData(); for (size_t i = 0; i < numSamples; ++i) { int y = 2 * lbl[i] - 1; if (y * out[i] < -1) @@ -653,8 +644,8 @@ void HuberTwoClassification::backwardImpIn(Matrix& output, else if (y * out[i] < 1) grad[i] += -2 * (1 - y * out[i]) * y; } + if (useGpu_) outputG.copyFrom(grad, numSamples); } - /** * This cost layer compute the sum of its input as loss. * \f[ diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index 77427b7a08..c006dc8110 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -304,6 +304,23 @@ public: Matrix& outputGrad) override; }; +/* + * A base layer for HuberRegressionLoss and HuberTwoClassification. + */ +class HuberCost : public CostLayer { +public: + std::vector tmpCpuInput_; + + explicit HuberCost(const LayerConfig& config) : CostLayer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; + + void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {} +}; + /** * Huber loss for robust 2-classes classification. * @@ -312,25 +329,19 @@ public: * Loss = (1 - y * f)^2, if -1 < y * f < 1 \\ * Loss = 0, otherwise */ -class HuberTwoClassification : public CostLayer { - std::vector tmpCpuInput_; - +class HuberTwoClassification : public HuberCost { public: explicit HuberTwoClassification(const LayerConfig& config) - : CostLayer(config) {} + : HuberCost(config) {} bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void forwardImpIn(Matrix& output, Argument& label, Matrix& cost); - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) override; - - void backwardImpIn(Matrix& outputValue, Argument& label, Matrix& outputGrad); }; typedef std::shared_ptr CostLayerPtr; diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index f2097e195f..7373a55ce6 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -141,7 +141,7 @@ class CostLayerTest(unittest.TestCase): cost8 = layer.rank_cost(left=score, right=score, label=score) cost9 = layer.lambda_cost(input=inference, score=score) cost10 = layer.sum_cost(input=inference) - cost11 = layer.huber_cost(input=score, label=label) + cost11 = layer.huber_classification_cost(input=score, label=label) print layer.parse_network([cost1, cost2]) print layer.parse_network([cost3, cost4]) From 4d8992c3bc64a835aa6a1e6e12678594d3f117b5 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 18 Aug 2017 09:58:41 +0800 Subject: [PATCH 029/170] check format before set header format --- paddle/parameter/Parameter.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h index e31cbc3dee..08a426eb74 100644 --- a/paddle/parameter/Parameter.h +++ b/paddle/parameter/Parameter.h @@ -278,7 +278,11 @@ public: /** * @brief Set the format in header. */ - void setHeaderFormat(int32_t fmt) { headerFormat_ = fmt; } + void setHeaderFormat(int32_t fmt) { + CHECK(isHeaderFormatSupported(fmt)) << "Unsupported format version: " + << fmt; + headerFormat_ = fmt; + } /** * @brief Parameter Update Hook. From 462b9b1d20942dca35dbe532248e53cdeccea6b2 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 18 Aug 2017 10:13:06 +0800 Subject: [PATCH 030/170] update mkldnn tag v0.10 --- cmake/external/mkldnn.cmake | 2 +- cmake/external/mklml.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 25c6b4ef52..9686df0021 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -51,7 +51,7 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} DEPENDS ${MKLDNN_DEPENDS} GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" - GIT_TAG "v0.9" + GIT_TAG "v0.10" PREFIX ${MKLDNN_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index e9fd3d4bed..51fafb9479 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -28,7 +28,7 @@ INCLUDE(ExternalProject) SET(MKLML_PROJECT "extern_mklml") SET(MKLML_VER "mklml_lnx_2018.0.20170720") -SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.9/${MKLML_VER}.tgz") +SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.10/${MKLML_VER}.tgz") SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") SET(MKLML_DST_DIR "mklml") From 46d30ec680f494e4cc30a73330074497da064fbd Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 17 Aug 2017 20:34:02 -0700 Subject: [PATCH 031/170] init minst.py --- python/paddle/v2/framework/tests/mnist.py | 140 ++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 python/paddle/v2/framework/tests/mnist.py diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py new file mode 100644 index 0000000000..32a088ac28 --- /dev/null +++ b/python/paddle/v2/framework/tests/mnist.py @@ -0,0 +1,140 @@ +import paddle.v2.framework.core as core +from paddle.v2.framework.op import Operator +import numpy + +BATCH_SIZE = 100 + +scope = core.Scope() +place = core.CPUPlace() +dev_ctx = core.DeviceContext.create(place) + +# init_net = core.Net.create() +forward_network = core.Net.create() + +# should be init after forward_op is constructed +# backward_net = core.Operator.backward(forward_net, set()) +backward_net = None +optimize_net = core.Net.create() + + +def atom_id(): + id = 0 + while True: + yield id + id += 1 + + +uniq_id = atom_id().next + + +def data_layer(name, dims): + var = scope.new_var(name) + tensor = var.get_tensor() + tensor.set_dims(dims) # 1 is batch size holder. + return name + + +def feed_data(name, data): + assert isinstance(data, numpy.array) + tensor = scope.find_var(name).get_tensor() + tensor.set_dims(data.shape) + tensor.alloc_float(place) + tensor.set(data, place) + + +def grad_var_name(var_name): + return var_name + "@GRAD" + + +def sgd_optimizer(net, param_name, learning_rate=0.01): + grad_name = grad_var_name(param_name) + optimize_op = Operator( + "sgd", param=param_name, grad=grad_name, learning_rate=learning_rate) + net.add_op(optimize_op) + + +# should use operator and add these to the init_network +def init_param(param_name, dims): + print param_name + var = scope.new_var(param_name) + tensor = var.get_tensor() + tensor.set_dims(dims) + data = numpy.random.uniform( + low=0.0, high=1.0, size=tensor.shape()).astype("float32") + tensor.set(data, place) + + +# fc_layer +def fc_layer(net, input, size, act="sigmoid", bias=True, param=None, name=None): + """ + Add a fc layer to net + + :param input: input variable name. + :type input: str + :param size: fully connected layer size. + :param act: activation name + :param param: parameter attribute, used for initialize parameters. + :param bias: bias attribute. False will not have a bias. + :param name: the name of fc layer. If not set, model will generate a + readable name + :return: output variable name. + """ + if name is None: + name = 'fc_%d' % uniq_id() + if not isinstance(name, str): + raise ValueError("name should be string") + + input_dims = scope.find_var(input).get_tensor().get_dims() + + w_name = param or name + ".w" + init_param(param_name=w_name, dims=[input_dims[1], size]) + sgd_optimizer(net=optimize_net, param_name=w_name, learning_rate=0.01) + + pre_activation = name + ".mul.out" + scope.new_var(pre_activation) + mul_op = Operator("mul", X=input, Y=w_name, Out=pre_activation) + net.add_op(mul_op) + + # create bias variable if needed + if bias: + bias_name = name + ".b" + init_param(param_name=bias_name, dims=[size]) + sgd_optimizer( + net=optimize_net, param_name=bias_name, learning_rate=0.01) + bias_out = name + ".rowwise_add.out" + scope.new_var(bias_out) + rowwise_add_op = Operator( + "rowwise_add", X=pre_activation, b=bias_name, Out=bias_out) + net.add_op(rowwise_add_op) + pre_activation = bias_out + + activation_op = Operator(act, X=pre_activation, Y=name) + net.add_op(activation_op) + scope.new_var(name) + net.infer_shape(scope) + return name + + +def cross_entropy_layer(net, input, label): + cost_name = 'cross_entropy_%d' % uniq_id() + cross_entropy_op = Operator( + "onehot_cross_entropy", X=input, label=label, Y=cost_name) + net.add_op(cross_entropy_op) + scope.new_var(cost_name) + net.infer_shape(scope) + return cost_name + + +images = data_layer(name='pixel', dims=[BATCH_SIZE, 784]) +label = data_layer(name='label', dims=[BATCH_SIZE]) +fc = fc_layer(net=forward_network, input=images, size=10, act="softmax") +cost = cross_entropy_layer(net=forward_network, input=fc, label=label) +forward_network.complete_add_op(True) +print(forward_network) +backward_net = core.Operator.backward(forward_network, set()) + +print(backward_net) + +PASS_NUM = 10 +for pass_id in range(PASS_NUM): + print pass_id From 424b325d084ef0fd5aa61996f35ef88126c48306 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Fri, 18 Aug 2017 14:10:27 +0800 Subject: [PATCH 032/170] add unit test DeConv3D, Conv3D, col2vol, vol2col --- paddle/gserver/tests/test_LayerGrad.cpp | 152 +++++++++++++++++++++++ paddle/math/tests/test_matrixCompare.cpp | 116 +++++++++++++++++ 2 files changed, 268 insertions(+) diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 0f312b6ca5..1e80e2c0ee 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -2007,6 +2007,158 @@ TEST(Layer, RowL2NormLayer) { } } +void test3DConvLayer(const string& type, bool trans, bool useGpu) { + // filter size + const int NUM_FILTERS = 6; + // const int CHANNELS = 3; + const int FILTER_SIZE = 3; + const int FILTER_SIZE_Y = 3; + const int FILTER_SIZE_Z = 3; + + // input image + const int CHANNELS = 3; + const int IMAGE_SIZE = 9; + const int IMAGE_SIZE_Y = 9; + const int IMAGE_SIZE_Z = 9; // 2, 3, 5, 5, 5 + + TestConfig config; + config.biasSize = NUM_FILTERS; + config.layerConfig.set_type(type); + config.layerConfig.set_num_filters(NUM_FILTERS); + config.layerConfig.set_partial_sum(1); + config.layerConfig.set_shared_biases(true); + + // Setting up conv3D-trans layer + LayerInputConfig* input = config.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + + conv->set_channels(CHANNELS); + conv->set_filter_size(FILTER_SIZE); + conv->set_filter_size_y(FILTER_SIZE_Y); + conv->set_filter_size_z(FILTER_SIZE_Z); + conv->set_padding(0); + conv->set_padding_y(0); + conv->set_padding_z(0); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_stride_z(2); + conv->set_img_size(IMAGE_SIZE); + conv->set_img_size_y(IMAGE_SIZE_Y); + conv->set_img_size_z(IMAGE_SIZE_Z); + conv->set_output_x(outputSize(conv->img_size(), + conv->filter_size(), + conv->padding(), + conv->stride(), + /* caffeMode */ true)); + conv->set_output_y(outputSize(conv->img_size_y(), + conv->filter_size_y(), + conv->padding_y(), + conv->stride_y(), + /* caffeMode */ true)); + conv->set_output_z(outputSize(conv->img_size_z(), + conv->filter_size_z(), + conv->padding_z(), + conv->stride_z(), + /* caffeMode */ true)); + + config.layerConfig.set_size(conv->output_x() * conv->output_y() * + conv->output_z() * NUM_FILTERS); + conv->set_groups(1); + conv->set_filter_channels(conv->channels() / conv->groups()); + config.inputDefs.push_back( + {INPUT_DATA, + "layer_0", + CHANNELS * IMAGE_SIZE * IMAGE_SIZE_Y * IMAGE_SIZE_Z, + conv->filter_channels() * FILTER_SIZE * FILTER_SIZE_Y * FILTER_SIZE_Z * + NUM_FILTERS}); + + testLayerGrad(config, "conv3D", 10, trans, useGpu); + // Use small batch_size and useWeight=true to test biasGrad + testLayerGrad(config, "conv3D", 2, trans, useGpu, true, 0.02); +} + +TEST(Layer, test3DConvLayer) { + test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ false); +#ifndef PADDLE_ONLY_CPU + test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ true); +#endif +} + +int deConvOutputSize(int inSize, int kSize, int pad, int stride) { + return (inSize - 1) * stride - 2 * pad + kSize; +} + +void test3DDeConvLayer(const string& type, bool trans, bool useGpu) { + // filter size + const int NUM_FILTERS = 6; + // const int CHANNELS = 3; + const int FILTER_SIZE = 3; + const int FILTER_SIZE_Y = 3; + const int FILTER_SIZE_Z = 3; + + // input image + const int CHANNELS = 3; + const int IMAGE_SIZE = 4; + const int IMAGE_SIZE_Y = 6; + const int IMAGE_SIZE_Z = 6; + + // Setting up conv-trans layer + TestConfig config; + config.biasSize = NUM_FILTERS; + config.layerConfig.set_type("deconv3d"); + config.layerConfig.set_num_filters(NUM_FILTERS); + config.layerConfig.set_partial_sum(1); + config.layerConfig.set_shared_biases(true); + + LayerInputConfig* input = config.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + + conv->set_channels(CHANNELS); + conv->set_filter_size(FILTER_SIZE); + conv->set_filter_size_y(FILTER_SIZE_Y); + conv->set_filter_size_z(FILTER_SIZE_Z); + conv->set_padding(0); + conv->set_padding_y(0); + conv->set_padding_z(0); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_stride_z(2); + conv->set_img_size(IMAGE_SIZE); + conv->set_img_size_y(IMAGE_SIZE_Y); + conv->set_img_size_z(IMAGE_SIZE_Z); + conv->set_output_x(deConvOutputSize( + conv->img_size(), conv->filter_size(), conv->padding(), conv->stride())); + conv->set_output_y(deConvOutputSize(conv->img_size_y(), + conv->filter_size_y(), + conv->padding_y(), + conv->stride_y())); + conv->set_output_z(deConvOutputSize(conv->img_size_z(), + conv->filter_size_z(), + conv->padding_z(), + conv->stride_z())); + config.layerConfig.set_size(conv->output_x() * conv->output_y() * + conv->output_z() * NUM_FILTERS); + conv->set_groups(1); + conv->set_filter_channels(conv->channels() / conv->groups()); + config.inputDefs.push_back( + {INPUT_DATA, + "layer_0", + CHANNELS * IMAGE_SIZE * IMAGE_SIZE_Y * IMAGE_SIZE_Z, + conv->filter_channels() * FILTER_SIZE * FILTER_SIZE_Y * FILTER_SIZE_Z * + NUM_FILTERS}); + + testLayerGrad(config, "deconv3D", 10, trans, useGpu); + // Use small batch_size and useWeight=true to test biasGrad + testLayerGrad(config, "deconv3D", 2, trans, useGpu, true, 0.02); +} + +TEST(Layer, test3DDeConvLayer) { + test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ false); +#ifndef PADDLE_ONLY_CPU + test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ true); +#endif +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index d77478f345..1d41ec0870 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -1203,4 +1203,120 @@ TEST(Matrix, warpCTC) { } } +int outputSizeCol2Vol( + int imageSize, int filterSize, int padding, int stride, bool caffeMode) { + int outputSize; + if (!caffeMode) { + outputSize = + (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1; + } else { + outputSize = (imageSize - filterSize + 2 * padding) / stride + 1; + } + CHECK_GE(outputSize, 1); + return outputSize; +} + +void testMatrixCol2Vol(int depth, int height, int width) { + int channel = 3; + int filterX = 3, filterY = 4, filterZ = 5; + int strideX = 2, strideY = 2, strideZ = 2; + int padX = 1, padY = 1, padZ = 1; + + MatrixPtr cpuImage = + std::make_shared(channel, depth * height * width); + MatrixPtr gpuImage = + std::make_shared(channel, depth * height * width); + cpuImage->randomizeUniform(); + gpuImage->copyFrom(*cpuImage); + + int outD = outputSizeCol2Vol(depth, filterZ, padZ, strideZ, true); + int outH = outputSizeCol2Vol(height, filterY, padZ, strideY, true); + int outW = outputSizeCol2Vol(width, filterX, padZ, strideX, true); + + int colBufHeight = channel * filterZ * filterY * filterX; + int colBufWidth = outD * outH * outW; + MatrixPtr cpuColBuf = std::make_shared(colBufHeight, colBufWidth); + MatrixPtr gpuColBuf = std::make_shared(colBufHeight, colBufWidth); + cpuColBuf->vol2Col(cpuImage->getData(), + channel, + depth, + height, + width, + filterZ, + filterY, + filterX, + strideZ, + strideY, + strideX, + padZ, + padY, + padX); + gpuColBuf->vol2Col(gpuImage->getData(), + channel, + depth, + height, + width, + filterZ, + filterY, + filterX, + strideZ, + strideY, + strideX, + padZ, + padY, + padX); + TensorCheckEqual(*cpuColBuf, *gpuColBuf); + + cpuColBuf->randomizeUniform(); + gpuColBuf->copyFrom(*cpuColBuf); + cpuColBuf->col2Vol(cpuImage->getData(), + channel, + depth, + height, + width, + filterZ, + filterY, + filterX, + strideZ, + strideY, + strideX, + padZ, + padY, + padX, + 1.0, + 1.0); + gpuColBuf->col2Vol(gpuImage->getData(), + channel, + depth, + height, + width, + filterZ, + filterY, + filterX, + strideZ, + strideY, + strideX, + padZ, + padY, + padX, + 1.0, + 1.0); + TensorCheckErr(*cpuImage, *gpuImage); +} + +TEST(Matrix, col2Vol) { + for (auto depth : {9, 16, 64, 128}) { + for (auto height : {9, 11, 73, 128, 256}) { + for (auto width : { + 9, 32, 100, 512, + }) { + VLOG(3) << "depth=" << depth << " height=" << height + << " width=" << width; + testMatrixCol2Vol(depth, height, width); + } + } + } +} +/////// + #endif From c792ef7d5ae470031bebcd990b79c0ce7f36f7bc Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Fri, 18 Aug 2017 14:12:01 +0800 Subject: [PATCH 033/170] fix DeConv3D, Conv3D --- paddle/gserver/layers/Conv3DLayer.cpp | 248 +++++++++++++----------- paddle/gserver/layers/DeConv3DLayer.cpp | 186 +++++++++--------- 2 files changed, 229 insertions(+), 205 deletions(-) diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/gserver/layers/Conv3DLayer.cpp index 0fa9c5f9f5..5609a4cc73 100644 --- a/paddle/gserver/layers/Conv3DLayer.cpp +++ b/paddle/gserver/layers/Conv3DLayer.cpp @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "Conv3DLayer.h" #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" -#include "Conv3DLayer.h" namespace paddle { @@ -22,32 +22,30 @@ REGISTER_LAYER(conv3d, Conv3DLayer); bool Conv3DLayer::init(const LayerMap &layerMap, const ParameterMap ¶meterMap) { - if (!ConvBaseLayer::init(layerMap, parameterMap)) - return false; + if (!ConvBaseLayer::init(layerMap, parameterMap)) return false; int index = 0; for (auto &inputConfig : config_.inputs()) { - const ConvConfig &conf = inputConfig.conv_conf(); - M_.push_back(numFilters_ / conf.groups()); - K_.push_back( - conf.filter_channels() * conf.filter_size_z() * \ - conf.filter_size_y() * conf.filter_size()); - weights_[index]->getW()->reshape( - weights_[index]->getW()->getWidth(), - weights_[index]->getW()->getHeight()); + const ConvConfig &conf = inputConfig.conv_conf(); + M_.push_back(numFilters_ / conf.groups()); + K_.push_back(filterPixels_[index] * filterChannels_[index]); + if (nullptr != weights_[index]->getW()) + weights_[index]->getW()->reshape(weights_[index]->getW()->getWidth(), + weights_[index]->getW()->getHeight()); + if (nullptr != weights_[index]->getWGrad()) weights_[index]->getWGrad()->reshape( - weights_[index]->getWGrad()->getWidth(), - weights_[index]->getWGrad()->getHeight()); - ++index; + weights_[index]->getWGrad()->getWidth(), + weights_[index]->getWGrad()->getHeight()); + ++index; } - biases_->getWGrad()->reshape( - biases_->getWGrad()->width_, biases_->getWGrad()->height_); - biases_->getW()->reshape( - biases_->getW()->width_, biases_->getW()->height_); + if (nullptr != biases_->getWGrad()) + biases_->getWGrad()->reshape(biases_->getWGrad()->width_, + biases_->getWGrad()->height_); + if (nullptr != biases_->getW()) + biases_->getW()->reshape(biases_->getW()->width_, biases_->getW()->height_); CHECK(inputLayers_.size() == parameters_.size()); return true; } - size_t Conv3DLayer::getSize() { CHECK_NE(inputLayers_.size(), 0UL); // imgSizeH_.clear(); @@ -59,22 +57,19 @@ size_t Conv3DLayer::getSize() { N_.clear(); size_t layerSize = 0; for (size_t i = 0; i < inputLayers_.size(); ++i) { - // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); - // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); - // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth()); - outputW_.push_back(outputSize( - imgSizeW_[i], filterSize_[i], - padding_[i], stride_[i], true)); - outputH_.push_back(outputSize( - imgSizeH_[i], filterSizeY_[i], - paddingY_[i], strideY_[i], true)); - outputD_.push_back(outputSize( - imgSizeD_[i], filterSizeZ_[i], - paddingZ_[i], strideZ_[i], true)); - - N_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); - CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize); - layerSize += N_[i] * numFilters_; + // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); + // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); + // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth()); + outputW_.push_back(outputSize( + imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true)); + outputH_.push_back(outputSize( + imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i], true)); + outputD_.push_back(outputSize( + imgSizeD_[i], filterSizeZ_[i], paddingZ_[i], strideZ_[i], true)); + + N_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); + CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize); + layerSize += N_[i] * numFilters_; } getOutput().setFrameHeight(outputH_[0]); getOutput().setFrameWidth(outputW_[0]); @@ -88,38 +83,46 @@ void Conv3DLayer::forward(PassType passType) { int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); int outWidth = getSize(); resetOutput(batchSize, outWidth); - const MatrixPtr outMat = getOutputValue(); for (size_t i = 0; i != inputLayers_.size(); ++i) { - REGISTER_TIMER_INFO("FwdConv3D", getName().c_str()); - const MatrixPtr& inMat = getInputValue(i); - int width = inMat->getWidth(); - int M = M_[i]; - int N = N_[i]; - int K = K_[i]; - Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); - MatrixPtr wMat = weights_[i]->getW(); - for (int n = 0; n < batchSize; ++n) { - colBuf_->vol2Col(inMat->getData() + n * width, channels_[i], - imgSizeD_[i], imgSizeH_[i], imgSizeW_[i], - filterSizeZ_[i], filterSizeY_[i], filterSize_[i], - strideZ_[i], strideY_[i], stride_[i], - paddingZ_[i], paddingY_[i], padding_[i]); - - real *outData = outMat->getData() + n * outWidth; - MatrixPtr outMatSub = - Matrix::create(outData, groups_[i] * M, N, false, useGpu_); - for (int g = 0; g < groups_[i]; g++) { - MatrixPtr wMatSub = wMat->subMatrix(g * M, M); - MatrixPtr in = colBuf_->subMatrix(g * K, K); - MatrixPtr out = outMatSub->subMatrix(g * M, M); - out->mul(*wMatSub, *in, 1.0, 0.0); - } + REGISTER_TIMER_INFO("FwdConv3D", getName().c_str()); + const MatrixPtr &inMat = getInputValue(i); + const MatrixPtr &outMat = getOutputValue(); + int M = M_[i]; + int N = N_[i]; + int K = K_[i]; + Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); + MatrixPtr wMat = weights_[i]->getW(); + for (int n = 0; n < batchSize; ++n) { + colBuf_->vol2Col(inMat->getData() + n * inMat->getStride(), + channels_[i], + imgSizeD_[i], + imgSizeH_[i], + imgSizeW_[i], + filterSizeZ_[i], + filterSizeY_[i], + filterSize_[i], + strideZ_[i], + strideY_[i], + stride_[i], + paddingZ_[i], + paddingY_[i], + padding_[i]); + + real *outData = outMat->getData() + n * outMat->getStride(); + MatrixPtr outMatSub = + Matrix::create(outData, groups_[i] * M, N, false, useGpu_); + for (int g = 0; g < groups_[i]; g++) { + MatrixPtr wMatSub = wMat->subMatrix(g * M, M); + MatrixPtr in = colBuf_->subMatrix(g * K, K); + MatrixPtr out = outMatSub->subMatrix(g * M, M); + out->mul(*wMatSub, *in, 1.0, 1.0); } + } } if (nullptr != this->biasParameter_) { - REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); - this->addBias(); + REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); + this->addBias(); } forwardActivation(); } @@ -128,20 +131,20 @@ void Conv3DLayer::backward(const UpdateCallback &callback) { backwardActivation(); if (biases_ && biases_->getWGrad()) { - bpropBiases(); - biases_->getParameterPtr()->incUpdate(callback); + bpropBiases(); + biases_->getParameterPtr()->incUpdate(callback); } for (size_t i = 0; i != inputLayers_.size(); ++i) { - REGISTER_TIMER_INFO("BwdConv3D", getName().c_str()); - if (weights_[i]->getWGrad()) { - bpropWeights(i); - } - if (this->needGradient_) { - bpropData(i); - } - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); - weights_[i]->getParameterPtr()->incUpdate(callback); + REGISTER_TIMER_INFO("BwdConv3D", getName().c_str()); + if (weights_[i]->getWGrad()) { + bpropWeights(i); + } + if (getInputGrad(i)) { + bpropData(i); + } + REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + weights_[i]->getParameterPtr()->incUpdate(callback); } } @@ -149,28 +152,36 @@ void Conv3DLayer::bpropWeights(int i) { int M = M_[i]; int N = N_[i]; int K = K_[i]; - const MatrixPtr& inMat = getInputValue(i); - int width = inMat->getWidth(); + const MatrixPtr &inMat = getInputValue(i); Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); MatrixPtr wGradMat = weights_[i]->getWGrad(); - real* outGradData = getOutputGrad()->getData(); int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); - for (int n = 0; n < batchSize; ++n) { - colBuf_->vol2Col(inMat->getData() + n * width, channels_[i], - imgSizeD_[i], imgSizeH_[i], imgSizeW_[i], - filterSizeZ_[i], filterSizeY_[i], filterSize_[i], - strideZ_[i], strideY_[i], stride_[i], - paddingZ_[i], paddingY_[i], padding_[i]); - outGradData += n * getOutputGrad()->getWidth(); - MatrixPtr outGradSub = - Matrix::create(outGradData, groups_[i] * M, N, false, useGpu_); - for (int g = 0; g < groups_[i]; ++g) { - MatrixPtr inMatSub = colBuf_->subMatrix(g * K, K); - MatrixPtr outG = outGradSub->subMatrix(g * M, M); - MatrixPtr wGradSub = wGradMat->subMatrix(g * M, M); - wGradSub->mul(*outG, *(inMatSub->getTranspose()), 1.0, 1.0); - } + colBuf_->vol2Col(inMat->getData() + n * inMat->getStride(), + channels_[i], + imgSizeD_[i], + imgSizeH_[i], + imgSizeW_[i], + filterSizeZ_[i], + filterSizeY_[i], + filterSize_[i], + strideZ_[i], + strideY_[i], + stride_[i], + paddingZ_[i], + paddingY_[i], + padding_[i]); + + real *outGradData = + getOutputGrad()->getData() + n * getOutputGrad()->getStride(); + MatrixPtr outGradSub = + Matrix::create(outGradData, groups_[i] * M, N, false, useGpu_); + for (int g = 0; g < groups_[i]; ++g) { + MatrixPtr inMatSub = colBuf_->subMatrix(g * K, K); + MatrixPtr outG = outGradSub->subMatrix(g * M, M); + MatrixPtr wGradSub = wGradMat->subMatrix(g * M, M); + wGradSub->mul(*outG, *(inMatSub->getTranspose()), 1.0, 1.0); + } } } @@ -180,45 +191,54 @@ void Conv3DLayer::bpropData(int i) { int K = K_[i]; Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); MatrixPtr wMat = weights_[i]->getW(); - real* outGradData = getOutputGrad()->getData(); - real* preGradData = getInputGrad(i)->getData(); int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); for (int n = 0; n < batchSize; ++n) { - outGradData += n * getOutputGrad()->getWidth(); - preGradData += n * getInputGrad(i)->getWidth(); - MatrixPtr outGradSub = - Matrix::create(outGradData, M * groups_[i], N, false, useGpu_); - for (int g = 0; g < groups_[i]; ++g) { - MatrixPtr wMatSub = wMat->subMatrix(g * M, M); - MatrixPtr outG = outGradSub->subMatrix(g * M, M); - MatrixPtr inGradMatSub = colBuf_->subMatrix(g * K, K); - inGradMatSub->mul(*(wMatSub->getTranspose()), *outG, 1.0, 0.0); - } - colBuf_->col2Vol(preGradData, channels_[i], - imgSizeD_[i], imgSizeH_[i], imgSizeW_[i], - filterSizeZ_[i], filterSizeY_[i], filterSize_[i], - strideZ_[i], strideY_[i], stride_[i], - paddingZ_[i], paddingY_[i], padding_[i], - 1.0, 1.0); + real *outGradData = + getOutputGrad()->getData() + n * getOutputGrad()->getStride(); + real *preGradData = + getInputGrad(i)->getData() + n * getInputGrad(i)->getStride(); + MatrixPtr outGradSub = + Matrix::create(outGradData, M * groups_[i], N, false, useGpu_); + for (int g = 0; g < groups_[i]; ++g) { + MatrixPtr wMatSub = wMat->subMatrix(g * M, M); + MatrixPtr outG = outGradSub->subMatrix(g * M, M); + MatrixPtr inGradMatSub = colBuf_->subMatrix(g * K, K); + inGradMatSub->mul(*(wMatSub->getTranspose()), *outG, 1.0, 0.0); + } + colBuf_->col2Vol(preGradData, + channels_[i], + imgSizeD_[i], + imgSizeH_[i], + imgSizeW_[i], + filterSizeZ_[i], + filterSizeY_[i], + filterSize_[i], + strideZ_[i], + strideY_[i], + stride_[i], + paddingZ_[i], + paddingY_[i], + padding_[i], + 1.0, + 1.0); } } void Conv3DLayer::bpropBiases() { MatrixPtr outGradMat = getOutputGrad(); if (this->sharedBiases_) { - biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f); + biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f); } else { - biases_->getWGrad()->collectBias(*outGradMat, 1.0f); + biases_->getWGrad()->collectBias(*outGradMat, 1.0f); } } void Conv3DLayer::addBias() { MatrixPtr outMat = getOutputValue(); - if (this->sharedBiases_) { - outMat->addSharedBias(*(biases_->getW()), 1.0f); + outMat->addSharedBias(*(biases_->getW()), 1.0f); } else { - outMat->addBias(*(biases_->getW()), 1.0f); + outMat->addBias(*(biases_->getW()), 1.0f); } } diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp index 8de40b681d..286f5b985c 100644 --- a/paddle/gserver/layers/DeConv3DLayer.cpp +++ b/paddle/gserver/layers/DeConv3DLayer.cpp @@ -12,43 +12,42 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "DeConv3DLayer.h" #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" -#include "DeConv3DLayer.h" namespace paddle { REGISTER_LAYER(deconv3d, DeConv3DLayer); #define DECONV_OUTPUT_SIZE(IN_SIZE, STRID, PAD, KSIZE) \ - (((IN_SIZE) - 1) * (STRID) - 2 * (PAD) + (KSIZE)) + (((IN_SIZE)-1) * (STRID)-2 * (PAD) + (KSIZE)) bool DeConv3DLayer::init(const LayerMap &layerMap, - const ParameterMap ¶meterMap) { + const ParameterMap ¶meterMap) { if (!ConvBaseLayer::init(layerMap, parameterMap)) return false; // for Deconv, the dimension of Kernel is // channel * output * depth * height * weigth // Matrix storage format: (output * depth * height * weigth) x channel for (int index = 0; index < config_.inputs().size(); ++index) { M_.push_back(filterChannels_[index]); - K_.push_back( - filterPixels_[index] * (numFilters_/groups_[index])); - weights_[index]->getW()->reshape( - filterPixels_[index] * numFilters_, - filterChannels_[index]); - weights_[index]->getWGrad()->reshape( - filterPixels_[index] * numFilters_, - filterChannels_[index]); + K_.push_back(filterPixels_[index] * (numFilters_ / groups_[index])); + if (weights_[index]->getW()) + weights_[index]->getW()->reshape(filterPixels_[index] * numFilters_, + filterChannels_[index]); + if (weights_[index]->getWGrad()) + weights_[index]->getWGrad()->reshape(filterPixels_[index] * numFilters_, + filterChannels_[index]); } - biases_->getWGrad()->reshape( - biases_->getWGrad()->width_, biases_->getWGrad()->height_); - biases_->getW()->reshape( - biases_->getW()->width_, biases_->getW()->height_); + if (biases_->getWGrad()) + biases_->getWGrad()->reshape(biases_->getWGrad()->width_, + biases_->getWGrad()->height_); + if (biases_->getW()) + biases_->getW()->reshape(biases_->getW()->width_, biases_->getW()->height_); CHECK(inputLayers_.size() == parameters_.size()); return true; } - size_t DeConv3DLayer::getSize() { CHECK_NE(inputLayers_.size(), 0UL); // imgSizeH_.clear(); @@ -64,18 +63,12 @@ size_t DeConv3DLayer::getSize() { // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth()); - outputW_.push_back( - DECONV_OUTPUT_SIZE( - imgSizeW_[i], stride_[i], - padding_[i], filterSize_[i])); - outputH_.push_back( - DECONV_OUTPUT_SIZE( - imgSizeH_[i], strideY_[i], - paddingY_[i], filterSizeY_[i])); - outputD_.push_back( - DECONV_OUTPUT_SIZE( - imgSizeD_[i], strideZ_[i], - paddingZ_[i], filterSizeZ_[i])); + outputW_.push_back(DECONV_OUTPUT_SIZE( + imgSizeW_[i], stride_[i], padding_[i], filterSize_[i])); + outputH_.push_back(DECONV_OUTPUT_SIZE( + imgSizeH_[i], strideY_[i], paddingY_[i], filterSizeY_[i])); + outputD_.push_back(DECONV_OUTPUT_SIZE( + imgSizeD_[i], strideZ_[i], paddingZ_[i], filterSizeZ_[i])); No_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); N_.push_back(imgSizeD_[i] * imgSizeH_[i] * imgSizeW_[i]); CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize); @@ -96,32 +89,37 @@ void DeConv3DLayer::forward(PassType passType) { for (size_t i = 0; i != inputLayers_.size(); ++i) { REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str()); - const MatrixPtr& inMat = getInputValue(i); - int width = inMat->getWidth(); + const MatrixPtr &inMat = getInputValue(i); int M = M_[i]; int N = N_[i]; int K = K_[i]; MatrixPtr wMat = weights_[i]->getW(); - Matrix::resizeOrCreate(colBuf_, K * groups_[i] , N, false, useGpu_); - + Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); for (int n = 0; n < batchSize; ++n) { - real *inData = inMat->getData() + n * width; - real *colBufData = colBuf_->getData(); - for (int g = 0; g < groups_[i]; g++) { - MatrixPtr wMatSub = wMat->subMatrix(g * K, K); - MatrixPtr inMatSub = - Matrix::create(inData, M, N, false, useGpu_); - MatrixPtr colBufDataSub = - Matrix::create(colBufData, K, N, false, useGpu_); - colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0); - colBufData += K * N; - inData += M * N; + real *inData = inMat->getData() + n * inMat->getStride(); + for (int g = 0; g < groups_[i]; ++g) { + MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_); + MatrixPtr wMatSub = wMat->subMatrix(g * K, K); + MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K); + colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0); + inData += M * N; } - colBuf_->col2Vol(outMat->getData()+ n * outMat->getWidth(), - numFilters_, outputD_[i], outputH_[i], outputW_[i], - filterSizeZ_[i], filterSizeY_[i], filterSize_[i], - strideZ_[i], strideY_[i], stride_[i], - paddingZ_[i], paddingY_[i], padding_[i], 1.0, 1.0); + colBuf_->col2Vol(outMat->getData() + n * outMat->getStride(), + numFilters_, + outputD_[i], + outputH_[i], + outputW_[i], + filterSizeZ_[i], + filterSizeY_[i], + filterSize_[i], + strideZ_[i], + strideY_[i], + stride_[i], + paddingZ_[i], + paddingY_[i], + padding_[i], + 1.0, + 1.0); } } if (nullptr != this->biasParameter_) { @@ -134,63 +132,69 @@ void DeConv3DLayer::forward(PassType passType) { void DeConv3DLayer::backward(const UpdateCallback &callback) { backwardActivation(); int batchSize = getOutputGrad()->getHeight(); - int outputWidth = getOutputGrad()->getWidth(); if (biases_ && biases_->getWGrad()) { bpropBiases(); biases_->getParameterPtr()->incUpdate(callback); } - for (size_t i =0; i < inputLayers_.size(); ++i) { - int M = M_[i]; - int N = N_[i]; - int K = K_[i]; - Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); - const MatrixPtr& inMat = getInputValue(i); - for (int n = 0; n < batchSize; ++n) { + for (size_t i = 0; i < inputLayers_.size(); ++i) { + if (weights_[i]->getWGrad() || this->needGradient_) { + int M = M_[i]; + int N = N_[i]; + int K = K_[i]; REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str()); - if (weights_[i]->getWGrad() || this->needGradient_) { - colBuf_->vol2Col(getOutputGrad()->getData() + n * outputWidth, - numFilters_, outputD_[i], outputH_[i], outputW_[i], - filterSizeZ_[i], filterSizeY_[i], filterSize_[i], - strideZ_[i], strideY_[i], stride_[i], - paddingZ_[i], paddingY_[i], padding_[i]); - } - if (weights_[i]->getWGrad()) { - real *inData = inMat->getData() + n * inMat->getWidth();; - real *wGradData = weights_[i]->getWGrad()->getData(); - for (int g = 0; g < groups_[i]; g++) { - MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K); - MatrixPtr inMatSub = Matrix::create( - inData, M, N, false, useGpu_); - MatrixPtr wGradMatSub = Matrix::create( - wGradData, K, M, false, useGpu_); - wGradMatSub->mul(*colBufDataSub, - *(inMatSub->getTranspose()), 1.0, 1.0); - wGradData += K * M; - inData += M * N; + Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); + const MatrixPtr &inMat = getInputValue(i); + for (int n = 0; n < batchSize; ++n) { + colBuf_->vol2Col( + getOutputGrad()->getData() + n * getOutputGrad()->getStride(), + numFilters_, + outputD_[i], + outputH_[i], + outputW_[i], + filterSizeZ_[i], + filterSizeY_[i], + filterSize_[i], + strideZ_[i], + strideY_[i], + stride_[i], + paddingZ_[i], + paddingY_[i], + padding_[i]); + if (weights_[i]->getWGrad()) { + real *inData = inMat->getData() + n * inMat->getStride(); + for (int g = 0; g < groups_[i]; ++g) { + MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K); + MatrixPtr wGradMatSub = + weights_[i]->getWGrad()->subMatrix(g * K, K); + MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_); + wGradMatSub->mul( + *colBufDataSub, *(inMatSub->getTranspose()), 1.0, 1.0); + inData += M * N; + } } - weights_[i]->getParameterPtr()->incUpdate(callback); - } - if (this->needGradient_) { - real* preGrad = getInputGrad(i)->getData(); - for (int g = 0; g < groups_[i]; ++g) { - MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K); - MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K); - MatrixPtr inGradMatSub = Matrix::create( - preGrad, M, N, false, useGpu_); - inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 0.0); - preGrad += M * N; + if (getInputGrad(i)) { + real *preGrad = + getInputGrad(i)->getData() + n * getInputGrad(i)->getStride(); + for (int g = 0; g < groups_[i]; ++g) { + MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K); + MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K); + MatrixPtr inGradMatSub = + Matrix::create(preGrad, M, N, false, useGpu_); + inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 1.0); + preGrad += M * N; + } } } REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + weights_[i]->getParameterPtr()->incUpdate(callback); } } } - -void DeConv3DLayer::bpropWeights(int i) { } -void DeConv3DLayer::bpropData(int i) { } +void DeConv3DLayer::bpropWeights(int i) {} +void DeConv3DLayer::bpropData(int i) {} void DeConv3DLayer::bpropBiases() { - MatrixPtr outGradMat = getOutputGrad(); + const MatrixPtr &outGradMat = getOutputGrad(); if (this->sharedBiases_) { biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f); From 43f6cdc8247042244f9b75bac51957c962a16ffd Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Fri, 18 Aug 2017 14:13:25 +0800 Subject: [PATCH 034/170] fix Matrix --- paddle/math/Matrix.cpp | 110 +++++++++++++++++++------------- paddle/math/Matrix.h | 140 ++++++++++++++++++++++++++--------------- 2 files changed, 153 insertions(+), 97 deletions(-) diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 66868e73b3..579a0f3cf3 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1389,51 +1389,71 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { output_d, grad_d, mat_d, height_, width_); } -void GpuMatrix::vol2Col(real* data, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW) { - hl_matrix_vol2Col(data, - channels, depth, height, width, - filterD, filterH, filterW, - strideD, strideH, strideW, - paddingD, paddingH, paddingW, getData()); -} - -void GpuMatrix::col2Vol(real* trg, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - real alpha, - real beta) { - hl_matrix_col2Vol(trg, - channels, depth, height, width, - filterD, filterH, filterW, - strideD, strideH, strideW, - paddingD, paddingH, paddingW, +void GpuMatrix::vol2Col(real* dataSrc, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW) { + hl_matrix_vol2Col(dataSrc, + channels, + depth, + height, + width, + filterD, + filterH, + filterW, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + getData()); +} + +void GpuMatrix::col2Vol(real* dataDst, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + real alpha, + real beta) { + hl_matrix_col2Vol(dataDst, + channels, + depth, + height, + width, + filterD, + filterH, + filterW, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, getData(), - alpha, beta); - } + alpha, + beta); +} /** * CpuMatrix @@ -4082,7 +4102,7 @@ void CpuMatrix::col2Vol(real* trg, real alpha, real beta) { real* src = getData(); - int outDepth = (depth + 2 * paddingH - filterD) / strideD + 1; + int outDepth = (depth + 2 * paddingD - filterD) / strideD + 1; int outHeight = (height + 2 * paddingH - filterH) / strideH + 1; int outWidth = (width + 2 * paddingW - filterW) / strideW + 1; int channelsCol = channels * filterD * filterH * filterW; diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 4354996ce0..cc3a56f279 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -1040,40 +1040,40 @@ public: } virtual void vol2Col(real* data, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW) { - LOG(FATAL) << "Not implemeted"; - } + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW) { + LOG(FATAL) << "Not implemeted"; + } - virtual void col2Vol(real* trg, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - real alpha, - real beta) { - LOG(FATAL) << "Not implemeted"; - } + virtual void col2Vol(real* trg, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + real alpha, + real beta) { + LOG(FATAL) << "Not implemeted"; + } virtual void bilinearForward(const Matrix& in, const size_t inImgH, @@ -1411,18 +1411,36 @@ public: const real ratioW); void vol2Col(real* data, - int channels, - int depth, int height, int width, - int filterD, int filterH, int filterW, - int strideD, int strideH, int strideW, - int paddingD, int paddingH, int paddingW); + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW); void col2Vol(real* trg, - int channels, int depth, int height, int width, - int filterD, int filterH, int filterW, - int strideD, int strideH, int strideW, - int paddingD, int paddingH, int paddingW, - real alpha, real beta); + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + real alpha, + real beta); void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label); @@ -1767,17 +1785,35 @@ public: void vol2Col(real* data, int channels, - int depth, int height, int width, - int filterD, int filterH, int filterW, - int strideD, int strideH, int strideW, - int paddingD, int paddingH, int paddingW); + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW); void col2Vol(real* trg, - int channels, int depth, int height, int width, - int filterD, int filterH, int filterW, - int strideD, int strideH, int strideW, - int paddingD, int paddingH, int paddingW, - real alpha, real beta); + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + real alpha, + real beta); template void operator=(const ExpressionType& expr) { From 0a7516d193061ccb35ab410fc947bd245a936159 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Fri, 18 Aug 2017 14:14:27 +0800 Subject: [PATCH 035/170] fix col2vol vol2col kernel --- paddle/cuda/src/hl_cuda_matrix.cu | 192 ++++++++++++++++++++---------- 1 file changed, 129 insertions(+), 63 deletions(-) diff --git a/paddle/cuda/src/hl_cuda_matrix.cu b/paddle/cuda/src/hl_cuda_matrix.cu index f626c07a0c..3bf1b0251f 100644 --- a/paddle/cuda/src/hl_cuda_matrix.cu +++ b/paddle/cuda/src/hl_cuda_matrix.cu @@ -593,21 +593,28 @@ void hl_matrix_rotate( CHECK_SYNC("hl_matrix_rotate failed"); } - -__global__ void keMatrixVol2Col( - int num_kernels, real*dataSrc, real* dataDst, - int depth, int height, int width, - int filterD, int filterH, int filterW, - int strideD, int strideH, int strideW, - int paddingD, int paddingH, int paddingW, - int depth_col, int height_col, int width_col){ - - for (int index = blockIdx.x * blockDim.x + threadIdx.x; - index < num_kernels; - index += blockDim.x * gridDim.x){ - +__global__ void keMatrixVol2Col(int num_kernels, + real* dataSrc, + real* dataDst, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + int depth_col, + int height_col, + int width_col) { + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels; + index += blockDim.x * gridDim.x) { int w_out = index % width_col; - int h_out = (index / width_col ) % height_col; + int h_out = (index / width_col) % height_col; int d_out = (index / width_col / height_col) % depth_col; int channel_in = index / width_col / height_col / depth_col; int channel_out = channel_in * filterD * filterH * filterW; @@ -615,7 +622,9 @@ __global__ void keMatrixVol2Col( int h_in = h_out * strideH - paddingH; int d_in = d_out * strideD - paddingD; - dataDst += ((channel_out * depth_col + d_out) * height_col + h_out) * width_col + w_out; + dataDst += + ((channel_out * depth_col + d_out) * height_col + h_out) * width_col + + w_out; dataSrc += ((channel_in * depth + d_in) * height + h_in) * width + w_in; for (int k = 0; k < filterD; ++k) { for (int i = 0; i < filterH; ++i) { @@ -623,8 +632,10 @@ __global__ void keMatrixVol2Col( int d = d_in + k; int h = h_in + i; int w = w_in + j; - *dataDst = (d >= 0 && d < depth && h >= 0 && h < height && w >= 0 && w < width ) ? - dataSrc[(k * height + i) * width + j] : 0; + *dataDst = (d >= 0 && d < depth && h >= 0 && h < height && w >= 0 && + w < width) + ? dataSrc[(k * height + i) * width + j] + : 0; dataDst += depth_col * height_col * width_col; } } @@ -633,11 +644,20 @@ __global__ void keMatrixVol2Col( } void hl_matrix_vol2Col(real* dataSrc, - int channels, int depth, int height, int width, - int filterD, int filterH, int filterW, - int strideD, int strideH, int strideW, - int paddingD, int paddingH, int paddingW, real* dataDst){ - + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + real* dataDst) { int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1; int height_col = (height + 2 * paddingH - filterH) / strideH + 1; int width_col = (width + 2 * paddingW - filterW) / strideW + 1; @@ -646,34 +666,55 @@ void hl_matrix_vol2Col(real* dataSrc, const int threads = 512; const int blocks = DIVUP(num_kernels, threads); - keMatrixVol2Col<<< blocks, threads >>>( - num_kernels, dataSrc, dataDst, - depth, height, width, - filterD, filterH, filterW, - strideD, strideH, strideW, - paddingD, paddingH, paddingW, - depth_col, height_col, width_col); + keMatrixVol2Col<<>>(num_kernels, + dataSrc, + dataDst, + depth, + height, + width, + filterD, + filterH, + filterW, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + depth_col, + height_col, + width_col); CHECK_SYNC("hl_matrix_vol2Col failed"); } -__global__ void keMatrixCol2Vol( - int num_kernels, real*dataDst, real* dataSrc, - int depth, int height, int width, - int filterD, int filterH, int filterW, - int strideD, int strideH, int strideW, - int paddingD, int paddingH, int paddingW, - int depth_col, int height_col, int width_col, - real alpha, real beta){ - - for (int index = blockIdx.x * blockDim.x + threadIdx.x; - index < num_kernels; +__global__ void keMatrixCol2Vol(int num_kernels, + real* dataDst, + real* dataSrc, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + int depth_col, + int height_col, + int width_col, + real alpha, + real beta) { + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels; index += blockDim.x * gridDim.x) { - - real val = 0; + real srcVal = 0; + real dstVal = dataDst[index]; int w = index % width + paddingW; int h = (index / width) % height + paddingH; int d = (index / width / height) % depth + paddingD; - int c = index / (width * height * depth); + int c = index / width / height / depth; // compute the start and end of the output int w_col_start = (w < filterW) ? 0 : (w - filterW) / strideW + 1; int w_col_end = min(w / strideW + 1, width_col); @@ -682,32 +723,45 @@ __global__ void keMatrixCol2Vol( int d_col_start = (d < filterD) ? 0 : (d - filterD) / strideD + 1; int d_col_end = min(d / strideD + 1, depth_col); - int offset = (c * filterD * filterW * filterH + \ - d * filterW * filterH + h * filterW + w) * depth_col * height_col * width_col; + int offset = (c * filterD * filterW * filterH + d * filterW * filterH + + h * filterW + w) * + depth_col * height_col * width_col; - int coeff_d_col = (1 - strideD * filterW * filterH * depth_col) * height_col * width_col; - int coeff_h_col = (1 - strideH * filterW * depth_col * height_col) * width_col; + int coeff_d_col = + (1 - strideD * filterW * filterH * depth_col) * height_col * width_col; + int coeff_h_col = + (1 - strideH * filterW * depth_col * height_col) * width_col; int coeff_w_col = (1 - strideW * depth_col * height_col * width_col); for (int d_col = d_col_start; d_col < d_col_end; ++d_col) { for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { - val += dataSrc[offset + d_col * coeff_d_col + h_col * coeff_h_col + w_col * coeff_w_col]; + srcVal += dataSrc[offset + d_col * coeff_d_col + h_col * coeff_h_col + + w_col * coeff_w_col]; } } } - dataDst[index] = val; + dataDst[index] = alpha * srcVal + beta * dstVal; } } void hl_matrix_col2Vol(real* dataDst, - int channels, int depth, int height, int width, - int filterD, int filterH, int filterW, - int strideD, int strideH, int strideW, - int paddingD, int paddingH, int paddingW, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, real* dataSrc, - real alpha, real beta){ - + real alpha, + real beta) { int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1; int height_col = (height + 2 * paddingH - filterH) / strideH + 1; int width_col = (width + 2 * paddingW - filterW) / strideW + 1; @@ -716,14 +770,26 @@ void hl_matrix_col2Vol(real* dataDst, const int threads = 512; const int blocks = DIVUP(num_kernels, threads); - keMatrixCol2Vol<<< blocks, threads >>>( - num_kernels, dataDst, dataSrc, - depth, height, width, - filterD, filterH, filterW, - strideD, strideH, strideW, - paddingD, paddingH, paddingW, - depth_col, height_col, width_col, - alpha, beta); + keMatrixCol2Vol<<>>(num_kernels, + dataDst, + dataSrc, + depth, + height, + width, + filterD, + filterH, + filterW, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + depth_col, + height_col, + width_col, + alpha, + beta); CHECK_SYNC("hl_matrix_col2Vol failed"); } From 62e6dac402ca63b402b5dfd1d7649cba1e258d41 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 18 Aug 2017 14:30:09 +0800 Subject: [PATCH 036/170] add MKLDNNMatrix files --- paddle/gserver/layers/MKLDNNLayer.h | 1 + paddle/math/CMakeLists.txt | 15 ++++++++++ paddle/math/MKLDNNMatrix.cpp | 19 ++++++++++++ paddle/math/MKLDNNMatrix.h | 45 +++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+) create mode 100644 paddle/math/MKLDNNMatrix.cpp create mode 100644 paddle/math/MKLDNNMatrix.h diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 63e29f447e..9533027fa6 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -18,6 +18,7 @@ limitations under the License. */ #include "Layer.h" #include "MKLDNNBase.h" #include "mkldnn.hpp" +#include "paddle/math/MKLDNNMatrix.h" DECLARE_bool(use_mkldnn); DECLARE_bool(use_mkldnn_wgt); diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index bf28092e82..ad6de18c81 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -14,6 +14,21 @@ # file(GLOB MATH_HEADERS . *.h) file(GLOB MATH_SOURCES . *.cpp) + +message(STATUS "----------MATH_HEADERS:${MATH_HEADERS}") +message(STATUS "----------MATH_SOURCES:${MATH_SOURCES}") +if(NOT WITH_MKLDNN) + file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") + file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") + message(STATUS "----------DNN_HEADER:${DNN_HEADER}") + message(STATUS "----------DNN_SOURCES:${DNN_SOURCES}") + list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER}) + list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES}) + message(STATUS "Skip compiling with MKLDNNMatrix") +else() + message(STATUS "Compile with MKLDNNMatrix") +endif() + set(MATH_SOURCES "${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu" "${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu" diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp new file mode 100644 index 0000000000..df8e72d78b --- /dev/null +++ b/paddle/math/MKLDNNMatrix.cpp @@ -0,0 +1,19 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNMatrix.h" + +using namespace mkldnn; // NOLINT + +namespace paddle {} // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h new file mode 100644 index 0000000000..91ef56f2c3 --- /dev/null +++ b/paddle/math/MKLDNNMatrix.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +//#include "Matrix.h" +#include "Vector.h" + +#include "mkldnn.hpp" +#include "paddle/parameter/Parameter.h" + +namespace paddle { + +static const std::map PARAM_FOARMAT_MAP = + {{mkldnn::memory::format::oi, PARAM_FORMAT_MKLDNN_OI}}; + +class MKLDNNMatrix; +typedef std::shared_ptr MKLDNNMatrixPtr; + +/** + * @brief MKLDNN Matrix. + * + */ +class MKLDNNMatrix : public CpuVector { +public: + explicit MKLDNNMatrix(size_t size, int fmt) : CpuVector(size), fmt_(fmt) {} + + ~MKLDNNMatrix() {} + +protected: + int fmt_; +}; + +} // namespace paddle From 38cc5dadcc5c76c4aa50f5e92b560f4ccaba9227 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Fri, 18 Aug 2017 16:43:59 +0800 Subject: [PATCH 037/170] modified bias shape of ConvLayer --- paddle/gserver/layers/Conv3DLayer.cpp | 5 ----- paddle/gserver/layers/ConvBaseLayer.cpp | 17 ++++++++--------- paddle/gserver/layers/DeConv3DLayer.cpp | 5 ----- 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/gserver/layers/Conv3DLayer.cpp index 5609a4cc73..106909824d 100644 --- a/paddle/gserver/layers/Conv3DLayer.cpp +++ b/paddle/gserver/layers/Conv3DLayer.cpp @@ -37,11 +37,6 @@ bool Conv3DLayer::init(const LayerMap &layerMap, weights_[index]->getWGrad()->getHeight()); ++index; } - if (nullptr != biases_->getWGrad()) - biases_->getWGrad()->reshape(biases_->getWGrad()->width_, - biases_->getWGrad()->height_); - if (nullptr != biases_->getW()) - biases_->getW()->reshape(biases_->getW()->width_, biases_->getW()->height_); CHECK(inputLayers_.size() == parameters_.size()); return true; } diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp index e437b0b86e..6bcbe0ddb2 100644 --- a/paddle/gserver/layers/ConvBaseLayer.cpp +++ b/paddle/gserver/layers/ConvBaseLayer.cpp @@ -21,11 +21,10 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { /* Initialize the basic parent class */ Layer::init(layerMap, parameterMap); - isDeconv_ = (config_.type() == "exconv" || - config_.type() == "cudnn_conv" || - config_.type() == "conv3d" || - config_.type() == "deconv3d" ) - ? false : true; + isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv" || + config_.type() == "conv3d" || config_.type() == "deconv3d") + ? false + : true; /* Initialize the convolutional layer parameter */ numFilters_ = config_.num_filters(); @@ -52,8 +51,8 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, filterSizeZ_.push_back(conf.filter_size_z()); imgSizeD_.push_back(conf.img_size_z()); outputD_.push_back(conf.output_z()); - filterPixels_.push_back( - filterSize_.back() * filterSizeY_.back() * filterSizeZ_.back()); + filterPixels_.push_back(filterSize_.back() * filterSizeY_.back() * + filterSizeZ_.back()); } CHECK(inputLayers_.size() == parameters_.size()); @@ -73,10 +72,10 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, if (sharedBiases_) { CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); biases_ = - std::unique_ptr(new Weight(numFilters_, 1, biasParameter_)); + std::unique_ptr(new Weight(1, numFilters_, biasParameter_)); } else { biases_ = - std::unique_ptr(new Weight(getSize(), 1, biasParameter_)); + std::unique_ptr(new Weight(1, getSize(), biasParameter_)); } } diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp index 286f5b985c..5a54a68447 100644 --- a/paddle/gserver/layers/DeConv3DLayer.cpp +++ b/paddle/gserver/layers/DeConv3DLayer.cpp @@ -39,11 +39,6 @@ bool DeConv3DLayer::init(const LayerMap &layerMap, weights_[index]->getWGrad()->reshape(filterPixels_[index] * numFilters_, filterChannels_[index]); } - if (biases_->getWGrad()) - biases_->getWGrad()->reshape(biases_->getWGrad()->width_, - biases_->getWGrad()->height_); - if (biases_->getW()) - biases_->getW()->reshape(biases_->getW()->width_, biases_->getW()->height_); CHECK(inputLayers_.size() == parameters_.size()); return true; } From 3065cb26258e1a7a014c6e367747214615832c3a Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 18 Aug 2017 17:43:06 +0800 Subject: [PATCH 038/170] add huber_regression_cost --- doc/api/v2/config/layer.rst | 5 ++ paddle/gserver/layers/CostLayer.cpp | 55 +++++++++++++++++++ paddle/gserver/layers/CostLayer.h | 24 ++++++++ paddle/gserver/tests/test_LayerGrad.cpp | 20 ++++++- proto/ModelConfig.proto | 3 + python/paddle/trainer/config_parser.py | 11 ++++ .../paddle/trainer_config_helpers/layers.py | 53 ++++++++++++++++++ .../protostr/test_cost_layers.protostr | 17 ++++++ .../tests/configs/test_cost_layers.py | 2 + python/paddle/v2/tests/test_layer.py | 5 +- 10 files changed, 192 insertions(+), 3 deletions(-) diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 22a6b2ab84..9a5901616f 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -409,6 +409,11 @@ multi_binary_label_cross_entropy_cost .. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost :noindex: +huber_regression_cost +------------------------- +.. autoclass:: paddle.v2.layer.huber_regression_cost + :noindex: + huber_classification_cost ------------------------- .. autoclass:: paddle.v2.layer.huber_classification_cost diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 69cf393225..91a742422e 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -594,6 +594,61 @@ void HuberCost::forwardImp(Matrix& output, Argument& label, Matrix& cost) { } } +// +// Huber loss for robust regression. +// +REGISTER_LAYER(huber_regression, HuberRegressionLoss); + +bool HuberRegressionLoss::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + HuberCost::init(layerMap, parameterMap); + delta_ = config_.delta(); + return true; +} + +void HuberRegressionLoss::forwardImp(Matrix& output, + Argument& label, + Matrix& target) { + HuberCost::forwardImp(output, label, target); + size_t numSamples = target.getHeight(); + CHECK(label.value); + CHECK_EQ((*label.value).getHeight(), numSamples); + CHECK_EQ(output.getHeight(), numSamples); + CHECK_EQ(output.getWidth(), (*label.value).getWidth()); + CHECK_EQ(target.getWidth(), (size_t)1); + + real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); + real* lbl = + useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData(); + std::vector cost(numSamples); + for (size_t i = 0; i < numSamples; ++i) { + real a = std::abs(lbl[i] - out[i]); + if (a <= delta_) + cost[i] = a * a / 2; + else + cost[i] = delta_ * (a - delta_ / 2); + } + target.copyFrom(cost.data(), numSamples); +} + +void HuberRegressionLoss::backwardImp(Matrix& output, + Argument& label, + Matrix& outputG) { + size_t numSamples = output.getHeight(); + real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); + real* lbl = + useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData(); + real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData(); + for (size_t i = 0; i < numSamples; ++i) { + real a = lbl[i] - out[i]; + if (std::abs(a) <= delta_) + grad[i] += -a; + else + grad[i] += a > 0 ? delta_ : -delta_; + } + if (useGpu_) outputG.copyFrom(grad, numSamples); +} + // // Huber loss for robust 2-classes classification // diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index c006dc8110..0ce72ef40a 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -321,6 +321,30 @@ public: void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {} }; +/** + * Huber loss for robust regression. + * + * Given output f(x), label y and delta, the loss is: + * Loss = 0.5 * (1 - y * f)^2, if abs(y - f) <= delta \\ + * Loss = delta * abs(y - f) - 0.5 * delta^2, otherwise + */ +class HuberRegressionLoss : public HuberCost { +public: + explicit HuberRegressionLoss(const LayerConfig& config) : HuberCost(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; + + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; + +protected: + real delta_; +}; + /** * Huber loss for robust 2-classes classification. * diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 6d60250f6d..c522b20f0e 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -828,6 +828,24 @@ TEST(Layer, square_error_weighted) { } } +TEST(Layer, huber_regression_loss) { + TestConfig config; + config.layerConfig.set_type("huber_regression"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + for (auto delta : {1, 3, 5}) { + config.layerConfig.set_delta(delta); + testLayerGrad(config, "huber_regression", 100, /* trans */ false, useGpu); + } + } +} + TEST(Layer, huber_two_class) { TestConfig config; config.layerConfig.set_type("huber_classification"); @@ -839,7 +857,7 @@ TEST(Layer, huber_two_class) { config.layerConfig.add_inputs(); for (auto useGpu : {false, true}) { - testLayerGrad(config, "huber", 100, /* trans */ false, useGpu); + testLayerGrad(config, "huber_two_class", 100, /* trans */ false, useGpu); } } diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 4f3d5bf3f6..e19e0f85f3 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -496,6 +496,9 @@ message LayerConfig { optional int32 axis = 54 [ default = 2 ]; repeated uint32 offset = 55; repeated uint32 shape = 56; + + // for HuberRegressionLoss + optional double delta = 57 [ default = 1.0 ]; } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 248da9417f..a3ca3f2510 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2317,6 +2317,17 @@ class LambdaCost(LayerBase): self.config.max_sort_size = max_sort_size +@config_layer('huber_regression') +class HuberRegressionLoss(LayerBase): + def __init__(self, name, inputs, delta=1., coeff=1., device=None): + super(HuberRegressionLoss, self).__init__( + name, 'huber_regression', 1, inputs=inputs, device=device) + config_assert( + len(self.inputs) == 2, 'HuberRegression must have 2 inputs') + self.config.delta = delta + self.config.coeff = coeff + + @config_layer('nce') class NCELayer(LayerBase): def __init__(self, diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 20d96efe15..d61c94dc82 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -108,6 +108,7 @@ __all__ = [ 'sum_cost', 'rank_cost', 'lambda_cost', + 'huber_regression_cost', 'huber_classification_cost', 'block_expand_layer', 'maxout_layer', @@ -216,6 +217,7 @@ class LayerType(object): RANK_COST = 'rank-cost' LAMBDA_COST = 'lambda_cost' + HUBER_REGRESSION = 'huber_regression' HUBER_CLASSIFICATION = 'huber_classification' CROSS_ENTROPY = 'multi-class-cross-entropy' CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm' @@ -5603,6 +5605,57 @@ def sum_cost(input, name=None, layer_attr=None): return LayerOutput(name, LayerType.SUM_COST, parents=[input], size=1) +@wrap_name_default() +@layer_support() +def huber_regression_cost(input, + label, + name=None, + delta=1.0, + coeff=1.0, + layer_attr=None): + """ + In statistics, the Huber loss is a loss function used in robust regression, + that is less sensitive to outliers in data than the squared error loss. + Given a prediction f(x), a label y and :math:`\delta`, the loss function + is defined as: + + .. math: + loss = 0.5*\left ( y-f(x) \right )^2, \left | y-f(x) \right |\leq \delta + loss = \delta \left | y-f(x) \right |-0.5\delta ^2, otherwise + + The example usage is: + + .. code-block:: python + + cost = huber_regression_cost(input=input_layer, label=label_layer) + + :param input: The first input layer. + :type input: LayerOutput. + :param label: The input label. + :type input: LayerOutput. + :param name: The name of this layers. It is not necessary. + :type name: None|basestring. + :param delta: The difference between the observed and predicted values. + :type delta: float. + :param coeff: The coefficient affects the gradient in the backward. + :type coeff: float. + :param layer_attr: Extra Layer Attribute. + :type layer_attr: ExtraLayerAttribute + :return: LayerOutput object. + :rtype: LayerOutput. + """ + assert isinstance(input, LayerOutput) + Layer( + name=name, + type=LayerType.HUBER_REGRESSION, + inputs=[input.name, label.name], + delta=delta, + coeff=coeff, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name, LayerType.HUBER_REGRESSION, parents=[input, label], size=1) + + @wrap_name_default() @layer_support() def huber_classification_cost(input, diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr index a64e5ea0dd..55ab464ddf 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr @@ -167,6 +167,20 @@ layers { softmax_selfnorm_alpha: 0.1 coeff: 1.0 } +layers { + name: "__huber_regression_cost_0__" + type: "huber_regression" + size: 1 + active_type: "" + inputs { + input_layer_name: "input" + } + inputs { + input_layer_name: "labels" + } + coeff: 1.0 + delta: 1.0 +} layers { name: "huber_probs" type: "data" @@ -300,6 +314,7 @@ output_layer_names: "__rank_cost_0__" output_layer_names: "__lambda_cost_0__" output_layer_names: "__cross_entropy_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__" +output_layer_names: "__huber_regression_cost_0__" output_layer_names: "__huber_classification_cost_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__sum_cost_0__" @@ -324,6 +339,7 @@ sub_models { layer_names: "__lambda_cost_0__" layer_names: "__cross_entropy_0__" layer_names: "__cross_entropy_with_selfnorm_0__" + layer_names: "__huber_regression_cost_0__" layer_names: "huber_probs" layer_names: "huber_label" layer_names: "__huber_classification_cost_0__" @@ -349,6 +365,7 @@ sub_models { output_layer_names: "__lambda_cost_0__" output_layer_names: "__cross_entropy_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__" + output_layer_names: "__huber_regression_cost_0__" output_layer_names: "__huber_classification_cost_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__sum_cost_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py index 98bf026d60..7ce375c708 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py @@ -33,6 +33,8 @@ outputs( input=probs, label=xe_label), cross_entropy_with_selfnorm( input=probs, label=xe_label), + huber_regression_cost( + input=seq_in, label=labels), huber_classification_cost( input=data_layer( name='huber_probs', size=1), diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index 7373a55ce6..783a0ca85d 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -141,12 +141,13 @@ class CostLayerTest(unittest.TestCase): cost8 = layer.rank_cost(left=score, right=score, label=score) cost9 = layer.lambda_cost(input=inference, score=score) cost10 = layer.sum_cost(input=inference) - cost11 = layer.huber_classification_cost(input=score, label=label) + cost11 = layer.huber_regression_cost(input=score, label=label) + cost12 = layer.huber_classification_cost(input=score, label=label) print layer.parse_network([cost1, cost2]) print layer.parse_network([cost3, cost4]) print layer.parse_network([cost5, cost6]) - print layer.parse_network([cost7, cost8, cost9, cost10, cost11]) + print layer.parse_network([cost7, cost8, cost9, cost10, cost11, cost12]) crf = layer.crf(input=inference, label=label) crf_decoding = layer.crf_decoding(input=inference, size=3) From 59b3df31aa3f960753bf0d0d922319124e04301e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sun, 20 Aug 2017 14:52:23 +0800 Subject: [PATCH 039/170] Extract OpInfo into a library Fix cycle dependencies, Fix #3583. --- paddle/framework/CMakeLists.txt | 4 +-- paddle/framework/backward_test.cc | 4 +-- paddle/framework/grad_op_builder.cc | 20 +++++++------- paddle/framework/op_info.cc | 30 +++++++++++++++++++++ paddle/framework/op_info.h | 42 +++++++++++++++++++++++++++++ paddle/framework/op_registry.cc | 37 +++++++++++++------------ paddle/framework/op_registry.h | 35 ++++++------------------ paddle/framework/operator.cc | 8 +++--- paddle/framework/operator.h | 27 ++++++++++--------- paddle/framework/operator_test.cc | 9 ++++--- paddle/framework/pybind.cc | 2 +- paddle/operators/net_op.cc | 5 ++-- paddle/operators/net_op.h | 6 +++-- paddle/operators/recurrent_op.cc | 8 +++--- paddle/operators/recurrent_op.h | 10 ++++--- 15 files changed, 152 insertions(+), 95 deletions(-) create mode 100644 paddle/framework/op_info.cc create mode 100644 paddle/framework/op_info.h diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 68304c9fc8..59012ea8c1 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -18,8 +18,8 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope) proto_library(framework_proto SRCS framework.proto) cc_library(attribute SRCS attribute.cc DEPS framework_proto) - -cc_library(operator SRCS operator.cc DEPS framework_proto device_context tensor scope attribute) +cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) +cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator) diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 2c5ec76dfe..bcdfae132c 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -72,8 +72,8 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker { class FcOp : public operators::NetOp { public: - FcOp(const std::string &type, const VarNameMap &inputs, - const VarNameMap &outputs, const AttributeMap &attrs) + FcOp(const std::string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs) : NetOp(type, inputs, outputs, attrs) { AddOp(OpRegistry::CreateOp("mul", {{"X", {Input("X")}}, {"Y", {Input("W")}}}, diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index 0a2a41f6b6..fcc5d7a216 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -20,11 +20,11 @@ namespace framework { enum class OpArgType { IN, OUT }; static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type, - bool is_grad, OperatorBase::VarNameMap* vars) { + bool is_grad, VariableNameMap* vars) { const auto& src_inout = src_type == OpArgType::IN ? src_op->Inputs() : src_op->Outputs(); auto& dst_inout = *vars; - const OpProto* proto = OpRegistry::op_info_map().at(src_op->Type()).proto_; + const OpProto* proto = OpInfoMap().at(src_op->Type()).proto_; const auto& src_arg_list = src_type == OpArgType::IN ? proto->inputs() : proto->outputs(); for (const auto& arg : src_arg_list) { @@ -40,25 +40,25 @@ static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type, } OperatorBase* BuildGradOp(const OperatorBase* op) { - auto it = OpRegistry::op_info_map().find(op->Type()); - PADDLE_ENFORCE(it != OpRegistry::op_info_map().end(), - "'%s' has not been registered.", op->Type()); + auto it = OpInfoMap().find(op->Type()); + PADDLE_ENFORCE(it != OpInfoMap().end(), "'%s' has not been registered.", + op->Type()); PADDLE_ENFORCE(it->second.proto_ != nullptr, "'%s' has no OpProto.", op->Type()); std::string grad_op_type = it->second.grad_op_type_; PADDLE_ENFORCE(!grad_op_type.empty(), "'%s' has no gradient operator.", op->Type()); - OperatorBase::VarNameMap inputs; - OperatorBase::VarNameMap outputs; + VariableNameMap inputs; + VariableNameMap outputs; TransOpArg(op, OpArgType::IN, false, &inputs); // I TransOpArg(op, OpArgType::OUT, false, &inputs); // O TransOpArg(op, OpArgType::OUT, true, &inputs); // OG TransOpArg(op, OpArgType::IN, true, &outputs); // IG - it = OpRegistry::op_info_map().find(grad_op_type); - PADDLE_ENFORCE(it != OpRegistry::op_info_map().end(), - "'%s' has not been registered.", grad_op_type); + it = OpInfoMap().find(grad_op_type); + PADDLE_ENFORCE(it != OpInfoMap().end(), "'%s' has not been registered.", + grad_op_type); return it->second.creator_(grad_op_type, inputs, outputs, op->Attrs()); } diff --git a/paddle/framework/op_info.cc b/paddle/framework/op_info.cc new file mode 100644 index 0000000000..f928ac6473 --- /dev/null +++ b/paddle/framework/op_info.cc @@ -0,0 +1,30 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/op_info.h" + +namespace paddle { +namespace framework { + +static std::unordered_map* + g_op_info_map = nullptr; +std::unordered_map& OpInfoMap() { + if (g_op_info_map == nullptr) { + g_op_info_map = + new std::unordered_map(); + } + return *g_op_info_map; +} +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/op_info.h b/paddle/framework/op_info.h new file mode 100644 index 0000000000..fdd0ed77d4 --- /dev/null +++ b/paddle/framework/op_info.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include +#include +#include + +#include "paddle/framework/attribute.h" + +namespace paddle { +namespace framework { +class OperatorBase; +using VariableNameMap = std::map>; + +using OpCreator = std::function; + +struct OpInfo { + OpCreator creator_; + std::string grad_op_type_; + OpProto* proto_; + OpAttrChecker* checker_; +}; + +extern std::unordered_map& OpInfoMap(); + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/op_registry.cc b/paddle/framework/op_registry.cc index 8eae86e960..e03dc3a73d 100644 --- a/paddle/framework/op_registry.cc +++ b/paddle/framework/op_registry.cc @@ -19,32 +19,20 @@ limitations under the License. */ namespace paddle { namespace framework { -std::unique_ptr OpRegistry::CreateOp(const std::string& type, - const VarNameMap& inputs, - const VarNameMap& outputs, - AttributeMap attrs) { - auto it = op_info_map().find(type); - PADDLE_ENFORCE(it != op_info_map().end(), +std::unique_ptr OpRegistry::CreateOp( + const std::string& type, const VariableNameMap& inputs, + const VariableNameMap& outputs, AttributeMap attrs) { + auto it = OpInfoMap().find(type); + PADDLE_ENFORCE(it != OpInfoMap().end(), "Operator '%s' has not been registered.", type); it->second.checker_->Check(attrs); auto op = it->second.creator_(type, inputs, outputs, attrs); return std::unique_ptr(op); } -std::unique_ptr OpRegistry::CreateOp(const OpDesc& op_desc) { - VarNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs()); - VarNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs()); - AttributeMap attrs; - for (auto& attr : op_desc.attrs()) { - attrs[attr.name()] = GetAttrValue(attr); - } - - return CreateOp(op_desc.type(), inputs, outputs, attrs); -} - -OperatorBase::VarNameMap OpRegistry::ConvertOpDescVarsToVarNameMap( +static VariableNameMap ConvertOpDescVarsToVarNameMap( const google::protobuf::RepeatedPtrField& op_desc_vars) { - VarNameMap ret_val; + VariableNameMap ret_val; for (auto& var : op_desc_vars) { auto& var_names = ret_val[var.parameter()]; auto& var_names_in_proto = var.arguments(); @@ -55,6 +43,17 @@ OperatorBase::VarNameMap OpRegistry::ConvertOpDescVarsToVarNameMap( return ret_val; } +std::unique_ptr OpRegistry::CreateOp(const OpDesc& op_desc) { + VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs()); + VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs()); + AttributeMap attrs; + for (auto& attr : op_desc.attrs()) { + attrs[attr.name()] = GetAttrValue(attr); + } + + return CreateOp(op_desc.type(), inputs, outputs, attrs); +} + std::unique_ptr OpRegistry::CreateGradOp(const OperatorBase& op) { PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); return std::unique_ptr(BuildGradOp(&op)); diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 4c2d13d639..06530bc7d0 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -23,6 +23,7 @@ limitations under the License. */ #include "paddle/framework/attribute.h" #include "paddle/framework/framework.pb.h" #include "paddle/framework/grad_op_builder.h" +#include "paddle/framework/op_info.h" #include "paddle/framework/operator.h" #include "paddle/framework/scope.h" @@ -30,28 +31,16 @@ namespace paddle { namespace framework { class OpRegistry { - using VarNameMap = OperatorBase::VarNameMap; - using OpCreator = std::function; - public: - struct OpInfo { - OpCreator creator_; - std::string grad_op_type_; - OpProto* proto_; - OpAttrChecker* checker_; - }; - template static void RegisterOp(const std::string& op_type, const std::string& grad_op_type) { - PADDLE_ENFORCE(op_info_map().count(op_type) == 0, + PADDLE_ENFORCE(OpInfoMap().count(op_type) == 0, "'%s' is registered more than once.", op_type); OpInfo op_info; - op_info.creator_ = [](const std::string& type, const VarNameMap& inputs, - const VarNameMap& outputs, - const AttributeMap& attrs) { + op_info.creator_ = []( + const std::string& type, const VariableNameMap& inputs, + const VariableNameMap& outputs, const AttributeMap& attrs) { return new OpType(type, inputs, outputs, attrs); }; op_info.grad_op_type_ = grad_op_type; @@ -70,7 +59,7 @@ class OpRegistry { op_info.proto_ = nullptr; op_info.checker_ = nullptr; } - op_info_map().insert(std::make_pair(op_type, op_info)); + OpInfoMap().insert(std::make_pair(op_type, op_info)); // register gradient op if (!grad_op_type.empty()) { RegisterOp(grad_op_type, ""); @@ -78,21 +67,13 @@ class OpRegistry { } static std::unique_ptr CreateOp(const std::string& type, - const VarNameMap& inputs, - const VarNameMap& outputs, + const VariableNameMap& inputs, + const VariableNameMap& outputs, AttributeMap attrs); static std::unique_ptr CreateOp(const OpDesc& op_desc); - static VarNameMap ConvertOpDescVarsToVarNameMap( - const google::protobuf::RepeatedPtrField& op_desc_vars); - static std::unique_ptr CreateGradOp(const OperatorBase& op); - - static std::unordered_map& op_info_map() { - static std::unordered_map op_info_map_; - return op_info_map_; - } }; class Registrar { diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index eadd8f3316..48a7fe64ac 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -115,8 +115,8 @@ void OperatorBase::Rename(const std::string& old_name, } OperatorBase::OperatorBase(const std::string& type, - const OperatorBase::VarNameMap& inputs, - const OperatorBase::VarNameMap& outputs, + const VariableNameMap& inputs, + const VariableNameMap& outputs, const AttributeMap& attrs) : type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs) { static std::atomic gUniqId(0UL); @@ -141,9 +141,9 @@ std::vector OperatorBase::OutputVars(bool has_intermediate) const { } return ret_val; } - auto it = OpRegistry::op_info_map().find(type_); + auto it = OpInfoMap().find(type_); PADDLE_ENFORCE( - it != OpRegistry::op_info_map().end(), + it != OpInfoMap().end(), "Operator %s not registered, cannot figure out intermediate outputs", type_); PADDLE_ENFORCE( diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 8072980889..83dab8631d 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include +#include "op_info.h" #include "paddle/framework/attribute.h" #include "paddle/framework/framework.pb.h" #include "paddle/framework/scope.h" @@ -62,10 +63,8 @@ class ExecutionContext; */ class OperatorBase { public: - using VarNameMap = std::map>; - - OperatorBase(const std::string& type, const VarNameMap& inputs, - const VarNameMap& outputs, const AttributeMap& attrs); + OperatorBase(const std::string& type, const VariableNameMap& inputs, + const VariableNameMap& outputs, const AttributeMap& attrs); virtual ~OperatorBase() {} @@ -93,8 +92,8 @@ class OperatorBase { /// rename inputs outputs name void Rename(const std::string& old_name, const std::string& new_name); - const VarNameMap& Inputs() const { return inputs_; } - const VarNameMap& Outputs() const { return outputs_; } + const VariableNameMap& Inputs() const { return inputs_; } + const VariableNameMap& Outputs() const { return outputs_; } //! Get a input with argument's name described in `op_proto` const std::string& Input(const std::string& name) const; //! Get a input which has multiple variables. @@ -122,11 +121,11 @@ class OperatorBase { // I (Inputs)opear // O (Outputs) // OG (Output Gradients) - VarNameMap inputs_; + VariableNameMap inputs_; // NOTE: in case of OpGrad, outputs_ contains // IG (Inputs Gradients) - VarNameMap outputs_; + VariableNameMap outputs_; AttributeMap attrs_; }; @@ -142,9 +141,11 @@ class OperatorBase { // You can also use // using PARENT_CLASS::PARENT_CLASS; // to use parent's constructor. -#define DEFINE_OP_CONSTRUCTOR(CLS, PARENT_CLS) \ - CLS(const std::string& type, const VarNameMap& inputs, \ - const VarNameMap& outputs, const paddle::framework::AttributeMap& attrs) \ +#define DEFINE_OP_CONSTRUCTOR(CLS, PARENT_CLS) \ + CLS(const std::string& type, \ + const ::paddle::framework::VariableNameMap& inputs, \ + const ::paddle::framework::VariableNameMap& outputs, \ + const paddle::framework::AttributeMap& attrs) \ : PARENT_CLS(type, inputs, outputs, attrs) {} class NOP : public OperatorBase { @@ -389,8 +390,8 @@ class OperatorWithKernel : public OperatorBase { using OpKernelMap = std::unordered_map, OpKernelHash>; - OperatorWithKernel(const std::string& type, const VarNameMap& inputs, - const VarNameMap& outputs, const AttributeMap& attrs) + OperatorWithKernel(const std::string& type, const VariableNameMap& inputs, + const VariableNameMap& outputs, const AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) {} void InferShape(const Scope& scope) const override { diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index 2425b87779..1d7efb7b94 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -23,8 +23,8 @@ static int op_run_num = 0; class OpWithoutKernelTest : public OperatorBase { public: - OpWithoutKernelTest(const std::string& type, const VarNameMap& inputs, - const VarNameMap& outputs, const AttributeMap& attrs) + OpWithoutKernelTest(const std::string& type, const VariableNameMap& inputs, + const VariableNameMap& outputs, const AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs), x(1) {} void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, @@ -249,8 +249,9 @@ TEST(OpKernel, multi_inputs) { class OperatorClone : public paddle::framework::OperatorBase { public: DEFINE_OP_CLONE_METHOD(OperatorClone); - OperatorClone(const std::string& type, const VarNameMap& inputs, - const VarNameMap& outputs, + OperatorClone(const std::string& type, + const paddle::framework::VariableNameMap& inputs, + const paddle::framework::VariableNameMap& outputs, const paddle::framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) {} void InferShape(const paddle::framework::Scope& scope) const override {} diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index f0114b9e49..1aec483573 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -138,7 +138,7 @@ All parameter, weight, gradient are variables in Paddle. //! @note: Be careful! PyBind will return std::string as an unicode, not //! Python str. If you want a str object, you should cast them in Python. m.def("get_all_op_protos", []() -> std::vector { - auto &op_info_map = OpRegistry::op_info_map(); + auto &op_info_map = OpInfoMap(); std::vector ret_values; for (auto it = op_info_map.begin(); it != op_info_map.end(); ++it) { const OpProto *proto = it->second.proto_; diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc index a7d7105110..9bfa712d98 100644 --- a/paddle/operators/net_op.cc +++ b/paddle/operators/net_op.cc @@ -81,9 +81,8 @@ std::vector NetOp::OutputVars(bool has_intermediate) const { return ret_val; } -NetOp::NetOp(const std::string& type, - const framework::OperatorBase::VarNameMap& inputs, - const framework::OperatorBase::VarNameMap& outputs, +NetOp::NetOp(const std::string& type, const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, const framework::AttributeMap& attrs) : framework::OperatorBase(type, inputs, outputs, attrs) {} diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index 885ac6eeca..05b475d88f 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -38,8 +38,10 @@ class NetOp : public framework::OperatorBase { public: static const char kAll[]; NetOp() : framework::OperatorBase("plain_net", {}, {}, {}) {} - NetOp(const std::string& type, const VarNameMap& inputs, - const VarNameMap& outputs, const framework::AttributeMap& attrs); + + NetOp(const std::string& type, const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs); NetOp(const NetOp& o) : framework::OperatorBase(o.type_, {}, {}, o.attrs_) { this->ops_.reserve(o.ops_.size()); diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 78ce0ba3c0..16bd249cb3 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -131,8 +131,8 @@ const rnn::ArgumentName RecurrentGradientOp::kArgName{ "memories", "pre_memories", "boot_memories@grad"}; RecurrentOp::RecurrentOp(const std::string& type, - const framework::OperatorBase::VarNameMap& inputs, - const framework::OperatorBase::VarNameMap& outputs, + const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, const framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) { rnn::InitArgument(kArgName, &arg_, *this); @@ -223,8 +223,8 @@ void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const { } RecurrentGradientOp::RecurrentGradientOp( - const std::string& type, const framework::OperatorBase::VarNameMap& inputs, - const framework::OperatorBase::VarNameMap& outputs, + const std::string& type, const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, const framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) { rnn::InitArgument(kArgName, &arg_, *this); diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index bcfa817de8..1033d657a3 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -114,8 +114,9 @@ class RecurrentGradientAlgorithm { class RecurrentOp : public framework::OperatorBase { public: - RecurrentOp(const std::string& type, const VarNameMap& inputs, - const VarNameMap& outputs, const framework::AttributeMap& attrs); + RecurrentOp(const std::string& type, const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs); RecurrentOp(const RecurrentOp& o) : framework::OperatorBase( @@ -150,8 +151,9 @@ class RecurrentOp : public framework::OperatorBase { class RecurrentGradientOp : public framework::OperatorBase { public: - RecurrentGradientOp(const std::string& type, const VarNameMap& inputs, - const VarNameMap& outputs, + RecurrentGradientOp(const std::string& type, + const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, const framework::AttributeMap& attrs); RecurrentGradientOp(const RecurrentGradientOp& o) From 7f6b5044b640edcbd77bbb368509569776f7e0ee Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sun, 20 Aug 2017 15:28:00 +0800 Subject: [PATCH 040/170] Make OpInfoMap as a class * Add Get/Has methods to OpInfoMap * Add PADDLE_ENFORCE for OpInfo to get field. --- paddle/framework/grad_op_builder.cc | 20 +++------- paddle/framework/op_info.cc | 9 ++--- paddle/framework/op_info.h | 61 ++++++++++++++++++++++++++++- paddle/framework/op_registry.cc | 8 ++-- paddle/framework/op_registry.h | 4 +- paddle/framework/operator.cc | 12 +----- paddle/framework/pybind.cc | 17 ++++---- 7 files changed, 84 insertions(+), 47 deletions(-) diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index fcc5d7a216..b02a599a80 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -24,9 +24,9 @@ static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type, const auto& src_inout = src_type == OpArgType::IN ? src_op->Inputs() : src_op->Outputs(); auto& dst_inout = *vars; - const OpProto* proto = OpInfoMap().at(src_op->Type()).proto_; + auto& proto = OpInfoMap::Instance().Get(src_op->Type()).Proto(); const auto& src_arg_list = - src_type == OpArgType::IN ? proto->inputs() : proto->outputs(); + src_type == OpArgType::IN ? proto.inputs() : proto.outputs(); for (const auto& arg : src_arg_list) { if (arg.not_in_gradient() && !is_grad) continue; const std::string src_name = arg.name(); @@ -40,14 +40,8 @@ static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type, } OperatorBase* BuildGradOp(const OperatorBase* op) { - auto it = OpInfoMap().find(op->Type()); - PADDLE_ENFORCE(it != OpInfoMap().end(), "'%s' has not been registered.", - op->Type()); - PADDLE_ENFORCE(it->second.proto_ != nullptr, "'%s' has no OpProto.", - op->Type()); - std::string grad_op_type = it->second.grad_op_type_; - PADDLE_ENFORCE(!grad_op_type.empty(), "'%s' has no gradient operator.", - op->Type()); + auto& info = OpInfoMap::Instance().Get(op->Type()); + PADDLE_ENFORCE(info.HasGradientOp()); VariableNameMap inputs; VariableNameMap outputs; @@ -56,10 +50,8 @@ OperatorBase* BuildGradOp(const OperatorBase* op) { TransOpArg(op, OpArgType::OUT, true, &inputs); // OG TransOpArg(op, OpArgType::IN, true, &outputs); // IG - it = OpInfoMap().find(grad_op_type); - PADDLE_ENFORCE(it != OpInfoMap().end(), "'%s' has not been registered.", - grad_op_type); - return it->second.creator_(grad_op_type, inputs, outputs, op->Attrs()); + auto& grad_info = OpInfoMap::Instance().Get(info.grad_op_type_); + return grad_info.Creator()(info.grad_op_type_, inputs, outputs, op->Attrs()); } } // namespace framework diff --git a/paddle/framework/op_info.cc b/paddle/framework/op_info.cc index f928ac6473..81ba29797c 100644 --- a/paddle/framework/op_info.cc +++ b/paddle/framework/op_info.cc @@ -17,12 +17,11 @@ namespace paddle { namespace framework { -static std::unordered_map* - g_op_info_map = nullptr; -std::unordered_map& OpInfoMap() { +static OpInfoMap* g_op_info_map = nullptr; + +OpInfoMap& OpInfoMap::Instance() { if (g_op_info_map == nullptr) { - g_op_info_map = - new std::unordered_map(); + g_op_info_map = new OpInfoMap(); } return *g_op_info_map; } diff --git a/paddle/framework/op_info.h b/paddle/framework/op_info.h index fdd0ed77d4..94245c6c44 100644 --- a/paddle/framework/op_info.h +++ b/paddle/framework/op_info.h @@ -34,9 +34,68 @@ struct OpInfo { std::string grad_op_type_; OpProto* proto_; OpAttrChecker* checker_; + + bool HasOpProtoAndChecker() const { + return proto_ != nullptr && checker_ != nullptr; + } + + const OpProto& Proto() const { + PADDLE_ENFORCE_NOT_NULL(proto_, "Operator Proto has not been registered"); + PADDLE_ENFORCE(proto_->IsInitialized(), + "Operator Proto must be initialized in op info"); + return *proto_; + } + + const OpAttrChecker& Checker() const { + PADDLE_ENFORCE_NOT_NULL(checker_, + "Operator Checker has not been registered"); + return *checker_; + } + + const OpCreator& Creator() const { + PADDLE_ENFORCE_NOT_NULL(creator_, + "Operator Creator has not been registered"); + return creator_; + } + + bool HasGradientOp() const { return !grad_op_type_.empty(); } }; -extern std::unordered_map& OpInfoMap(); +class OpInfoMap { + public: + static OpInfoMap& Instance(); + + OpInfoMap(const OpInfoMap& o) = delete; + OpInfoMap(OpInfoMap&& o) = delete; + OpInfoMap& operator=(const OpInfoMap& o) = delete; + OpInfoMap& operator=(OpInfoMap&& o) = delete; + + bool Has(const std::string& op_type) const { + return map_.find(op_type) != map_.end(); + } + + void Insert(const std::string& type, const OpInfo& info) { + PADDLE_ENFORCE(!Has(type), "Operator %s has been registered", type); + map_.insert({type, info}); + } + + const OpInfo& Get(const std::string& type) const { + auto it = map_.find(type); + PADDLE_ENFORCE(it != map_.end(), "Operator %s are not found", type); + return it->second; + } + + template + void IterAllInfo(Callback callback) { + for (auto& it : map_) { + callback(it.first, it.second); + } + } + + private: + OpInfoMap() = default; + std::unordered_map map_; +}; } // namespace framework } // namespace paddle diff --git a/paddle/framework/op_registry.cc b/paddle/framework/op_registry.cc index e03dc3a73d..b0e85dd49f 100644 --- a/paddle/framework/op_registry.cc +++ b/paddle/framework/op_registry.cc @@ -22,11 +22,9 @@ namespace framework { std::unique_ptr OpRegistry::CreateOp( const std::string& type, const VariableNameMap& inputs, const VariableNameMap& outputs, AttributeMap attrs) { - auto it = OpInfoMap().find(type); - PADDLE_ENFORCE(it != OpInfoMap().end(), - "Operator '%s' has not been registered.", type); - it->second.checker_->Check(attrs); - auto op = it->second.creator_(type, inputs, outputs, attrs); + auto& info = OpInfoMap::Instance().Get(type); + info.Checker().Check(attrs); + auto op = info.Creator()(type, inputs, outputs, attrs); return std::unique_ptr(op); } diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 06530bc7d0..2d09cde41e 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -35,7 +35,7 @@ class OpRegistry { template static void RegisterOp(const std::string& op_type, const std::string& grad_op_type) { - PADDLE_ENFORCE(OpInfoMap().count(op_type) == 0, + PADDLE_ENFORCE(!OpInfoMap::Instance().Has(op_type), "'%s' is registered more than once.", op_type); OpInfo op_info; op_info.creator_ = []( @@ -59,7 +59,7 @@ class OpRegistry { op_info.proto_ = nullptr; op_info.checker_ = nullptr; } - OpInfoMap().insert(std::make_pair(op_type, op_info)); + OpInfoMap::Instance().Insert(op_type, op_info); // register gradient op if (!grad_op_type.empty()) { RegisterOp(grad_op_type, ""); diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 48a7fe64ac..7abbde610f 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -141,18 +141,10 @@ std::vector OperatorBase::OutputVars(bool has_intermediate) const { } return ret_val; } - auto it = OpInfoMap().find(type_); - PADDLE_ENFORCE( - it != OpInfoMap().end(), - "Operator %s not registered, cannot figure out intermediate outputs", - type_); - PADDLE_ENFORCE( - it->second.proto_ != nullptr, - "Operator %s has no OpProto, cannot figure out intermediate outputs", - type_); + auto& info = OpInfoMap::Instance().Get(Type()); // get all OpProto::Var for outputs - for (auto& o : it->second.proto_->outputs()) { + for (auto& o : info.Proto().outputs()) { // ignore all intermediate output if (o.intermediate()) continue; auto out = outputs_.find(o.name()); diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 1aec483573..6212c84909 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -138,19 +138,16 @@ All parameter, weight, gradient are variables in Paddle. //! @note: Be careful! PyBind will return std::string as an unicode, not //! Python str. If you want a str object, you should cast them in Python. m.def("get_all_op_protos", []() -> std::vector { - auto &op_info_map = OpInfoMap(); std::vector ret_values; - for (auto it = op_info_map.begin(); it != op_info_map.end(); ++it) { - const OpProto *proto = it->second.proto_; - if (proto == nullptr) { - continue; - } - PADDLE_ENFORCE(proto->IsInitialized(), "OpProto must all be initialized"); + + OpInfoMap::Instance().IterAllInfo([&ret_values](const std::string &type, + const OpInfo &info) { + if (!info.HasOpProtoAndChecker()) return; std::string str; - PADDLE_ENFORCE(proto->SerializeToString(&str), + PADDLE_ENFORCE(info.Proto().SerializeToString(&str), "Serialize OpProto Error. This could be a bug of Paddle."); - ret_values.push_back(py::bytes(str)); - } + ret_values.emplace_back(str); + }); return ret_values; }); m.def_submodule( From c108d6108cbdd28424397341fb67be01a2f63413 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 21 Aug 2017 14:03:12 +0800 Subject: [PATCH 041/170] Identity operator and its gradient --- paddle/framework/CMakeLists.txt | 3 +- paddle/framework/pybind.cc | 1 + paddle/operators/CMakeLists.txt | 1 + paddle/operators/identity_op.cc | 71 +++++++++++++++++++ paddle/operators/identity_op.cu | 17 +++++ paddle/operators/identity_op.h | 32 +++++++++ .../paddle/v2/framework/tests/CMakeLists.txt | 1 + .../v2/framework/tests/test_identity_op.py | 24 +++++++ 8 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 paddle/operators/identity_op.cc create mode 100644 paddle/operators/identity_op.cu create mode 100644 paddle/operators/identity_op.h create mode 100644 python/paddle/v2/framework/tests/test_identity_op.py diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 68304c9fc8..f249512f47 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -55,5 +55,6 @@ cc_library(paddle_pybind SHARED recurrent_op uniform_random_op gaussian_random_op - fill_zeros_like_op) + fill_zeros_like_op + identity_op) endif(WITH_PYTHON) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index f0114b9e49..ddb244623f 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -42,6 +42,7 @@ USE_OP(fill_zeros_like); USE_OP_ITSELF(recurrent_op); USE_OP(gaussian_random); USE_OP(uniform_random); +USE_OP(identity); namespace paddle { namespace framework { diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index a7c89787e4..20e562c7d3 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -68,3 +68,4 @@ op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor op_registry operator net_op) op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu) +op_library(identity_op SRCS identity_op.cc identity_op.cu DEPS net_op) diff --git a/paddle/operators/identity_op.cc b/paddle/operators/identity_op.cc new file mode 100644 index 0000000000..cac44020bc --- /dev/null +++ b/paddle/operators/identity_op.cc @@ -0,0 +1,71 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/identity_op.h" +#include "paddle/operators/net_op.h" + +namespace paddle { +namespace operators { + +class IdentityOp : public framework::OperatorWithKernel { + public: + IdentityOp(const std::string &type, const VarNameMap &inputs, + const VarNameMap &outputs, const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto *in = ctx.Input("X"); + auto *out = ctx.Output("Out"); + out->Resize(in->dims()); + } +}; + +class IdentityOpMaker : public framework::OpProtoAndCheckerMaker { + public: + IdentityOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The input tensor of identity operator.").NotInGradient(); + AddOutput("Out", "The output tensor of identity operator.").NotInGradient(); + AddComment(R"DOC(Identity operator + +The equation is: Out = X +)DOC"); + } +}; + +// Identity Op's gradient is identity op, too. +// Grad(Out=identity_op(X)) => Grad(X) = identity_op(Grad(Out)) +class IdentityGradOp : public NetOp { + public: + IdentityGradOp(const std::string &type, const VarNameMap &inputs, + const VarNameMap &outputs, + const framework::AttributeMap &attrs) + : NetOp(type, inputs, outputs, attrs) { + AddOp(framework::OpRegistry::CreateOp( + "identity", {{"X", {Input(framework::GradVarName("Out"))}}}, + {{"Out", {Output(framework::GradVarName("X"))}}}, {})); + CompleteAddOp(false); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP(identity, ops::IdentityOp, ops::IdentityOpMaker, identity_grad, + ops::IdentityGradOp); +REGISTER_OP_CPU_KERNEL(identity, ops::IdentityKernel); diff --git a/paddle/operators/identity_op.cu b/paddle/operators/identity_op.cu new file mode 100644 index 0000000000..3053104bbe --- /dev/null +++ b/paddle/operators/identity_op.cu @@ -0,0 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/identity_op.h" + +REGISTER_OP_GPU_KERNEL(identity, paddle::operators::IdentityKernel); diff --git a/paddle/operators/identity_op.h b/paddle/operators/identity_op.h new file mode 100644 index 0000000000..14a832257b --- /dev/null +++ b/paddle/operators/identity_op.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/op_registry.h" +#include "paddle/memory/memcpy.h" +namespace paddle { +namespace operators { +template +class IdentityKernel : public framework::OpKernel { + public: + virtual void Compute(const framework::ExecutionContext& context) const { + auto* tensor = context.Output("Out"); + auto* in = context.Input("X"); + tensor->CopyFrom(*in, in->place()); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index ce57a07130..cf7baa5556 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -27,3 +27,4 @@ py_test(test_uniform_random_op SRCS test_uniform_random_op.py) py_test(test_recurrent_op SRCS test_recurrent_op.py) py_test(test_sgd_op SRCS test_sgd_op.py) py_test(test_gradient_checker SRCS test_gradient_checker.py) +py_test(test_identity_op SRCS test_identity_op.py) diff --git a/python/paddle/v2/framework/tests/test_identity_op.py b/python/paddle/v2/framework/tests/test_identity_op.py new file mode 100644 index 0000000000..181d9c0c21 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_identity_op.py @@ -0,0 +1,24 @@ +import unittest +from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op +import numpy as np + + +class IdentityTest(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "identity" + self.inputs = {'X': np.random.random((32, 784)).astype("float32")} + self.outputs = {'Out': self.inputs['X']} + + +class IdentityGradOpTest(GradientChecker): + def test_normal(self): + op = create_op("identity") + inputs = {"X": np.random.random((10, 10)).astype("float32")} + self.check_grad(op, inputs, set("X"), "Out") + + +if __name__ == '__main__': + unittest.main() From d5768ebc89868431040e47e3db126263da385d70 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Fri, 18 Aug 2017 20:49:35 +0800 Subject: [PATCH 042/170] fix above comments --- paddle/cuda/include/hl_matrix.h | 58 ++++++++----- paddle/cuda/include/stub/hl_matrix_stub.h | 47 +++++++---- paddle/cuda/src/hl_cuda_matrix.cu | 84 +++++++++---------- paddle/gserver/layers/Conv3DLayer.cpp | 26 ++++-- paddle/gserver/layers/Conv3DLayer.h | 14 +--- paddle/gserver/layers/ConvBaseLayer.cpp | 26 +----- paddle/gserver/layers/ConvBaseLayer.h | 1 - paddle/gserver/layers/CudnnConvBaseLayer.cpp | 18 ++++ paddle/gserver/layers/DeConv3DLayer.cpp | 46 +++++----- paddle/gserver/layers/DeConv3DLayer.h | 44 +++++----- paddle/gserver/layers/ExpandConvBaseLayer.cpp | 21 ++++- paddle/gserver/tests/test_LayerGrad.cpp | 31 +++---- paddle/math/tests/test_matrixCompare.cpp | 28 ++----- proto/ModelConfig.proto | 4 +- 14 files changed, 247 insertions(+), 201 deletions(-) diff --git a/paddle/cuda/include/hl_matrix.h b/paddle/cuda/include/hl_matrix.h index da2ed8cabb..a37921b749 100644 --- a/paddle/cuda/include/hl_matrix.h +++ b/paddle/cuda/include/hl_matrix.h @@ -240,16 +240,25 @@ extern void hl_matrix_rotate( * @param[in] strideW stride in the width. * @param[in] paddingD padding in the depth. * @param[in] paddingH padding in the height. - * @param[in] paddingW padding in the width. + * @param[in] paddingW padding in the width. * @param[out] matDst output matrix. - * + * */ -extern void hl_matrix_vol2Col(real* matSrc, - int channel, int depth, int height, int width, - int filterD, int filterH, int filterW, - int strideD, int strideH, int strideW, - int paddingD, int paddingH, int paddingW, - real* matDst); +extern void hl_matrix_vol2Col(const real* dataSrc, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + real* dataDst); /** * @brief Matrix col2Vol: Convert col matrix into 3D volume @@ -267,19 +276,28 @@ extern void hl_matrix_vol2Col(real* matSrc, * @param[in] strideW stride in the width. * @param[in] paddingD padding in the depth. * @param[in] paddingH padding in the height. - * @param[in] paddingW padding in the width. + * @param[in] paddingW padding in the width. * @param[in] matSrc input matrix. - * @param[in] beta input - * @param[in] alpha input - * + * @param[in] beta input + * @param[in] alpha input + * */ -extern void hl_matrix_col2Vol(real* matDst, - int channels, int depth, int height, int width, - int filterD, int filterH, int filterW, - int strideD, int strideH, int strideW, - int paddingD, int paddingH, int paddingW, - real* matSrc, - real alpha, real beta); - +extern void hl_matrix_col2Vol(real* dataDst, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + const real* dataSrc, + real alpha, + real beta); #endif /* HL_MATRIX_H_ */ diff --git a/paddle/cuda/include/stub/hl_matrix_stub.h b/paddle/cuda/include/stub/hl_matrix_stub.h index 0b73777812..6ac332945c 100644 --- a/paddle/cuda/include/stub/hl_matrix_stub.h +++ b/paddle/cuda/include/stub/hl_matrix_stub.h @@ -99,19 +99,38 @@ inline void hl_matrix_collect_shared_bias(real* B_d, inline void hl_matrix_rotate( real* mat, real* matRot, int dimM, int dimN, bool clockWise) {} -inline void hl_matrix_vol2Col(real* data, - int channels, int depth, int height, int width, - int filterD, int filterH, int filterW, - int strideD, int strideH, int strideW, - int paddingD, int paddingH, int paddingW, - real* data_col) {} - -inline void hl_matrix_col2Vol(real* data, - int channels, int depth, int height, int width, - int filterD, int filterH, int filterW, - int strideD, int strideH, int strideW, - int paddingD, int paddingH, int paddingW, - real* data_Im, - real alpha, real beta) {} +inline void hl_matrix_vol2Col(const real* dataSrc, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + real* dataDst) {} + +inline void hl_matrix_col2Vol(real* dataDst, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + const real* dataSrc, + real alpha, + real beta) {} #endif // HL_MATRIX_STUB_H_ diff --git a/paddle/cuda/src/hl_cuda_matrix.cu b/paddle/cuda/src/hl_cuda_matrix.cu index 3bf1b0251f..b41a3a1e06 100644 --- a/paddle/cuda/src/hl_cuda_matrix.cu +++ b/paddle/cuda/src/hl_cuda_matrix.cu @@ -594,7 +594,7 @@ void hl_matrix_rotate( } __global__ void keMatrixVol2Col(int num_kernels, - real* dataSrc, + const real* dataSrc, real* dataDst, int depth, int height, @@ -643,7 +643,7 @@ __global__ void keMatrixVol2Col(int num_kernels, } } -void hl_matrix_vol2Col(real* dataSrc, +void hl_matrix_vol2Col(const real* dataSrc, int channels, int depth, int height, @@ -666,30 +666,30 @@ void hl_matrix_vol2Col(real* dataSrc, const int threads = 512; const int blocks = DIVUP(num_kernels, threads); - keMatrixVol2Col<<>>(num_kernels, - dataSrc, - dataDst, - depth, - height, - width, - filterD, - filterH, - filterW, - strideD, - strideH, - strideW, - paddingD, - paddingH, - paddingW, - depth_col, - height_col, - width_col); + keMatrixVol2Col<<>>(num_kernels, + dataSrc, + dataDst, + depth, + height, + width, + filterD, + filterH, + filterW, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + depth_col, + height_col, + width_col); CHECK_SYNC("hl_matrix_vol2Col failed"); } __global__ void keMatrixCol2Vol(int num_kernels, real* dataDst, - real* dataSrc, + const real* dataSrc, int depth, int height, int width, @@ -759,7 +759,7 @@ void hl_matrix_col2Vol(real* dataDst, int paddingD, int paddingH, int paddingW, - real* dataSrc, + const real* dataSrc, real alpha, real beta) { int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1; @@ -770,26 +770,26 @@ void hl_matrix_col2Vol(real* dataDst, const int threads = 512; const int blocks = DIVUP(num_kernels, threads); - keMatrixCol2Vol<<>>(num_kernels, - dataDst, - dataSrc, - depth, - height, - width, - filterD, - filterH, - filterW, - strideD, - strideH, - strideW, - paddingD, - paddingH, - paddingW, - depth_col, - height_col, - width_col, - alpha, - beta); + keMatrixCol2Vol<<>>(num_kernels, + dataDst, + dataSrc, + depth, + height, + width, + filterD, + filterH, + filterW, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + depth_col, + height_col, + width_col, + alpha, + beta); CHECK_SYNC("hl_matrix_col2Vol failed"); } diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/gserver/layers/Conv3DLayer.cpp index 106909824d..db907bbab1 100644 --- a/paddle/gserver/layers/Conv3DLayer.cpp +++ b/paddle/gserver/layers/Conv3DLayer.cpp @@ -28,16 +28,26 @@ bool Conv3DLayer::init(const LayerMap &layerMap, const ConvConfig &conf = inputConfig.conv_conf(); M_.push_back(numFilters_ / conf.groups()); K_.push_back(filterPixels_[index] * filterChannels_[index]); - if (nullptr != weights_[index]->getW()) - weights_[index]->getW()->reshape(weights_[index]->getW()->getWidth(), - weights_[index]->getW()->getHeight()); - if (nullptr != weights_[index]->getWGrad()) - weights_[index]->getWGrad()->reshape( - weights_[index]->getWGrad()->getWidth(), - weights_[index]->getWGrad()->getHeight()); + + // create a new weight + size_t height, width; + width = filterPixels_[index] * filterChannels_[index]; + height = numFilters_; + CHECK_EQ(parameters_[index]->getSize(), width * height); + Weight *w = new Weight(height, width, parameters_[index]); + weights_.emplace_back(w); ++index; } - CHECK(inputLayers_.size() == parameters_.size()); + if (biasParameter_.get()) { + if (sharedBiases_) { + CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); + biases_ = + std::unique_ptr(new Weight(1, numFilters_, biasParameter_)); + } else { + biases_ = + std::unique_ptr(new Weight(1, getSize(), biasParameter_)); + } + } return true; } diff --git a/paddle/gserver/layers/Conv3DLayer.h b/paddle/gserver/layers/Conv3DLayer.h index 703671e5d0..b622508d0c 100644 --- a/paddle/gserver/layers/Conv3DLayer.h +++ b/paddle/gserver/layers/Conv3DLayer.h @@ -12,13 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #pragma once - +#include #include "ConvBaseLayer.h" -#include "paddle/math/Matrix.h" #include "paddle/math/MathUtils.h" -#include +#include "paddle/math/Matrix.h" namespace paddle { @@ -30,21 +28,17 @@ namespace paddle { class Conv3DLayer : public ConvBaseLayer { public: explicit Conv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {} - ~Conv3DLayer() {} - bool init(const LayerMap &layerMap, const ParameterMap ¶meterMap); - - size_t getSize(); + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); void forward(PassType passType); void addBias(); - void backward(const UpdateCallback& callback); - void bpropBiases(); void bpropData(int i); void bpropWeights(int i); + size_t getSize(); protected: // Figure out the dimensions for individual gemms. diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp index 6bcbe0ddb2..8c637eaec9 100644 --- a/paddle/gserver/layers/ConvBaseLayer.cpp +++ b/paddle/gserver/layers/ConvBaseLayer.cpp @@ -21,8 +21,7 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { /* Initialize the basic parent class */ Layer::init(layerMap, parameterMap); - isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv" || - config_.type() == "conv3d" || config_.type() == "deconv3d") + isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv") ? false : true; @@ -56,28 +55,9 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, } CHECK(inputLayers_.size() == parameters_.size()); - for (size_t i = 0; i < inputLayers_.size(); i++) { - size_t height, width; - height = filterPixels_[i] * filterChannels_[i]; - width = (!isDeconv_) ? numFilters_ : channels_[i]; - - // create a new weight - CHECK_EQ(parameters_[i]->getSize(), width * height); - Weight* w = new Weight(height, width, parameters_[i]); - weights_.emplace_back(w); - } - /* initialize the biases_ */ - if (biasParameter_.get()) { - if (sharedBiases_) { - CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); - biases_ = - std::unique_ptr(new Weight(1, numFilters_, biasParameter_)); - } else { - biases_ = - std::unique_ptr(new Weight(1, getSize(), biasParameter_)); - } - } + // create new weights_ in derived class + // create new biases_ in derived class // default caffe model caffeMode_ = true; diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h index 8d1fd989e8..629c462776 100644 --- a/paddle/gserver/layers/ConvBaseLayer.h +++ b/paddle/gserver/layers/ConvBaseLayer.h @@ -23,7 +23,6 @@ namespace paddle { * with learned filters and (optionally) adds biases. */ - class ConvBaseLayer : public Layer { protected: typedef std::vector IntV; diff --git a/paddle/gserver/layers/CudnnConvBaseLayer.cpp b/paddle/gserver/layers/CudnnConvBaseLayer.cpp index c056bbe4d1..9e954615cd 100644 --- a/paddle/gserver/layers/CudnnConvBaseLayer.cpp +++ b/paddle/gserver/layers/CudnnConvBaseLayer.cpp @@ -46,8 +46,26 @@ bool CudnnConvBaseLayer::init(const LayerMap &layerMap, projConf_.emplace_back(conf); projections_.emplace_back( Projection::create(*projConf_[i], parameters_[i], useGpu_)); + + // create a new weight + size_t height, width; + height = filterPixels_[i] * filterChannels_[i]; + width = (!isDeconv_) ? numFilters_ : channels_[i]; + CHECK_EQ(parameters_[i]->getSize(), width * height); + Weight *w = new Weight(height, width, parameters_[i]); + weights_.emplace_back(w); } + if (biasParameter_.get()) { + if (sharedBiases_) { + CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); + biases_ = + std::unique_ptr(new Weight(numFilters_, 1, biasParameter_)); + } else { + biases_ = + std::unique_ptr(new Weight(getSize(), 1, biasParameter_)); + } + } if (biases_.get() && sharedBiases_) { hl_create_tensor_descriptor(&biasDesc_); hl_create_tensor_descriptor(&outputDesc_); diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp index 5a54a68447..b18c06e36c 100644 --- a/paddle/gserver/layers/DeConv3DLayer.cpp +++ b/paddle/gserver/layers/DeConv3DLayer.cpp @@ -20,9 +20,6 @@ namespace paddle { REGISTER_LAYER(deconv3d, DeConv3DLayer); -#define DECONV_OUTPUT_SIZE(IN_SIZE, STRID, PAD, KSIZE) \ - (((IN_SIZE)-1) * (STRID)-2 * (PAD) + (KSIZE)) - bool DeConv3DLayer::init(const LayerMap &layerMap, const ParameterMap ¶meterMap) { if (!ConvBaseLayer::init(layerMap, parameterMap)) return false; @@ -32,14 +29,25 @@ bool DeConv3DLayer::init(const LayerMap &layerMap, for (int index = 0; index < config_.inputs().size(); ++index) { M_.push_back(filterChannels_[index]); K_.push_back(filterPixels_[index] * (numFilters_ / groups_[index])); - if (weights_[index]->getW()) - weights_[index]->getW()->reshape(filterPixels_[index] * numFilters_, - filterChannels_[index]); - if (weights_[index]->getWGrad()) - weights_[index]->getWGrad()->reshape(filterPixels_[index] * numFilters_, - filterChannels_[index]); + + // create a new weight + size_t height, width; + height = filterPixels_[index] * numFilters_; + width = filterChannels_[index]; + CHECK_EQ(parameters_[index]->getSize(), width * height); + Weight *w = new Weight(height, width, parameters_[index]); + weights_.emplace_back(w); + } + if (biasParameter_.get()) { + if (sharedBiases_) { + CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); + biases_ = + std::unique_ptr(new Weight(1, numFilters_, biasParameter_)); + } else { + biases_ = + std::unique_ptr(new Weight(1, getSize(), biasParameter_)); + } } - CHECK(inputLayers_.size() == parameters_.size()); return true; } @@ -52,22 +60,22 @@ size_t DeConv3DLayer::getSize() { outputW_.clear(); outputD_.clear(); N_.clear(); - No_.clear(); + NOut_.clear(); size_t layerSize = 0; for (size_t i = 0; i < inputLayers_.size(); ++i) { // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth()); - outputW_.push_back(DECONV_OUTPUT_SIZE( - imgSizeW_[i], stride_[i], padding_[i], filterSize_[i])); - outputH_.push_back(DECONV_OUTPUT_SIZE( - imgSizeH_[i], strideY_[i], paddingY_[i], filterSizeY_[i])); - outputD_.push_back(DECONV_OUTPUT_SIZE( - imgSizeD_[i], strideZ_[i], paddingZ_[i], filterSizeZ_[i])); - No_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); + outputW_.push_back( + imageSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true)); + outputH_.push_back(imageSize( + imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i], true)); + outputD_.push_back(imageSize( + imgSizeD_[i], filterSizeZ_[i], paddingZ_[i], strideZ_[i], true)); + NOut_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); N_.push_back(imgSizeD_[i] * imgSizeH_[i] * imgSizeW_[i]); CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize); - layerSize += No_[i] * numFilters_; + layerSize += NOut_[i] * numFilters_; } getOutput().setFrameHeight(outputH_[0]); getOutput().setFrameWidth(outputW_[0]); diff --git a/paddle/gserver/layers/DeConv3DLayer.h b/paddle/gserver/layers/DeConv3DLayer.h index 435807fe5d..a2a3d3f827 100644 --- a/paddle/gserver/layers/DeConv3DLayer.h +++ b/paddle/gserver/layers/DeConv3DLayer.h @@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #pragma once +#include #include "ConvBaseLayer.h" -#include "paddle/math/Matrix.h" #include "paddle/math/MathUtils.h" -#include +#include "paddle/math/Matrix.h" namespace paddle { @@ -29,30 +28,25 @@ namespace paddle { */ class DeConv3DLayer : public ConvBaseLayer { public: - explicit DeConv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {} - - ~DeConv3DLayer() {} - - bool init(const LayerMap &layerMap, const ParameterMap ¶meterMap); - - size_t getSize(); - - void forward(PassType passType); - void addBias(); - - void backward(const UpdateCallback& callback); - - void bpropBiases(); - void bpropData(int i); - void bpropWeights(int i); + explicit DeConv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {} + ~DeConv3DLayer() {} + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + void forward(PassType passType); + void addBias(); + void backward(const UpdateCallback& callback); + void bpropBiases(); + void bpropData(int i); + void bpropWeights(int i); + size_t getSize(); protected: - // Figure out the dimensions for individual gemms. - IntV M_; /// numFilters_ / filter_group_; - IntV N_; /// channels_ * filterSizeZ_ * filterSize_ * filterSizeY_ - IntV K_; /// outputD_ * outputH_ * outputW_ - IntV No_; - MatrixPtr colBuf_; + // Figure out the dimensions for individual gemms. + IntV M_; /// numFilters_ / filter_group_; + IntV N_; /// channels_ * filterSizeZ_ * filterSize_ * filterSizeY_ + IntV K_; /// outputD_ * outputH_ * outputW_ + IntV NOut_; + MatrixPtr colBuf_; }; } // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.cpp b/paddle/gserver/layers/ExpandConvBaseLayer.cpp index 77736e78f9..2b7bef0a75 100644 --- a/paddle/gserver/layers/ExpandConvBaseLayer.cpp +++ b/paddle/gserver/layers/ExpandConvBaseLayer.cpp @@ -22,12 +22,31 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap, /* Initialize the basic convolutional parent class */ ConvBaseLayer::init(layerMap, parameterMap); + int index = 0; for (auto &inputConfig : config_.inputs()) { const ConvConfig &conf = inputConfig.conv_conf(); /* Consistent caffe mode for multiple input */ caffeMode_ = conf.caffe_mode(); - } + // create a new weight + size_t height, width; + height = filterPixels_[index] * filterChannels_[index]; + width = (!isDeconv_) ? numFilters_ : channels_[index]; + CHECK_EQ(parameters_[index]->getSize(), width * height); + Weight *w = new Weight(height, width, parameters_[index]); + weights_.emplace_back(w); + index++; + } + if (biasParameter_.get()) { + if (sharedBiases_) { + CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); + biases_ = + std::unique_ptr(new Weight(numFilters_, 1, biasParameter_)); + } else { + biases_ = + std::unique_ptr(new Weight(getSize(), 1, biasParameter_)); + } + } getOutputSize(); return true; diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 1e80e2c0ee..d5724293bf 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -2019,7 +2019,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) { const int CHANNELS = 3; const int IMAGE_SIZE = 9; const int IMAGE_SIZE_Y = 9; - const int IMAGE_SIZE_Z = 9; // 2, 3, 5, 5, 5 + const int IMAGE_SIZE_Z = 9; TestConfig config; config.biasSize = NUM_FILTERS; @@ -2084,10 +2084,6 @@ TEST(Layer, test3DConvLayer) { #endif } -int deConvOutputSize(int inSize, int kSize, int pad, int stride) { - return (inSize - 1) * stride - 2 * pad + kSize; -} - void test3DDeConvLayer(const string& type, bool trans, bool useGpu) { // filter size const int NUM_FILTERS = 6; @@ -2126,16 +2122,21 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) { conv->set_img_size(IMAGE_SIZE); conv->set_img_size_y(IMAGE_SIZE_Y); conv->set_img_size_z(IMAGE_SIZE_Z); - conv->set_output_x(deConvOutputSize( - conv->img_size(), conv->filter_size(), conv->padding(), conv->stride())); - conv->set_output_y(deConvOutputSize(conv->img_size_y(), - conv->filter_size_y(), - conv->padding_y(), - conv->stride_y())); - conv->set_output_z(deConvOutputSize(conv->img_size_z(), - conv->filter_size_z(), - conv->padding_z(), - conv->stride_z())); + conv->set_output_x(imageSize(conv->img_size(), + conv->filter_size(), + conv->padding(), + conv->stride(), + true)); + conv->set_output_y(imageSize(conv->img_size_y(), + conv->filter_size_y(), + conv->padding_y(), + conv->stride_y(), + true)); + conv->set_output_z(imageSize(conv->img_size_z(), + conv->filter_size_z(), + conv->padding_z(), + conv->stride_z(), + true)); config.layerConfig.set_size(conv->output_x() * conv->output_y() * conv->output_z() * NUM_FILTERS); conv->set_groups(1); diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 1d41ec0870..3abe4484db 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -18,6 +18,7 @@ limitations under the License. */ #include #include "TensorCheck.h" +#include "paddle/math/MathUtils.h" #include "paddle/math/Matrix.h" #include "paddle/math/SparseMatrix.h" #include "paddle/testing/TestUtil.h" @@ -1203,19 +1204,6 @@ TEST(Matrix, warpCTC) { } } -int outputSizeCol2Vol( - int imageSize, int filterSize, int padding, int stride, bool caffeMode) { - int outputSize; - if (!caffeMode) { - outputSize = - (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1; - } else { - outputSize = (imageSize - filterSize + 2 * padding) / stride + 1; - } - CHECK_GE(outputSize, 1); - return outputSize; -} - void testMatrixCol2Vol(int depth, int height, int width) { int channel = 3; int filterX = 3, filterY = 4, filterZ = 5; @@ -1229,9 +1217,9 @@ void testMatrixCol2Vol(int depth, int height, int width) { cpuImage->randomizeUniform(); gpuImage->copyFrom(*cpuImage); - int outD = outputSizeCol2Vol(depth, filterZ, padZ, strideZ, true); - int outH = outputSizeCol2Vol(height, filterY, padZ, strideY, true); - int outW = outputSizeCol2Vol(width, filterX, padZ, strideX, true); + int outD = outputSize(depth, filterZ, padZ, strideZ, true); + int outH = outputSize(height, filterY, padY, strideY, true); + int outW = outputSize(width, filterX, padX, strideX, true); int colBufHeight = channel * filterZ * filterY * filterX; int colBufWidth = outD * outH * outW; @@ -1305,11 +1293,9 @@ void testMatrixCol2Vol(int depth, int height, int width) { } TEST(Matrix, col2Vol) { - for (auto depth : {9, 16, 64, 128}) { - for (auto height : {9, 11, 73, 128, 256}) { - for (auto width : { - 9, 32, 100, 512, - }) { + for (auto depth : {9, 16, 64}) { + for (auto height : {9, 11, 128}) { + for (auto width : {9, 32, 128}) { VLOG(3) << "depth=" << depth << " height=" << height << " width=" << width; testMatrixCol2Vol(depth, height, width); diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 043ae502b0..8c6eb5b7e1 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -82,7 +82,7 @@ message ConvConfig { // if not set, use img_size optional uint32 img_size_y = 14; - + optional uint32 filter_size_z = 15 [ default = 1 ]; optional uint32 padding_z = 16 [ default = 1 ]; optional uint32 stride_z = 17 [ default = 1 ]; @@ -637,4 +637,4 @@ message ModelConfig { // For External Machine, defining how to split a neural network // into multiple parts. optional ExternalConfig external_config = 9; -}; \ No newline at end of file +}; From 4bffbd30f0dbc2a2bbff4aa8108867fceecc260a Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 21 Aug 2017 16:44:30 +0800 Subject: [PATCH 043/170] use MKLDNNMatrix in fc forward --- paddle/gserver/layers/Layer.cpp | 2 +- paddle/gserver/layers/Layer.h | 20 +++++++- paddle/gserver/layers/MKLDNNFcLayer.cpp | 63 ++++++++++++++++--------- paddle/gserver/layers/MKLDNNLayer.h | 25 +++++++--- paddle/math/CMakeLists.txt | 4 -- paddle/math/MKLDNNMatrix.cpp | 29 +++++++++++- paddle/math/MKLDNNMatrix.h | 43 +++++++++++++---- 7 files changed, 143 insertions(+), 43 deletions(-) diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp index d5621412ca..2bc20eee6c 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/gserver/layers/Layer.cpp @@ -41,7 +41,7 @@ namespace paddle { Layer::Layer(const LayerConfig& config, bool useGpu) : config_(config), useGpu_(useGpu), - deviceId_(-1), + deviceId_(CPU_DEVICE), needSequenceInfo_(true) {} bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h index 0ed482889d..ec4d093e0c 100644 --- a/paddle/gserver/layers/Layer.h +++ b/paddle/gserver/layers/Layer.h @@ -59,7 +59,12 @@ protected: LayerConfig config_; /// whether to use GPU bool useGpu_; - /// Device Id. CPU is -1, and GPU is 0, 1, 2 ... + /// Paddle device ID, MKLDNN is -2, CPU is -1 + enum PADDLE_DEVICE_ID { + MKLDNN_DEVICE = -2, + CPU_DEVICE = -1, + }; + /// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ... int deviceId_; /// Input layers std::vector inputLayers_; @@ -321,6 +326,19 @@ public: if (deviceId == getDeviceId()) { return output_; } else { + bool CPU2MKLDNN = + getDeviceId() == CPU_DEVICE && deviceId == MKLDNN_DEVICE; + bool MKLDNN2CPU = + getDeviceId() == MKLDNN_DEVICE && deviceId == CPU_DEVICE; + if (CPU2MKLDNN) { + // TODO: do something + return output_; + } else if (MKLDNN2CPU) { + // TODO: do something + return output_; + } + + // TODO: handle mkldnn device or add mkldnn device to other for (size_t i = 0; i < outputOtherDevice_.size(); i++) { if (outputOtherDevice_[i].deviceId == deviceId) { return outputOtherDevice_[i]; diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index d201fac65e..fac0390eee 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -135,33 +135,51 @@ void MKLDNNFcLayer::reshape() { void MKLDNNFcLayer::resetFwd() { bool hasBias = biases_ && biases_->getW(); - real* iData = getInputValue(0)->getData(); - real* oData = getOutputValue()->getData(); - real* wData = weight_->getW()->getData(); - real* bData = hasBias ? biases_->getW()->getData() : NULL; + const MatrixPtr& in = getInputValue(0); + const MatrixPtr& wgt = weight_->getW(); + const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr; + const MatrixPtr& out = output_.value; + + if (getPrev(0)->getDeviceId() == MKLDNN_DEVICE) { + inVal_ = std::dynamic_pointer_cast(in); + CHECK(inVal_) << "Input should be MKLDNNMatrix"; + // TODO: change input nchw to nc if available + // inVal_->downSpatial() + } else { + inVal_ = MKLDNNMatrix::create( + in, + hasSpatial_ ? memory::dims{bs_, ic_, ih_, iw_} : memory::dims{bs_, ic_}, + hasSpatial_ ? format::nchw : format::nc, + engine_); + } - // TODO(TJ): below create should be covered in MkldnnMatrix - // create memory desc - memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) - : createMD({bs_, ic_}, format::nc); - memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) - : createMD({oc_, ic_}, format::oi); - memory::desc bMD = bData != NULL ? createMD({oc_}, format::x) - : createMD({}, format::format_undef); - memory::desc oMD = createMD({bs_, oc_}, format::nc); + wgtVal_ = MKLDNNMatrix::create( + wgt, + hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_}, + hasSpatial_ ? format::oihw : format::oi, + engine_); - // create memory primitive desc and memory self - inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); - wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData)); - outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData)); + biasVal_ = + hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; + + outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); + + // change original output to mkldnn output + output_.value = std::dynamic_pointer_cast(outVal_); + // create forward handle prop_kind pk = prop_kind::forward; - fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD) - : fc_fwd::desc(pk, iMD, wMD, oMD); + fc_fwd::desc fwdDesc = + hasBias ? fc_fwd::desc(pk, + inVal_->getMD(), + wgtVal_->getMD(), + biasVal_->getMD(), + outVal_->getMD()) + : fc_fwd::desc( + pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - if (bData != NULL) { - biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData)); + if (hasBias) { fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); } else { fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); @@ -197,7 +215,8 @@ void MKLDNNFcLayer::resetBwd() { // update data inVal_->set_data_handle(iData); } else { - inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); + LOG(FATAL) << "Should not be empty"; + // inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); } // create memory primitive desc and memory self diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 9533027fa6..b44095befb 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -21,7 +21,6 @@ limitations under the License. */ #include "paddle/math/MKLDNNMatrix.h" DECLARE_bool(use_mkldnn); -DECLARE_bool(use_mkldnn_wgt); namespace paddle { @@ -54,13 +53,14 @@ protected: std::vector pipelineBwd_; // TODO(TJ): change below memory as MKLDNNMatrixPtr type - std::shared_ptr inVal_; + // MKLDNNMatrixPtr ; + MKLDNNMatrixPtr inVal_; std::shared_ptr inGrad_; - std::shared_ptr outVal_; + MKLDNNMatrixPtr outVal_; std::shared_ptr outGrad_; - std::shared_ptr wgtVal_; + MKLDNNMatrixPtr wgtVal_; std::shared_ptr wgtGrad_; - std::shared_ptr biasVal_; + MKLDNNMatrixPtr biasVal_; std::shared_ptr biasGrad_; public: @@ -94,7 +94,7 @@ public: stream_.reset(new MKLDNNStream()); engine_ = CPUEngine::Instance().getEngine(); - // TODO(TJ): deivecId + setDeviceID(MKLDNN_DEVICE); return true; } @@ -128,6 +128,19 @@ public: // TODO(TJ): isFmtSuppoted(fmt) return mkldnn::memory::desc(dims, type, fmt); } + + void resetMKLDNNOutput(size_t height, size_t width) { + Layer::resetOutput(height, width); + // get valu and grad, use mkldnn matrix instaed + // output_.value; + } + +protected: + void setDeviceID(int id) { + deviceId_ = id; + output_.deviceId = id; + // TODO: handle mkldnn device or add mkldnn device to other + } }; } // namespace paddle diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index ad6de18c81..8afe6b509d 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -15,13 +15,9 @@ file(GLOB MATH_HEADERS . *.h) file(GLOB MATH_SOURCES . *.cpp) -message(STATUS "----------MATH_HEADERS:${MATH_HEADERS}") -message(STATUS "----------MATH_SOURCES:${MATH_SOURCES}") if(NOT WITH_MKLDNN) file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") - message(STATUS "----------DNN_HEADER:${DNN_HEADER}") - message(STATUS "----------DNN_SOURCES:${DNN_SOURCES}") list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER}) list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES}) message(STATUS "Skip compiling with MKLDNNMatrix") diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index df8e72d78b..44fc54278c 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -16,4 +16,31 @@ limitations under the License. */ using namespace mkldnn; // NOLINT -namespace paddle {} // namespace paddle +namespace paddle { + +MKLDNNMatrixPtr MKLDNNMatrix::create(const MatrixPtr& m, + memory::dims dims, + memory::format fmt, + engine& eg, + mkldnn::memory::data_type dtype) { + CpuMatrixPtr cpuM = std::dynamic_pointer_cast(m); + CHECK(cpuM) << "Only support create from CPU matrix yet"; + + size_t ndims = dims.size(); + CHECK(ndims > 0) << "Input dims should not be empty"; + size_t cnt = 1; + for (size_t i = 0; i < ndims; ++i) { + cnt *= dims[i]; + } + CHECK_EQ(cnt, m->getElementCnt()) << "Count size does not match"; + + size_t width = m->getWidth(); + size_t height = m->getHeight(); + real* data = m->getData(); + + memory::desc md = memory::desc(dims, dtype, fmt); + memory::primitive_desc pd = memory::primitive_desc(md, eg); + return std::make_shared(data, height, width, pd); +} + +} // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index 91ef56f2c3..73eb50d2a0 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -14,9 +14,8 @@ limitations under the License. */ #pragma once -//#include "Matrix.h" -#include "Vector.h" - +#include +#include "Matrix.h" #include "mkldnn.hpp" #include "paddle/parameter/Parameter.h" @@ -32,14 +31,42 @@ typedef std::shared_ptr MKLDNNMatrixPtr; * @brief MKLDNN Matrix. * */ -class MKLDNNMatrix : public CpuVector { +class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { public: - explicit MKLDNNMatrix(size_t size, int fmt) : CpuVector(size), fmt_(fmt) {} + MKLDNNMatrix(real* data, + size_t height, + size_t width, + mkldnn::memory::primitive_desc pd) + : CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {} - ~MKLDNNMatrix() {} + MKLDNNMatrix(size_t height, size_t width, mkldnn::memory::primitive_desc pd) + : CpuMatrix(height, width, false), mkldnn::memory(pd) { + set_data_handle(CpuMatrix::getData()); + } + + static MKLDNNMatrixPtr create( + const MatrixPtr& m, + mkldnn::memory::dims dims, + mkldnn::memory::format fmt, + mkldnn::engine& eg, + mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32); + + /** + * Get primitive descriptor + */ + mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); } -protected: - int fmt_; + /** + * Get memory descriptor + */ + mkldnn::memory::desc getMD() { return getPD().desc(); } + + /** + * Get format + */ + int getFormat() { return getMD().data.format; } + + ~MKLDNNMatrix() {} }; } // namespace paddle From 73ab2d4678418d42dd5a03d5f8531b49eab2e7ce Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 21 Aug 2017 17:57:06 +0800 Subject: [PATCH 044/170] fix backward error of huber_regression_cost --- paddle/gserver/layers/CostLayer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 91a742422e..7f648070f2 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -644,7 +644,7 @@ void HuberRegressionLoss::backwardImp(Matrix& output, if (std::abs(a) <= delta_) grad[i] += -a; else - grad[i] += a > 0 ? delta_ : -delta_; + grad[i] += a > 0 ? -delta_ : delta_; } if (useGpu_) outputG.copyFrom(grad, numSamples); } From d3f219aa9911015bd8c4a1316b85620a07eb9f49 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 21 Aug 2017 18:09:17 +0800 Subject: [PATCH 045/170] Change IdentityOp to ScaleOp --- paddle/framework/CMakeLists.txt | 2 +- paddle/framework/pybind.cc | 3 +- paddle/framework/tensor.h | 5 +- paddle/operators/CMakeLists.txt | 2 +- paddle/operators/identity_op.cc | 71 ------------ paddle/operators/net_op.cc | 9 +- paddle/operators/scale_op.cc | 102 ++++++++++++++++++ .../operators/{identity_op.cu => scale_op.cu} | 5 +- .../operators/{identity_op.h => scale_op.h} | 16 ++- .../paddle/v2/framework/tests/CMakeLists.txt | 2 +- .../v2/framework/tests/gradient_checker.py | 7 +- ...ty_op.py => test_scale_and_identity_op.py} | 19 ++++ 12 files changed, 158 insertions(+), 85 deletions(-) delete mode 100644 paddle/operators/identity_op.cc create mode 100644 paddle/operators/scale_op.cc rename paddle/operators/{identity_op.cu => scale_op.cu} (81%) rename paddle/operators/{identity_op.h => scale_op.h} (66%) rename python/paddle/v2/framework/tests/{test_identity_op.py => test_scale_and_identity_op.py} (51%) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index f249512f47..5df14ae78d 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -56,5 +56,5 @@ cc_library(paddle_pybind SHARED uniform_random_op gaussian_random_op fill_zeros_like_op - identity_op) + scale_op) endif(WITH_PYTHON) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index ddb244623f..3aaf0de150 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -42,7 +42,8 @@ USE_OP(fill_zeros_like); USE_OP_ITSELF(recurrent_op); USE_OP(gaussian_random); USE_OP(uniform_random); -USE_OP(identity); +USE_OP(scale); +USE_OP_ITSELF(identity); namespace paddle { namespace framework { diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index b8c779f4e5..643f875491 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -105,7 +105,10 @@ class Tensor { template inline Tensor Slice(const int& begin_idx, const int& end_idx) const; - platform::Place place() const { return holder_->place(); } + platform::Place place() const { + PADDLE_ENFORCE_NOT_NULL(holder_, "Tensor get place() must contains holder"); + return holder_->place(); + } private: template diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 20e562c7d3..0ba598823b 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -68,4 +68,4 @@ op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor op_registry operator net_op) op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu) -op_library(identity_op SRCS identity_op.cc identity_op.cu DEPS net_op) +op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op) diff --git a/paddle/operators/identity_op.cc b/paddle/operators/identity_op.cc deleted file mode 100644 index cac44020bc..0000000000 --- a/paddle/operators/identity_op.cc +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/operators/identity_op.h" -#include "paddle/operators/net_op.h" - -namespace paddle { -namespace operators { - -class IdentityOp : public framework::OperatorWithKernel { - public: - IdentityOp(const std::string &type, const VarNameMap &inputs, - const VarNameMap &outputs, const framework::AttributeMap &attrs) - : OperatorWithKernel(type, inputs, outputs, attrs) {} - - protected: - void InferShape(const framework::InferShapeContext &ctx) const override { - auto *in = ctx.Input("X"); - auto *out = ctx.Output("Out"); - out->Resize(in->dims()); - } -}; - -class IdentityOpMaker : public framework::OpProtoAndCheckerMaker { - public: - IdentityOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "The input tensor of identity operator.").NotInGradient(); - AddOutput("Out", "The output tensor of identity operator.").NotInGradient(); - AddComment(R"DOC(Identity operator - -The equation is: Out = X -)DOC"); - } -}; - -// Identity Op's gradient is identity op, too. -// Grad(Out=identity_op(X)) => Grad(X) = identity_op(Grad(Out)) -class IdentityGradOp : public NetOp { - public: - IdentityGradOp(const std::string &type, const VarNameMap &inputs, - const VarNameMap &outputs, - const framework::AttributeMap &attrs) - : NetOp(type, inputs, outputs, attrs) { - AddOp(framework::OpRegistry::CreateOp( - "identity", {{"X", {Input(framework::GradVarName("Out"))}}}, - {{"Out", {Output(framework::GradVarName("X"))}}}, {})); - CompleteAddOp(false); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP(identity, ops::IdentityOp, ops::IdentityOpMaker, identity_grad, - ops::IdentityGradOp); -REGISTER_OP_CPU_KERNEL(identity, ops::IdentityKernel); diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc index a7d7105110..7e3779ed2e 100644 --- a/paddle/operators/net_op.cc +++ b/paddle/operators/net_op.cc @@ -68,10 +68,15 @@ std::string NetOp::DebugString() const { bool NetOp::IsNetOp() const { return true; } std::vector NetOp::OutputVars(bool has_intermediate) const { + std::vector all; + for (auto& pair : this->outputs_) { + for (auto& var_name : pair.second) { + all.push_back(var_name); + } + } if (has_intermediate) { - return this->outputs_.at(kAll); + return all; } - auto& all = this->outputs_.at(kAll); std::vector ret_val; for (auto& each : all) { if (!Contains(intermediate_outputs_, each)) { diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc new file mode 100644 index 0000000000..3b18ff078e --- /dev/null +++ b/paddle/operators/scale_op.cc @@ -0,0 +1,102 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/scale_op.h" +#include "paddle/operators/net_op.h" + +namespace paddle { +namespace operators { + +class ScaleOp : public framework::OperatorWithKernel { + public: + ScaleOp(const std::string &type, const VarNameMap &inputs, + const VarNameMap &outputs, const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto *in = ctx.Input("X"); + auto *out = ctx.Output("Out"); + out->Resize(in->dims()); + } +}; + +template +class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The input tensor of scale operator.").NotInGradient(); + AddOutput("Out", "The output tensor of scale operator.").NotInGradient(); + AddComment(R"DOC(Scale operator + +The equation is: Out = scale*X +)DOC"); + AddAttr("scale", "scale of scale operator.").SetDefault(1.0); + } +}; + +// Identity Op's gradient is identity op, too. +// Grad(Out=scale(X)) => Grad(X) = scale(Grad(Out)) +template +class ScaleGradOp : public NetOp { + public: + ScaleGradOp(const std::string &type, const VarNameMap &inputs, + const VarNameMap &outputs, const framework::AttributeMap &attrs) + : NetOp(type, inputs, outputs, attrs) { + AddOp(framework::OpRegistry::CreateOp( + "scale", {{"X", {Input(framework::GradVarName("Out"))}}}, + {{"Out", {Output(framework::GradVarName("X"))}}}, + {{"scale", GetAttr("scale")}})); + CompleteAddOp(false); + } +}; + +// identity is a alias of scale op. This is also a example for creating a alias +// operator. +template +class IdentityOpMaker : public framework::OpProtoAndCheckerMaker { + public: + IdentityOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "input tensor of identity op"); + AddOutput("Out", "output tensor of identity op"); + AddComment("identity operator. Just a alias of scale op which scale = 1.0"); + } +}; + +template +class IdentityOp : public NetOp { + public: + IdentityOp(const std::string &type, const VarNameMap &inputs, + const VarNameMap &outputs, const framework::AttributeMap &attrs) + : NetOp(type, inputs, outputs, attrs) { + AddOp(framework::OpRegistry::CreateOp( + "scale", {{"X", {Input("X")}}}, {{"Out", {Output("Out")}}}, + {{"scale", static_cast(1)}})); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP(scale, ops::ScaleOp, ops::ScaleOpMaker, scale_grad, + ops::ScaleGradOp); +REGISTER_OP_CPU_KERNEL(scale, + ops::ScaleKernel); +REGISTER_OP_WITHOUT_GRADIENT(identity, ops::IdentityOp, + ops::IdentityOpMaker); diff --git a/paddle/operators/identity_op.cu b/paddle/operators/scale_op.cu similarity index 81% rename from paddle/operators/identity_op.cu rename to paddle/operators/scale_op.cu index 3053104bbe..63efbe0da8 100644 --- a/paddle/operators/identity_op.cu +++ b/paddle/operators/scale_op.cu @@ -12,6 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/identity_op.h" +#include "paddle/operators/scale_op.h" -REGISTER_OP_GPU_KERNEL(identity, paddle::operators::IdentityKernel); +REGISTER_OP_GPU_KERNEL( + scale, paddle::operators::ScaleKernel); diff --git a/paddle/operators/identity_op.h b/paddle/operators/scale_op.h similarity index 66% rename from paddle/operators/identity_op.h rename to paddle/operators/scale_op.h index 14a832257b..aea64f1b04 100644 --- a/paddle/operators/identity_op.h +++ b/paddle/operators/scale_op.h @@ -14,17 +14,25 @@ #pragma once +#include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" -#include "paddle/memory/memcpy.h" + namespace paddle { namespace operators { -template -class IdentityKernel : public framework::OpKernel { +template +class ScaleKernel : public framework::OpKernel { public: virtual void Compute(const framework::ExecutionContext& context) const { auto* tensor = context.Output("Out"); auto* in = context.Input("X"); - tensor->CopyFrom(*in, in->place()); + tensor->mutable_data(in->place()); + + auto scale = static_cast(context.op_.GetAttr("scale")); + + auto eigen_out = framework::EigenVector::Flatten(*tensor); + auto eigen_in = framework::EigenVector::Flatten(*in); + auto& dev = context.GetEigenDevice(); + eigen_out.device(dev) = scale * eigen_in; } }; diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index cf7baa5556..0e8811bfe7 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -27,4 +27,4 @@ py_test(test_uniform_random_op SRCS test_uniform_random_op.py) py_test(test_recurrent_op SRCS test_recurrent_op.py) py_test(test_sgd_op SRCS test_sgd_op.py) py_test(test_gradient_checker SRCS test_gradient_checker.py) -py_test(test_identity_op SRCS test_identity_op.py) +py_test(test_scale_and_identity_op SRCS test_scale_and_identity_op.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 8b8e2f444b..c22c6f8831 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -160,8 +160,13 @@ class GradientChecker(unittest.TestCase): grad_tensor.set(data, place) # run backward op - for name in backward_op.outputs(): + backward_outs = backward_op.outputs() + backward_names = [ + item for key in backward_outs for item in backward_outs[key] + ] + for name in backward_names: scope.new_var(name) + backward_op.infer_shape(scope) backward_op.run(scope, ctx) diff --git a/python/paddle/v2/framework/tests/test_identity_op.py b/python/paddle/v2/framework/tests/test_scale_and_identity_op.py similarity index 51% rename from python/paddle/v2/framework/tests/test_identity_op.py rename to python/paddle/v2/framework/tests/test_scale_and_identity_op.py index 181d9c0c21..69b301c376 100644 --- a/python/paddle/v2/framework/tests/test_identity_op.py +++ b/python/paddle/v2/framework/tests/test_scale_and_identity_op.py @@ -2,6 +2,7 @@ import unittest from op_test_util import OpTestMeta from gradient_checker import GradientChecker, create_op import numpy as np +from paddle.v2.framework.op import Operator class IdentityTest(unittest.TestCase): @@ -20,5 +21,23 @@ class IdentityGradOpTest(GradientChecker): self.check_grad(op, inputs, set("X"), "Out") +class ScaleTest(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "scale" + self.inputs = {'X': np.random.random((32, 784)).astype("float32")} + self.attrs = {'scale': -2.3} + self.outputs = {'Out': self.inputs['X'] * self.attrs['scale']} + + +class ScaleGradTest(GradientChecker): + def test_normal(self): + op = Operator("scale", X="X", Out="Out", scale=3.2) + self.check_grad(op, + {"X": np.random.random((10, 10)).astype("float32")}, + set("X"), "Out") + + if __name__ == '__main__': unittest.main() From 2377d719473543da3a6129de3c6c32667bdb9f18 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Mon, 21 Aug 2017 09:28:03 +0800 Subject: [PATCH 046/170] Add3DPooling --- paddle/cuda/include/hl_cnn.h | 198 ++++++++- paddle/cuda/include/stub/hl_cnn_stub.h | 90 ++++ paddle/cuda/src/hl_cuda_cnn.cu | 427 ++++++++++++++++++- paddle/gserver/layers/Pool3DLayer.cpp | 198 +++++++++ paddle/gserver/layers/Pool3DLayer.h | 48 +++ paddle/gserver/tests/test_LayerGrad.cpp | 69 ++++ paddle/math/Matrix.cpp | 502 +++++++++++++++++++++++ paddle/math/Matrix.h | 254 +++++++++++- paddle/math/tests/test_matrixCompare.cpp | 204 +++++++++ paddle/parameter/Argument.cpp | 2 + paddle/parameter/Argument.h | 8 +- proto/ModelConfig.proto | 12 + 12 files changed, 1998 insertions(+), 14 deletions(-) create mode 100644 paddle/gserver/layers/Pool3DLayer.cpp create mode 100644 paddle/gserver/layers/Pool3DLayer.h diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h index 9f84db72da..e9687d0a58 100644 --- a/paddle/cuda/include/hl_cnn.h +++ b/paddle/cuda/include/hl_cnn.h @@ -173,6 +173,202 @@ extern void hl_avgpool_backward(const int frameCnt, real* backGrad, const int outStride); +/** + * @brief Maximum pool forward. + * + * @param[in] frameCnt batch size of input image. + * @param[in] inputData input data. + * @param[in] channels number of channel. + * @param[in] depth image depth. + * @param[in] height image height. + * @param[in] width image width. + * @param[in] pooledD output image depth. + * @param[in] pooledH output image height. + * @param[in] pooledW output image width. + * @param[in] sizeZ depth of pooling window. + * @param[in] sizeY height of pooling window. + * @param[in] sizeX width of pooling window. + * @param[in] strideD pooling stride depth. + * @param[in] strideH pooling stride height. + * @param[in] strideW pooling stride width. + * @param[in] paddingD padding depth. + * @param[in] paddingH padding height. + * @param[in] paddingW padding width. + * @param[out] tgtData output data. + * @param[in] tgtStride stride between output data samples. + * + */ +extern void hl_maxpool3D_forward(const int frameCnt, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real* tgtData, + const int tgtStride); + +/** + * @brief Maximum pool backward. + * + * @param[in] frameCnt batch size of input image. + * @param[in] inputData input data. + * @param[out] outData output data. + * @param[out] outGrad output grad data. + * @param[in] channels number of channel. + * @param[in] depth image depth. + * @param[in] height image height. + * @param[in] width image width. + * @param[in] pooledD output image depth. + * @param[in] pooledH output image height. + * @param[in] pooledW output image width. + * @param[in] sizeZ depth of pooling window. + * @param[in] sizeY height of pooling window. + * @param[in] sizeX width of pooling window. + * @param[in] strideD pooling stride depth. + * @param[in] strideH pooling stride height. + * @param[in] strideW pooling stride width. + * @param[in] scaleA scale. + * @param[in] scaleB scale. + * @param[in] paddingD padding depth. + * @param[in] paddingH padding height. + * @param[in] paddingW padding width. + * @param[out] targetGrad output grad. + * @param[in] outStride stride between output data samples. + * + */ +extern void hl_maxpool3D_backward(const int frameCnt, + const real* inputData, + const real* outData, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real scaleA, + real scaleB, + real* targetGrad, + const int outStride); + +/** + * @brief Averge pool forward. + * + * @param[in] frameCnt batch size of input image. + * @param[in] inputData input data. + * @param[in] channels number of channel. + * @param[in] depth image depth. + * @param[in] height image height. + * @param[in] width image width. + * @param[in] pooledD output image depth. + * @param[in] pooledH output image height. + * @param[in] pooledW output image width. + * @param[in] sizeZ depth of pooling window. + * @param[in] sizeY height of pooling window. + * @param[in] sizeX width of pooling window. + * @param[in] strideD pooling stride depth. + * @param[in] strideH pooling stride height. + * @param[in] strideW pooling stride width. + * @param[in] paddingD padding depth. + * @param[in] paddingH padding height. + * @param[in] paddingW padding width. + * @param[out] tgtData output data. + * @param[in] tgtStride stride between output data samples. + * + */ +extern void hl_avgpool3D_forward(const int frameCnt, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real* tgtData, + const int tgtStride); + +/** + * @brief Maximum pool backward. + * + * @param[in] frameCnt batch size of input image. + * @param[in] outGrad output grad data. + * @param[in] channels number of channel. + * @param[in] depth image depth. + * @param[in] height image height. + * @param[in] width image width. + * @param[in] pooledD output image depth. + * @param[in] pooledH output image height. + * @param[in] pooledW output image width. + * @param[in] sizeZ depth of pooling window. + * @param[in] sizeY height of pooling window. + * @param[in] sizeX width of pooling window. + * @param[in] strideD pooling stride depth. + * @param[in] strideH pooling stride height. + * @param[in] strideW pooling stride width. + * @param[in] paddingD padding depth. + * @param[in] paddingH padding height. + * @param[in] paddingW padding width. + * @param[in] scaleA scale. + * @param[in] scaleB scale. + * @param[out] backGrad output grad. + * @param[in] outStride stride between output data samples. + * + */ +extern void hl_avgpool3D_backward(const int frameCnt, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + int paddingD, + int paddingH, + int paddingW, + real scaleA, + real scaleB, + real* backGrad, + const int outStride); + /** * @brief Bilinear interpolation forward. * @@ -275,4 +471,4 @@ extern void hl_maxout_backward(real* inGrad, size_t featLen, size_t groups); -#endif /* HL_CNN_H_ */ +#endif // HL_CNN_H_ diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h index 2bbb9fa8df..28f61781be 100644 --- a/paddle/cuda/include/stub/hl_cnn_stub.h +++ b/paddle/cuda/include/stub/hl_cnn_stub.h @@ -87,6 +87,96 @@ inline void hl_avgpool_backward(const int frameCnt, real* backGrad, const int outStride) {} +inline void hl_maxpool3D_forward(const int frameCnt, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real* tgtData, + const int tgtStride) {} + +inline void hl_maxpool3D_backward(const int frameCnt, + const real* inputData, + const real* outData, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real scaleA, + real scaleB, + real* targetGrad, + const int outStride) {} + +inline void hl_avgpool3D_forward(const int frameCnt, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real* tgtData, + const int tgtStride) {} + +inline void hl_avgpool3D_backward(const int frameCnt, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + int paddingD, + int paddingH, + int paddingW, + real scaleA, + real scaleB, + real* backGrad, + const int outStride) {} + inline void hl_bilinear_forward(const real* inData, const size_t inImgH, const size_t inImgW, diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu index aac19b1ea5..458c347728 100644 --- a/paddle/cuda/src/hl_cuda_cnn.cu +++ b/paddle/cuda/src/hl_cuda_cnn.cu @@ -1,11 +1,8 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -353,6 +350,430 @@ void hl_avgpool_backward(const int frameCnt, CHECK_SYNC("hl_avgpool_backward failed"); } +///////////////// +__global__ void KeMaxPool3DForward(const int nthreads, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int ksizeD, + const int ksizeH, + const int ksizeW, + const int strideD, + const int strideH, + const int strideW, + const int offsetD, + const int offsetH, + const int offsetW, + real* tgtData, + const int tgtStride) { + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + int pw = index % pooledW; + int ph = (index / pooledW) % pooledH; + int pd = (index / pooledW / pooledH) % pooledD; + int c = (index / pooledW / pooledH / pooledD) % channels; + int frameNum = index / pooledW / pooledH / pooledD / channels; + int dstart = pd * strideD - offsetD; + int hstart = ph * strideH - offsetH; + int wstart = pw * strideW - offsetW; + int dend = min(dstart + ksizeD, depth); + int hend = min(hstart + ksizeH, height); + int wend = min(wstart + ksizeW, width); + dstart = max(dstart, 0); + hstart = max(hstart, 0); + wstart = max(wstart, 0); + real maxval = -FLT_MAX; + inputData += (frameNum * channels + c) * depth * height * width; + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + if (maxval < inputData[(d * height + h) * width + w]) + maxval = inputData[(d * height + h) * width + w]; + } + } + } + int tgtIndex = + index % (pooledW * pooledH * pooledD * channels) + frameNum * tgtStride; + tgtData[tgtIndex] = maxval; + } +} + +void hl_maxpool3D_forward(const int frameCnt, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real* tgtData, + const int tgtStride) { + int num_kernels = pooledD * pooledH * pooledW * channels * frameCnt; + int blocks = (num_kernels + 1024 - 1) / 1024; + dim3 threads(1024, 1); + dim3 grid(blocks, 1); + + KeMaxPool3DForward<<>>(num_kernels, + inputData, + channels, + depth, + height, + width, + pooledD, + pooledH, + pooledW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + tgtData, + tgtStride); + CHECK_SYNC("hl_maxpool3D_forward failed"); +} + +__global__ void KeMaxPool3DBackward(const int nthreads, + const real* inputData, + const real* outData, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int padD, + const int padH, + const int padW, + real scaleA, + real scaleB, + real* targetGrad, + const int outStride) { + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + // find out the local index + // find out the local offset + int offsetW = index % width + padW; + int offsetH = (index / width) % height + padH; + int offsetD = (index / width / height) % depth + padD; + int offsetC = (index / width / height / depth) % channels; + int frameNum = index / width / height / depth / channels; + + int pdstart = (offsetD < sizeZ) ? 0 : (offsetD - sizeZ) / strideD + 1; + int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1; + int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1; + int pdend = min(offsetD / strideD + 1, pooledD); + int phend = min(offsetH / strideH + 1, pooledH); + int pwend = min(offsetW / strideW + 1, pooledW); + + real gradient = 0; + real input = inputData[index]; + + outData += ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW); + outGrad += ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW); + for (int pd = pdstart; pd < pdend; ++pd) { + for (int ph = phstart; ph < phend; ++ph) { + for (int pw = pwstart; pw < pwend; ++pw) { + if (input == outData[(pd * pooledH + ph) * pooledW + pw]) + gradient += outGrad[(pd * pooledH + ph) * pooledW + pw]; + } + } + } + targetGrad[index] = scaleA * gradient + scaleB * targetGrad[index]; + } +} + +void hl_maxpool3D_backward(const int frameCnt, + const real* inputData, + const real* outData, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int outputD, + const int outputH, + const int outputW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real scaleA, + real scaleB, + real* targetGrad, + const int outStride) { + int num_kernels = depth * height * width * channels * frameCnt; + int blocks = (num_kernels + 1024 - 1) / 1024; + + KeMaxPool3DBackward<<>>(num_kernels, + inputData, + outData, + outGrad, + channels, + depth, + height, + width, + outputD, + outputH, + outputW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + scaleA, + scaleB, + targetGrad, + outStride); + CHECK_SYNC("hl_maxpool3D_backward"); +} + +__global__ void KeAvgPool3DForward(const int nthreads, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int padD, + const int padH, + const int padW, + real* tgtData, + const int tgtStride) { + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + int pw = index % pooledW; + int ph = (index / pooledW) % pooledH; + int pd = (index / pooledW / pooledH) % pooledD; + int c = (index / pooledW / pooledH / pooledD) % channels; + int frameNum = index / pooledW / pooledH / pooledD / channels; + int dstart = pd * strideD - padD; + int hstart = ph * strideH - padH; + int wstart = pw * strideW - padW; + int dend = min(dstart + sizeZ, depth + padD); + int hend = min(hstart + sizeY, height + padH); + int wend = min(wstart + sizeX, width + padW); + int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart); + dstart = max(dstart, 0); + hstart = max(hstart, 0); + wstart = max(wstart, 0); + dend = min(dend, depth); + hend = min(hend, height); + wend = min(wend, width); + + real aveval = 0; + inputData += (frameNum * channels + c) * depth * height * width; + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + aveval += inputData[(d * height + h) * width + w]; + } + } + } + int tgtIndex = + index % (pooledW * pooledH * pooledD * channels) + frameNum * tgtStride; + tgtData[tgtIndex] = aveval / pool_size; + } +} + +void hl_avgpool3D_forward(const int frameCnt, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real* tgtData, + const int tgtStride) { + int num_kernels = pooledD * pooledH * pooledW * channels * frameCnt; + int blocks = (num_kernels + 1024 - 1) / 1024; + KeAvgPool3DForward<<>>(num_kernels, + inputData, + channels, + depth, + height, + width, + pooledD, + pooledH, + pooledW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + tgtData, + tgtStride); + CHECK_SYNC("hl_avgpool3D_forward failed"); +} + +__global__ void KeAvgPool3DBackward(const int nthreads, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int padD, + const int padH, + const int padW, + real scaleA, + real scaleB, + real* tgtGrad, + const int outStride) { + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + int offsetW = index % width + padW; + int offsetH = (index / width) % height + padH; + int offsetD = (index / width / height) % depth + padD; + int offsetC = (index / width / height / depth) % channels; + int frameNum = index / width / height / depth / channels; + + int pdstart = (offsetD < sizeZ) ? 0 : (offsetD - sizeZ) / strideD + 1; + int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1; + int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1; + int pdend = min(offsetD / strideD + 1, pooledD); + int phend = min(offsetH / strideH + 1, pooledH); + int pwend = min(offsetW / strideW + 1, pooledW); + + real gradient = 0; + outGrad += (frameNum * channels + offsetC) * pooledD * pooledH * pooledW; + + for (int pd = pdstart; pd < pdend; ++pd) { + for (int ph = phstart; ph < phend; ++ph) { + for (int pw = pwstart; pw < pwend; ++pw) { + // figure out the pooling size + int dstart = pd * strideD - padD; + int hstart = ph * strideH - padH; + int wstart = pw * strideW - padW; + int dend = min(dstart + sizeZ, depth + padD); + int hend = min(hstart + sizeY, height + padH); + int wend = min(wstart + sizeX, width + padW); + int poolsize = (dend - dstart) * (hend - hstart) * (wend - wstart); + gradient += outGrad[(pd * pooledH + ph) * pooledW + pw] / poolsize; + } + } + } + tgtGrad[index] = scaleA * gradient + scaleB * tgtGrad[index]; + } +} + +void hl_avgpool3D_backward(const int frameCnt, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int outputD, + const int outputH, + const int outputW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + int paddingD, + int paddingH, + int paddingW, + real scaleA, + real scaleB, + real* backGrad, + const int outStride) { + int num_kernels = depth * height * width * channels * frameCnt; + int blocks = (num_kernels + 1024 - 1) / 1024; + + KeAvgPool3DBackward<<>>(num_kernels, + outGrad, + channels, + depth, + height, + width, + outputD, + outputH, + outputW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + scaleA, + scaleB, + backGrad, + outStride); + CHECK_SYNC("hl_avgpool3D_backward failed"); +} +///////////////// + __global__ void KeBilinearInterpFw(const real* in, const size_t inImgH, const size_t inImgW, diff --git a/paddle/gserver/layers/Pool3DLayer.cpp b/paddle/gserver/layers/Pool3DLayer.cpp new file mode 100644 index 0000000000..fc6b9bdd2f --- /dev/null +++ b/paddle/gserver/layers/Pool3DLayer.cpp @@ -0,0 +1,198 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Pool3DLayer.h" +#include "PoolProjectionLayer.h" +#include "paddle/utils/Logging.h" + +namespace paddle { + +REGISTER_LAYER(pool3d, Pool3DLayer); + +bool Pool3DLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + /* the size of inputs for pool-layer is 1 */ + CHECK_EQ(config_.inputs_size(), 1); + + const PoolConfig& conf = config_.inputs(0).pool_conf(); + poolType_ = conf.pool_type(); + channels_ = conf.channels(); + + sizeX_ = conf.size_x(); + sizeY_ = conf.size_y(); + sizeZ_ = conf.size_z(); + + strideW_ = conf.stride(); + strideH_ = conf.stride_y(); + strideD_ = conf.stride_z(); + + imgSizeW_ = conf.img_size(); + imgSizeH_ = conf.img_size_y(); + imgSizeD_ = conf.img_size_z(); + + paddingW_ = conf.padding(); + paddingH_ = conf.padding_y(); + paddingD_ = conf.padding_z(); + + outputW_ = conf.output_x(); + outputH_ = conf.output_y(); + outputD_ = conf.output_z(); + + return true; +} + +size_t Pool3DLayer::getSize() { + CHECK_EQ(inputLayers_.size(), 1UL); + + size_t layerSize = 0; + // imgSizeD_ = inputLayers_[0]->getOutput().getFrameDepth(); + // imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight(); + // imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth(); + if (imgSizeH_ == 0) { + // imgSizeH_ = imgSizeY_; + } + if (imgSizeW_ == 0) { + // imgSizeW_ = imgSize_; + } + outputD_ = outputSize(imgSizeD_, + sizeZ_, + paddingD_, + strideD_, + /* caffeMode */ false); + outputH_ = outputSize(imgSizeH_, + sizeY_, + paddingH_, + strideH_, + /* caffeMode */ false); + outputW_ = outputSize(imgSizeW_, + sizeX_, + paddingW_, + strideW_, + /* caffeMode */ false); + + layerSize = outputD_ * outputH_ * outputW_ * channels_; + getOutput().setFrameHeight(outputH_); + getOutput().setFrameWidth(outputW_); + getOutput().setFrameDepth(outputD_); + return layerSize; +} + +void Pool3DLayer::forward(PassType passType) { + Layer::forward(passType); + const MatrixPtr& inMat = inputLayers_[0]->getOutputValue(); + int batchSize = inMat->getHeight(); + int outWidth = getSize(); + resetOutput(batchSize, outWidth); + const MatrixPtr outMat = getOutputValue(); + + if (poolType_ == "avg") { + outMat->avgPool3DForward(*inMat, + imgSizeD_, + imgSizeH_, + imgSizeW_, + channels_, + sizeZ_, + sizeY_, + sizeX_, + strideD_, + strideH_, + strideW_, + outputD_, + outputH_, + outputW_, + paddingD_, + paddingH_, + paddingW_); + } else if (poolType_ == "max") { + outMat->maxPool3DForward(*inMat, + imgSizeD_, + imgSizeH_, + imgSizeW_, + channels_, + sizeZ_, + sizeY_, + sizeX_, + strideD_, + strideH_, + strideW_, + outputD_, + outputH_, + outputW_, + paddingD_, + paddingH_, + paddingW_); + } else { + LOG(FATAL) << "Unknown pool type: " << poolType_; + } + forwardActivation(); +} + +void Pool3DLayer::backward(const UpdateCallback& callback) { + backwardActivation(); + + (void)callback; + if (NULL == getInputGrad(0)) return; + MatrixPtr inMat = inputLayers_[0]->getOutputValue(); + MatrixPtr inGradMat = inputLayers_[0]->getOutputGrad(); + MatrixPtr outMat = getOutputValue(); + MatrixPtr outGradMat = getOutputGrad(); + + if (poolType_ == "avg") { + inGradMat->avgPool3DBackward(*outGradMat, + imgSizeD_, + imgSizeH_, + imgSizeW_, + sizeZ_, + sizeY_, + sizeZ_, + strideD_, + strideH_, + strideW_, + outputD_, + outputH_, + outputW_, + 1, + 1, + paddingD_, + paddingH_, + paddingW_); + } else if (poolType_ == "max") { + inGradMat->maxPool3DBackward(*inMat, + imgSizeD_, + imgSizeH_, + imgSizeW_, + *outGradMat, + *outMat, + sizeZ_, + sizeY_, + sizeZ_, + strideD_, + strideH_, + strideW_, + outputD_, + outputH_, + outputW_, + 1, + 1, + paddingD_, + paddingH_, + paddingW_); + } else { + LOG(FATAL) << "Unknown pool type: " << poolType_; + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/Pool3DLayer.h b/paddle/gserver/layers/Pool3DLayer.h new file mode 100644 index 0000000000..afc65ac2b0 --- /dev/null +++ b/paddle/gserver/layers/Pool3DLayer.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "Layer.h" +#include "paddle/math/MathUtils.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +/** + * @brief Basic parent layer of pooling + * Pools the input within regions + */ +class Pool3DLayer : public Layer { +public: + explicit Pool3DLayer(const LayerConfig& config) : Layer(config) {} + ~Pool3DLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; + size_t getSize(); + +protected: + int channels_; + int sizeX_, sizeY_, sizeZ_; + int strideW_, strideH_, strideD_; + int paddingW_, paddingH_, paddingD_; + int imgSizeW_, imgSizeH_, imgSizeD_; + int outputW_, outputH_, outputD_; + std::string poolType_; +}; +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 0f312b6ca5..43fb255ae0 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1206,6 +1206,75 @@ TEST(Layer, PoolLayer) { #endif } +void setPool3DConfig(TestConfig* config, + PoolConfig* pool, + const string& poolType) { + // filter size + const int NUM_FILTERS = 16; + const int FILTER_SIZE = 3; + const int FILTER_SIZE_Y = 3; + const int FILTER_SIZE_Z = 3; + const int CHANNELS = 16; + + (*config).biasSize = 0; + (*config).layerConfig.set_type("pool3d"); + (*config).layerConfig.set_num_filters(NUM_FILTERS); + + int kw = FILTER_SIZE, kh = FILTER_SIZE_Y, kd = FILTER_SIZE_Z; + int pw = 0, ph = 0, pd = 0; + int sw = 2, sh = 2, sd = 2; + + pool->set_pool_type(poolType); + pool->set_pool_type("avg"); + pool->set_channels(CHANNELS); + pool->set_size_x(kw); + pool->set_size_y(kh); + pool->set_size_z(kd); + pool->set_padding(0); + pool->set_padding_y(0); + pool->set_padding_z(0); + pool->set_stride(sw); + pool->set_stride_y(sh); + pool->set_stride_z(sd); + pool->set_start(0); + int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); + int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); + int od = outputSize(pool->img_size_z(), kd, pd, sd, /* caffeMode */ false); + pool->set_output_x(ow); + pool->set_output_y(oh); + pool->set_output_z(od); +} + +void testPool3DLayer(const string& poolType, bool trans, bool useGpu) { + TestConfig config; + config.inputDefs.push_back({INPUT_DATA, "layer_0", 11664, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + PoolConfig* pool = input->mutable_pool_conf(); + + const int IMAGE_SIZE = 9; + const int IMAGE_SIZE_Y = 9; + const int IMAGE_SIZE_Z = 9; + + pool->set_img_size(IMAGE_SIZE); + pool->set_img_size_y(IMAGE_SIZE_Y); + pool->set_img_size_z(IMAGE_SIZE_Z); + + setPool3DConfig(&config, pool, poolType); + config.layerConfig.set_size(pool->output_x() * pool->output_y() * + pool->channels()); + + testLayerGrad(config, "pool3d", 100, trans, useGpu); +} + +TEST(Layer, Pool3DLayer) { + testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ false); + testPool3DLayer("max", /* trans= */ false, /* useGpu= */ false); +#ifndef PADDLE_ONLY_CPU + testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ true); + testPool3DLayer("max", /* trans= */ false, /* useGpu= */ true); +#endif +} + void testSppLayer(const string& poolType, const int pyramidHeight, bool trans, diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 27f7d95b75..e7f1489b8b 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1190,6 +1190,224 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, outGrad.getStride()); } +void GpuMatrix::maxPool3DForward(Matrix& inputMat, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; + + real* inputData = inputMat.getData(); + size_t num = inputMat.getHeight(); + size_t width = imgSizeW; + size_t height = imgSizeH; + size_t depth = imgSizeD; + CHECK(depth * height * width * channels == inputMat.getWidth()); + CHECK(height_ == inputMat.getHeight()); + CHECK(width_ == outputD * outputH * outputW * channels); + + hl_maxpool3D_forward(num, + inputData, + channels, + depth, + height, + width, + outputD, + outputH, + outputW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + data_, + getStride()); +} + +void GpuMatrix::maxPool3DBackward(Matrix& inputMat, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + Matrix& outGrad, + Matrix& outV, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + CHECK(inputMat.useGpu_ == true && outGrad.useGpu_ == true && + outV.useGpu_ == true) + << "Matrix type are not equal"; + + real* inputData = inputMat.getData(); + real* outData = outV.getData(); + real* outDiff = outGrad.getData(); + size_t frameNum = inputMat.getHeight(); + size_t channels = outV.getWidth() / outputD / outputH / outputW; + size_t width = imgSizeW; + size_t height = imgSizeH; + size_t depth = imgSizeD; + CHECK(depth * height * width * channels == inputMat.getWidth()); + CHECK(height_ == inputMat.getHeight()); + CHECK(width_ == depth * width * height * channels); + CHECK(outGrad.getHeight() == outV.getHeight() && + outGrad.getWidth() == outV.getWidth()); + + hl_maxpool3D_backward(frameNum, + inputData, + outData, + outDiff, + channels, + depth, + height, + width, + outputD, + outputH, + outputW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + scaleTargets, + scaleOutput, + data_, + outGrad.getStride()); +} + +void GpuMatrix::avgPool3DForward(Matrix& inputMat, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; + + real* inputData = inputMat.getData(); + size_t frameNum = inputMat.getHeight(); + size_t height = imgSizeH; + size_t width = imgSizeW; + size_t depth = imgSizeD; + CHECK(depth * height * width * channels == inputMat.getWidth()); + CHECK(height_ == inputMat.getHeight()); + CHECK(width_ == outputD * outputH * outputW * channels); + + hl_avgpool3D_forward(frameNum, + inputData, + channels, + depth, + height, + width, + outputD, + outputH, + outputW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + data_, + getStride()); +} + +void GpuMatrix::avgPool3DBackward(Matrix& outGrad, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + CHECK(outGrad.useGpu_ == true) << "Matrix type are not equal"; + + real* outDiff = outGrad.getData(); + size_t frameNum = outGrad.getHeight(); + size_t channels = outGrad.getWidth() / outputD / outputH / outputW; + size_t height = imgSizeH; + size_t width = imgSizeW; + size_t depth = imgSizeD; + CHECK(depth * height * width * channels == width_); + CHECK(height_ == outGrad.getHeight()); + CHECK(outGrad.getWidth() == outputD * outputH * outputW * channels); + + hl_avgpool3D_backward(frameNum, + outDiff, + channels, + depth, + height, + width, + outputD, + outputH, + outputW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + scaleTargets, + scaleOutput, + data_, + outGrad.getStride()); +} + void GpuMatrix::maxSequenceForward(Matrix& input, const IVector& sequence, IVector& index) { @@ -1930,6 +2148,290 @@ void CpuMatrix::avgPoolBackward(Matrix& input, } } +void CpuMatrix::maxPool3DForward(Matrix& inputMat, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + real* inputData = inputMat.getData(); + real* outData = data_; + size_t num = inputMat.getHeight(); + size_t inWidth = imgSizeW; + size_t inHeight = imgSizeH; + size_t inDepth = imgSizeD; + CHECK(inHeight * inWidth * inDepth == inputMat.getWidth() / channels); + CHECK_EQ(num, this->getHeight()); + CHECK_EQ(channels * outputH * outputW * outputD, this->getWidth()); + size_t outStride = getStride(); + + /* initialize the data_ */ + for (size_t i = 0; i < height_; i++) { + for (size_t j = 0; j < width_; j++) { + outData[(i)*outStride + j] = -(real)FLT_MAX; + } + } + + /* pool max one by one */ + for (size_t n = 0; n < num; ++n) { // frame by frame + if (!isContiguous()) { + outData = data_ + n * outStride; + } + for (size_t c = 0; c < channels; ++c) { // channel by channel + for (size_t pd = 0; pd < outputD; ++pd) { + for (size_t ph = 0; ph < outputH; ++ph) { + for (size_t pw = 0; pw < outputW; ++pw) { + int dstart = pd * strideD - paddingD; + int hstart = ph * strideH - paddingH; + int wstart = pw * strideW - paddingW; + int dend = std::min(dstart + sizeZ, inDepth); + int hend = std::min(hstart + sizeY, inHeight); + int wend = std::min(wstart + sizeX, inWidth); + dstart = std::max(dstart, 0); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + outData[(pd * outputH + ph) * outputW + pw] = + std::max(outData[(pd * outputH + ph) * outputW + pw], + inputData[(d * inHeight + h) * inWidth + w]); + } + } + } + } + } + } + // compute offset + inputData += inDepth * inHeight * inWidth; + outData += outputD * outputH * outputW; + } + } +} + +void CpuMatrix::maxPool3DBackward(Matrix& image, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + Matrix& outGrad, + Matrix& outV, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + size_t num = image.getHeight(); + size_t channels = size_t(width_ / imgSizeD / imgSizeH / imgSizeW); + CHECK(image.getWidth() == imgSizeD * imgSizeH * imgSizeW * channels); + CHECK(image.getHeight() == height_ && image.getWidth() == width_); + CHECK(outV.getHeight() == outGrad.getHeight() && + outV.getWidth() == outGrad.getWidth()); + + real* tgtGrad = data_; + real* inData = image.getData(); + real* otData = outV.getData(); + real* otGrad = outGrad.getData(); + + size_t outStride = outV.getStride(); + real* origOutData = otData; + real* origOutGrad = otGrad; + + for (size_t n = 0; n < num; ++n) { + if (!outV.isContiguous()) { + otData = origOutData + n * outStride; + otGrad = origOutGrad + n * outStride; + } + for (size_t c = 0; c < channels; ++c) { + for (size_t pd = 0; pd < outputD; ++pd) { + for (size_t ph = 0; ph < outputH; ++ph) { + for (size_t pw = 0; pw < outputW; ++pw) { + int dstart = pd * strideD - paddingD; + int hstart = ph * strideH - paddingH; + int wstart = pw * strideW - paddingW; + int dend = std::min(dstart + sizeZ, imgSizeD); + int hend = std::min(hstart + sizeY, imgSizeH); + int wend = std::min(wstart + sizeX, imgSizeW); + dstart = std::max(dstart, 0); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + for (int d = 0; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + tgtGrad[(d * imgSizeH + h) * imgSizeW + w] = + scaleTargets * + tgtGrad[(d * imgSizeH + h) * imgSizeW + w] + + scaleOutput * otGrad[(pd * outputH + ph) * outputW + pw] * + (inData[(d * imgSizeH + h) * imgSizeW + w] == + otData[(pd * outputH + ph) * outputW + pw]); + } + } + } + } + } + } + // offset + inData += imgSizeD * imgSizeH * imgSizeW; + tgtGrad += imgSizeD * imgSizeH * imgSizeW; + otData += outputD * outputH * outputW; + otGrad += outputD * outputH * outputW; + } + } +} + +void CpuMatrix::avgPool3DForward(Matrix& input, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + // The main loop + size_t num = input.getHeight(); + size_t inDepth = imgSizeD; + size_t inHeight = imgSizeH; + size_t inWidth = imgSizeW; + CHECK(inDepth * inHeight * inWidth * channels == input.getWidth()); + CHECK(outputD * outputH * outputW * channels * num == height_ * width_); + real* tgtData = data_; + real* inData = input.getData(); + + for (size_t n = 0; n < num; ++n) { + if (!isContiguous()) { + tgtData = data_ + n * getStride(); + } + for (size_t c = 0; c < channels; ++c) { + for (size_t pd = 0; pd < outputD; ++pd) { + for (size_t ph = 0; ph < outputH; ++ph) { + for (size_t pw = 0; pw < outputW; ++pw) { + int dstart = pd * strideD - paddingD; + int hstart = ph * strideH - paddingH; + int wstart = pw * strideW - paddingW; + int dend = std::min(dstart + sizeZ, inDepth + paddingD); + int hend = std::min(hstart + sizeY, inHeight + paddingH); + int wend = std::min(wstart + sizeX, inWidth + paddingW); + int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart); + dstart = std::max(dstart, 0); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + dend = std::min(dend, static_cast(inDepth)); + hend = std::min(hend, static_cast(inHeight)); + wend = std::min(wend, static_cast(inWidth)); + + CHECK(poolSize); + tgtData[(pd * outputH + ph) * outputW + pw] = 0; // clear + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + tgtData[(pd * outputH + ph) * outputW + pw] += + inData[(d * inHeight + h) * inWidth + w]; + } + } + } + tgtData[(pd * outputH + ph) * outputW + pw] /= poolSize; + } + } + } + // compute offset + inData += inDepth * inHeight * inWidth; + tgtData += outputD * outputH * outputW; + } + } +} + +void CpuMatrix::avgPool3DBackward(Matrix& input, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + size_t num = input.getHeight(); + size_t channels = input.getWidth() / outputD / outputH / outputW; + CHECK(imgSizeD * imgSizeH * imgSizeW * channels == getWidth()); + real* inData = input.getData(); + real* outData = getData(); + + for (size_t n = 0; n < num; ++n) { + if (!input.isContiguous()) { + inData = input.getData() + n * input.getStride(); + } + for (size_t c = 0; c < channels; ++c) { + for (size_t pd = 0; pd < outputD; ++pd) { + for (size_t ph = 0; ph < outputH; ++ph) { + for (size_t pw = 0; pw < outputW; ++pw) { + int dstart = pd * strideD - paddingD; + int hstart = ph * strideH - paddingH; + int wstart = pw * strideW - paddingW; + int dend = std::min(dstart + sizeZ, imgSizeD + paddingD); + int hend = std::min(hstart + sizeY, imgSizeH + paddingH); + int wend = std::min(wstart + sizeX, imgSizeW + paddingW); + int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart); + dstart = std::max(dstart, 0); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + dend = std::min(dend, static_cast(imgSizeD)); + hend = std::min(hend, static_cast(imgSizeH)); + wend = std::min(wend, static_cast(imgSizeW)); + CHECK(poolSize); + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + outData[(d * imgSizeH + h) * imgSizeW + w] += + inData[(pd * outputH + ph) * outputW + pw] / poolSize; + } + } + } + } + } + } + // offset + outData += imgSizeD * imgSizeH * imgSizeW; + inData += outputD * outputH * outputW; + } + } +} + /** * Input: one or more sequences. Each sequence contains some instances. * Output: output size is the number of input sequences (NOT input instances). diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index bb802bbb2c..f1534c5ea0 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -928,15 +928,102 @@ public: size_t paddingW) { LOG(FATAL) << "Not implemeted"; } - /** - * Input: one or more sequences. Each sequence contains some instances. - * - * Output: output size is the number of input sequences (NOT input - * instances). - * - * output[i] is set to max_input[i]. + * Pooling 3D forward operation, pick out the largest element + * in the sizeX of value */ + virtual void maxPool3DForward(Matrix& inputMat, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + LOG(FATAL) << "Not implemeted"; + } + + virtual void maxPool3DBackward(Matrix& image, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + Matrix& outGrad, + Matrix& outV, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + LOG(FATAL) << "Not implemeted"; + } + + virtual void avgPool3DForward(Matrix& input, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + LOG(FATAL) << "Not implemeted"; + } + + virtual void avgPool3DBackward(Matrix& input, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + LOG(FATAL) << "Not implemeted"; + } + + /** + * Input: one or more sequences. Each sequence contains some instances. + * + * Output: output size is the number of input sequences (NOT input + * instances). + * + * output[i] is set to max_input[i]. + */ virtual void maxSequenceForward(Matrix& input, const IVector& sequence, IVector& index) { @@ -1348,6 +1435,83 @@ public: size_t paddingH, size_t paddingW); + ///////////////////////// + void maxPool3DForward(Matrix& inputMat, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t paddingD, + size_t paddingH, + size_t paddingW); + + void maxPool3DBackward(Matrix& image, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + Matrix& outGrad, + Matrix& outV, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingD, + size_t paddingH, + size_t paddingW); + + void avgPool3DForward(Matrix& input, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t paddingD, + size_t paddingH, + size_t paddingW); + + void avgPool3DBackward(Matrix& input, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingD, + size_t paddingH, + size_t paddingW); + void maxSequenceForward(Matrix& input, const IVector& sequence, IVector& index); @@ -1506,6 +1670,82 @@ public: real scaleOutput, size_t paddingH, size_t paddingW); + ////////////////////// + void maxPool3DForward(Matrix& inputMat, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t paddingD, + size_t paddingH, + size_t paddingW); + + void maxPool3DBackward(Matrix& image, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + Matrix& outGrad, + Matrix& outV, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingD, + size_t paddingH, + size_t paddingW); + + void avgPool3DForward(Matrix& input, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t paddingD, + size_t paddingH, + size_t paddingW); + + void avgPool3DBackward(Matrix& input, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t outputD, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingD, + size_t paddingH, + size_t paddingW); void maxSequenceForward(Matrix& input, const IVector& sequence, diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index d77478f345..7a961d2751 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -18,6 +18,7 @@ limitations under the License. */ #include #include "TensorCheck.h" +#include "paddle/math/MathUtils.h" #include "paddle/math/Matrix.h" #include "paddle/math/SparseMatrix.h" #include "paddle/testing/TestUtil.h" @@ -1203,4 +1204,207 @@ TEST(Matrix, warpCTC) { } } +///// +void testMatrixPool3D(int depth, int height, int width) { + int channel = 3; + int filterX = 3, filterY = 4, filterZ = 5; + int strideX = 2, strideY = 2, strideZ = 2; + int padX = 1, padY = 1, padZ = 1; + + MatrixPtr cpuImage = + std::make_shared(1, channel * depth * height * width); + MatrixPtr gpuImage = + std::make_shared(1, channel * depth * height * width); + + int outD = outputSize(depth, filterZ, padZ, strideZ, true); + int outH = outputSize(height, filterY, padZ, strideY, true); + int outW = outputSize(width, filterX, padZ, strideX, true); + + int colBufWidth = outD * outH * outW; + MatrixPtr cpuOutput = std::make_shared(1, channel * colBufWidth); + MatrixPtr gpuOutput = std::make_shared(1, channel * colBufWidth); + + cpuImage->randomizeUniform(); + gpuImage->copyFrom(*cpuImage); + // std::cout << "test maxPool3DForward...\n"; + cpuOutput->maxPool3DForward(*cpuImage, + depth, + height, + width, + channel, + filterZ, + filterY, + filterX, + strideZ, + strideY, + strideX, + outD, + outH, + outW, + padZ, + padY, + padX); + gpuOutput->maxPool3DForward(*gpuImage, + depth, + height, + width, + channel, + filterZ, + filterY, + filterX, + strideZ, + strideY, + strideX, + outD, + outH, + outW, + padZ, + padY, + padX); + TensorCheckErr(*cpuOutput, *gpuOutput); + + cpuImage->randomizeUniform(); + gpuImage->copyFrom(*cpuImage); + // std::cout << "test avgPool3DForward...\n"; + cpuOutput->avgPool3DForward(*cpuImage, + depth, + height, + width, + channel, + filterZ, + filterY, + filterX, + strideZ, + strideY, + strideX, + outD, + outH, + outW, + padZ, + padY, + padX); + + gpuOutput->avgPool3DForward(*gpuImage, + depth, + height, + width, + channel, + filterZ, + filterY, + filterX, + strideZ, + strideY, + strideX, + outD, + outH, + outW, + padZ, + padY, + padX); + TensorCheckErr(*cpuOutput, *gpuOutput); + cpuImage->randomizeUniform(); + gpuImage->copyFrom(*cpuImage); + cpuOutput->randomizeUniform(); + gpuOutput->copyFrom(*cpuOutput); + // std::cout << "test avgPool3DBackward...\n"; + cpuImage->avgPool3DBackward(*cpuOutput, + depth, + height, + width, + filterZ, + filterY, + filterX, + strideZ, + strideY, + strideX, + outD, + outH, + outW, + 1, + 1, + padZ, + padY, + padX); + + gpuImage->avgPool3DBackward(*gpuOutput, + depth, + height, + width, + filterZ, + filterY, + filterX, + strideZ, + strideY, + strideX, + outD, + outH, + outW, + 1, + 1, + padZ, + padY, + padX); + TensorCheckErr(*cpuImage, *gpuImage); + + cpuImage->randomizeUniform(); + gpuImage->copyFrom(*cpuImage); + cpuOutput->randomizeUniform(); + gpuOutput->copyFrom(*cpuOutput); + // std::cout << "test maxPool3DBackward...\n"; + cpuImage->maxPool3DBackward(*cpuImage, + depth, + height, + width, + *cpuOutput, + *cpuOutput, + filterZ, + filterY, + filterX, + strideZ, + strideY, + strideX, + outD, + outH, + outW, + 1, + 1, + padZ, + padY, + padX); + + gpuImage->maxPool3DBackward(*gpuImage, + depth, + height, + width, + *gpuOutput, + *gpuOutput, + filterZ, + filterY, + filterX, + strideZ, + strideY, + strideX, + outD, + outH, + outW, + 1, + 1, + padZ, + padY, + padX); + TensorCheckErr(*cpuImage, *gpuImage); +} + +TEST(Matrix, Pool3D) { + for (auto depth : {9, 16, 64, 128}) { + for (auto height : {9, 11, 128, 256}) { + for (auto width : {9, 32, 128}) { + VLOG(3) << "depth=" << depth << " height=" << height + << " width=" << width; + testMatrixPool3D(depth, height, width); + } + } + } +} + #endif diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 0547ac93cd..77fd0c5890 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -186,6 +186,7 @@ void Argument::resizeAndCopyFrom(const Argument& src, resizeAndCopy(strs, src.strs, useGpu, stream); frameWidth = src.frameWidth; frameHeight = src.frameHeight; + frameDepth = src.frameDepth; } int32_t Argument::resizeAndCopyFrom(const Argument& src, @@ -206,6 +207,7 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src, dataId = src.dataId; frameWidth = src.frameWidth; frameHeight = src.frameHeight; + frameDepth = src.frameDepth; if (!src.sequenceStartPositions) { // non-sequence input, copy samples directly diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index d8d7a4398f..ba3ad2fd4d 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -1,11 +1,8 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -35,6 +32,7 @@ struct Argument { strs(nullptr), frameHeight(0), frameWidth(0), + frameDepth(0), sequenceStartPositions(nullptr), subSequenceStartPositions(nullptr), cpuSequenceDims(nullptr), @@ -64,6 +62,7 @@ struct Argument { allCount = argument.allCount; frameHeight = argument.frameHeight; frameWidth = argument.frameWidth; + frameDepth = argument.frameDepth; dataId = argument.dataId; } @@ -76,6 +75,7 @@ struct Argument { // A dataBatch includes batchSize frames, one frame maybe not only vector size_t frameHeight; size_t frameWidth; + size_t frameDepth; // If NULL, each position is treated independently. // Otherwise, its size should be #NumberOfSequences + 1. @@ -136,8 +136,10 @@ struct Argument { } size_t getFrameHeight() const { return frameHeight; } size_t getFrameWidth() const { return frameWidth; } + size_t getFrameDepth() const { return frameDepth; } void setFrameHeight(size_t h) { frameHeight = h; } void setFrameWidth(size_t w) { frameWidth = w; } + void setFrameDepth(size_t d) { frameDepth = d; } int64_t getNumSequences() const { return sequenceStartPositions ? sequenceStartPositions->getSize() - 1 diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 4f3d5bf3f6..42cf10e9d3 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -82,6 +82,12 @@ message ConvConfig { // if not set, use img_size optional uint32 img_size_y = 14; + + optional uint32 filter_size_z = 15 [ default = 1 ]; + optional uint32 padding_z = 16 [ default = 1 ]; + optional uint32 stride_z = 17 [ default = 1 ]; + optional uint32 output_z = 18 [ default = 1 ]; + optional uint32 img_size_z = 19 [ default = 1 ]; } message PoolConfig { @@ -124,6 +130,12 @@ message PoolConfig { // if not set, use padding optional uint32 padding_y = 13; + + optional uint32 size_z = 14 [ default = 1 ]; + optional uint32 stride_z = 15 [ default = 1 ]; + optional uint32 output_z = 16 [ default = 1 ]; + optional uint32 img_size_z = 17 [ default = 1 ]; + optional uint32 padding_z = 18 [ default = 1 ]; } message SppConfig { From d7b80f03b0064ac9db5db5f313bc381f9046f689 Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Wed, 2 Aug 2017 11:29:46 -0700 Subject: [PATCH 047/170] Correctly handle width and height for some layers --- python/paddle/trainer/config_parser.py | 11 ++++--- .../paddle/trainer_config_helpers/layers.py | 29 +++++++++++++++---- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 8d71629faa..b3d5ef95cc 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -338,7 +338,8 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, in_links_count += 1 layer_name = MakeLayerNameInParentSubmodel(name) layer = g_layer_map[layer_name] - ScatterAgentLayer(name=name, size=layer.size) + ScatterAgentLayer( + name=name, size=layer.size, width=layer.width, height=layer.height) pair = g_current_submodel.in_links.add() pair.layer_name = layer_name @@ -2197,8 +2198,8 @@ class MaxOutLayer(LayerBase): maxout_conf = self.config.inputs[0].maxout_conf parse_maxout(self.inputs[0].maxout, input_layer.name, maxout_conf) out_channels = maxout_conf.image_conf.channels / maxout_conf.groups - self.set_cnn_layer(name, g_layer_map[input_layer.name].height, - g_layer_map[input_layer.name].width, out_channels) + self.set_cnn_layer(name, maxout_conf.image_conf.img_size_y, + maxout_conf.image_conf.img_size, out_channels) @config_layer('row_conv') @@ -2405,9 +2406,11 @@ class GatherAgentLayer(LayerBase): @config_layer('scatter_agent') class ScatterAgentLayer(LayerBase): - def __init__(self, name, size, device=None): + def __init__(self, name, size, width=None, height=None, device=None): super(ScatterAgentLayer, self).__init__( name, 'scatter_agent', size, inputs=[], device=device) + if height and width: + self.set_layer_height_width(height, width) @config_layer('multiplex') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index c9e3ded65c..dd6d1f7f8c 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -16,11 +16,13 @@ import functools import collections import inspect +import paddle.trainer.config_parser as cp from paddle.trainer.config_parser import * from .activations import LinearActivation, SigmoidActivation, TanhActivation, \ ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation from .evaluators import * -from .poolings import MaxPooling, AvgPooling, BasePoolingType +from .poolings import MaxPooling, AvgPooling, BasePoolingType, \ + CudnnAvgPooling, CudnnMaxPooling from .attrs import * from .default_decorators import * @@ -330,6 +332,14 @@ class LayerOutput(object): self.outputs = outputs self.reverse = reverse + @property + def width(self): + return cp.g_layer_map[self.full_name].width + + @property + def height(self): + return cp.g_layer_map[self.full_name].height + def set_input(self, input): """ Set the input for a memory layer. Can only be used for memory layer @@ -911,7 +921,13 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): width=width, **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput(name, LayerType.DATA, size=size) + num_filters = None + if height is not None and width is not None: + num_filters = size / (width * height) + assert num_filters * width * height == size, \ + "size=%s width=%s height=%s" % (size, width, height) + + return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters) @wrap_name_default("embedding") @@ -2571,6 +2587,10 @@ def img_pool_layer(input, assert input.num_filters is not None num_channels = input.num_filters + assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling, + CudnnMaxPooling], \ + "only AvgPooling and MaxPooling are supported" + if pool_type is None: pool_type = MaxPooling() elif isinstance(pool_type, AvgPooling): @@ -2580,7 +2600,6 @@ def img_pool_layer(input, if ( isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \ else pool_type.name - pool_size_y = pool_size if pool_size_y is None else pool_size_y stride_y = stride if stride_y is None else stride_y padding_y = padding if padding_y is None else padding_y @@ -4204,8 +4223,7 @@ def conv_operator(img, num_channels = img.num_filters assert isinstance(filter, LayerOutput) - if filter.size is not None: - filter.size = filter_size * filter_size_y * num_filters * num_channels + assert filter.size is not None opCls = ConvTransOperator if trans else ConvOperator @@ -4916,7 +4934,6 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): :return: LayerOutput object. :rtype: LayerOutput """ - assert input.layer_type == LayerType.CONV_LAYER assert isinstance(input.activation, LinearActivation) assert groups > 1 if num_channels is None: From 99af29e3f29f0392727bba312282e56a431dfc7b Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Mon, 21 Aug 2017 14:17:13 -0700 Subject: [PATCH 048/170] Fix error message for img_pool_layer --- python/paddle/trainer_config_helpers/layers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index dd6d1f7f8c..be854c38f7 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -2589,7 +2589,7 @@ def img_pool_layer(input, assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling, CudnnMaxPooling], \ - "only AvgPooling and MaxPooling are supported" + "only (Cudnn)AvgPooling, (Cudnn)MaxPooling are supported" if pool_type is None: pool_type = MaxPooling() @@ -6236,11 +6236,11 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1): @wrap_bias_attr_default() def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None): """ - A layer applies a linear transformation to each element in each row of - the input matrix. For each element, the layer first re-scale it and then + A layer applies a linear transformation to each element in each row of + the input matrix. For each element, the layer first re-scale it and then adds a bias to it. - This layer is very like the SlopeInterceptLayer, except the scale and + This layer is very like the SlopeInterceptLayer, except the scale and bias are trainable. .. math:: From 118dd1494fbe3654da8f71c2245523e27616d475 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 21 Aug 2017 18:22:59 -0700 Subject: [PATCH 049/170] can run, for debug --- .../paddle/v2/framework/tests/CMakeLists.txt | 1 + python/paddle/v2/framework/tests/mnist.py | 73 +++++++++++++++++-- 2 files changed, 66 insertions(+), 8 deletions(-) diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index ce57a07130..41682c8350 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -27,3 +27,4 @@ py_test(test_uniform_random_op SRCS test_uniform_random_op.py) py_test(test_recurrent_op SRCS test_recurrent_op.py) py_test(test_sgd_op SRCS test_sgd_op.py) py_test(test_gradient_checker SRCS test_gradient_checker.py) +py_test(mnist SRCS mnist.py) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index 32a088ac28..d0c56c457d 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -2,7 +2,7 @@ import paddle.v2.framework.core as core from paddle.v2.framework.op import Operator import numpy -BATCH_SIZE = 100 +BATCH_SIZE = 2 scope = core.Scope() place = core.CPUPlace() @@ -35,10 +35,15 @@ def data_layer(name, dims): def feed_data(name, data): - assert isinstance(data, numpy.array) + assert isinstance(data, numpy.ndarray) tensor = scope.find_var(name).get_tensor() tensor.set_dims(data.shape) - tensor.alloc_float(place) + if data.dtype == numpy.dtype('int32'): + tensor.alloc_float(place) + elif data.dtype == numpy.dtype('float32'): + tensor.alloc_int(place) + else: + raise ValueError("data type not supported") tensor.set(data, place) @@ -49,7 +54,11 @@ def grad_var_name(var_name): def sgd_optimizer(net, param_name, learning_rate=0.01): grad_name = grad_var_name(param_name) optimize_op = Operator( - "sgd", param=param_name, grad=grad_name, learning_rate=learning_rate) + "sgd", + param=param_name, + grad=grad_name, + param_out=param_name, + learning_rate=learning_rate) net.add_op(optimize_op) @@ -65,7 +74,7 @@ def init_param(param_name, dims): # fc_layer -def fc_layer(net, input, size, act="sigmoid", bias=True, param=None, name=None): +def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): """ Add a fc layer to net @@ -125,16 +134,64 @@ def cross_entropy_layer(net, input, label): return cost_name +def get_backward_net(forward_net): + net = core.Operator.backward(forward_net, set()) + for input in net.inputs()["all"]: + var = scope.new_var(input) + var.get_tensor() + for output in net.outputs()["all"]: + var = scope.new_var(output) + var.get_tensor() + return net + + +def print_inputs_outputs(op): + print("===============" + op.type() + "==============") + print("***inputs:***") + for input in op.inputs()["all"]: + print input, scope.find_var(input).get_tensor().get_dims() + print("***outputs:***") + for output in op.outputs()["all"]: + print output, scope.find_var(output).get_tensor().get_dims() + print("") + print("") + + images = data_layer(name='pixel', dims=[BATCH_SIZE, 784]) label = data_layer(name='label', dims=[BATCH_SIZE]) fc = fc_layer(net=forward_network, input=images, size=10, act="softmax") cost = cross_entropy_layer(net=forward_network, input=fc, label=label) forward_network.complete_add_op(True) print(forward_network) -backward_net = core.Operator.backward(forward_network, set()) - +backward_net = get_backward_net(forward_network) print(backward_net) +optimize_net.complete_add_op(True) +print(optimize_net) PASS_NUM = 10 for pass_id in range(PASS_NUM): - print pass_id + print("===========forward==========") + feed_data("pixel", numpy.random.random((BATCH_SIZE, 784)).astype('float32')) + feed_data("label", numpy.ones(BATCH_SIZE).astype("int32")) + forward_network.infer_shape(scope) + print_inputs_outputs(forward_network) + + print(numpy.array(scope.find_var("label").get_tensor())) + forward_network.run(scope, dev_ctx) + # print(numpy.array(scope.find_var("fc_0").get_tensor())) + + print("===========backward==========") + cost_data = numpy.array(scope.find_var("cross_entropy_1").get_tensor()) + cost_grad = scope.find_var(grad_var_name("cross_entropy_1")).get_tensor() + cost_grad.set_dims(cost_data.shape) + cost_grad.alloc_float(place) + cost_grad.set(cost_data, place) + + backward_net.infer_shape(scope) + print_inputs_outputs(backward_net) + + backward_net.run(scope, dev_ctx) + + print("===========optimize_net==========") + print_inputs_outputs(optimize_net) + optimize_net.run(scope, dev_ctx) From 53e71b44f41860e6482651b9e92dd1e6d3213c8a Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Tue, 22 Aug 2017 03:28:21 +0000 Subject: [PATCH 050/170] gather op bp passed --- paddle/operators/CMakeLists.txt | 2 -- paddle/operators/gather.h | 6 +++--- paddle/operators/gather_op.cc | 8 ++++---- paddle/operators/gather_op.h | 19 ++++++++++--------- .../v2/framework/tests/test_gather_op.py | 18 ++++++++++++++---- 5 files changed, 31 insertions(+), 22 deletions(-) diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 6849e39cb7..ba1362e8bf 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -44,8 +44,6 @@ endfunction() add_subdirectory(math) cc_test(gather_test SRCS gather_test.cc DEPS tensor) op_library(gather_op SRCS gather_op.cc gather_op.cu) -# DEPS op_registry) -# cc_test(gather_op_test SRCS gather_op_test.cc DEPS gather_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h index 3f299ea1a6..edac29f6db 100644 --- a/paddle/operators/gather.h +++ b/paddle/operators/gather.h @@ -27,13 +27,13 @@ namespace operators { // Implementation of CPU copy template -void CPUGather(const T* params, const int* indices, const int slice_size, +void CPUGather(const T* src, const int* indices, const int slice_size, const int index_size, T* output) { const size_t slice_bytes = slice_size * sizeof(T); for (int i = 0; i < index_size; ++i) { int index_ = indices[i]; - memcpy(output + i * slice_size, params + index_ * slice_size, slice_bytes); + memcpy(output + i * slice_size, src + index_ * slice_size, slice_bytes); } } @@ -57,7 +57,7 @@ void Gather(const platform::Place& place, const paddle::framework::Tensor* src, int index_size = index->dims()[0]; auto src_dims = src->dims(); - paddle::framework::DDim output_dims(src_dims); + framework::DDim output_dims(src_dims); output_dims[0] = index_size; // slice size diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc index 499def05a7..123bed296c 100644 --- a/paddle/operators/gather_op.cc +++ b/paddle/operators/gather_op.cc @@ -26,9 +26,9 @@ class GatherOp : public framework::OperatorWithKernel { void InferShape(const framework::InferShapeContext &ctx) const override { int batch_size = ctx.Input("Index")->dims()[0]; PADDLE_ENFORCE_GE(batch_size, 0, "Batch size must be >0"); - paddle::framework::DDim output_dims(ctx.Input("X")->dims()); + framework::DDim output_dims(ctx.Input("X")->dims()); output_dims[0] = batch_size; - ctx.Output("Y")->Resize(output_dims); + ctx.Output("Out")->Resize(output_dims); } }; @@ -51,11 +51,11 @@ class GatherOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The source input of gather op"); AddInput("Index", "The index input of gather op"); - AddOutput("Y", "The output of add op"); + AddOutput("Out", "The output of add op"); AddComment(R"DOC( Gather Operator by selecting from the first axis, -Y = X[Index] +Out = X[Index] )DOC"); } }; diff --git a/paddle/operators/gather_op.h b/paddle/operators/gather_op.h index 13e4c9b058..381854f301 100644 --- a/paddle/operators/gather_op.h +++ b/paddle/operators/gather_op.h @@ -26,10 +26,10 @@ using Tensor = framework::Tensor; template class GatherOpKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto X = ctx.Input("X"); - auto Index = ctx.Input("Index"); - auto Y = ctx.Output("Y"); + void Compute(const framework::ExecutionContext &ctx) const override { + auto *X = ctx.Input("X"); + auto *Index = ctx.Input("Index"); + auto *Y = ctx.Output("Out"); Y->mutable_data(ctx.GetPlace()); Gather(ctx.GetPlace(), X, Index, Y); @@ -39,12 +39,13 @@ class GatherOpKernel : public framework::OpKernel { template class GatherGradientOpKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto Index = ctx.Input("Index"); - auto dX = ctx.Output(framework::GradVarName("X")); - auto dY = ctx.Input(framework::GradVarName("Y")); + void Compute(const framework::ExecutionContext &ctx) const override { + auto *Index = ctx.Input("Index"); + auto *dX = ctx.Output(framework::GradVarName("X")); + auto *dO = ctx.Input(framework::GradVarName("Out")); - ScatterUpdate(ctx.GetPlace(), dY, Index, dX); + dX->mutable_data(ctx.GetPlace()); + ScatterUpdate(ctx.GetPlace(), dO, Index, dX); } }; diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/framework/tests/test_gather_op.py index 049054d07b..e868983042 100644 --- a/python/paddle/v2/framework/tests/test_gather_op.py +++ b/python/paddle/v2/framework/tests/test_gather_op.py @@ -1,11 +1,10 @@ import unittest - +from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op import numpy import paddle.v2.framework.core as core from paddle.v2.framework.op import Operator -from op_test_util import OpTestMeta - class TestGatherOp(unittest.TestCase): __metaclass__ = OpTestMeta @@ -17,7 +16,18 @@ class TestGatherOp(unittest.TestCase): 'X': xnp, 'Index': numpy.array([1, 3, 5]).astype("int32") } - self.outputs = {'Y': self.inputs['X'][self.inputs['Index']]} + self.outputs = {'Out': self.inputs['X'][self.inputs['Index']]} + + +class TestGatherGradOp(GradientChecker): + def test_gather_grad(self): + print 'creating op' + op = create_op("gather") + print 'creating op done' + xnp = numpy.random.random((10, 20)).astype("float32") + inputs = {'X': xnp, 'Index': numpy.array([1, 3, 5]).astype("int32")} + print 'correct before check gradient' + self.check_grad(op, inputs, set("X"), "Out") if __name__ == "__main__": From dc5f0dbc324e0e15bef1753aeaed6700f5972cf0 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Tue, 22 Aug 2017 05:27:02 +0000 Subject: [PATCH 051/170] remove opregistry in gather function --- paddle/operators/gather.h | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h index edac29f6db..92fb51ec17 100644 --- a/paddle/operators/gather.h +++ b/paddle/operators/gather.h @@ -18,7 +18,6 @@ limitations under the License. */ #include "paddle/framework/ddim.h" #include "paddle/framework/eigen.h" -#include "paddle/framework/op_registry.h" #include "paddle/framework/tensor.h" #include "paddle/platform/place.h" From 4eecd0c2d531f66e64eebff88a99488275143207 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 22 Aug 2017 14:18:16 +0800 Subject: [PATCH 052/170] use MKLDNNMatrix in fc backward --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 77 ++++++++++++------------- paddle/gserver/layers/MKLDNNLayer.h | 59 ++++++++++++++----- paddle/math/MKLDNNMatrix.h | 33 +++++++++-- 3 files changed, 110 insertions(+), 59 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index fac0390eee..5463104469 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -158,10 +158,8 @@ void MKLDNNFcLayer::resetFwd() { hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_}, hasSpatial_ ? format::oihw : format::oi, engine_); - biasVal_ = hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; - outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); // change original output to mkldnn output @@ -193,46 +191,41 @@ void MKLDNNFcLayer::resetBwd() { return; } needResetBwd_ = false; - bool hasBias = biases_ && biases_->getWGrad(); - real* iData = getInputValue(0)->getData(); - real* iDiff = getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL; - real* oDiff = getOutputGrad()->getData(); - real* wDiff = weight_->getWGrad()->getData(); - real* bDiff = hasBias ? biases_->getWGrad()->getData() : NULL; /// backward weight - // create memory desc for backward memory - memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) - : createMD({bs_, ic_}, format::nc); - memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) - : createMD({oc_, ic_}, format::oi); - memory::desc oMD = createMD({bs_, oc_}, format::nc); - memory::desc bMD = bDiff != NULL ? createMD({oc_}, format::x) - : createMD({}, format::format_undef); - - if (inVal_) { - // update data - inVal_->set_data_handle(iData); - } else { - LOG(FATAL) << "Should not be empty"; - // inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); - } - - // create memory primitive desc and memory self - wgtGrad_.reset(new memory(memory::primitive_desc(wMD, engine_), wDiff)); - outGrad_.reset(new memory(memory::primitive_desc(oMD, engine_), oDiff)); + CHECK(inVal_) << "Should have input value"; + const MatrixPtr& wgt = weight_->getWGrad(); + const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; + const MatrixPtr& out = output_.grad; + + wgtGrad_ = MKLDNNMatrix::create( + wgt, wgtVal_->getDims(), wgtVal_->getFormat(), engine_); + biasGrad_ = + hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; - fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, iMD, wMD, oMD); + outGrad_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); + // change original output to mkldnn output + // TODO: right? + output_.grad = std::dynamic_pointer_cast(outGrad_); + + // create memory primitive desc + fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, + inVal_->getMD(), + wgtGrad_->getMD(), + outGrad_->getMD()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - fc_bwdWgt::desc bwdWgtDesc = bDiff != NULL - ? fc_bwdWgt::desc(iMD, wMD, bMD, oMD) - : fc_bwdWgt::desc(iMD, wMD, oMD); + fc_bwdWgt::desc bwdWgtDesc = + hasBias ? fc_bwdWgt::desc(inVal_->getMD(), + wgtGrad_->getMD(), + biasGrad_->getMD(), + outGrad_->getMD()) + : fc_bwdWgt::desc( + inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD()); fc_bwdWgt::primitive_desc bwdWgtPD = fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); - if (bDiff != NULL) { - biasGrad_.reset(new memory(memory::primitive_desc(bMD, engine_), bDiff)); + if (hasBias) { bwdWgt_.reset( new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_)); } else { @@ -242,13 +235,19 @@ void MKLDNNFcLayer::resetBwd() { pipelineBwd_.push_back(*bwdWgt_); /// backward data - if (iDiff == NULL) { + const MatrixPtr& in = getInputGrad(0); + if (in == nullptr) { return; } - fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(iMD, wMD, oMD); + fc_bwdData::desc bwdDataDesc = + fc_bwdData::desc(inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD()); fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); - inGrad_.reset(new memory(memory::primitive_desc(iMD, engine_), iDiff)); + + // TODO: check right, just from ingrad? + inGrad_ = + MKLDNNMatrix::create(in, inVal_->getDims(), inVal_->getFormat(), engine_); + CHECK(wgtVal_) << "Should have weight memory"; bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); pipelineBwd_.push_back(*bwdData_); @@ -264,7 +263,7 @@ void MKLDNNFcLayer::forward(PassType passType) { // update input data // since it might be changed if this is after data layer real* iData = getInputValue(0)->getData(); - inVal_->set_data_handle(iData); + inVal_->updateData(iData); // just submit forward pipeline stream_->submit(pipelineFwd_); @@ -288,7 +287,7 @@ void MKLDNNFcLayer::backward(const UpdateCallback& callback) { // update diff real* oDiff = getOutputGrad()->getData(); - outGrad_->set_data_handle(oDiff); + outGrad_->updateData(oDiff); // just sumbmit backward pipeline stream_->submit(pipelineBwd_); diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index b44095befb..fbd62d9aaa 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -52,16 +52,15 @@ protected: std::vector pipelineFwd_; std::vector pipelineBwd_; - // TODO(TJ): change below memory as MKLDNNMatrixPtr type - // MKLDNNMatrixPtr ; + // MKLDNNMatrixPtr MKLDNNMatrixPtr inVal_; - std::shared_ptr inGrad_; + MKLDNNMatrixPtr inGrad_; MKLDNNMatrixPtr outVal_; - std::shared_ptr outGrad_; + MKLDNNMatrixPtr outGrad_; MKLDNNMatrixPtr wgtVal_; - std::shared_ptr wgtGrad_; + MKLDNNMatrixPtr wgtGrad_; MKLDNNMatrixPtr biasVal_; - std::shared_ptr biasGrad_; + MKLDNNMatrixPtr biasGrad_; public: explicit MKLDNNLayer(const LayerConfig& config) @@ -84,17 +83,24 @@ public: virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." + << "Please set WITH_MKLDNN=ON " + << "and set use_mkldnn=True"; + if (useGpu_ == true) { + LOG(WARNING) << "Do not support GPU yet, will change to useGpu = false"; + useGpu_ = false; + } + + // set device id before Layer::init + setDevice(MKLDNN_DEVICE); + // change param device to MKLDNN device + setParamsDevice(MKLDNN_DEVICE, parameterMap); if (!Layer::init(layerMap, parameterMap)) { return false; } - CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." - << "Please set WITH_MKLDNN=ON " - << "and set use_mkldnn=True"; stream_.reset(new MKLDNNStream()); engine_ = CPUEngine::Instance().getEngine(); - - setDeviceID(MKLDNN_DEVICE); return true; } @@ -136,10 +142,33 @@ public: } protected: - void setDeviceID(int id) { - deviceId_ = id; - output_.deviceId = id; - // TODO: handle mkldnn device or add mkldnn device to other + /** + * Set deviceId of this layer. + */ + void setDevice(int id) { deviceId_ = id; } + + /** + * Set deviceId of the params used in this layer. + */ + void setParamsDevice(int id, const ParameterMap& parameterMap) { + for (auto& inputConfig : config_.inputs()) { + if (inputConfig.has_input_parameter_name()) { + ParameterPtr parameter; + std::string name = inputConfig.input_parameter_name(); + CHECK(mapGet(name, parameterMap, ¶meter)) + << "Cannot find input parameter " << name << " for layer " + << getName(); + parameter->setDevice(id); + } + } + if (config_.has_bias_parameter_name()) { + ParameterPtr parameter; + std::string name = config_.bias_parameter_name(); + CHECK(mapGet(name, parameterMap, ¶meter)) + << "Cannot find bias parameter " << name << " for layer " + << getName(); + parameter->setDevice(id); + } } }; diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index 73eb50d2a0..54c0a1fdcb 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -44,6 +44,8 @@ public: set_data_handle(CpuMatrix::getData()); } + ~MKLDNNMatrix() {} + static MKLDNNMatrixPtr create( const MatrixPtr& m, mkldnn::memory::dims dims, @@ -52,21 +54,42 @@ public: mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32); /** - * Get primitive descriptor + * Get primitive descriptor. */ mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); } /** - * Get memory descriptor + * Get memory descriptor. */ mkldnn::memory::desc getMD() { return getPD().desc(); } /** - * Get format + * Get dims. */ - int getFormat() { return getMD().data.format; } + mkldnn::memory::dims getDims() { + mkldnn::memory::dims dst; + int* src = getMD().data.dims; + int ndims = getMD().data.ndims; + dst.resize(ndims); + for (int i = 0; i < ndims; ++i) { + dst[i] = src[i]; + } + return dst; + } - ~MKLDNNMatrix() {} + /** + * Get format. + */ + mkldnn::memory::format getFormat() { + return (mkldnn::memory::format)(getMD().data.format); + } + + /** + * Update the memory data handle. + * Caution: This will not check the buffer size of the data, + * it should be coverd by user. + */ + void updateData(void* data) { set_data_handle(data); } }; } // namespace paddle From 950cc60d2b2e6ab9c05f82df3f2d3f3179541209 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 22 Aug 2017 15:29:38 +0800 Subject: [PATCH 053/170] Add minus --- paddle/framework/CMakeLists.txt | 3 +- paddle/framework/pybind.cc | 1 + paddle/operators/CMakeLists.txt | 1 + paddle/operators/minus_op.cc | 84 +++++++++++++++++++++++++++++++++ paddle/operators/minus_op.cu | 18 +++++++ paddle/operators/minus_op.h | 39 +++++++++++++++ 6 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 paddle/operators/minus_op.cc create mode 100644 paddle/operators/minus_op.cu create mode 100644 paddle/operators/minus_op.h diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 5df14ae78d..c9cf45e9d7 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -56,5 +56,6 @@ cc_library(paddle_pybind SHARED uniform_random_op gaussian_random_op fill_zeros_like_op - scale_op) + scale_op + minus_op) endif(WITH_PYTHON) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 3aaf0de150..b4b7921d33 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -44,6 +44,7 @@ USE_OP(gaussian_random); USE_OP(uniform_random); USE_OP(scale); USE_OP_ITSELF(identity); +USE_OP(minus); namespace paddle { namespace framework { diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 0ba598823b..61f7a4070f 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -69,3 +69,4 @@ op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu) op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op) +op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op) diff --git a/paddle/operators/minus_op.cc b/paddle/operators/minus_op.cc new file mode 100644 index 0000000000..c660ab5d32 --- /dev/null +++ b/paddle/operators/minus_op.cc @@ -0,0 +1,84 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/minus_op.h" +#include "paddle/operators/net_op.h" + +namespace paddle { +namespace operators { + +class MinusOp : public framework::OperatorWithKernel { + public: + MinusOp(const std::string &type, const VarNameMap &inputs, + const VarNameMap &outputs, const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto *left_tensor = ctx.Input("X"); + auto *right_tensor = ctx.Input("Y"); + + PADDLE_ENFORCE_EQ( + framework::product(left_tensor->dims()), + framework::product(right_tensor->dims()), + "Minus operator must take two tensor with same num of elements"); + ctx.Output("Out")->Resize(left_tensor->dims()); + } +}; + +class MinusOpMaker : public framework::OpProtoAndCheckerMaker { + public: + MinusOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The left tensor of minus operator.").NotInGradient(); + AddInput("Y", "The right tensor of minus operator.").NotInGradient(); + AddOutput("Out", "The output tensor of minus operator.").NotInGradient(); + + AddComment(R"DOC(Minus Operator + +Equation: Out = X - Y +)DOC"); + } +}; +template +class MinusGradOp : public NetOp { + public: + MinusGradOp(const std::string &type, const VarNameMap &inputs, + const VarNameMap &outputs, const framework::AttributeMap &attrs) + : NetOp(type, inputs, outputs, attrs) { + auto out_grad = Input(framework::GradVarName("Out")); + auto x_grad = Output(framework::GradVarName("X")); + auto y_grad = Output(framework::GradVarName("Y")); + + // x_grad = out_grad + AddOp(framework::OpRegistry::CreateOp("identity", {{"X", {out_grad}}}, + {{"Out", {x_grad}}}, {})); + + framework::AttributeMap scale_attr; + scale_attr["scale"] = static_cast(-1); + AddOp(framework::OpRegistry::CreateOp("scale", {{"X", {out_grad}}}, + {{"Out", {y_grad}}}, scale_attr)); + } +}; + +} // namespace operators +} // namespace paddle + +USE_OP(scale); +USE_OP_ITSELF(identity); +namespace ops = paddle::operators; +REGISTER_OP(minus, ops::MinusOp, ops::MinusOpMaker, minus_grad, + ops::MinusGradOp); +REGISTER_OP_CPU_KERNEL(minus, + ops::MinusKernel); diff --git a/paddle/operators/minus_op.cu b/paddle/operators/minus_op.cu new file mode 100644 index 0000000000..a8375cc630 --- /dev/null +++ b/paddle/operators/minus_op.cu @@ -0,0 +1,18 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/minus_op.h" + +REGISTER_OP_GPU_KERNEL( + minus, paddle::operators::MinusKernel); diff --git a/paddle/operators/minus_op.h b/paddle/operators/minus_op.h new file mode 100644 index 0000000000..6310a4fd51 --- /dev/null +++ b/paddle/operators/minus_op.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class MinusKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* left_tensor = context.Input("X"); + auto* right_tensor = context.Input("Y"); + auto* out_tensor = context.Output("Out"); + + out_tensor->mutable_data(context.GetPlace()); + auto& dev = context.GetEigenDevice(); + framework::EigenVector::Flatten(*out_tensor).device(dev) = + framework::EigenVector::Flatten(*left_tensor) - + framework::EigenVector::Flatten(*right_tensor); + } +}; + +} // namespace operators +} // namespace paddle From 5a8fbb7d19e95f3be16bbee029e82e14f0a240df Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 22 Aug 2017 00:56:34 -0700 Subject: [PATCH 054/170] add data --- python/paddle/v2/framework/tests/mnist.py | 26 +++++++++++++++++------ 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index d0c56c457d..f75f196168 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -1,8 +1,9 @@ import paddle.v2.framework.core as core from paddle.v2.framework.op import Operator import numpy +import paddle.v2 as paddle -BATCH_SIZE = 2 +BATCH_SIZE = 100 scope = core.Scope() place = core.CPUPlace() @@ -39,9 +40,9 @@ def feed_data(name, data): tensor = scope.find_var(name).get_tensor() tensor.set_dims(data.shape) if data.dtype == numpy.dtype('int32'): - tensor.alloc_float(place) - elif data.dtype == numpy.dtype('float32'): tensor.alloc_int(place) + elif data.dtype == numpy.dtype('float32'): + tensor.alloc_float(place) else: raise ValueError("data type not supported") tensor.set(data, place) @@ -168,20 +169,31 @@ print(backward_net) optimize_net.complete_add_op(True) print(optimize_net) -PASS_NUM = 10 +reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=BATCH_SIZE) + +PASS_NUM = 1000 for pass_id in range(PASS_NUM): print("===========forward==========") - feed_data("pixel", numpy.random.random((BATCH_SIZE, 784)).astype('float32')) - feed_data("label", numpy.ones(BATCH_SIZE).astype("int32")) + # feed_data("pixel", numpy.random.random((BATCH_SIZE, 784)).astype('float32')) + # feed_data("label", numpy.ones(BATCH_SIZE).astype("int32")) + data = reader().next() + image = numpy.array(map(lambda x: x[0], data)).astype("float32") + label = numpy.array(map(lambda x: x[1], data)).astype("int32") + feed_data("pixel", image) + feed_data("label", label) forward_network.infer_shape(scope) print_inputs_outputs(forward_network) - print(numpy.array(scope.find_var("label").get_tensor())) + # print(numpy.array(scope.find_var("label").get_tensor())) forward_network.run(scope, dev_ctx) # print(numpy.array(scope.find_var("fc_0").get_tensor())) print("===========backward==========") cost_data = numpy.array(scope.find_var("cross_entropy_1").get_tensor()) + print(cost_data.sum() / len(cost_data)) cost_grad = scope.find_var(grad_var_name("cross_entropy_1")).get_tensor() cost_grad.set_dims(cost_data.shape) cost_grad.alloc_float(place) From 0f3b9e4112cbedd1b026f6cd09955d15f6207864 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 22 Aug 2017 15:36:43 +0800 Subject: [PATCH 055/170] lookup table op, cuda helper and set functor 1. finish lookup table CPU and GPU kernel 2. Add some cuda helper 3. Add some math funtor --- paddle/framework/pybind.cc | 1 + paddle/operators/CMakeLists.txt | 6 +- paddle/operators/functor/CMakeLists.txt | 5 + paddle/operators/functor/math_functor.cc | 42 +++++++ paddle/operators/functor/math_functor.cu | 42 +++++++ paddle/operators/functor/math_functor.h | 32 +++++ paddle/operators/lookup_table_op.cc | 71 +++++++++++ paddle/operators/lookup_table_op.cu | 116 ++++++++++++++++++ paddle/operators/lookup_table_op.h | 75 +++++++++++ paddle/platform/cuda_helper.h | 57 +++++++++ .../paddle/v2/framework/tests/CMakeLists.txt | 1 + .../v2/framework/tests/test_lookup_table.py | 31 +++++ 12 files changed, 477 insertions(+), 2 deletions(-) create mode 100644 paddle/operators/functor/CMakeLists.txt create mode 100644 paddle/operators/functor/math_functor.cc create mode 100644 paddle/operators/functor/math_functor.cu create mode 100644 paddle/operators/functor/math_functor.h create mode 100644 paddle/operators/lookup_table_op.cc create mode 100644 paddle/operators/lookup_table_op.cu create mode 100644 paddle/operators/lookup_table_op.h create mode 100644 paddle/platform/cuda_helper.h create mode 100644 python/paddle/v2/framework/tests/test_lookup_table.py diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index f0114b9e49..68c5526bbb 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -42,6 +42,7 @@ USE_OP(fill_zeros_like); USE_OP_ITSELF(recurrent_op); USE_OP(gaussian_random); USE_OP(uniform_random); +USE_OP(lookup_table); namespace paddle { namespace framework { diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index a7c89787e4..1ca5010eae 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -42,6 +42,8 @@ function(op_library TARGET) endfunction() add_subdirectory(math) +add_subdirectory(functor) + cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) @@ -66,5 +68,5 @@ op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor op_registry operator net_op) -op_library(uniform_random_op - SRCS uniform_random_op.cc uniform_random_op.cu) +op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu) +op_library(lookup_table_op SRCS lookup_table_op.cc lookup_table_op.cu DEPS math_functor) diff --git a/paddle/operators/functor/CMakeLists.txt b/paddle/operators/functor/CMakeLists.txt new file mode 100644 index 0000000000..d3b39e5fc2 --- /dev/null +++ b/paddle/operators/functor/CMakeLists.txt @@ -0,0 +1,5 @@ +if(WITH_GPU) + nv_library(math_functor SRCS math_functor.cc math_functor.cu DEPS device_context) +else() + cc_library(math_functor SRCS math_functor.cc DEPS device_context) +endif() diff --git a/paddle/operators/functor/math_functor.cc b/paddle/operators/functor/math_functor.cc new file mode 100644 index 0000000000..1f2767f171 --- /dev/null +++ b/paddle/operators/functor/math_functor.cc @@ -0,0 +1,42 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/functor/math_functor.h" +#include "paddle/framework/eigen.h" + +namespace paddle { +namespace operators { +namespace functor { + +template +struct Set { + void operator()(const T alpha, framework::Tensor* Y, + platform::DeviceContext* context) { + int N = product(Y->dims()); + T* YData = Y->mutable_data(context->GetPlace()); + if (alpha == static_cast(0)) { + memset(YData, 0, N * sizeof(T)); + } else { + framework::EigenVector::Flatten(*Y) + .setConstant(alpha); + } + } +}; + +template struct Set; +template struct Set; + +} // namespace functor +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/functor/math_functor.cu b/paddle/operators/functor/math_functor.cu new file mode 100644 index 0000000000..6dc828c60a --- /dev/null +++ b/paddle/operators/functor/math_functor.cu @@ -0,0 +1,42 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/functor/math_functor.h" +#include "paddle/platform/cuda_helper.h" + +namespace paddle { +namespace operators { +namespace functor { + +template +__global__ void SetKernel(const int N, const T alpha, T* Y) { + CUDA_1D_KERNEL_LOOP(i, N) { Y[i] = alpha; } +} + +template +struct Set { + void operator()(const T alpha, framework::Tensor* Y, + platform::DeviceContext* context) { + int N = product(Y->dims()); + T* YData = Y->mutable_data(context->GetPlace()); + SetKernel<<<(N + 512 - 1) / 512, 512>>>(N, alpha, YData); + } +}; + +template struct Set; +template struct Set; + +} // namespace functor +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/functor/math_functor.h b/paddle/operators/functor/math_functor.h new file mode 100644 index 0000000000..d5c7bd368f --- /dev/null +++ b/paddle/operators/functor/math_functor.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/tensor.h" +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace operators { +namespace functor { + +template +struct Set { + void operator()(const T alpha, paddle::framework::Tensor* Y, + paddle::platform::DeviceContext* context); +}; + +} // namespace functor +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc new file mode 100644 index 0000000000..5f70458a87 --- /dev/null +++ b/paddle/operators/lookup_table_op.cc @@ -0,0 +1,71 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/lookup_table_op.h" + +namespace paddle { +namespace operators { + +class LookupTableOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &context) const override { + auto table_t = context.Input("W"); + auto ids_t = context.Input("Ids"); + auto output_t = context.Output("Out"); + + output_t->Resize({ids_t->dims()[0], table_t->dims()[1]}); + } +}; + +class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker { + public: + LookupTableOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("W", + "An input represents embedding tensors," + " which is a learnable parameter."); + AddInput("Ids", + "An input with type int32 or int64" + "contains the ids to be looked up in W.") + .NotInGradient(); + AddOutput("Out", "The lookup results, which have the same type with W."); + AddComment( + "This operator is used to perform lookups on the parameter W," + "then concatenated into a dense tensor."); + } +}; + +class LookupTableOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &context) const override { + context.Output(0)->Resize(context.Input(0)->dims()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(lookup_table, ops::LookupTableOp, ops::LookupTableOpMaker, + lookup_table_grad, ops::LookupTableOpGrad); + +REGISTER_OP_CPU_KERNEL(lookup_table, ops::LookupTableKernel); +REGISTER_OP_CPU_KERNEL(lookup_table_grad, ops::LookupTableGradKernel); diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu new file mode 100644 index 0000000000..94b440e00e --- /dev/null +++ b/paddle/operators/lookup_table_op.cu @@ -0,0 +1,116 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/op_registry.h" +#include "paddle/operators/functor/math_functor.h" +#include "paddle/platform/assert.h" +#include "paddle/platform/cuda_helper.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +__global__ void LookupTable(T* output, const T* table, const uint32_t* ids, + const int N, const int K, const int D) { + int idx = threadIdx.x; + int idy = blockIdx.x + threadIdx.y * gridDimX; + + while (idy < K) { + int id = ids[idy]; + PADDLE_ASSERT(id >= 0); + PADDLE_ASSERT(id < N); + T* out = output + idy; + const T* tab = table + id; + for (int i = idx; i < D; i += blockDimX) { + out[i] = tab[i]; + } + idy += blockDimY * gridDimX; + } +} + +template +__global__ void LookupTableGradKernel(T* table, const T* output, + const uint32_t* ids, const int N, + const int K, const int D) { + int idx = threadIdx.x; + int idy = blockIdx.x + threadIdx.y * gridDimX; + + while (idy < K) { + int id = ids[idy]; + PADDLE_ASSERT(id >= 0); + PADDLE_ASSERT(id < N); + const T* out = output + idy; + T* tab = table + id; + for (int i = idx; i < D; i += blockDimX) { + paddle::platform::CudaAtomicAdd(tab + i, out[i]); + } + idy += blockDimY * gridDimX; + } +} + +template +class LookupTableCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto table_t = context.Input("W"); + auto ids_t = context.Input("Ids"); + auto output_t = context.Output("Out"); + + size_t N = table_t->dims()[0]; + size_t D = table_t->dims()[1]; + size_t K = product(ids_t->dims()); + auto ids = ids_t->data(); + auto table = table_t->data(); + auto output = output_t->mutable_data(context.GetPlace()); + + dim3 threads(128, 8); + dim3 grids(8, 1); + LookupTable<<>>(output, table, ids, N, K, D); + } +}; + +template +class LookupTableGrad : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto ids_t = context.Input("Ids"); + auto d_output_t = context.Input(framework::GradVarName("Out")); + auto d_table_t = context.Output(framework::GradVarName("W")); + + int N = d_table_t->dims()[0]; + int D = d_table_t->dims()[1]; + int K = product(ids_t->dims()); + const uint32_t* ids = ids_t->data(); + T* d_table = d_table_t->mutable_data(context.GetPlace()); + const T* d_output = d_output_t->data(); + + auto* device_context = + const_cast(context.device_context_); + functor::Set()(static_cast(0), d_table_t, + device_context); + dim3 threads(128, 8); + dim3 grids(8, 1); + LookupTableGradKernel<<>>(d_table, d_output, + ids, N, K, D); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(lookup_table, ops::LookupTableCUDAKernel); +REGISTER_OP_GPU_KERNEL(lookup_table_grad, ops::LookupTableGrad); diff --git a/paddle/operators/lookup_table_op.h b/paddle/operators/lookup_table_op.h new file mode 100644 index 0000000000..790ecab3c6 --- /dev/null +++ b/paddle/operators/lookup_table_op.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/op_registry.h" +#include "paddle/operators/functor/math_functor.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class LookupTableKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto table_t = context.Input("W"); // float tensor + auto ids_t = context.Input("Ids"); // int tensor + auto output_t = context.Output("Out"); // float tensor + + size_t N = table_t->dims()[0]; + size_t D = table_t->dims()[1]; + auto ids = ids_t->data(); + auto table = table_t->data(); + auto output = output_t->mutable_data(context.GetPlace()); + for (size_t i = 0; i < product(ids_t->dims()); ++i) { + PADDLE_ENFORCE_LT(ids[i], N); + PADDLE_ENFORCE_GE(ids[i], 0); + memcpy(output + i * D, table + ids[i] * D, D * sizeof(T)); + } + } +}; + +template +class LookupTableGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto ids_t = context.Input("Ids"); + auto d_output_t = context.Input(framework::GradVarName("Out")); + auto d_table_t = context.Output(framework::GradVarName("W")); + + size_t N = d_table_t->dims()[0]; + size_t D = d_table_t->dims()[1]; + auto ids = ids_t->data(); + T* d_table = d_table_t->mutable_data(context.GetPlace()); + const T* d_output = d_output_t->data(); + + auto* device_context = + const_cast(context.device_context_); + functor::Set()(static_cast(0), d_table_t, + device_context); + for (size_t i = 0; i < product(ids_t->dims()); ++i) { + PADDLE_ENFORCE_LT(ids[i], N); + PADDLE_ENFORCE_GE(ids[i], 0); + for (size_t j = 0; j < D; ++j) { + d_table[ids[i] * D + j] += d_output[i * D + j]; + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/platform/cuda_helper.h b/paddle/platform/cuda_helper.h new file mode 100644 index 0000000000..4346291117 --- /dev/null +++ b/paddle/platform/cuda_helper.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include + +namespace paddle { +namespace platform { + +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + +#define CUDA_ATOMIC_WRAPPER(op, T) \ + __device__ __forceinline__ T CudaAtomic##op(T* address, const T val) + +#define USE_CUDA_ATOMIC(op, T) \ + CUDA_ATOMIC_WRAPPER(op, T) { return atomic##op(address, val); } + +// For atomicAdd. +USE_CUDA_ATOMIC(Add, float); + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 +USE_CUDA_ATOMIC(Add, double); +#else +// Custom implementation of atomicAdd for double. +// This implementation is copied from CUDA manual. +CUDA_ATOMIC_WRAPPER(Add, double) { + unsigned long long int* address_as_ull = + reinterpret_cast(address); + unsigned long long int old = *address_as_ull, assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + + // Note: uses integer comparison to avoid hang in case of NaN + } while (assumed != old); + + return __longlong_as_double(old); +#endif +} + +} // namespace platform +} // namespace paddle diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index ce57a07130..65c02f2cfb 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -27,3 +27,4 @@ py_test(test_uniform_random_op SRCS test_uniform_random_op.py) py_test(test_recurrent_op SRCS test_recurrent_op.py) py_test(test_sgd_op SRCS test_sgd_op.py) py_test(test_gradient_checker SRCS test_gradient_checker.py) +py_test(test_lookup_table SRCS test_lookup_table.py) diff --git a/python/paddle/v2/framework/tests/test_lookup_table.py b/python/paddle/v2/framework/tests/test_lookup_table.py new file mode 100644 index 0000000000..071069768b --- /dev/null +++ b/python/paddle/v2/framework/tests/test_lookup_table.py @@ -0,0 +1,31 @@ +import unittest +import numpy as np +from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op + + +class TestSigmoidOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = 'lookup_table' + table = np.random.random((17, 31)).astype('float32') + ids = np.random.randint(0, 17, 4) + self.inputs = {'W': table, 'Ids': ids} + self.outputs = {'Out': table[ids]} + + +class TestSigmoidGradOp(GradientChecker): + def test_grad(self): + op = create_op('lookup_table') + table = np.random.random((17, 31)).astype('float32') + ids = np.random.randint(0, 17, 4) + inputs = {'W': table, 'Ids': ids} + # compare gradients between cpu and gpu + self.compare_grad(op, inputs) + # check gradients + self.check_grad(op, inputs, set('W'), 'Out') + + +if __name__ == '__main__': + unittest.main() From c91e542ad3a5a1ecd1c6b825d7c9e89d3e7384b5 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 22 Aug 2017 17:18:14 +0800 Subject: [PATCH 056/170] fix compile for paddle_pybind. --- paddle/framework/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 68304c9fc8..325a6f7532 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -55,5 +55,6 @@ cc_library(paddle_pybind SHARED recurrent_op uniform_random_op gaussian_random_op + lookup_table_op fill_zeros_like_op) endif(WITH_PYTHON) From 9bc1a1a126dc60f06cd353ff72869416d50eb3af Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 22 Aug 2017 18:19:04 +0800 Subject: [PATCH 057/170] fix cuda_helper.h --- paddle/platform/cuda_helper.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/paddle/platform/cuda_helper.h b/paddle/platform/cuda_helper.h index 4346291117..939c3713ad 100644 --- a/paddle/platform/cuda_helper.h +++ b/paddle/platform/cuda_helper.h @@ -34,8 +34,6 @@ USE_CUDA_ATOMIC(Add, float); #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 USE_CUDA_ATOMIC(Add, double); #else -// Custom implementation of atomicAdd for double. -// This implementation is copied from CUDA manual. CUDA_ATOMIC_WRAPPER(Add, double) { unsigned long long int* address_as_ull = reinterpret_cast(address); @@ -50,8 +48,8 @@ CUDA_ATOMIC_WRAPPER(Add, double) { } while (assumed != old); return __longlong_as_double(old); -#endif } +#endif } // namespace platform } // namespace paddle From 8f4ca2d12fffe38d5adff0ad74db6ba1bdc0d223 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 16 Aug 2017 15:34:02 +0800 Subject: [PATCH 058/170] add implementations. --- .../gserver/layers/CrossEntropyOverBeam.cpp | 344 +++++++++++++++++- paddle/gserver/layers/CrossEntropyOverBeam.h | 98 +++++ .../tests/test_CrossEntropyOverBeamGrad.cpp | 166 ++++++--- 3 files changed, 549 insertions(+), 59 deletions(-) diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp index 88d80aa83a..09258fb305 100644 --- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp +++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp @@ -16,6 +16,168 @@ limitations under the License. */ namespace paddle { +void CostForOneSequence::calValidExpandStep() { + validExpansionCount_ = 0; + goldAsExtraPath_ = true; + + for (size_t i = 0; i < beams_->expansionCount; ++i) { + real gold = static_cast(beams_->gold[i]); + if (i) { + real* start = beams_->candidateIds[i - 1]->getData(); + goldRowIds_[i] = std::count_if( + start, + start + goldRowIds_[i - 1] * beamSize_ + goldColIds_[i - 1], + [](const real& val) { return val != -1.; }); + } else + goldRowIds_[i] = 0; + + real* start = + beams_->candidateIds[i]->getData() + goldRowIds_[i] * beamSize_; + real* findEnd = std::find(start, start + beamSize_, gold); + validExpansionCount_++; + + if (start + beamSize_ == findEnd) return; + goldColIds_[i] = findEnd - start; + } + + if (goldColIds_[beams_->expansionCount - 1] != -1) goldAsExtraPath_ = false; +} + +size_t CostForOneSequence::initLastExpansion() { + int beamId = validExpansionCount_ - 1; + const MatrixPtr candidates = beams_->candidateIds[beamId]; + size_t height = candidates->getHeight(); + + /* initialization the last expansion. */ + size_t pathCount = std::count_if(candidates->getData(), + candidates->getData() + height * beamSize_, + [](const real& val) { return val != -1; }); + /* + * if the gold sequence falls off the beam during search, + * add the gold sequence as the last path into all expanded paths. + */ + if (goldAsExtraPath_) goldIdsInFinalExpansion_ = pathCount++; + + pathRowIdsInEachBeam_.clear(); + pathRowIdsInEachBeam_.resize(validExpansionCount_, + std::vector(pathCount, 0)); + parentIdsInBeam_.clear(); + parentIdsInBeam_.resize(pathCount, 0); + + if (goldAsExtraPath_) { + /* add gold sequence into the total expansion. */ + pathRowIdsInEachBeam_[beamId].back() = + beams_->gold[beamId] + + getSeqStartPos(beamId, goldRowIds_[validExpansionCount_ - 1]); + parentIdsInBeam_.back() = goldRowIds_[validExpansionCount_ - 1]; + } else { + size_t goldOffset = goldRowIds_[beamId] * beamSize_ + goldColIds_[beamId]; + goldIdsInFinalExpansion_ = + std::count_if(candidates->getData(), + candidates->getData() + goldOffset, + [](const real& val) { return val != -1.; }); + } + + /* + * TODO(caoying): fix this, store the indices of selected candidate + * paths into Argument.ids + */ + real* ids = candidates->getData(); + size_t curIdx = 0; + for (size_t i = 0; i < height; ++i) { + int basePos = getSeqStartPos(beamId, i); + for (size_t j = 0; j < beamSize_; ++j) { + int id = ids[i * beamSize_ + j]; + if (id == -1) continue; + pathRowIdsInEachBeam_[beamId][curIdx] = id + basePos; + parentIdsInBeam_[curIdx++] = i; + } + } + return pathCount; +} + +void CostForOneSequence::constructTotalExpansion() { + /* + * construct the entire expanded beam by begining with the last search + * in which gold falls off the beam. + */ + size_t totalPathCount = initLastExpansion(); + + for (int beamId = validExpansionCount_ - 2; beamId >= 0; --beamId) { + const MatrixPtr candidates = beams_->candidateIds[beamId]; + real* ids = candidates->getData(); + + int lastParentIdInBeam = -1; + int basePos = -1; + for (size_t i = 0; + i < (goldAsExtraPath_ ? totalPathCount - 1 : totalPathCount); + ++i) { + int id = ids[parentIdsInBeam_[i]]; + int parentRowId = std::div(parentIdsInBeam_[i], beamSize_).quot; + if (parentIdsInBeam_[i] != lastParentIdInBeam) + basePos = getSeqStartPos(beamId, parentRowId); + + pathRowIdsInEachBeam_[beamId][i] = id + basePos; + lastParentIdInBeam = parentIdsInBeam_[i]; + parentIdsInBeam_[i] = parentRowId; + + if (goldAsExtraPath_) + pathRowIdsInEachBeam_[beamId][totalPathCount - 1] = + beams_->gold[beamId] + getSeqStartPos(beamId, goldRowIds_[beamId]); + } + } +} + +real CostForOneSequence::globallyNormalizedScore() { + expandedPathScores_.resize(validExpansionCount_); + + Matrix::resizeOrCreate( + softmaxOut_, 1, pathRowIdsInEachBeam_[0].size(), false, false); + softmaxOut_->zero(); + MatrixPtr tmp = Matrix::create( + softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false); + + for (size_t i = 0; i < validExpansionCount_; ++i) { + Matrix::resizeOrCreate(expandedPathScores_[i], + pathRowIdsInEachBeam_[i].size(), + 1, + false, + false); + IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(), + pathRowIdsInEachBeam_[i].size(), + false); + expandedPathScores_[i]->selectRows(*(beams_->scores[i]), *rowIds); + tmp->add(*expandedPathScores_[i]); + } + + softmaxOut_->softmax(*softmaxOut_); + return -std::log(softmaxOut_->getData()[goldIdsInFinalExpansion_]); +} + +real CostForOneSequence::forward() { + calValidExpandStep(); + constructTotalExpansion(); + return globallyNormalizedScore(); +} + +void CostForOneSequence::backward() { + softmaxOut_->getData()[goldIdsInFinalExpansion_] -= 1.; + MatrixPtr tmp = Matrix::create( + softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false); + + for (size_t i = 0; i < validExpansionCount_; ++i) { + IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(), + pathRowIdsInEachBeam_[i].size(), + false); + /* + beams_->scoreGrad[i] has been intialized outside this class, this + class only keeps a pointer pointing to the original input gradients, + so here does not need to allocate or initalize the memory. + */ + tmp->addToRows(*beams_->scoreGrad[i], *rowIds); + } +} + REGISTER_LAYER(cross_entropy_over_beam, CrossEntropyOverBeam); bool CrossEntropyOverBeam::init(const LayerMap& layerMap, @@ -24,13 +186,189 @@ bool CrossEntropyOverBeam::init(const LayerMap& layerMap, Layer::init(layerMap, parameterMap); CHECK_EQ(0U, inputLayers_.size() % 3) << "Error input number."; - setNeedSequenceInfo(false); + beamExpanCount_ = inputLayers_.size() / 3; + + candidateScores_.resize(beamExpanCount_); + candidateScoreGrad_.resize(beamExpanCount_); + candidateInBeam_.resize(beamExpanCount_); + goldSequence_.resize(beamExpanCount_); + gradToInputs_.resize(beamExpanCount_); + + setNeedSequenceInfo(false); return true; } -void CrossEntropyOverBeam::forward(PassType passType) {} +void CrossEntropyOverBeam::checkInputs() { + batchSize_ = 0; + for (size_t i = 0; i < beamExpanCount_; ++i) { + const Argument& scores = getInput(i * 3); + const Argument& selCandidates = getInput(i * 3 + 1); + const Argument& goldSeq = getInput(i * 3 + 2); + + if (i) { + CHECK(scores.hasSubseq()) << "Beam expansion expect the first one, " + "should be a nested sequence"; + CHECK_EQ(getInputValue(i * 3 + 1)->getWidth(), beamSize_); + CHECK_EQ(scores.getNumSequences(), batchSize_); + CHECK_EQ(scores.getNumSubSequences(), selCandidates.getBatchSize()); + } else { + CHECK(scores.hasSeq()) << "The first beam expansion should be a sequence"; + batchSize_ = scores.getNumSequences(); + beamSize_ = getInputValue(i * 3 + 1)->getWidth(); + CHECK_EQ(batchSize_, selCandidates.getBatchSize()); + } + CHECK_EQ(1U, scores.value->getWidth()); + CHECK_EQ(batchSize_, goldSeq.getBatchSize()); + } +} + +void CrossEntropyOverBeam::copyInputsToCpu() { + auto copyValue = [](const MatrixPtr& src, MatrixPtr& trg) { + if (dynamic_cast(src.get())) { + Matrix::resizeOrCreate( + trg, src->getHeight(), src->getWidth(), false, false); + trg->copyFrom(*src); + } else { + trg = std::move(src); + } + }; + + auto copyIds = [](const IVectorPtr& src, IVectorPtr& trg) { + if (dynamic_cast(src.get())) { + IVector::resizeOrCreate(trg, src->getSize(), false); + trg->copyFrom(*src); + } else { + trg = std::move(src); + } + }; + + beamSplitPos_.clear(); + beamSplitPos_.resize(batchSize_, std::vector(beamExpanCount_, 0)); + for (size_t i = 0; i < beamExpanCount_; ++i) { + copyValue(getInputValue(i * 3), candidateScores_[i]); + copyValue(getInputValue(i * 3 + 1), candidateInBeam_[i]); + copyIds(getInput(i * 3 + 2).ids, goldSequence_[i]); + + if (i) { + ICpuGpuVectorPtr seqInfo = getInput(i * 3).sequenceStartPositions; + const int* seqStarts = seqInfo->getMutableData(false); + ICpuGpuVectorPtr subSeqInfo = getInput(i * 3).subSequenceStartPositions; + const int* subSeqStarts = subSeqInfo->getMutableData(false); + + size_t seqId = 1; + for (size_t subSeqId = 0; subSeqId < subSeqInfo->getSize() - 1; + ++subSeqId) { + CHECK_LT(seqId, seqInfo->getSize()); + if (subSeqStarts[subSeqId] == seqStarts[seqId]) { + beamSplitPos_[seqId][i] = beamSplitPos_[seqId - 1][i]; + seqId++; + } + beamSplitPos_[seqId - 1][i]++; + } + } else { + for (size_t j = 0; j < batchSize_; ++j) beamSplitPos_[j][i] = j + 1; + } + } +} + +void CrossEntropyOverBeam::splitBatchBeams() { + beamCosts_.resize(batchSize_); + beamPerSeq_.resize(batchSize_, beamExpanCount_); + + for (size_t i = 0; i < beamExpanCount_; ++i) { + int* seqStarts = + getInput(i * 3).sequenceStartPositions->getMutableData(false); + + int* subSeqStarts = nullptr; + int maxLen = 0; + if (i) { + subSeqStarts = + getInput(i * 3).subSequenceStartPositions->getMutableData(false); + maxLen = getInput(i * 3).subSequenceStartPositions->getSize() - 1; + } else + maxLen = getInput(i).sequenceStartPositions->getSize() - 1; + + for (size_t j = 0; j < batchSize_; ++j) { + beamPerSeq_[j].scores[i] = + Matrix::create(candidateScores_[i]->getData() + seqStarts[j], + seqStarts[j + 1] - seqStarts[j], + 1, + false, + false); + beamPerSeq_[j].scoreGrad[i] = + Matrix::create(candidateScoreGrad_[i]->getData() + seqStarts[j], + seqStarts[j + 1] - seqStarts[j], + 1, + false, + false); + + int offset = j ? beamSplitPos_[j - 1][i] : 0; + int height = beamSplitPos_[j][i] - (j ? beamSplitPos_[j - 1][i] : 0); + CHECK_GE(maxLen, offset + height); + beamPerSeq_[j].seqInfo[i] = IVector::create( + (i ? subSeqStarts : seqStarts) + offset, height + 1, false); -void CrossEntropyOverBeam::backward(const UpdateCallback& callback) {} + beamPerSeq_[j].candidateIds[i] = + Matrix::create(candidateInBeam_[i]->getData() + offset * beamSize_, + height, + beamSize_, + false, + false); + beamPerSeq_[j].gold[i] = goldSequence_[i]->getData()[j]; + } + } +} + +void CrossEntropyOverBeam::resizeOutput() { + Matrix::resizeOrCreate(output_.value, batchSize_, 1, false, false); + output_.value->zero(); + + for (size_t i = 0; i < beamExpanCount_; ++i) { + MatrixPtr inGrad = getInputGrad(i * 3); + if (dynamic_cast(inGrad.get())) { + Matrix::resizeOrCreate(candidateScoreGrad_[i], + inGrad->getHeight(), + inGrad->getWidth(), + false, + false); + } else + candidateScoreGrad_[i] = std::move(inGrad); + candidateScoreGrad_[i]->zero(); + } +} + +void CrossEntropyOverBeam::copyGradToGpu(size_t copyCount) { + for (size_t i = 0; i < beamExpanCount_; ++i) { + if (dynamic_cast(getInputGrad(i * 3).get())) + getInputGrad(i * 3)->copyFrom(*candidateScoreGrad_[i]); + + if (i == copyCount - 1) break; + } +} + +void CrossEntropyOverBeam::forward(PassType passType) { + Layer::forward(passType); + + checkInputs(); + copyInputsToCpu(); + + resizeOutput(); + splitBatchBeams(); + + MatrixPtr outputValue = getOutputValue(); + for (size_t i = 0; i < batchSize_; ++i) { + beamCosts_[i].setData( + std::move(std::make_shared(beamPerSeq_[i])), beamSize_); + outputValue->getData()[i] = beamCosts_[i].forward(); + } +} + +void CrossEntropyOverBeam::backward(const UpdateCallback& callback) { + for (size_t i = 0; i < batchSize_; ++i) { + beamCosts_[i].backward(); + copyGradToGpu(beamCosts_[i].getValidExpansionCount()); + } +} } // namespace paddle diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/gserver/layers/CrossEntropyOverBeam.h index 3106f9858b..96a5df7dfb 100644 --- a/paddle/gserver/layers/CrossEntropyOverBeam.h +++ b/paddle/gserver/layers/CrossEntropyOverBeam.h @@ -19,6 +19,79 @@ limitations under the License. */ namespace paddle { +struct BeamExpansion { + // store the entire beam expansion for a single sequence + std::vector scores; + std::vector seqInfo; + + std::vector candidateIds; + std::vector gold; + + std::vector scoreGrad; + + size_t expansionCount; + + BeamExpansion(int n) { + expansionCount = n; + scores.resize(expansionCount); + seqInfo.resize(expansionCount); + candidateIds.resize(expansionCount); + scoreGrad.resize(expansionCount); + + gold.resize(expansionCount); + }; +}; +typedef std::shared_ptr BeamExpansionPtr; + +class CostForOneSequence { +public: + CostForOneSequence() + : beamSize_(0), validExpansionCount_(0), goldAsExtraPath_(false) {} + void setData(const BeamExpansionPtr bPtr, size_t beamSize) { + beams_ = bPtr; + beamSize_ = beamSize; + + expandedPathScores_.clear(); + expandedPathScores_.resize(beams_->expansionCount); + + goldRowIds_.clear(); + goldRowIds_.resize(beams_->expansionCount, 0); + goldColIds_.clear(); + goldColIds_.resize(beams_->expansionCount, -1); + } + size_t getValidExpansionCount() { return validExpansionCount_; } + + real forward(); + void backward(); + +private: + void calValidExpandStep(); + void constructTotalExpansion(); + size_t initLastExpansion(); + real globallyNormalizedScore(); + + int getSeqStartPos(size_t beamId, size_t rowId) { + CHECK_GT(beams_->seqInfo[beamId]->getSize() - 1, rowId); + int* starts = beams_->seqInfo[beamId]->getData(); + return starts[rowId] - starts[0]; + }; + + size_t beamSize_; + size_t validExpansionCount_; + bool goldAsExtraPath_; + std::vector goldRowIds_; + std::vector goldColIds_; + + BeamExpansionPtr beams_; + std::vector> pathRowIdsInEachBeam_; + std::vector parentIdsInBeam_; + size_t goldIdsInFinalExpansion_; + + std::vector expandedPathScores_; + + MatrixPtr softmaxOut_; +}; + class CrossEntropyOverBeam : public Layer { public: explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {} @@ -26,6 +99,31 @@ public: const ParameterMap& parameterMap) override; void forward(PassType passType) override; void backward(const UpdateCallback& callback) override; + +private: + void checkInputs(); + void copyInputsToCpu(); + void resizeOutput(); + void copyGradToGpu(size_t copyCount); + void splitBatchBeams(); + + size_t beamExpanCount_; + size_t batchSize_; + size_t beamSize_; + + // Currently, this layer only works on CPU, if its inputs is on GPU, + // copy them to CPU memory. + std::vector candidateScores_; + std::vector candidateScoreGrad_; + std::vector candidateInBeam_; + std::vector gradToInputs_; + std::vector goldSequence_; + std::vector> beamSplitPos_; + + // split entire bath of beams into beam per sequnence. + std::vector beamPerSeq_; + // beamCosts_ is used to propagate error in one sequence. + std::vector beamCosts_; }; } // namespace paddle diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp index a5f06c15dc..506a4281df 100644 --- a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp +++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp @@ -28,9 +28,17 @@ using namespace paddle; // NOLINT DECLARE_int32(gpu_id); DECLARE_bool(thread_local_rand_use_global_seed); -const size_t MAX_SEQ_NUM = 10; -const size_t MAX_SEQ_LEN = 27; -const size_t MAX_BEAM_SIZE = 10; +// const size_t MAX_SEQ_NUM = 5; +// const size_t MAX_SEQ_LEN = 10; +// const size_t MAX_BEAM_SIZE = 3; + +const size_t MAX_SEQ_NUM = 23; +const size_t MAX_SEQ_LEN = 50; +const size_t MAX_BEAM_SIZE = 27; + +// const size_t SEED = 1503391792; +// const size_t SEED = 1; +const size_t SEED = (size_t)(time(NULL)); struct SingleBeamExpansion { vector seqStartPos; @@ -43,11 +51,30 @@ struct SingleBeamExpansion { vector groundTruth; vector inBeam; vector rowIdxInBeam; + vector colIdxInBeam; + + void resetGroundTruth(size_t n) { + groundTruth.clear(); + groundTruth.resize(n, -1); + + inBeam.clear(); + inBeam.resize(n, 0); + + rowIdxInBeam.clear(); + rowIdxInBeam.resize(n, -1); + + colIdxInBeam.clear(); + colIdxInBeam.resize(n, -1); + } }; +inline float randFloat() { + return static_cast(rand()) / static_cast(RAND_MAX); +} + void genRand(real* numbers, size_t n) { default_random_engine generator; - uniform_real_distribution distribution(0.0, 1.0); + uniform_real_distribution distribution(0.0, 1.0); for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator); } @@ -72,8 +99,7 @@ void genCandidateScores(bool hasSubseq, vector& subSeqStartPos = curBeam.subSeqStartPos; subSeqStartPos.resize(1, 0); - srand((size_t)(time(NULL))); - // srand(1); + srand(SEED); if (prevBeam.selectedIndices.size()) { if (prevBeam.subSeqStartPos.size() > 1) { int seqIdx = 1; @@ -81,9 +107,8 @@ void genCandidateScores(bool hasSubseq, for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) { for (size_t j = 0; j < beamSize; ++j) { if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break; - for (size_t k = 0; k < beamSize; ++k) - subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) + - subSeqStartPos.back()); + subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) + + subSeqStartPos.back()); } if (prevBeam.seqStartPos[seqIdx] == prevBeam.subSeqStartPos[i]) { seqStartPos.push_back(subSeqStartPos.back()); @@ -91,7 +116,6 @@ void genCandidateScores(bool hasSubseq, } } } else { - // samples in previous beam are sequences. for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) { if (i && i % beamSize == 0) { seqStartPos.push_back(subSeqStartPos.back()); @@ -141,27 +165,41 @@ void genSelectedIndices(size_t beamSize, void genGroundTruth(vector& beamExpansions, size_t beamSize) { - size_t seqNum = beamExpansions[1].seqStartPos.size() - 1; + SingleBeamExpansion& beam = beamExpansions[1]; + size_t seqNum = beam.seqStartPos.size() - 1; for (size_t i = 2; i < beamExpansions.size(); ++i) - CHECK_EQ(seqNum, beamExpansions[i - 1].seqStartPos.size() - 1); + CHECK_EQ(seqNum, beamExpansions[i].seqStartPos.size() - 1); - // srand(1); - srand((size_t)(time(NULL))); + srand(SEED); // initialize the first beam. - SingleBeamExpansion& beam = beamExpansions[1]; - beam.groundTruth.resize(seqNum, 0); - beam.inBeam.resize(seqNum, 0); - beam.rowIdxInBeam.resize(seqNum, -1); - - auto begPos = beam.selectedIndices.begin(); + beam.resetGroundTruth(seqNum); for (size_t i = 0; i < seqNum; ++i) { - int seqLen = beam.seqStartPos[i + 1] - beam.seqStartPos[i]; - int label = rand() % seqLen; - auto endPos = begPos + beamSize; - beam.groundTruth[i] = label; - if (find(begPos, endPos, real(label)) != endPos) beam.inBeam[i] = 1; - begPos = endPos; + if (randFloat() > 0.5) { + // force the randomly generated label falls in the beam by chance 0.5. + // otherwise, when sequence length is relatively long and beam size is + // relatively small, the gold sequences falls off the beam at in + // the first search. + real* begPos = beam.selectedIndices.data() + i * beamSize; + beam.colIdxInBeam[i] = + rand() % count_if(begPos, begPos + beamSize, [](const real& val) { + return val != -1.; + }); + beam.groundTruth[i] = + beam.selectedIndices[i * beamSize + beam.colIdxInBeam[i]]; + beam.inBeam[i] = 1; + } else { + int label = rand() % (beam.seqStartPos[i + 1] - beam.seqStartPos[i]); + beam.groundTruth[i] = label; + + real* begPos = beam.selectedIndices.data() + i * beamSize; + real* endPos = begPos + beamSize; + real* lblPos = find(begPos, endPos, real(label)); + if (lblPos != endPos) { + beam.inBeam[i] = 1; + beam.colIdxInBeam[i] = lblPos - begPos; + } + } beam.rowIdxInBeam[i] = i; } @@ -169,22 +207,33 @@ void genGroundTruth(vector& beamExpansions, for (size_t i = 2; i < beamExpansions.size(); ++i) { SingleBeamExpansion& curBeam = beamExpansions[i]; SingleBeamExpansion& prevBeam = beamExpansions[i - 1]; - - curBeam.groundTruth.resize(seqNum, 0); - curBeam.inBeam.resize(seqNum, 0); - curBeam.rowIdxInBeam.resize(seqNum, -1); + curBeam.resetGroundTruth(seqNum); // iterate over each sequence for (size_t j = 0; j < seqNum; ++j) { - if (prevBeam.inBeam[j]) { - // gold sequence falls in the beam in previous search. - - auto begPos = prevBeam.selectedIndices.begin(); - auto endPos = begPos + prevBeam.rowIdxInBeam[j] * beamSize; - size_t totalExpansion = - prevBeam.rowIdxInBeam[j] * beamSize - count(begPos, endPos, -1.); - curBeam.rowIdxInBeam[j] = totalExpansion + prevBeam.groundTruth[j]; - + if (!prevBeam.inBeam[j]) continue; + + // gold sequence falls in the beam in previous search. + real* begPos = prevBeam.selectedIndices.data(); + int offset = + prevBeam.rowIdxInBeam[j] * beamSize + prevBeam.colIdxInBeam[j]; + curBeam.rowIdxInBeam[j] = count_if( + begPos, begPos + offset, [](const real& val) { return val != -1.; }); + + if (randFloat() > 0.5) { + // force the randomly generated label falls in the beam by chance 0.5. + // otherwise, when sequence length is relatively long and beam size is + // relatively small, the gold sequences falls off the beam at in + // the first search. + real* start = + curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize; + int n = rand() % count_if(start, start + beamSize, [](const real& val) { + return val != -1.; + }); + curBeam.colIdxInBeam[j] = n; + curBeam.groundTruth[j] = *(start + n); + curBeam.inBeam[j] = 1; + } else { CHECK_LE(curBeam.rowIdxInBeam[j] + 1, curBeam.subSeqStartPos.size() - 1); int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]]; @@ -193,16 +242,14 @@ void genGroundTruth(vector& beamExpansions, int label = rand() % (end - start); curBeam.groundTruth[j] = label; - auto findBeg = curBeam.selectedIndices.begin() + - curBeam.rowIdxInBeam[j] * beamSize; - auto findEnd = findBeg + beamSize; - if (find(findBeg, findEnd, real(label)) != findEnd) + real* findBeg = + curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize; + real* lblPos = + find(findBeg, findBeg + beamSize, static_cast(label)); + if (lblPos != (findBeg + beamSize)) { curBeam.inBeam[j] = 1; - } else { - // in previous search, gold sequence has fallen off the beam, - // the beam search stops, here use -1 as a dummy label. - // It will not used in calculation the cost. - beamExpansions[i].groundTruth[j] = -1; + curBeam.colIdxInBeam[j] = lblPos - findBeg; + } } } } @@ -230,15 +277,12 @@ void genRandomBeamExpansion(size_t expansionCount, genGroundTruth(beamExpansions, beamSize); } -void testCrossEntropyOverBeam(bool useGpu) { +void testCrossEntropyOverBeam(bool useGpu, + size_t beamSize, + vector& beams) { TestConfig config; config.layerConfig.set_type("cross_entropy_over_beam"); - const size_t expansionCount = 3; - const size_t beamSize = MAX_BEAM_SIZE; - vector beams; - genRandomBeamExpansion(expansionCount, beamSize, beams); - size_t seqNum = 0; for (size_t i = 1; i < beams.size(); ++i) { const SingleBeamExpansion& beam = beams[i]; @@ -291,7 +335,17 @@ void testCrossEntropyOverBeam(bool useGpu) { } TEST(Layer, CrossEntropyOverBeam) { - for (bool useGpu : {false, true}) testCrossEntropyOverBeam(useGpu); + LOG(INFO) << "SEED = " << SEED; + const size_t beamSize = 1 + rand() % MAX_BEAM_SIZE; + LOG(INFO) << "beamSize = " << beamSize; + + // TODO(caoying): test with more beam expansions. + const size_t expansionCount = 3; + vector beams; + genRandomBeamExpansion(expansionCount, beamSize, beams); + + for (bool useGpu : {false, true}) + testCrossEntropyOverBeam(useGpu, beamSize, beams); } int main(int argc, char** argv) { @@ -299,7 +353,7 @@ int main(int argc, char** argv) { hl_start(); hl_init(FLAGS_gpu_id); FLAGS_thread_local_rand_use_global_seed = true; - srand(1); + srand(SEED); testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } From 3bf440023abd5801f21b98d027623b6cb3959a0b Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 22 Aug 2017 21:03:46 +0800 Subject: [PATCH 059/170] follow comments. --- paddle/gserver/gradientmachines/RecurrentGradientMachine.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h index cc0eda9f13..c16fae6d17 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h @@ -503,9 +503,9 @@ private: * sequence in Matrix stored the entire beam search batch's forward pass * results. * - * @param isSeq: a flag indicating whetehr the layer to be output of the + * @param isSeq: a flag indicating whether the layer to be output of the * RecurrentGradientMachine is a sequence or not - * @param outArgs: all of the the returned Arguments of the forward pass + * @param outArgs: all of the returned Arguments of the forward pass * during the generation process. */ void createDataOutlinkSelRowsInfo(bool isSeq, std::vector& outArgs); From a8d072c769b940d087006fa68ffcf462aa8579b8 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Wed, 23 Aug 2017 00:12:58 +0800 Subject: [PATCH 060/170] fix bug. --- paddle/operators/lookup_table_op.cc | 7 ++-- paddle/operators/lookup_table_op.cu | 32 +++++++++---------- paddle/operators/lookup_table_op.h | 6 ++-- .../v2/framework/tests/test_lookup_table.py | 6 ++-- 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc index 5f70458a87..94d40890a7 100644 --- a/paddle/operators/lookup_table_op.cc +++ b/paddle/operators/lookup_table_op.cc @@ -41,8 +41,7 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker { " which is a learnable parameter."); AddInput("Ids", "An input with type int32 or int64" - "contains the ids to be looked up in W.") - .NotInGradient(); + "contains the ids to be looked up in W."); AddOutput("Out", "The lookup results, which have the same type with W."); AddComment( "This operator is used to perform lookups on the parameter W," @@ -56,7 +55,9 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &context) const override { - context.Output(0)->Resize(context.Input(0)->dims()); + auto table = context.Input("W"); + auto d_table = context.Output(framework::GradVarName("W")); + d_table->Resize(table->dims()); } }; diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu index 94b440e00e..99678ef681 100644 --- a/paddle/operators/lookup_table_op.cu +++ b/paddle/operators/lookup_table_op.cu @@ -23,7 +23,7 @@ namespace operators { using Tensor = framework::Tensor; template -__global__ void LookupTable(T* output, const T* table, const uint32_t* ids, +__global__ void LookupTable(T* output, const T* table, const int32_t* ids, const int N, const int K, const int D) { int idx = threadIdx.x; int idy = blockIdx.x + threadIdx.y * gridDimX; @@ -32,8 +32,8 @@ __global__ void LookupTable(T* output, const T* table, const uint32_t* ids, int id = ids[idy]; PADDLE_ASSERT(id >= 0); PADDLE_ASSERT(id < N); - T* out = output + idy; - const T* tab = table + id; + T* out = output + idy * D; + const T* tab = table + id * D; for (int i = idx; i < D; i += blockDimX) { out[i] = tab[i]; } @@ -42,9 +42,8 @@ __global__ void LookupTable(T* output, const T* table, const uint32_t* ids, } template -__global__ void LookupTableGradKernel(T* table, const T* output, - const uint32_t* ids, const int N, - const int K, const int D) { +__global__ void LookupTableGrad(T* table, const T* output, const int32_t* ids, + const int N, const int K, const int D) { int idx = threadIdx.x; int idy = blockIdx.x + threadIdx.y * gridDimX; @@ -52,10 +51,10 @@ __global__ void LookupTableGradKernel(T* table, const T* output, int id = ids[idy]; PADDLE_ASSERT(id >= 0); PADDLE_ASSERT(id < N); - const T* out = output + idy; - T* tab = table + id; + const T* out = output + idy * D; + T* tab = table + id * D; for (int i = idx; i < D; i += blockDimX) { - paddle::platform::CudaAtomicAdd(tab + i, out[i]); + paddle::platform::CudaAtomicAdd(&tab[i], out[i]); } idy += blockDimY * gridDimX; } @@ -72,7 +71,7 @@ class LookupTableCUDAKernel : public framework::OpKernel { size_t N = table_t->dims()[0]; size_t D = table_t->dims()[1]; size_t K = product(ids_t->dims()); - auto ids = ids_t->data(); + auto ids = ids_t->data(); auto table = table_t->data(); auto output = output_t->mutable_data(context.GetPlace()); @@ -83,7 +82,7 @@ class LookupTableCUDAKernel : public framework::OpKernel { }; template -class LookupTableGrad : public framework::OpKernel { +class LookupTableGradCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto ids_t = context.Input("Ids"); @@ -93,9 +92,9 @@ class LookupTableGrad : public framework::OpKernel { int N = d_table_t->dims()[0]; int D = d_table_t->dims()[1]; int K = product(ids_t->dims()); - const uint32_t* ids = ids_t->data(); - T* d_table = d_table_t->mutable_data(context.GetPlace()); + const int32_t* ids = ids_t->data(); const T* d_output = d_output_t->data(); + T* d_table = d_table_t->mutable_data(context.GetPlace()); auto* device_context = const_cast(context.device_context_); @@ -103,8 +102,8 @@ class LookupTableGrad : public framework::OpKernel { device_context); dim3 threads(128, 8); dim3 grids(8, 1); - LookupTableGradKernel<<>>(d_table, d_output, - ids, N, K, D); + LookupTableGrad<<>>(d_table, d_output, ids, N, + K, D); } }; @@ -113,4 +112,5 @@ class LookupTableGrad : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(lookup_table, ops::LookupTableCUDAKernel); -REGISTER_OP_GPU_KERNEL(lookup_table_grad, ops::LookupTableGrad); +REGISTER_OP_GPU_KERNEL(lookup_table_grad, + ops::LookupTableGradCUDAKernel); diff --git a/paddle/operators/lookup_table_op.h b/paddle/operators/lookup_table_op.h index 790ecab3c6..9254e03a1b 100644 --- a/paddle/operators/lookup_table_op.h +++ b/paddle/operators/lookup_table_op.h @@ -32,7 +32,7 @@ class LookupTableKernel : public framework::OpKernel { size_t N = table_t->dims()[0]; size_t D = table_t->dims()[1]; - auto ids = ids_t->data(); + auto ids = ids_t->data(); auto table = table_t->data(); auto output = output_t->mutable_data(context.GetPlace()); for (size_t i = 0; i < product(ids_t->dims()); ++i) { @@ -53,9 +53,9 @@ class LookupTableGradKernel : public framework::OpKernel { size_t N = d_table_t->dims()[0]; size_t D = d_table_t->dims()[1]; - auto ids = ids_t->data(); - T* d_table = d_table_t->mutable_data(context.GetPlace()); + auto ids = ids_t->data(); const T* d_output = d_output_t->data(); + T* d_table = d_table_t->mutable_data(context.GetPlace()); auto* device_context = const_cast(context.device_context_); diff --git a/python/paddle/v2/framework/tests/test_lookup_table.py b/python/paddle/v2/framework/tests/test_lookup_table.py index 071069768b..3056bf53e3 100644 --- a/python/paddle/v2/framework/tests/test_lookup_table.py +++ b/python/paddle/v2/framework/tests/test_lookup_table.py @@ -10,7 +10,7 @@ class TestSigmoidOp(unittest.TestCase): def setUp(self): self.type = 'lookup_table' table = np.random.random((17, 31)).astype('float32') - ids = np.random.randint(0, 17, 4) + ids = np.random.randint(0, 17, 4).astype('int32') self.inputs = {'W': table, 'Ids': ids} self.outputs = {'Out': table[ids]} @@ -19,10 +19,8 @@ class TestSigmoidGradOp(GradientChecker): def test_grad(self): op = create_op('lookup_table') table = np.random.random((17, 31)).astype('float32') - ids = np.random.randint(0, 17, 4) + ids = np.random.randint(0, 17, 4).astype('int32') inputs = {'W': table, 'Ids': ids} - # compare gradients between cpu and gpu - self.compare_grad(op, inputs) # check gradients self.check_grad(op, inputs, set('W'), 'Out') From 51792022c9f7963321d77d7dac4143e566af9fdc Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 22 Aug 2017 12:54:44 -0700 Subject: [PATCH 061/170] refine code and add debug info --- python/paddle/v2/framework/tests/mnist.py | 47 +++++++++++------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index f75f196168..6a3ed0dce0 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -52,7 +52,7 @@ def grad_var_name(var_name): return var_name + "@GRAD" -def sgd_optimizer(net, param_name, learning_rate=0.01): +def sgd_optimizer(net, param_name, learning_rate=0.001): grad_name = grad_var_name(param_name) optimize_op = Operator( "sgd", @@ -65,7 +65,6 @@ def sgd_optimizer(net, param_name, learning_rate=0.01): # should use operator and add these to the init_network def init_param(param_name, dims): - print param_name var = scope.new_var(param_name) tensor = var.get_tensor() tensor.set_dims(dims) @@ -158,17 +157,34 @@ def print_inputs_outputs(op): print("") +def set_cost(): + cost_data = numpy.array(scope.find_var("cross_entropy_1").get_tensor()) + # print(cost_data) + print(cost_data.sum() / len(cost_data)) + + cost_grad = scope.find_var(grad_var_name("cross_entropy_1")).get_tensor() + cost_grad.set_dims(cost_data.shape) + cost_grad.alloc_float(place) + cost_grad.set(cost_data, place) + + images = data_layer(name='pixel', dims=[BATCH_SIZE, 784]) label = data_layer(name='label', dims=[BATCH_SIZE]) fc = fc_layer(net=forward_network, input=images, size=10, act="softmax") cost = cross_entropy_layer(net=forward_network, input=fc, label=label) + forward_network.complete_add_op(True) -print(forward_network) backward_net = get_backward_net(forward_network) -print(backward_net) optimize_net.complete_add_op(True) + +print(forward_network) +print(backward_net) print(optimize_net) +print_inputs_outputs(forward_network) +print_inputs_outputs(backward_net) +print_inputs_outputs(optimize_net) + reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), @@ -176,34 +192,17 @@ reader = paddle.batch( PASS_NUM = 1000 for pass_id in range(PASS_NUM): - print("===========forward==========") - # feed_data("pixel", numpy.random.random((BATCH_SIZE, 784)).astype('float32')) - # feed_data("label", numpy.ones(BATCH_SIZE).astype("int32")) data = reader().next() + image = numpy.array(map(lambda x: x[0], data)).astype("float32") label = numpy.array(map(lambda x: x[1], data)).astype("int32") feed_data("pixel", image) feed_data("label", label) - forward_network.infer_shape(scope) - print_inputs_outputs(forward_network) - # print(numpy.array(scope.find_var("label").get_tensor())) + forward_network.infer_shape(scope) forward_network.run(scope, dev_ctx) - # print(numpy.array(scope.find_var("fc_0").get_tensor())) - - print("===========backward==========") - cost_data = numpy.array(scope.find_var("cross_entropy_1").get_tensor()) - print(cost_data.sum() / len(cost_data)) - cost_grad = scope.find_var(grad_var_name("cross_entropy_1")).get_tensor() - cost_grad.set_dims(cost_data.shape) - cost_grad.alloc_float(place) - cost_grad.set(cost_data, place) - + set_cost() backward_net.infer_shape(scope) - print_inputs_outputs(backward_net) - backward_net.run(scope, dev_ctx) - print("===========optimize_net==========") - print_inputs_outputs(optimize_net) optimize_net.run(scope, dev_ctx) From d3c65a64dc4ab98af10498cb2eb9327ef1697e5a Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 22 Aug 2017 20:21:23 -0700 Subject: [PATCH 062/170] fix data reader --- python/paddle/v2/framework/tests/mnist.py | 29 ++++++++++++----------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index 6a3ed0dce0..1d40fd9a97 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -52,7 +52,7 @@ def grad_var_name(var_name): return var_name + "@GRAD" -def sgd_optimizer(net, param_name, learning_rate=0.001): +def sgd_optimizer(net, param_name, learning_rate=0.01): grad_name = grad_var_name(param_name) optimize_op = Operator( "sgd", @@ -159,13 +159,13 @@ def print_inputs_outputs(op): def set_cost(): cost_data = numpy.array(scope.find_var("cross_entropy_1").get_tensor()) - # print(cost_data) print(cost_data.sum() / len(cost_data)) cost_grad = scope.find_var(grad_var_name("cross_entropy_1")).get_tensor() + cost_grad.set_dims(cost_data.shape) cost_grad.alloc_float(place) - cost_grad.set(cost_data, place) + cost_grad.set(numpy.ones(cost_data.shape).astype("float32"), place) images = data_layer(name='pixel', dims=[BATCH_SIZE, 784]) @@ -192,17 +192,18 @@ reader = paddle.batch( PASS_NUM = 1000 for pass_id in range(PASS_NUM): - data = reader().next() - image = numpy.array(map(lambda x: x[0], data)).astype("float32") - label = numpy.array(map(lambda x: x[1], data)).astype("int32") - feed_data("pixel", image) - feed_data("label", label) + print("pass[" + str(pass_id) + "]") + for data in reader(): + image = numpy.array(map(lambda x: x[0], data)).astype("float32") + label = numpy.array(map(lambda x: x[1], data)).astype("int32") + feed_data("pixel", image) + feed_data("label", label) - forward_network.infer_shape(scope) - forward_network.run(scope, dev_ctx) - set_cost() - backward_net.infer_shape(scope) - backward_net.run(scope, dev_ctx) + forward_network.infer_shape(scope) + forward_network.run(scope, dev_ctx) + set_cost() + backward_net.infer_shape(scope) + backward_net.run(scope, dev_ctx) - optimize_net.run(scope, dev_ctx) + optimize_net.run(scope, dev_ctx) From a13798e8f7764239c151864894afc6a543e6c190 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 22 Aug 2017 20:41:31 -0700 Subject: [PATCH 063/170] rename add_op to append_op --- python/paddle/v2/framework/tests/mnist.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index 1d40fd9a97..32349b8d4d 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -60,7 +60,7 @@ def sgd_optimizer(net, param_name, learning_rate=0.01): grad=grad_name, param_out=param_name, learning_rate=learning_rate) - net.add_op(optimize_op) + net.append_op(optimize_op) # should use operator and add these to the init_network @@ -102,7 +102,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): pre_activation = name + ".mul.out" scope.new_var(pre_activation) mul_op = Operator("mul", X=input, Y=w_name, Out=pre_activation) - net.add_op(mul_op) + net.append_op(mul_op) # create bias variable if needed if bias: @@ -112,13 +112,13 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): net=optimize_net, param_name=bias_name, learning_rate=0.01) bias_out = name + ".rowwise_add.out" scope.new_var(bias_out) - rowwise_add_op = Operator( + rowwise_append_op = Operator( "rowwise_add", X=pre_activation, b=bias_name, Out=bias_out) - net.add_op(rowwise_add_op) + net.append_op(rowwise_append_op) pre_activation = bias_out activation_op = Operator(act, X=pre_activation, Y=name) - net.add_op(activation_op) + net.append_op(activation_op) scope.new_var(name) net.infer_shape(scope) return name @@ -128,7 +128,7 @@ def cross_entropy_layer(net, input, label): cost_name = 'cross_entropy_%d' % uniq_id() cross_entropy_op = Operator( "onehot_cross_entropy", X=input, label=label, Y=cost_name) - net.add_op(cross_entropy_op) + net.append_op(cross_entropy_op) scope.new_var(cost_name) net.infer_shape(scope) return cost_name From d8cd67dd1e229a27180d3628dc9485734546aba4 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 23 Aug 2017 12:26:46 +0800 Subject: [PATCH 064/170] Make cudnn convolution layer and projection support for dilation. --- paddle/cuda/include/hl_cuda_cudnn.h | 11 +- paddle/cuda/src/hl_cuda_cudnn.cc | 123 ++++++++++-------- paddle/gserver/layers/ConvBaseLayer.cpp | 16 ++- paddle/gserver/layers/ConvBaseLayer.h | 4 + paddle/gserver/layers/ConvBaseOperator.cpp | 3 +- paddle/gserver/layers/ConvBaseProjection.cpp | 20 ++- paddle/gserver/layers/ConvBaseProjection.h | 1 + paddle/gserver/layers/ConvProjection.cpp | 4 +- paddle/gserver/tests/test_LayerGrad.cpp | 40 ++++-- proto/ModelConfig.proto | 3 + python/paddle/trainer/config_parser.py | 4 + .../paddle/trainer_config_helpers/layers.py | 19 +++ .../tests/configs/img_layers.py | 1 + 13 files changed, 171 insertions(+), 78 deletions(-) diff --git a/paddle/cuda/include/hl_cuda_cudnn.h b/paddle/cuda/include/hl_cuda_cudnn.h index db18e4912b..3f68c62de6 100644 --- a/paddle/cuda/include/hl_cuda_cudnn.h +++ b/paddle/cuda/include/hl_cuda_cudnn.h @@ -214,7 +214,8 @@ extern void hl_conv_workspace(hl_tensor_descriptor input, int* convBwdDataAlgo, size_t* bwdDataLimitBytes, int* convBwdFilterAlgo, - size_t* bwdFilterLimitBytes); + size_t* bwdFilterLimitBytes, + bool useDilation); /** * @brief destroy filter descriptor. @@ -242,7 +243,9 @@ extern void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, int padding_height, int padding_width, int stride_height, - int stride_width); + int stride_width, + int dilation_h = 1, + int dilation_w = 1); /** * @brief reset convolution descriptor. @@ -262,7 +265,9 @@ extern void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, int padding_height, int padding_width, int stride_height, - int stride_width); + int stride_width, + int dilation_h = 1, + int dilation_w = 1); /** * @brief destroy convolution descriptor. diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc index 78642a1744..f55fa523e1 100644 --- a/paddle/cuda/src/hl_cuda_cudnn.cc +++ b/paddle/cuda/src/hl_cuda_cudnn.cc @@ -201,7 +201,8 @@ void hl_conv_workspace(hl_tensor_descriptor input, int* convBwdDataAlgo, size_t* bwdDataLimitBytes, int* convBwdFilterAlgo, - size_t* bwdFilterLimitBytes) { + size_t* bwdFilterLimitBytes, + bool useDilation) { #if CUDNN_VERSION >= 4000 CHECK_NOTNULL(input); @@ -213,21 +214,60 @@ void hl_conv_workspace(hl_tensor_descriptor input, size_t memoryLimitBytes = (1LL << 20) * FLAGS_cudnn_conv_workspace_limit_in_mb; + // For dilation + int algo = 0; + // cudnn convolution forward configuration cudnnTensorDescriptor_t fwd_src_desc = GET_TENSOR_DESCRIPTOR(input); cudnnTensorDescriptor_t fwd_dest_desc = GET_TENSOR_DESCRIPTOR(output); cudnnFilterDescriptor_t fwd_filter_desc = GET_FILTER_DESCRIPTOR(filter); cudnnConvolutionDescriptor_t fwd_conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv); + // cudnn convolution backward data configuration + cudnnFilterDescriptor_t bwd_data_filter_desc = GET_FILTER_DESCRIPTOR(filter); + cudnnTensorDescriptor_t bwd_data_diff_desc = GET_TENSOR_DESCRIPTOR(output); + cudnnTensorDescriptor_t bwd_data_grad_desc = GET_TENSOR_DESCRIPTOR(input); + cudnnConvolutionDescriptor_t bwd_data_conv_desc = + GET_CONVOLUTION_DESCRIPTOR(conv); + // cudnn convolution backward filter configuration + cudnnTensorDescriptor_t bwd_filter_src_desc = GET_TENSOR_DESCRIPTOR(input); + cudnnTensorDescriptor_t bwd_filter_diff_desc = GET_TENSOR_DESCRIPTOR(output); + cudnnConvolutionDescriptor_t bwd_filter_conv_desc = + GET_CONVOLUTION_DESCRIPTOR(conv); + cudnnFilterDescriptor_t bwd_filter_grad_desc = GET_FILTER_DESCRIPTOR(filter); - CHECK_CUDNN(dynload::cudnnGetConvolutionForwardAlgorithm( - t_resource.cudnn_handle, - fwd_src_desc, - fwd_filter_desc, - fwd_conv_desc, - fwd_dest_desc, - CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, - memoryLimitBytes, - reinterpret_cast(convFwdAlgo))); + if (useDilation) { + convFwdAlgo = &algo; + convBwdDataAlgo = &algo; + convBwdFilterAlgo = &algo; + } else { + CHECK_CUDNN(dynload::cudnnGetConvolutionForwardAlgorithm( + t_resource.cudnn_handle, + fwd_src_desc, + fwd_filter_desc, + fwd_conv_desc, + fwd_dest_desc, + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + memoryLimitBytes, + reinterpret_cast(convFwdAlgo))); + CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataAlgorithm( + t_resource.cudnn_handle, + bwd_data_filter_desc, + bwd_data_diff_desc, + bwd_data_conv_desc, + bwd_data_grad_desc, + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + memoryLimitBytes, + reinterpret_cast(convBwdDataAlgo))); + CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterAlgorithm( + t_resource.cudnn_handle, + bwd_filter_src_desc, + bwd_filter_diff_desc, + bwd_filter_conv_desc, + bwd_filter_grad_desc, + CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, + memoryLimitBytes, + reinterpret_cast(convBwdFilterAlgo))); + } CHECK_CUDNN(dynload::cudnnGetConvolutionForwardWorkspaceSize( t_resource.cudnn_handle, @@ -238,23 +278,6 @@ void hl_conv_workspace(hl_tensor_descriptor input, static_cast(*convFwdAlgo), fwdLimitBytes)); - // cudnn convolution backward data configuration - cudnnFilterDescriptor_t bwd_data_filter_desc = GET_FILTER_DESCRIPTOR(filter); - cudnnTensorDescriptor_t bwd_data_diff_desc = GET_TENSOR_DESCRIPTOR(output); - cudnnTensorDescriptor_t bwd_data_grad_desc = GET_TENSOR_DESCRIPTOR(input); - cudnnConvolutionDescriptor_t bwd_data_conv_desc = - GET_CONVOLUTION_DESCRIPTOR(conv); - - CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataAlgorithm( - t_resource.cudnn_handle, - bwd_data_filter_desc, - bwd_data_diff_desc, - bwd_data_conv_desc, - bwd_data_grad_desc, - CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, - memoryLimitBytes, - reinterpret_cast(convBwdDataAlgo))); - CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataWorkspaceSize( t_resource.cudnn_handle, bwd_data_filter_desc, @@ -264,23 +287,6 @@ void hl_conv_workspace(hl_tensor_descriptor input, static_cast(*convBwdDataAlgo), bwdDataLimitBytes)); - // cudnn convolution backward filter configuration - cudnnTensorDescriptor_t bwd_filter_src_desc = GET_TENSOR_DESCRIPTOR(input); - cudnnTensorDescriptor_t bwd_filter_diff_desc = GET_TENSOR_DESCRIPTOR(output); - cudnnConvolutionDescriptor_t bwd_filter_conv_desc = - GET_CONVOLUTION_DESCRIPTOR(conv); - cudnnFilterDescriptor_t bwd_filter_grad_desc = GET_FILTER_DESCRIPTOR(filter); - - CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterAlgorithm( - t_resource.cudnn_handle, - bwd_filter_src_desc, - bwd_filter_diff_desc, - bwd_filter_conv_desc, - bwd_filter_grad_desc, - CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, - memoryLimitBytes, - reinterpret_cast(convBwdFilterAlgo))); - CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize( t_resource.cudnn_handle, bwd_filter_src_desc, @@ -603,7 +609,9 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, int padding_height, int padding_width, int stride_height, - int stride_width) { + int stride_width, + int dilation_h, + int dilation_w) { CHECK_NOTNULL(conv); cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)malloc( @@ -625,18 +633,23 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, padding_width, stride_height, stride_width, - 1, - 1, + dilation_h, + dilation_w, mode, data_type)); #else + if (dilation_h > 1 || dilation_w > 1) { + LOG(FATAL) + << "Current cudnn version does't support for dilation convolution."; + } + CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(hl_conv->desc, padding_height, padding_width, stride_height, stride_width, - 1, - 1, + dilation_h, + dilation_w, mode)); #endif @@ -659,7 +672,9 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, int padding_height, int padding_width, int stride_height, - int stride_width) { + int stride_width, + int dilation_h, + int dilation_w) { CHECK_NOTNULL(conv); CHECK_NOTNULL(image); CHECK_NOTNULL(filter); @@ -678,8 +693,8 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, padding_width, stride_height, stride_width, - 1, - 1, + dilation_h, + dilation_w, mode, data_type)); #else @@ -688,8 +703,8 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, padding_width, stride_height, stride_width, - 1, - 1, + dilation_h, + dilation_w, mode)); #endif diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp index e161d89c38..a5328ef834 100644 --- a/paddle/gserver/layers/ConvBaseLayer.cpp +++ b/paddle/gserver/layers/ConvBaseLayer.cpp @@ -32,9 +32,11 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, const ConvConfig& conf = inputConfig.conv_conf(); padding_.push_back(conf.padding()); stride_.push_back(conf.stride()); + dilation_.push_back(conf.dilation()); filterSize_.push_back(conf.filter_size()); paddingY_.push_back(conf.padding_y()); strideY_.push_back(conf.stride_y()); + dilationY_.push_back(conf.dilation_y()); filterSizeY_.push_back(conf.filter_size_y()); filterPixels_.push_back(filterSize_.back() * filterSizeY_.back()); channels_.push_back(conf.channels()); @@ -89,7 +91,11 @@ size_t ConvBaseLayer::calOutputSize() { size_t layerSize = 0; auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) { + size_t filterSizeY; + size_t filterSize; for (size_t i = 0; i < inputLayers_.size(); i++) { + filterSizeY = (filterSizeY_[i] - 1) * dilationY_[i] + 1; + filterSize = (filterSize_[i] - 1) * dilation_[i] + 1; inH.push_back(inputLayers_[i]->getOutput().getFrameHeight()); inW.push_back(inputLayers_[i]->getOutput().getFrameWidth()); const ConvConfig& conf = config_.inputs(i).conv_conf(); @@ -98,17 +104,17 @@ size_t ConvBaseLayer::calOutputSize() { inH[i] = conf.has_output_y() ? conf.output_y() : conf.output_x(); if (inW[i] == 0) inW[i] = conf.output_x(); outH.push_back(imageSize( - inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], caffeMode_)); - outW.push_back(imageSize( - inW[i], filterSize_[i], padding_[i], stride_[i], caffeMode_)); + inH[i], filterSizeY, paddingY_[i], strideY_[i], caffeMode_)); + outW.push_back( + imageSize(inW[i], filterSize, padding_[i], stride_[i], caffeMode_)); } else { if (inH[i] == 0) inH[i] = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); if (inW[i] == 0) inW[i] = conf.img_size(); outH.push_back(outputSize( - inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], caffeMode_)); + inH[i], filterSizeY, paddingY_[i], strideY_[i], caffeMode_)); outW.push_back(outputSize( - inW[i], filterSize_[i], padding_[i], stride_[i], caffeMode_)); + inW[i], filterSize, padding_[i], stride_[i], caffeMode_)); } CHECK_EQ(outH[i], outH[0]); CHECK_EQ(outW[i], outW[0]); diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h index e9d15d94f8..223bce8e29 100644 --- a/paddle/gserver/layers/ConvBaseLayer.h +++ b/paddle/gserver/layers/ConvBaseLayer.h @@ -40,6 +40,10 @@ protected: IntV stride_; /// The y dimension of the stride. IntV strideY_; + /// The x dimension of the dilation. + IntV dilation_; + /// The y dimension of the dilation. + IntV dilationY_; /// The x dimension of a filter kernel. IntV filterSize_; /// The y dimension of a filter kernel. diff --git a/paddle/gserver/layers/ConvBaseOperator.cpp b/paddle/gserver/layers/ConvBaseOperator.cpp index 5c23198629..5469c41c87 100644 --- a/paddle/gserver/layers/ConvBaseOperator.cpp +++ b/paddle/gserver/layers/ConvBaseOperator.cpp @@ -59,7 +59,8 @@ void ConvBaseOperator::allocConvWorkSpace() { &bwdDataAlgo_, &bwdDataLimitBytes_, &bwdFilterAlgo_, - &bwdFilterLimitBytes_); + &bwdFilterLimitBytes_, + /*useDilation*/ false); size_t maxWorkSpace = 0; maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); diff --git a/paddle/gserver/layers/ConvBaseProjection.cpp b/paddle/gserver/layers/ConvBaseProjection.cpp index eb6b0445c9..08f36c516c 100644 --- a/paddle/gserver/layers/ConvBaseProjection.cpp +++ b/paddle/gserver/layers/ConvBaseProjection.cpp @@ -41,6 +41,11 @@ void ConvBaseProjection::getConvParams() { strideH_ = conf.stride_y(); strideW_ = conf.stride(); + dilationH_ = conf.dilation_y(); + dilationW_ = conf.dilation(); + CHECK_GT(dilationH_, 0); + CHECK_GT(dilationW_, 0); + filterH_ = conf.filter_size_y(); filterW_ = conf.filter_size(); @@ -77,7 +82,9 @@ void ConvBaseProjection::initCudnn() { paddingH_, paddingW_, strideH_, - strideW_); + strideW_, + dilationH_, + dilationW_); // initialize all to default algorithms fwdAlgo_ = 0; @@ -131,7 +138,9 @@ void ConvBaseProjection::reshapeTensorDesc(int batchSize) { paddingH_, paddingW_, strideH_, - strideW_); + strideW_, + dilationH_, + dilationW_); } void ConvBaseProjection::reshape(int batchSize) { @@ -140,6 +149,10 @@ void ConvBaseProjection::reshape(int batchSize) { CHECK_EQ(calInputSize(), in_->value->getWidth()); reshapeTensorDesc(batchSize); + bool useDilation = false; + if (dilationH_ > 1 || dilationW_ > 1) { + useDilation = true; + } hl_conv_workspace(imageDesc_, outputDesc_, filterDesc_, @@ -149,7 +162,8 @@ void ConvBaseProjection::reshape(int batchSize) { &bwdDataAlgo_, &bwdDataLimitBytes_, &bwdFilterAlgo_, - &bwdFilterLimitBytes_); + &bwdFilterLimitBytes_, + useDilation); size_t maxWorkSpace = 0; maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); diff --git a/paddle/gserver/layers/ConvBaseProjection.h b/paddle/gserver/layers/ConvBaseProjection.h index e9d9f8f1b2..ebdb57845b 100644 --- a/paddle/gserver/layers/ConvBaseProjection.h +++ b/paddle/gserver/layers/ConvBaseProjection.h @@ -63,6 +63,7 @@ protected: int configChannels_, configNumFilters_; int paddingH_, paddingW_; int strideH_, strideW_; + int dilationH_, dilationW_; int filterH_, filterW_; /// One group offset of input data. int inputOffset_; diff --git a/paddle/gserver/layers/ConvProjection.cpp b/paddle/gserver/layers/ConvProjection.cpp index 5b7ecc5560..6f0106b713 100644 --- a/paddle/gserver/layers/ConvProjection.cpp +++ b/paddle/gserver/layers/ConvProjection.cpp @@ -25,12 +25,12 @@ size_t ConvProjection::calOutputSize() { if (imageH_ == 0) imageH_ = configImgH_; if (imageW_ == 0) imageW_ = configImgW_; outputH_ = outputSize(imageH_, - filterH_, + (filterH_ - 1) * dilationH_ + 1, paddingH_, strideH_, /* caffeMode */ true); outputW_ = outputSize(imageW_, - filterW_, + (filterW_ - 1) * dilationW_ + 1, paddingW_, strideW_, /* caffeMode */ true); diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 0f312b6ca5..b3913d3a28 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include #include @@ -189,10 +190,16 @@ TEST(Projection, scaling) { void testProjectionConv(size_t groups, bool isDeconv) { const int NUM_FILTERS = 18; const int FILTER_SIZE = 2; - const int FILTER_SIZE_Y = 4; + const int FILTER_SIZE_Y = 2; const int CHANNELS = 3; const int IMAGE_SIZE = 16; +#if CUDNN_VERSION >= 6000 + const int DILATION = 2; +#else + const int DILATION = 1; +#endif + ProjectionConfig conf; if (isDeconv) { conf.set_type("convt"); @@ -209,6 +216,8 @@ void testProjectionConv(size_t groups, bool isDeconv) { conv->set_padding_y(1); conv->set_stride(2); conv->set_stride_y(2); + conv->set_dilation(DILATION); + conv->set_dilation_y(DILATION); conv->set_groups(groups); if (isDeconv) { conv->set_filter_channels(NUM_FILTERS / conv->groups()); @@ -217,12 +226,12 @@ void testProjectionConv(size_t groups, bool isDeconv) { } conv->set_img_size(IMAGE_SIZE); int output_x = outputSize(conv->img_size(), - conv->filter_size(), + (conv->filter_size() - 1) * DILATION + 1, conv->padding(), conv->stride(), /* caffeMode */ true); int output_y = outputSize(conv->img_size(), - conv->filter_size_y(), + (conv->filter_size_y() - 1) * DILATION + 1, conv->padding_y(), conv->stride_y(), /* caffeMode */ true); @@ -253,8 +262,8 @@ TEST(Projection, conv) { testProjectionConv(1, false); testProjectionConv(3, false); /// test ConvTransProjection - testProjectionConv(1, true); - testProjectionConv(3, true); + /// testProjectionConv(1, true); + /// testProjectionConv(3, true); } #endif @@ -424,27 +433,38 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { config.layerConfig.set_partial_sum(1); config.layerConfig.set_shared_biases(true); - config.inputDefs.push_back({INPUT_DATA, "layer_0", 384, 288}); + int dilation = 1; + if (type == "cudnn_conv") { +#if CUDNN_VERSION >= 6000 + dilation = 2; +#else + dilation = 1; +#endif + } + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 768, 192}); LayerInputConfig* input = config.layerConfig.add_inputs(); ConvConfig* conv = input->mutable_conv_conf(); conv->set_filter_size(2); - conv->set_filter_size_y(3); + conv->set_filter_size_y(2); conv->set_channels(3); conv->set_padding(0); conv->set_padding_y(1); conv->set_stride(2); conv->set_stride_y(2); + conv->set_dilation(dilation); + conv->set_dilation_y(dilation); conv->set_groups(1); conv->set_filter_channels(conv->channels() / conv->groups()); conv->set_img_size(16); - conv->set_img_size_y(8); + conv->set_img_size_y(16); conv->set_output_x(outputSize(conv->img_size(), - conv->filter_size(), + (conv->filter_size() - 1) * dilation + 1, conv->padding(), conv->stride(), /* caffeMode */ true)); conv->set_output_y(outputSize(conv->img_size_y(), - conv->filter_size_y(), + (conv->filter_size_y() - 1) * dilation + 1, conv->padding_y(), conv->stride_y(), /* caffeMode */ true)); diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 4f3d5bf3f6..14c745b532 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -82,6 +82,9 @@ message ConvConfig { // if not set, use img_size optional uint32 img_size_y = 14; + + required uint32 dilation = 15 [ default = 1 ]; + required uint32 dilation_y = 16 [ default = 1 ]; } message PoolConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index da99e5bd53..2d96901ed4 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -861,6 +861,7 @@ class Conv(Cfg): filter_size, channels, padding=None, + dilation=None, stride=None, groups=None, filter_channels=None, @@ -869,12 +870,15 @@ class Conv(Cfg): caffe_mode=True, filter_size_y=None, padding_y=None, + dilation_y=None, stride_y=None): self.add_keys(locals()) if filter_size_y is None: self.filter_size_y = filter_size if padding_y is None: self.padding_y = padding + if dilation_y is None: + self.dilation_y = dilation if stride_y is None: self.stride_y = stride if output_x is not None: diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 1bc55c8696..de7f31a20a 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -2322,6 +2322,7 @@ def img_conv_layer(input, groups=1, stride=1, padding=0, + dilation=0, bias_attr=None, param_attr=None, shared_biases=True, @@ -2329,6 +2330,7 @@ def img_conv_layer(input, filter_size_y=None, stride_y=None, padding_y=None, + dilation_y=None, trans=False, layer_type=None): """ @@ -2393,6 +2395,11 @@ def img_conv_layer(input, :type padding: int|tuple|list :param padding_y: The y dimension of the padding. :type padding_y: int + :param dilation: The x dimension of the dilation. Or input a tuple for two + image dimension + :type dilation: int|tuple|list + :param padding_y: The y dimension of the dilation. + :type padding_y: int :param bias_attr: Convolution bias attribute. None means default bias. False means no bias. :type bias_attr: ParameterAttribute|False @@ -2440,6 +2447,16 @@ def img_conv_layer(input, else: padding_y = padding + if dilation_y is None: + if isinstance(dilation, collections.Sequence): + assert len(dilation) == 2 + dilation, dilation_y = dilation + else: + dilation_y = dilation + + if dilation > 1 or dilation_y > 1: + assert layer_type in ["cudnn_conv", "cudnn_convt"] + if param_attr.attr.get('initial_smart'): # special initial for conv layers. init_w = (2.0 / (filter_size**2 * num_channels))**0.5 @@ -2464,11 +2481,13 @@ def img_conv_layer(input, conv=Conv( filter_size=filter_size, padding=padding, + dilation=dilation, stride=stride, channels=num_channels, groups=groups, filter_size_y=filter_size_y, padding_y=padding_y, + dilation_y=dilation_y, stride_y=stride_y), **param_attr.attr), active_type=act.name, diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py index 9fda16a540..01d31ef3fa 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py +++ b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py @@ -12,6 +12,7 @@ img_conv = img_conv_layer( num_filters=64, filter_size=(32, 32), padding=(1, 1), + dilation=(1, 1), stride=(1, 1), act=LinearActivation()) img_bn = batch_norm_layer(input=img_conv, act=ReluActivation()) From 1dc850e4d116f3e51c63bf5c390f9529f6884904 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 23 Aug 2017 13:13:16 +0800 Subject: [PATCH 065/170] Fix proto file --- proto/ModelConfig.proto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 14c745b532..1ea1e05259 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -83,8 +83,8 @@ message ConvConfig { // if not set, use img_size optional uint32 img_size_y = 14; - required uint32 dilation = 15 [ default = 1 ]; - required uint32 dilation_y = 16 [ default = 1 ]; + optional uint32 dilation = 15 [ default = 1 ]; + optional uint32 dilation_y = 16 [ default = 1 ]; } message PoolConfig { From 82e4fab4e31d730d2d9d4df7e223881e9db693a9 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 23 Aug 2017 14:07:53 +0800 Subject: [PATCH 066/170] follow comments. --- paddle/gserver/layers/KmaxSeqScoreLayer.cpp | 26 ++++---- paddle/gserver/layers/SequenceSliceLayer.cpp | 63 ++++++++----------- .../gserver/layers/SubNestedSequenceLayer.cpp | 29 +++++---- python/paddle/trainer/config_parser.py | 5 +- 4 files changed, 58 insertions(+), 65 deletions(-) diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp index 3b5060e3ce..d5407555b2 100644 --- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp +++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp @@ -80,13 +80,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) { << "input of " << getName() << " must be a sequence or a nested sequence."; CHECK_EQ(input.value->getWidth(), 1UL) - << "input of " << getName() - << " is score over a sequence or a nested sequence, so its width " - << " must be 1."; + << "input of " << getName() << " are scores over a sequence or " + << "a nested sequence, so its width must be 1."; if (useGpu_) { - // this Layer runs only in CPU, if the model is runing on GPU, - // then copy the input to this layer from GPU to CPU. + /* + * currently, this Layer only runs in CPU, if the other part of the model is + * runing on GPU, then copy the input to this layer from GPU to CPU. + */ Matrix::resizeOrCreate(scores_, inputScore->getHeight(), 1, @@ -97,13 +98,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) { scores_ = inputScore; } - // TODO(caoying) - // In PaddlePaddle, the currently available matrixes all a have real-typed - // data field, but the selected indices information are actually int-typed - // (with -1 as a special token). Storing indices information in real-typed - // Matrix leads to converting real to int. This is very dangerous if a user - // fills this matrix himself, invalid data may occur. - // The selected indices should be stored in an int-typed matrix. + /* + * TODO(caoying) + * In PaddePaddle, currently all matrices are real number types, + * but output of this layer which is some selected indices of the give + * sequence are actually filled with int types so that storing int types + * information in a real number matrix is dangerous, since real numbers will + * be convered to int types. + */ Matrix::resizeOrCreate( output_.value, input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(), diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp index 165ee6311a..4da65ade0b 100644 --- a/paddle/gserver/layers/SequenceSliceLayer.cpp +++ b/paddle/gserver/layers/SequenceSliceLayer.cpp @@ -31,13 +31,15 @@ public: void backward(const UpdateCallback& callback = nullptr) override; private: - // TODO(caoying) - // In PaddlePaddle, the currently available matrixes all a have real-typed - // data field, but the selected indices information are actually int-typed - // (with -1 as a special token). Storing indices information in real-typed - // Matrix leads to converting real to int. This is very dangerous if a user - // fills this matrix himself, invalid data may occur. - // The selected indices should be stored in an int-typed matrix. + /* + * TODO(caoying) + * In PaddePaddle, currently all matrices are real number types, + * but the second and the (optional) third input which are some + * selected indices of the give sequence to trim the sequence, are actually + * filled with int types so that storing int types information in real number + * matrices is very dangerous, since real numbers will be convered to int + * types. If a user fills this matrix himself, invalid data may occor. + */ MatrixPtr startIdsOnCpu_; MatrixPtr endIdsOnCpu_; @@ -68,7 +70,7 @@ bool SequenceSliceLayer::init(const LayerMap& layerMap, void SequenceSliceLayer::checkInputs() { const Argument& inputSeq = getInput(0); - CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer " + CHECK(inputSeq.hasSeq()) << "The first input of sequence slice layer " << "must be a sequence."; const MatrixPtr indices1 = getInputValue(1); CHECK_EQ(static_cast(indices1->getHeight()), @@ -86,22 +88,6 @@ void SequenceSliceLayer::checkInputs() { } void SequenceSliceLayer::copySliceIdsToCpu() { - if (!useGpu_) { - if (inputLayers_.size() == 2U) { - if (config_.select_first()) { - startIdsOnCpu_ = getInputValue(1); - endIdsOnCpu_ = nullptr; - } else { - startIdsOnCpu_ = nullptr; - endIdsOnCpu_ = getInputValue(1); - } - } else if (inputLayers_.size() == 3U) { - startIdsOnCpu_ = getInputValue(1); - endIdsOnCpu_ = getInputValue(2); - } - return; - } - const MatrixPtr indices1 = getInputValue(1); if (inputLayers_.size() == 2U) { if (config_.select_first()) { @@ -141,22 +127,19 @@ void SequenceSliceLayer::copySliceIdsToCpu() { void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, const MatrixPtr ends) { + CHECK(starts && ends); + outSeqStartPos_.resize(1, 0); outSubSeqStartPos_.resize(1, 0); selectedRows_.clear(); size_t beamSize = starts ? starts->getWidth() : ends->getWidth(); - // iterate over sequence size_t rowIdx = 0; for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) { - // iterate over sub-sequence in a sequence for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) { - // iterate over each index for slicing. for (size_t k = 0; k < beamSize; ++k) { - if (starts) { - if (starts->getElement(rowIdx, k) == -1.) break; - } else if (ends->getElement(rowIdx, k) == -1.) - break; + if (starts && starts->getElement(rowIdx, k) == -1.) break; + if (ends && ends->getElement(rowIdx, k) == -1.) break; int begPos = inputSeqInfoVec_[i][j]; if (starts) begPos += starts->getElement(rowIdx, k); @@ -165,7 +148,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k); int seqLen = endPos - begPos + 1; - CHECK(seqLen); + CHECK_LT(seqLen, 0U); for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m); inputSeqInfoVec_.size() > 1 ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen) @@ -208,7 +191,16 @@ void SequenceSliceLayer::forward(PassType passType) { Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, inputSeq.subSequenceStartPositions, inputSeqInfoVec_); - copySliceIdsToCpu(); + if (!useGpu_) { + if (inputLayers_.size() == 2U) { + startIdsOnCpu_ = config_.select_first() ? getInputValue(1) : nullptr; + endIdsOnCpu_ = config_.select_first() ? nullptr : getInputValue(1); + } else if (inputLayers_.size() == 3U) { + startIdsOnCpu_ = getInputValue(1); + endIdsOnCpu_ = getInputValue(2); + } + } else + copySliceIdsToCpu(); // calculate the selected row indices in a batch, // and build the output sequence information. @@ -221,10 +213,7 @@ void SequenceSliceLayer::forward(PassType passType) { } void SequenceSliceLayer::backward(const UpdateCallback& callback) { - MatrixPtr inputSeqGrad = getInputGrad(0); - MatrixPtr outputGrad = getOutputGrad(); - - outputGrad->addToRows(*inputSeqGrad, *rowIndice_); + getOutputGrad()->addToRows(*getInputGrad(0), *rowIndice_); } } // namespace paddle diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp index c8607d50f5..e9bee77212 100644 --- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -58,23 +58,28 @@ private: void calSelectedRows(const MatrixPtr selectedIndices, const std::vector>& inputSeqInfo); - // if the second input of this layer is on GPU memory, copy it to CPU memory. - // TODO(caoying) - // In PaddlePaddle, the currently available matrixes all a have real-typed - // data field, but the selected indices information are actually int-typed - // (with -1 as a special token). Storing indices information in real-typed - // Matrix leads to converting real to int. This is very dangerous if a user - // fills this matrix himself, invalid data may occur. - // The selected indices should be stored in an int-typed matrix. + /* + * TODO(caoying) + * In PaddePaddle, currently all matrices are real number types, + * but the second is some selected indices of the give sequence to trim + * the nested sequence, are actually filled with int types so that storing + * int types information in real number matrices is very dangerous, since + * real numbers will be convered to int types. If a user fills this matrix + * himself, invalid data may occor. + * + * if the second input of this layer is on GPU memory, copy it to CPU memory. + */ MatrixPtr selIdsCpu_; - // reorganized sequenceStartPositions and subSequenceStartPositions - // into a 2d vector to facilitate the sequence selection process. + /* + * reorganize sequenceStartPositions and subSequenceStartPositions + * into a 2d vector to facilitate the sequence selection process. + */ std::vector> inputSeqInfoVec_; - // the final selected row indices in a batch, - // rowIndice_ and selectedRows_ actually share a same memory. + /* store the final selected row indices in a batch */ IVectorPtr rowIndice_; + /* rowIndice_ and selectedRows_ actually share a same memory. */ std::vector selectedRows_; }; diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index af14007de6..2fcccc6948 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2717,10 +2717,7 @@ class SeqSliceLayer(LayerBase): 'If start and end indices are both given to' 'sequence slice layer, they should have the same width.') elif len(inputs) == 2: - if starts is not None: - self.config.select_first = True - else: - self.config.select_first = False + self.config.select_first = (starts is not None) @config_layer('sub_nested_seq') From 377401fb0cc7947d09b007a2c52cb679905cf2b5 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 23 Aug 2017 14:13:51 +0800 Subject: [PATCH 067/170] fix a bug. --- paddle/gserver/layers/SequenceSliceLayer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp index 4da65ade0b..5d72d37304 100644 --- a/paddle/gserver/layers/SequenceSliceLayer.cpp +++ b/paddle/gserver/layers/SequenceSliceLayer.cpp @@ -127,7 +127,8 @@ void SequenceSliceLayer::copySliceIdsToCpu() { void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, const MatrixPtr ends) { - CHECK(starts && ends); + CHECK(starts || ends) << "At least one of the start or end indices " + << "should be given."; outSeqStartPos_.resize(1, 0); outSubSeqStartPos_.resize(1, 0); @@ -148,7 +149,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k); int seqLen = endPos - begPos + 1; - CHECK_LT(seqLen, 0U); + CHECK_GT(seqLen, 0U); for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m); inputSeqInfoVec_.size() > 1 ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen) From f188e22b33c1a152a1835a5d0cb4b23e6e6d25bf Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Wed, 23 Aug 2017 14:39:16 +0800 Subject: [PATCH 068/170] Remove set functor and add comapre_grad test --- paddle/operators/CMakeLists.txt | 3 +- paddle/operators/fill_zeros_like_op.h | 2 +- paddle/operators/functor/CMakeLists.txt | 5 --- paddle/operators/functor/math_functor.cc | 42 ------------------- paddle/operators/functor/math_functor.cu | 42 ------------------- paddle/operators/functor/math_functor.h | 32 -------------- paddle/operators/lookup_table_op.cu | 26 ++++++------ paddle/operators/lookup_table_op.h | 10 ++--- paddle/platform/cuda_helper.h | 4 -- .../v2/framework/tests/gradient_checker.py | 13 +++++- .../v2/framework/tests/test_lookup_table.py | 2 + 11 files changed, 33 insertions(+), 148 deletions(-) delete mode 100644 paddle/operators/functor/CMakeLists.txt delete mode 100644 paddle/operators/functor/math_functor.cc delete mode 100644 paddle/operators/functor/math_functor.cu delete mode 100644 paddle/operators/functor/math_functor.h diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 1ca5010eae..8d2d8a1141 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -42,7 +42,6 @@ function(op_library TARGET) endfunction() add_subdirectory(math) -add_subdirectory(functor) cc_test(gather_test SRCS gather_test.cc DEPS tensor) @@ -69,4 +68,4 @@ op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor op_registry operator net_op) op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu) -op_library(lookup_table_op SRCS lookup_table_op.cc lookup_table_op.cu DEPS math_functor) +op_library(lookup_table_op SRCS lookup_table_op.cc lookup_table_op.cu) diff --git a/paddle/operators/fill_zeros_like_op.h b/paddle/operators/fill_zeros_like_op.h index fd380ca851..969998ce2e 100644 --- a/paddle/operators/fill_zeros_like_op.h +++ b/paddle/operators/fill_zeros_like_op.h @@ -26,7 +26,7 @@ class FillZerosLikeKernel : public framework::OpKernel { auto* output = context.Output("Dst"); output->mutable_data(context.GetPlace()); auto t = framework::EigenVector::Flatten(*output); - t.device(context.GetEigenDevice()) = t.constant(T(0)); + t.device(context.GetEigenDevice()) = t.constant(static_cast(0)); } }; diff --git a/paddle/operators/functor/CMakeLists.txt b/paddle/operators/functor/CMakeLists.txt deleted file mode 100644 index d3b39e5fc2..0000000000 --- a/paddle/operators/functor/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -if(WITH_GPU) - nv_library(math_functor SRCS math_functor.cc math_functor.cu DEPS device_context) -else() - cc_library(math_functor SRCS math_functor.cc DEPS device_context) -endif() diff --git a/paddle/operators/functor/math_functor.cc b/paddle/operators/functor/math_functor.cc deleted file mode 100644 index 1f2767f171..0000000000 --- a/paddle/operators/functor/math_functor.cc +++ /dev/null @@ -1,42 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/operators/functor/math_functor.h" -#include "paddle/framework/eigen.h" - -namespace paddle { -namespace operators { -namespace functor { - -template -struct Set { - void operator()(const T alpha, framework::Tensor* Y, - platform::DeviceContext* context) { - int N = product(Y->dims()); - T* YData = Y->mutable_data(context->GetPlace()); - if (alpha == static_cast(0)) { - memset(YData, 0, N * sizeof(T)); - } else { - framework::EigenVector::Flatten(*Y) - .setConstant(alpha); - } - } -}; - -template struct Set; -template struct Set; - -} // namespace functor -} // namespace operators -} // namespace paddle diff --git a/paddle/operators/functor/math_functor.cu b/paddle/operators/functor/math_functor.cu deleted file mode 100644 index 6dc828c60a..0000000000 --- a/paddle/operators/functor/math_functor.cu +++ /dev/null @@ -1,42 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/operators/functor/math_functor.h" -#include "paddle/platform/cuda_helper.h" - -namespace paddle { -namespace operators { -namespace functor { - -template -__global__ void SetKernel(const int N, const T alpha, T* Y) { - CUDA_1D_KERNEL_LOOP(i, N) { Y[i] = alpha; } -} - -template -struct Set { - void operator()(const T alpha, framework::Tensor* Y, - platform::DeviceContext* context) { - int N = product(Y->dims()); - T* YData = Y->mutable_data(context->GetPlace()); - SetKernel<<<(N + 512 - 1) / 512, 512>>>(N, alpha, YData); - } -}; - -template struct Set; -template struct Set; - -} // namespace functor -} // namespace operators -} // namespace paddle diff --git a/paddle/operators/functor/math_functor.h b/paddle/operators/functor/math_functor.h deleted file mode 100644 index d5c7bd368f..0000000000 --- a/paddle/operators/functor/math_functor.h +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/framework/tensor.h" -#include "paddle/platform/device_context.h" - -namespace paddle { -namespace operators { -namespace functor { - -template -struct Set { - void operator()(const T alpha, paddle::framework::Tensor* Y, - paddle::platform::DeviceContext* context); -}; - -} // namespace functor -} // namespace operators -} // namespace paddle diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu index 99678ef681..27eee3436a 100644 --- a/paddle/operators/lookup_table_op.cu +++ b/paddle/operators/lookup_table_op.cu @@ -12,8 +12,8 @@ See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" -#include "paddle/operators/functor/math_functor.h" #include "paddle/platform/assert.h" #include "paddle/platform/cuda_helper.h" @@ -22,11 +22,11 @@ namespace operators { using Tensor = framework::Tensor; -template +template __global__ void LookupTable(T* output, const T* table, const int32_t* ids, const int N, const int K, const int D) { int idx = threadIdx.x; - int idy = blockIdx.x + threadIdx.y * gridDimX; + int idy = blockIdx.x + threadIdx.y * GridDimX; while (idy < K) { int id = ids[idy]; @@ -34,18 +34,18 @@ __global__ void LookupTable(T* output, const T* table, const int32_t* ids, PADDLE_ASSERT(id < N); T* out = output + idy * D; const T* tab = table + id * D; - for (int i = idx; i < D; i += blockDimX) { + for (int i = idx; i < D; i += BlockDimX) { out[i] = tab[i]; } - idy += blockDimY * gridDimX; + idy += BlockDimY * GridDimX; } } -template +template __global__ void LookupTableGrad(T* table, const T* output, const int32_t* ids, const int N, const int K, const int D) { int idx = threadIdx.x; - int idy = blockIdx.x + threadIdx.y * gridDimX; + int idy = blockIdx.x + threadIdx.y * GridDimX; while (idy < K) { int id = ids[idy]; @@ -53,10 +53,10 @@ __global__ void LookupTableGrad(T* table, const T* output, const int32_t* ids, PADDLE_ASSERT(id < N); const T* out = output + idy * D; T* tab = table + id * D; - for (int i = idx; i < D; i += blockDimX) { + for (int i = idx; i < D; i += BlockDimX) { paddle::platform::CudaAtomicAdd(&tab[i], out[i]); } - idy += blockDimY * gridDimX; + idy += BlockDimY * GridDimX; } } @@ -96,10 +96,10 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { const T* d_output = d_output_t->data(); T* d_table = d_table_t->mutable_data(context.GetPlace()); - auto* device_context = - const_cast(context.device_context_); - functor::Set()(static_cast(0), d_table_t, - device_context); + auto t = framework::EigenVector::Flatten(*d_table_t); + t.device(context.GetEigenDevice()) = + t.constant(static_cast(0)); + dim3 threads(128, 8); dim3 grids(8, 1); LookupTableGrad<<>>(d_table, d_output, ids, N, diff --git a/paddle/operators/lookup_table_op.h b/paddle/operators/lookup_table_op.h index 9254e03a1b..4da8079b91 100644 --- a/paddle/operators/lookup_table_op.h +++ b/paddle/operators/lookup_table_op.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" -#include "paddle/operators/functor/math_functor.h" namespace paddle { namespace operators { @@ -57,10 +57,10 @@ class LookupTableGradKernel : public framework::OpKernel { const T* d_output = d_output_t->data(); T* d_table = d_table_t->mutable_data(context.GetPlace()); - auto* device_context = - const_cast(context.device_context_); - functor::Set()(static_cast(0), d_table_t, - device_context); + auto t = framework::EigenVector::Flatten(*d_table_t); + t.device(context.GetEigenDevice()) = + t.constant(static_cast(0)); + for (size_t i = 0; i < product(ids_t->dims()); ++i) { PADDLE_ENFORCE_LT(ids[i], N); PADDLE_ENFORCE_GE(ids[i], 0); diff --git a/paddle/platform/cuda_helper.h b/paddle/platform/cuda_helper.h index 939c3713ad..6feec0d7f8 100644 --- a/paddle/platform/cuda_helper.h +++ b/paddle/platform/cuda_helper.h @@ -18,10 +18,6 @@ limitations under the License. */ namespace paddle { namespace platform { -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - #define CUDA_ATOMIC_WRAPPER(op, T) \ __device__ __forceinline__ T CudaAtomic##op(T* address, const T val) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 8b8e2f444b..06b82fa2e4 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -23,6 +23,10 @@ def grad_var_name(var_name): return var_name + "@GRAD" +def empty_var_name(): + return "@EMPTY@" + + def get_numeric_gradient(op, input_values, output_name, @@ -171,7 +175,7 @@ class GradientChecker(unittest.TestCase): ] return outs - def compare_grad(self, forward_op, input_value): + def compare_grad(self, forward_op, input_value, no_grad_set=None): """ Compare the input gradients between CPU and GPU for the given forward operator. @@ -179,15 +183,20 @@ class GradientChecker(unittest.TestCase): :type forward_op: Operator :param input_value: input values. :type input_value: dict{string:numpy.array} + :param no_grad_set: the set of variables names without gradients. + :type no_grad_set: a set of string :raises: AssertionError, there is different gradient value. """ - backward_op = core.Operator.backward(forward_op, set()) + if no_grad_set is None: + no_grad_set = set() + backward_op = core.Operator.backward(forward_op, no_grad_set) # return if not compile with GPU or not implementing GPU kernel if not (core.is_compile_gpu() and backward_op.support_gpu()): return outputs = backward_op.outputs() out_names = [item for k in outputs for item in outputs[k]] + out_names = filter(lambda x: x != empty_var_name(), out_names) cpu_grads = self.__get_gradient(forward_op, backward_op, input_value, out_names, core.CPUPlace()) gpu_grads = self.__get_gradient(forward_op, backward_op, input_value, diff --git a/python/paddle/v2/framework/tests/test_lookup_table.py b/python/paddle/v2/framework/tests/test_lookup_table.py index 3056bf53e3..19eb464baa 100644 --- a/python/paddle/v2/framework/tests/test_lookup_table.py +++ b/python/paddle/v2/framework/tests/test_lookup_table.py @@ -21,6 +21,8 @@ class TestSigmoidGradOp(GradientChecker): table = np.random.random((17, 31)).astype('float32') ids = np.random.randint(0, 17, 4).astype('int32') inputs = {'W': table, 'Ids': ids} + # comapre gradients + self.compare_grad(op, inputs, set(['Ids'])) # check gradients self.check_grad(op, inputs, set('W'), 'Out') From f715c740bf2bfedb779ba4876f4d6b16e770e61d Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Mon, 21 Aug 2017 23:07:51 +0800 Subject: [PATCH 069/170] Add_config_parser_for_Conv3D_DeConv3D --- proto/ModelConfig.proto | 1 + python/paddle/trainer/config_parser.py | 266 ++++++++++++++- python/paddle/trainer/recurrent_units.py | 0 .../paddle/trainer_config_helpers/layers.py | 316 ++++++++++++------ .../paddle/trainer_config_helpers/networks.py | 4 +- .../configs/conv3d_deconv3d_test_config.py | 98 ++++++ .../tests/layers_test.py | 4 +- 7 files changed, 581 insertions(+), 108 deletions(-) mode change 100755 => 100644 python/paddle/trainer/recurrent_units.py mode change 100755 => 100644 python/paddle/trainer_config_helpers/layers.py mode change 100755 => 100644 python/paddle/trainer_config_helpers/networks.py create mode 100644 python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 8c6eb5b7e1..21049ba0a0 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -489,6 +489,7 @@ message LayerConfig { // to indicate rectangle image data optional uint64 height = 50; optional uint64 width = 51; + optional uint64 depth = 57 [ default = 1 ]; // blank label used in ctc loss optional uint32 blank = 52 [ default = 0 ]; diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index b7b696ef0c..49b3c430e7 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -881,6 +881,42 @@ class Conv(Cfg): config_assert(output_x <= 0) +# please refer to the comments in proto/ModelConfig.proto +@config_class +class Conv3D(Cfg): + def __init__(self, + filter_size, + channels, + padding=None, + stride=None, + groups=None, + filter_channels=None, + output_x=None, + img_size=None, + caffe_mode=True, + filter_size_y=None, + padding_y=None, + stride_y=None, + filter_size_z=None, + padding_z=None, + stride_z=None): + self.add_keys(locals()) + if filter_size_y is None: + self.filter_size_y = filter_size + if padding_y is None: + self.padding_y = padding + if stride_y is None: + self.stride_y = stride + if output_x is not None: + config_assert(output_x <= 0) + if filter_size_z is None: + self.filter_size_z = filter_size + if padding_z is None: + self.padding_z = padding + if stride_z is None: + self.stride_z = stride + + @config_class class BilinearInterp(Cfg): def __init__(self, out_size_x=None, out_size_y=None, channels=None): @@ -1167,6 +1203,20 @@ def get_img_size(input_layer_name, channels): return img_size, img_size_y +def get_img3d_size(input_layer_name, channels): + input = g_layer_map[input_layer_name] + img_pixels = input.size / channels + img_size = input.width if input.width > 0 else int(img_pixels**0.5) + img_size_y = input.height if input.height > 0 else int(img_pixels / + img_size) + img_size_z = input.depth if input.depth > 1 else 1 + config_assert( + img_size * img_size_y * img_size_z == img_pixels, + "Input layer %s: Incorrect input image size %d * %d * %d for input image pixels %d" + % (input_layer_name, img_size, img_size_y, img_size_z, img_pixels)) + return img_size, img_size_y, img_size_z + + def parse_bilinear(bilinear, input_layer_name, bilinear_conf): parse_image(bilinear, input_layer_name, bilinear_conf.image_conf) bilinear_conf.out_size_x = bilinear.out_size_x @@ -1277,6 +1327,50 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): conv_conf.stride_y, conv_conf.caffe_mode) +#caffe_mode: compute the output size using floor instead of ceil, +# which is consistent of caffe and CuDNN's convention. +def parse_conv3d(conv, input_layer_name, conv_conf, num_filters, trans=False): + conv_conf.filter_size = conv.filter_size + conv_conf.filter_size_y = conv.filter_size_y + conv_conf.filter_size_z = conv.filter_size_z + conv_conf.channels = conv.channels + conv_conf.padding = conv.padding + conv_conf.padding_y = conv.padding_y + conv_conf.padding_z = conv.padding_z + conv_conf.stride = conv.stride + conv_conf.stride_y = conv.stride_y + conv_conf.stride_z = conv.stride_z + conv_conf.groups = conv.groups + conv_conf.caffe_mode = conv.caffe_mode + + if not trans: + conv_conf.filter_channels = conv.channels / conv.groups + conv_conf.img_size, conv_conf.img_size_y, conv_conf.img_size_z = \ + get_img3d_size(input_layer_name, conv.channels) + conv_conf.output_x = cnn_output_size( + conv_conf.img_size, conv_conf.filter_size, conv_conf.padding, + conv_conf.stride, conv_conf.caffe_mode) + conv_conf.output_y = cnn_output_size( + conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y, + conv_conf.stride_y, conv_conf.caffe_mode) + conv_conf.output_z = cnn_output_size( + conv_conf.img_size_z, conv_conf.filter_size_z, conv_conf.padding_z, + conv_conf.stride_z, conv_conf.caffe_mode) + else: + conv_conf.filter_channels = num_filters / conv.groups + conv_conf.output_x, conv_conf.output_y, conv_conf.output_z = \ + get_img3d_size(input_layer_name, conv.channels) + conv_conf.img_size = cnn_image_size( + conv_conf.output_x, conv_conf.filter_size, conv_conf.padding, + conv_conf.stride, conv_conf.caffe_mode) + conv_conf.img_size_y = cnn_image_size( + conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y, + conv_conf.stride_y, conv_conf.caffe_mode) + conv_conf.img_size_z = cnn_image_size( + conv_conf.output_z, conv_conf.filter_size_z, conv_conf.padding_z, + conv_conf.stride_z, conv_conf.caffe_mode) + + def parse_block_expand(block_expand, input_layer_name, block_expand_conf): block_expand_conf.channels = block_expand.channels block_expand_conf.stride_x = block_expand.stride_x @@ -1580,6 +1674,9 @@ class LayerBase(object): self.config.height = height self.config.width = width + def set_layer_depth(self, depth): + self.config.depth = depth + def set_cnn_layer(self, input_layer_name, height, @@ -1763,11 +1860,19 @@ class DetectionOutputLayer(LayerBase): @config_layer('data') class DataLayer(LayerBase): - def __init__(self, name, size, height=None, width=None, device=None): + def __init__(self, + name, + size, + height=None, + width=None, + depth=None, + device=None): super(DataLayer, self).__init__( name, 'data', size, inputs=[], device=device) if height and width: self.set_layer_height_width(height, width) + if depth: + self.set_layer_depth(depth) ''' @@ -1882,7 +1987,7 @@ class ConvLayerBase(LayerBase): def calc_parameter_size(self, conv_conf): return self.config.num_filters * conv_conf.filter_channels \ - * (conv_conf.filter_size * conv_conf.filter_size_y) + * (conv_conf.filter_size * conv_conf.filter_size_y) @config_layer('exconv') @@ -1895,6 +2000,163 @@ class ConvLayer(ConvLayerBase): layer_type = 'cudnn_conv' +@config_layer('conv_3d') +class Conv3DLayerBase(LayerBase): + def __init__(self, + name, + inputs=[], + bias=True, + num_filters=None, + shared_biases=False, + **xargs): + super(Conv3DLayerBase, self).__init__( + name, self.layer_type, 0, inputs=inputs, **xargs) + + if num_filters is not None: + self.config.num_filters = num_filters + + use_gpu = int(g_command_config_args.get("use_gpu", 0)) + parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) + + # Automatically select cudnn_type for GPU and exconv for CPU + # if set type=conv, but still reserve the way user specify + # exconv or cudnn_conv manually. + if self.layer_type == "cudnn_conv3d": + config_assert(use_gpu, "cudnn_conv3d only support GPU") + + # need to specify layer in config + self.config.type = self.layer_type + + if shared_biases is not None: + self.config.shared_biases = shared_biases + + for input_index in xrange(len(self.inputs)): + input_layer = self.get_input_layer(input_index) + conv_conf = self.config.inputs[input_index].conv_conf + parse_conv3d( + self.inputs[input_index].conv, input_layer.name, conv_conf, + num_filters + ) # for z-axis pad:0, strid:1, filter_size:1, img_size:1 + psize = self.calc_parameter_size(conv_conf) + self.create_input_parameter(input_index, psize) + self.set_cnn_layer(name, conv_conf.output_z, conv_conf.output_y, + conv_conf.output_x, self.config.num_filters) + + psize = self.config.size + if shared_biases: + psize = self.config.num_filters + self.create_bias_parameter(bias, psize, [psize, 1]) + + def calc_parameter_size(self, conv_conf): + return self.config.num_filters * conv_conf.filter_channels \ + * (conv_conf.filter_size * conv_conf.filter_size_y \ + * conv_conf.filter_size_z) + + def set_layer_height_width(self, depth, height, width): + self.config.depth = depth + self.config.height = height + self.config.width = width + + def set_cnn_layer(self, + input_layer_name, + depth, + height, + width, + channels, + is_print=True): + size = depth * height * width * channels + self.set_layer_size(size) + self.set_layer_height_width(depth, height, width) + if is_print: + print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" % + (input_layer_name, channels, depth, height, width, size)) + + +@config_layer('conv3d') +class Conv3DLayer(Conv3DLayerBase): + layer_type = 'conv3d' + + +@config_layer('convt_3d') +class Conv3DTransLayerBase(LayerBase): + def __init__(self, + name, + inputs=[], + bias=True, + num_filters=None, + shared_biases=False, + **xargs): + super(Conv3DTransLayerBase, self).__init__( + name, self.layer_type, 0, inputs=inputs, **xargs) + + if num_filters is not None: + self.config.num_filters = num_filters + + use_gpu = int(g_command_config_args.get("use_gpu", 0)) + parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) + + # Automatically select cudnn_type for GPU and exconv for CPU + # if set type=conv, but still reserve the way user specify + # exconv or cudnn_conv manually. + if self.layer_type == "cudnn_deconv3d": + config_assert(use_gpu, "cudnn_conv3d only support GPU") + + # need to specify layer in config + self.config.type = self.layer_type + + if shared_biases is not None: + self.config.shared_biases = shared_biases + + for input_index in xrange(len(self.inputs)): + input_layer = self.get_input_layer(input_index) + conv_conf = self.config.inputs[input_index].conv_conf + parse_conv3d( + self.inputs[input_index].conv, + input_layer.name, + conv_conf, + num_filters, + trans=True + ) # for z-axis pad:0, strid:1, filter_size:1, img_size:1 + psize = self.calc_parameter_size(conv_conf) + self.create_input_parameter(input_index, psize) + self.set_cnn_layer(name, conv_conf.img_size_z, conv_conf.img_size_y, + conv_conf.img_size, self.config.num_filters) + + psize = self.config.size + if shared_biases: + psize = self.config.num_filters + self.create_bias_parameter(bias, psize, [psize, 1]) + + def calc_parameter_size(self, conv_conf): + return self.config.num_filters * conv_conf.filter_channels \ + * (conv_conf.filter_size * conv_conf.filter_size_y \ + * conv_conf.filter_size_z) + + def set_layer_height_width(self, depth, height, width): + self.config.depth = depth + self.config.height = height + self.config.width = width + + def set_cnn_layer(self, + input_layer_name, + depth, + height, + width, + channels, + is_print=True): + size = depth * height * width * channels + self.set_layer_size(size) + self.set_layer_height_width(depth, height, width) + if is_print: + print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" % + (input_layer_name, channels, depth, height, width, size)) + + +@config_layer('deconv3d') +class DeConv3DLayer(Conv3DTransLayerBase): + layer_type = 'deconv3d' + + @config_layer('convt') class ConvTransLayerBase(LayerBase): layer_type = 'convt' diff --git a/python/paddle/trainer/recurrent_units.py b/python/paddle/trainer/recurrent_units.py old mode 100755 new mode 100644 diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py old mode 100755 new mode 100644 index 1bc55c8696..6953f134c5 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -31,108 +31,34 @@ except ImportError: import copy __all__ = [ - 'full_matrix_projection', - 'AggregateLevel', - 'ExpandLevel', - 'identity_projection', - 'dotmul_projection', - 'dotmul_operator', - 'repeat_layer', - 'seq_reshape_layer', - 'table_projection', - 'mixed_layer', - 'data_layer', - 'embedding_layer', - 'fc_layer', - 'grumemory', - 'pooling_layer', - 'lstmemory', - 'last_seq', - 'first_seq', - 'cos_sim', - 'hsigmoid', - 'conv_projection', - 'mse_cost', - 'regression_cost', - 'classification_cost', - 'LayerOutput', - 'img_conv_layer', - 'img_pool_layer', - 'batch_norm_layer', - 'img_cmrnorm_layer', - 'addto_layer', - 'concat_layer', - 'seq_concat_layer', - 'lstm_step_layer', - 'recurrent_group', - 'memory', - 'StaticInput', - 'expand_layer', - 'scaling_layer', - 'scaling_projection', - 'power_layer', - 'interpolation_layer', - 'bilinear_interp_layer', - 'trans_layer', - 'rotate_layer', - 'sum_to_one_norm_layer', - 'row_l2_norm_layer', - 'get_output_layer', - 'LayerType', - 'context_projection', - 'beam_search', - 'maxid_layer', - 'GeneratedInput', - 'SubsequenceInput', - 'gru_step_layer', - 'gru_step_naive_layer', - 'recurrent_layer', - 'BaseGeneratedInput', - 'conv_operator', - 'conv_shift_layer', - 'tensor_layer', - 'selective_fc_layer', - 'sampling_id_layer', - 'slope_intercept_layer', - 'trans_full_matrix_projection', - 'linear_comb_layer', - 'convex_comb_layer', - 'ctc_layer', - 'warp_ctc_layer', - 'crf_layer', - 'crf_decoding_layer', - 'nce_layer', - 'cross_entropy_with_selfnorm', - 'cross_entropy', - 'multi_binary_label_cross_entropy', - 'sum_cost', - 'rank_cost', - 'lambda_cost', - 'huber_cost', - 'block_expand_layer', - 'maxout_layer', - 'out_prod_layer', - 'printer_layer', - 'print_layer', - 'priorbox_layer', - 'cross_channel_norm_layer', - 'multibox_loss_layer', - 'detection_output_layer', - 'spp_layer', - 'pad_layer', - 'eos_layer', - 'smooth_l1_cost', - 'layer_support', - 'multiplex_layer', - 'row_conv_layer', - 'dropout_layer', - 'prelu_layer', - 'gated_unit_layer', - 'crop_layer', - 'sub_nested_seq_layer', - 'clip_layer', - 'slice_projection', - 'kmax_sequence_score_layer', + 'full_matrix_projection', 'AggregateLevel', 'ExpandLevel', + 'identity_projection', 'dotmul_projection', 'dotmul_operator', + 'repeat_layer', 'seq_reshape_layer', 'table_projection', 'mixed_layer', + 'data_layer', 'embedding_layer', 'fc_layer', 'grumemory', 'pooling_layer', + 'lstmemory', 'last_seq', 'first_seq', 'cos_sim', 'hsigmoid', + 'conv_projection', 'mse_cost', 'regression_cost', 'classification_cost', + 'LayerOutput', 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer', + 'img_cmrnorm_layer', 'addto_layer', 'concat_layer', 'seq_concat_layer', + 'lstm_step_layer', 'recurrent_group', 'memory', 'StaticInput', + 'expand_layer', 'scaling_layer', 'scaling_projection', 'power_layer', + 'interpolation_layer', 'bilinear_interp_layer', 'trans_layer', + 'rotate_layer', 'sum_to_one_norm_layer', 'row_l2_norm_layer', + 'get_output_layer', 'LayerType', 'context_projection', 'beam_search', + 'maxid_layer', 'GeneratedInput', 'SubsequenceInput', 'gru_step_layer', + 'gru_step_naive_layer', 'recurrent_layer', 'BaseGeneratedInput', + 'conv_operator', 'conv_shift_layer', 'tensor_layer', 'selective_fc_layer', + 'sampling_id_layer', 'slope_intercept_layer', + 'trans_full_matrix_projection', 'linear_comb_layer', 'convex_comb_layer', + 'ctc_layer', 'warp_ctc_layer', 'crf_layer', 'crf_decoding_layer', + 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', + 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', 'lambda_cost', + 'huber_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer', + 'printer_layer', 'print_layer', 'priorbox_layer', + 'cross_channel_norm_layer', 'multibox_loss_layer', 'detection_output_layer', + 'spp_layer', 'pad_layer', 'eos_layer', 'smooth_l1_cost', 'layer_support', + 'multiplex_layer', 'row_conv_layer', 'dropout_layer', 'prelu_layer', + 'gated_unit_layer', 'crop_layer', 'sub_nested_seq_layer', 'clip_layer', + 'slice_projection', 'kmax_sequence_score_layer', 'img_conv3d_layer' ] @@ -214,6 +140,9 @@ class LayerType(object): CRF_DECODING_LAYER = 'crf_decoding' NCE_LAYER = 'nce' + CONV3D_LAYER = 'conv3d' + DECONV3D_LAYER = 'deconv3d' + RANK_COST = 'rank-cost' LAMBDA_COST = 'lambda_cost' HUBER = 'huber' @@ -878,7 +807,8 @@ def mixed_layer(size=0, @layer_support() -def data_layer(name, size, height=None, width=None, layer_attr=None): +def data_layer(name, size, height=None, width=None, depth=None, + layer_attr=None): """ Define DataLayer For NeuralNetwork. @@ -907,6 +837,7 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): size=size, height=height, width=width, + depth=depth, **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput(name, LayerType.DATA, size=size) @@ -6210,3 +6141,182 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1): return LayerOutput( name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size) + + +@wrap_name_default("conv3d") +@wrap_param_attr_default() +@wrap_bias_attr_default() +@wrap_act_default(act=ReluActivation()) +@layer_support(DROPOUT) +def img_conv3d_layer(input, + filter_size, + num_filters, + name=None, + num_channels=None, + act=None, + groups=1, + stride=1, + padding=0, + bias_attr=None, + param_attr=None, + shared_biases=True, + layer_attr=None, + filter_size_y=None, + stride_y=None, + padding_y=None, + filter_size_z=None, + stride_z=None, + padding_z=None, + trans=False, + layer_type=None): + """ + + The example usage is: + + .. code-block:: python + + conv = img_conv3d_layer(input=data, filter_size=1, filter_size_y=1, + num_channels=8, + num_filters=16, stride=1, + bias_attr=False, + act=ReluActivation()) + + :param name: Layer name. + :type name: basestring + :param input: Layer Input. + :type input: LayerOutput + :param filter_size: The x dimension of a filter kernel. Or input a tuple for + two image dimension. + :type filter_size: int|tuple|list + :param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle + currently supports rectangular filters, the filter's + shape will be (filter_size, filter_size_y). + :type filter_size_y: int|None + :param num_filters: Each filter group's number of filter + :param act: Activation type. Default is tanh + :type act: BaseActivation + :param groups: Group size of filters. + :type groups: int + :param stride: The x dimension of the stride. Or input a tuple for two image + dimension. + :type stride: int|tuple|list + :param stride_y: The y dimension of the stride. + :type stride_y: int + :param padding: The x dimension of the padding. Or input a tuple for two + image dimension + :type padding: int|tuple|list + :param padding_y: The y dimension of the padding. + :type padding_y: int + :param bias_attr: Convolution bias attribute. None means default bias. + False means no bias. + :type bias_attr: ParameterAttribute|False + :param num_channels: number of input channels. If None will be set + automatically from previous output. + :type num_channels: int + :param param_attr: Convolution param attribute. None means default attribute + :type param_attr: ParameterAttribute + :param shared_biases: Is biases will be shared between filters or not. + :type shared_biases: bool + :param layer_attr: Layer Extra Attribute. + :type layer_attr: ExtraLayerAttribute + :param trans: true if it is a convTransLayer, false if it is a convLayer + :type trans: bool + :param layer_type: specify the layer_type, default is None. If trans=True, + layer_type has to be "exconvt" or "cudnn_convt", + otherwise layer_type has to be either "exconv" or + "cudnn_conv" + :type layer_type: String + :return: LayerOutput object. + :rtype: LayerOutput + """ + if num_channels is None: + assert input.num_filters is not None + num_channels = input.num_filters + + if filter_size_y is None: + if isinstance(filter_size, collections.Sequence): + assert len(filter_size) == 2 + filter_size, filter_size_y = filter_size + else: + filter_size_y = filter_size + + if filter_size_z is None: + if isinstance(filter_size, collections.Sequence): + assert len(filter_size) == 2 + filter_size, filter_size_z = filter_size + else: + filter_size_z = filter_size + + if stride_y is None: + if isinstance(stride, collections.Sequence): + assert len(stride) == 2 + stride, stride_y = stride + else: + stride_y = stride + + if stride_z is None: + if isinstance(stride, collections.Sequence): + assert len(stride) == 2 + stride, stride_z = stride + else: + stride_z = stride + + if padding_y is None: + if isinstance(padding, collections.Sequence): + assert len(padding) == 2 + padding, padding_y = padding + else: + padding_y = padding + + if padding_z is None: + if isinstance(padding, collections.Sequence): + assert len(padding) == 2 + padding, padding_z = padding + else: + padding_z = padding + + if param_attr.attr.get('initial_smart'): + # special initial for conv layers. + init_w = (2.0 / (filter_size**2 * num_channels))**0.5 + param_attr.attr["initial_mean"] = 0.0 + param_attr.attr["initial_std"] = init_w + param_attr.attr["initial_strategy"] = 0 + param_attr.attr["initial_smart"] = False + + if layer_type: + if trans: + assert layer_type in ["deconv3d"] + lt = layer_type + else: + lt = LayerType.DECONV3D_LAYER if trans else LayerType.CONV3D_LAYER + + l = Layer( + name=name, + inputs=Input( + input.name, + conv=Conv3D( + filter_size=filter_size, + padding=padding, + stride=stride, + channels=num_channels, + groups=groups, + filter_size_y=filter_size_y, + padding_y=padding_y, + stride_y=stride_y, + filter_size_z=filter_size_z, + padding_z=padding_z, + stride_z=stride_z), + **param_attr.attr), + active_type=act.name, + num_filters=num_filters, + bias=ParamAttr.to_bias(bias_attr), + shared_biases=shared_biases, + type=lt, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name, + lt, + parents=[input], + activation=act, + num_filters=num_filters, + size=l.config.size) diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py old mode 100755 new mode 100644 index 34be203ee2..28a71cf788 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -1406,7 +1406,7 @@ def inputs(layers, *args): if len(args) != 0: layers.extend(args) - Inputs(*[l.name for l in layers]) + Inputs(* [l.name for l in layers]) def outputs(layers, *args): @@ -1456,7 +1456,7 @@ def outputs(layers, *args): assert len(layers) > 0 if HasInputsSet(): # input already set - Outputs(*[l.name for l in layers]) + Outputs(* [l.name for l in layers]) return # just return outputs. if len(layers) != 1: diff --git a/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py b/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py new file mode 100644 index 0000000000..da0d23d057 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py @@ -0,0 +1,98 @@ +from paddle.trainer_config_helpers import * + +settings(batch_size=1000, learning_rate=1e-5) + +num_channels = 3 +filter_size = 3 +filter_size_y = 3 +filter_size_z = 3 +stride = 2 +stride_y = 2 +stride_z = 2 +padding = 1 +padding_y = 1 +padding_z = 1 +groups = 1 + +data = data_layer( + name='data1', size=12096 * num_channels, height=48, width=42, depth=6) + +conv3d = img_conv3d_layer( + input=data, + name='conv3d_1', + num_filters=16, + num_channels=num_channels, + filter_size=filter_size, + filter_size_y=filter_size, + filter_size_z=filter_size, + stride=stride, + stride_y=stride_y, + stride_z=stride_z, + padding=padding, + padding_y=padding_y, + padding_z=padding_z, + groups=groups, + bias_attr=True, + shared_biases=True, + trans=False, + layer_type="conv3d", + act=LinearActivation()) + +deconv3d = img_conv3d_layer( + input=data, + name='deconv3d_1', + num_filters=16, + num_channels=num_channels, + filter_size=filter_size, + filter_size_y=filter_size, + filter_size_z=filter_size, + stride=stride, + stride_y=stride_y, + stride_z=stride_z, + padding=padding, + padding_y=padding_y, + padding_z=padding_z, + groups=groups, + bias_attr=True, + shared_biases=True, + trans=True, + layer_type="deconv3d", + act=LinearActivation()) + +data = data_layer(name="input", size=8 * 16 * 16) +conv1 = img_conv_layer( + input=data, + filter_size=1, + filter_size_y=1, + num_channels=8, + num_filters=16, + stride=1, + bias_attr=False, + act=ReluActivation(), + layer_type="exconv") +conv2 = img_conv_layer( + input=data, + filter_size=1, + filter_size_y=1, + num_channels=8, + num_filters=16, + stride=1, + bias_attr=False, + act=ReluActivation(), + layer_type="exconv") + +concat = concat_layer(input=[conv1, conv2]) + +conv = img_conv_layer( + input=data, + filter_size=1, + filter_size_y=1, + num_channels=8, + num_filters=16, + stride=1, + bias_attr=True, + act=LinearActivation(), + groups=2, + layer_type="exconv") + +outputs(concat, conv) diff --git a/python/paddle/trainer_config_helpers/tests/layers_test.py b/python/paddle/trainer_config_helpers/tests/layers_test.py index 05902ea293..44d1c1c9b2 100644 --- a/python/paddle/trainer_config_helpers/tests/layers_test.py +++ b/python/paddle/trainer_config_helpers/tests/layers_test.py @@ -16,4 +16,6 @@ from paddle.trainer.config_parser import parse_config_and_serialize if __name__ == '__main__': parse_config_and_serialize( - 'trainer_config_helpers/tests/layers_test_config.py', '') + 'trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py', + '') +# layers_test_config.py From 760cb6c8615fe3eece8d831ef78e10da4b552246 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 23 Aug 2017 15:47:18 +0800 Subject: [PATCH 070/170] Follow jiayi's comment --- paddle/framework/operator.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 83dab8631d..8397570d26 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -132,21 +132,21 @@ class OperatorBase { // Macro for define a clone method. // If you are writing an kernel operator, `Clone` will be defined when you // register it. i.e. `Clone` method is not needed to define by yourself. -#define DEFINE_OP_CLONE_METHOD(CLS) \ +#define DEFINE_OP_CLONE_METHOD(cls) \ std::unique_ptr Clone() const final { \ - return std::unique_ptr(new CLS(*this)); \ + return std::unique_ptr(new cls(*this)); \ } // Macro for define a default constructor for Operator. // You can also use // using PARENT_CLASS::PARENT_CLASS; // to use parent's constructor. -#define DEFINE_OP_CONSTRUCTOR(CLS, PARENT_CLS) \ - CLS(const std::string& type, \ +#define DEFINE_OP_CONSTRUCTOR(cls, parent_cls) \ + cls(const std::string& type, \ const ::paddle::framework::VariableNameMap& inputs, \ const ::paddle::framework::VariableNameMap& outputs, \ const paddle::framework::AttributeMap& attrs) \ - : PARENT_CLS(type, inputs, outputs, attrs) {} + : parent_cls(type, inputs, outputs, attrs) {} class NOP : public OperatorBase { public: From 6f4b968f5618adce529d12bd2e3b72d4d1b64f61 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 23 Aug 2017 00:58:02 -0700 Subject: [PATCH 071/170] can train the parameters --- python/paddle/v2/framework/tests/mnist.py | 39 +++++++++++++++++------ 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index 32349b8d4d..ededf767bc 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -69,7 +69,7 @@ def init_param(param_name, dims): tensor = var.get_tensor() tensor.set_dims(dims) data = numpy.random.uniform( - low=0.0, high=1.0, size=tensor.shape()).astype("float32") + low=-0.5, high=0.5, size=tensor.shape()).astype("float32") tensor.set(data, place) @@ -109,7 +109,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): bias_name = name + ".b" init_param(param_name=bias_name, dims=[size]) sgd_optimizer( - net=optimize_net, param_name=bias_name, learning_rate=0.01) + net=optimize_net, param_name=bias_name, learning_rate=0.001) bias_out = name + ".rowwise_add.out" scope.new_var(bias_out) rowwise_append_op = Operator( @@ -158,20 +158,33 @@ def print_inputs_outputs(op): def set_cost(): - cost_data = numpy.array(scope.find_var("cross_entropy_1").get_tensor()) + cost_shape = numpy.array(scope.find_var("cross_entropy_3").get_tensor( + )).shape + cost_grad = scope.find_var(grad_var_name("cross_entropy_3")).get_tensor() + cost_grad.set_dims(cost_shape) + cost_grad.alloc_float(place) + cost_grad.set(numpy.ones(cost_shape).astype("float32"), place) + + +def print_cost(): + cost_data = numpy.array(scope.find_var("cross_entropy_3").get_tensor()) print(cost_data.sum() / len(cost_data)) - cost_grad = scope.find_var(grad_var_name("cross_entropy_1")).get_tensor() - cost_grad.set_dims(cost_data.shape) - cost_grad.alloc_float(place) - cost_grad.set(numpy.ones(cost_data.shape).astype("float32"), place) +def error_rate(predict, label): + predict_var = numpy.array(scope.find_var(predict).get_tensor()).argmax( + axis=1) + label = numpy.array(scope.find_var(label).get_tensor()) + error_num = numpy.sum(predict_var != label) + print(error_num / float(len(label))) images = data_layer(name='pixel', dims=[BATCH_SIZE, 784]) label = data_layer(name='label', dims=[BATCH_SIZE]) -fc = fc_layer(net=forward_network, input=images, size=10, act="softmax") -cost = cross_entropy_layer(net=forward_network, input=fc, label=label) +fc1 = fc_layer(net=forward_network, input=images, size=100, act="sigmoid") +fc2 = fc_layer(net=forward_network, input=fc1, size=100, act="sigmoid") +predict = fc_layer(net=forward_network, input=fc2, size=100, act="softmax") +cost = cross_entropy_layer(net=forward_network, input=predict, label=label) forward_network.complete_add_op(True) backward_net = get_backward_net(forward_network) @@ -192,8 +205,8 @@ reader = paddle.batch( PASS_NUM = 1000 for pass_id in range(PASS_NUM): + batch_id = 0 - print("pass[" + str(pass_id) + "]") for data in reader(): image = numpy.array(map(lambda x: x[0], data)).astype("float32") label = numpy.array(map(lambda x: x[1], data)).astype("int32") @@ -207,3 +220,9 @@ for pass_id in range(PASS_NUM): backward_net.run(scope, dev_ctx) optimize_net.run(scope, dev_ctx) + if batch_id % 100 == 0: + print("pass[" + str(pass_id) + "] batch_id[" + str(batch_id) + "]") + print_cost() + error_rate(predict, "label") + + batch_id = batch_id + 1 From 48d87e5e912ad084ccc63dae8649f90a3f0989ba Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 23 Aug 2017 16:47:51 +0800 Subject: [PATCH 072/170] pass test, support input CPU device --- paddle/gserver/layers/Layer.h | 35 +++++--- paddle/gserver/layers/MKLDNNFcLayer.cpp | 108 +++++++++++++++--------- paddle/gserver/layers/MKLDNNLayer.h | 81 +++++++++++++++--- paddle/math/Allocator.h | 6 ++ paddle/math/MKLDNNMatrix.cpp | 71 +++++++++++++--- paddle/math/MKLDNNMatrix.h | 49 ++++++++--- 6 files changed, 258 insertions(+), 92 deletions(-) diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h index ec4d093e0c..edef36194a 100644 --- a/paddle/gserver/layers/Layer.h +++ b/paddle/gserver/layers/Layer.h @@ -82,6 +82,7 @@ protected: Argument output_; /// Several outputs stored on different devices, used in 'parallel_nn' case, /// and record them by deviceId_. + /// Also used in 'use_mkldnn' case. std::vector outputOtherDevice_; /// If there are several outputs, map them by each name. std::map outputMap_; @@ -177,6 +178,13 @@ protected: return inputLayer.getOutput(deviceId_); } + /** + * Get the argument of input layer with deviceId. + */ + const Argument& getInput(size_t inputIndex, int deviceId) const { + return inputLayers_[inputIndex]->getOutput(deviceId); + } + /** * Get the forward-input value. */ @@ -191,6 +199,13 @@ protected: return inputLayer.getOutput(deviceId_).value; } + /** + * Get the forward-input value with deviceId. + */ + const MatrixPtr& getInputValue(int inputIndex, int deviceId) { + return inputLayers_[inputIndex]->getOutput(deviceId).value; + } + /** * Get the forward-input grad. */ @@ -205,6 +220,13 @@ protected: return inputLayer.getOutput(deviceId_).grad; } + /** + * Get the forward-input grad. + */ + const MatrixPtr& getInputGrad(int inputIndex, int deviceId) { + return inputLayers_[inputIndex]->getOutput(deviceId).grad; + } + /** * Get the forward-input label. */ @@ -326,19 +348,6 @@ public: if (deviceId == getDeviceId()) { return output_; } else { - bool CPU2MKLDNN = - getDeviceId() == CPU_DEVICE && deviceId == MKLDNN_DEVICE; - bool MKLDNN2CPU = - getDeviceId() == MKLDNN_DEVICE && deviceId == CPU_DEVICE; - if (CPU2MKLDNN) { - // TODO: do something - return output_; - } else if (MKLDNN2CPU) { - // TODO: do something - return output_; - } - - // TODO: handle mkldnn device or add mkldnn device to other for (size_t i = 0; i < outputOtherDevice_.size(); i++) { if (outputOtherDevice_[i].deviceId == deviceId) { return outputOtherDevice_[i]; diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index 5463104469..a3291e6a8f 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -97,7 +97,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { } void MKLDNNFcLayer::reshape() { - const Argument& input = getInput(0); + const Argument& input = getInput(0, getPrev(0)->getDeviceId()); int batchSize = input.getBatchSize(); if (bs_ == batchSize) { return; @@ -135,35 +135,43 @@ void MKLDNNFcLayer::reshape() { void MKLDNNFcLayer::resetFwd() { bool hasBias = biases_ && biases_->getW(); - const MatrixPtr& in = getInputValue(0); const MatrixPtr& wgt = weight_->getW(); const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr; const MatrixPtr& out = output_.value; - if (getPrev(0)->getDeviceId() == MKLDNN_DEVICE) { + if (prevIsMKLDNN()) { + const MatrixPtr& in = getInputValue(0); inVal_ = std::dynamic_pointer_cast(in); CHECK(inVal_) << "Input should be MKLDNNMatrix"; - // TODO: change input nchw to nc if available - // inVal_->downSpatial() } else { + CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet"; + const MatrixPtr& in = getInputValue(0, CPU_DEVICE); inVal_ = MKLDNNMatrix::create( - in, - hasSpatial_ ? memory::dims{bs_, ic_, ih_, iw_} : memory::dims{bs_, ic_}, - hasSpatial_ ? format::nchw : format::nc, - engine_); + in, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_); } - + inVal_->downSpatial(); wgtVal_ = MKLDNNMatrix::create( - wgt, - hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_}, - hasSpatial_ ? format::oihw : format::oi, - engine_); + wgt, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_); + wgtVal_->downSpatial(); biasVal_ = hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); - // change original output to mkldnn output + // change original output value to mkldnn output value output_.value = std::dynamic_pointer_cast(outVal_); + if (!nextIsMKLDNN()) { + Argument cpuOutput; + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { + cpuOutput = outputOtherDevice_[i]; + } + } + cpuOutput.setFrameHeight(output_.getFrameHeight()); + cpuOutput.setFrameWidth(output_.getFrameWidth()); + + // fc cpu output value do not need convert + cpuOutput.value = output_.value; + } // create forward handle prop_kind pk = prop_kind::forward; @@ -176,12 +184,13 @@ void MKLDNNFcLayer::resetFwd() { : fc_fwd::desc( pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - if (hasBias) { fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); } else { fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); } + printValueFormatFlow(); + pipelineFwd_.clear(); pipelineFwd_.push_back(*fwd_); } @@ -197,17 +206,24 @@ void MKLDNNFcLayer::resetBwd() { CHECK(inVal_) << "Should have input value"; const MatrixPtr& wgt = weight_->getWGrad(); const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; - const MatrixPtr& out = output_.grad; - wgtGrad_ = MKLDNNMatrix::create( - wgt, wgtVal_->getDims(), wgtVal_->getFormat(), engine_); - biasGrad_ = - hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; + if (nextIsMKLDNN()) { + // can not directly cast outputgrad to mkldnnmatrix, + // since each layer can not write the inputgrad to mkldnn inputgrad. + // So just create from matrix with outputvalue format. + const MatrixPtr& out = getOutput(MKLDNN_DEVICE).grad; + outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD()); + // TODO: maybe need merge topdiffs + } else { + // TODO: merge topdiffs + const MatrixPtr& out = getOutput(CPU_DEVICE).grad; + // fc do not need to convert from cpu device since output always nc + // only need create from cpu device + outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD()); + } - outGrad_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); - // change original output to mkldnn output - // TODO: right? - output_.grad = std::dynamic_pointer_cast(outGrad_); + wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPD()); + biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPD()) : nullptr; // create memory primitive desc fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, @@ -235,21 +251,38 @@ void MKLDNNFcLayer::resetBwd() { pipelineBwd_.push_back(*bwdWgt_); /// backward data - const MatrixPtr& in = getInputGrad(0); - if (in == nullptr) { - return; + if (prevIsMKLDNN()) { + const MatrixPtr& in = getInputGrad(0, MKLDNN_DEVICE); + if (in == nullptr) { + return; + } + if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) { + // TODO: many mkldnn bots + // add sum handle + } else { + inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); + } + } else { + const MatrixPtr& in = getInputGrad(0, CPU_DEVICE); + if (in == nullptr) { + return; + } + if (getInput(0, CPU_DEVICE).getAllCount() > 1) { + // TODO: many bots + // add sum handle + } else { + inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); + } } + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD()); fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); - // TODO: check right, just from ingrad? - inGrad_ = - MKLDNNMatrix::create(in, inVal_->getDims(), inVal_->getFormat(), engine_); - CHECK(wgtVal_) << "Should have weight memory"; bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); + printGradFormatFlow(); pipelineBwd_.push_back(*bwdData_); } @@ -259,11 +292,7 @@ void MKLDNNFcLayer::forward(PassType passType) { { REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); - - // update input data - // since it might be changed if this is after data layer - real* iData = getInputValue(0)->getData(); - inVal_->updateData(iData); + syncInputValue(); // just submit forward pipeline stream_->submit(pipelineFwd_); @@ -285,10 +314,7 @@ void MKLDNNFcLayer::backward(const UpdateCallback& callback) { REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); resetBwd(); - // update diff - real* oDiff = getOutputGrad()->getData(); - outGrad_->updateData(oDiff); - + syncOutputGrad(); // just sumbmit backward pipeline stream_->submit(pipelineBwd_); } diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index fbd62d9aaa..3dd17a36ff 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -125,23 +125,80 @@ public: << ", oh: " << oh_ << ", ow: " << ow_; } - // TODO(TJ): move to MkldnnMatrix - // create memory desc - inline mkldnn::memory::desc createMD( - mkldnn::memory::dims dims, - mkldnn::memory::format fmt, - mkldnn::memory::data_type type = mkldnn::memory::data_type::f32) { - // TODO(TJ): isFmtSuppoted(fmt) - return mkldnn::memory::desc(dims, type, fmt); + /** + * Print the mkldnn memory format flow of value + */ + virtual void printValueFormatFlow() { + if (inVal_ && outVal_) { + VLOG(MKLDNN_FMTS) << "value format flow --- " << inVal_->getFormat() + << " >>> " << outVal_->getFormat(); + } } - void resetMKLDNNOutput(size_t height, size_t width) { - Layer::resetOutput(height, width); - // get valu and grad, use mkldnn matrix instaed - // output_.value; + /** + * Print the mkldnn memory format flow of grad + */ + virtual void printGradFormatFlow() { + if (inGrad_ && outGrad_) { + VLOG(MKLDNN_FMTS) << "grad format flow --- " << inGrad_->getFormat() + << " <<< " << outGrad_->getFormat(); + } } protected: + /** + * If next layer only has MKLDNN type. + * Otherwise, only support otherdevice CPU device. + */ + bool nextIsMKLDNN() { + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) + << "Only support other device is CPU yet"; + } + return outputOtherDevice_.size() == 0; + } + + /** + * Is previous layer MKLDNN type. + * Otherwise, only support otherdevice CPU device. + */ + bool prevIsMKLDNN(int index = 0) { + int prevDevice = getPrev(index)->getDeviceId(); + if (prevDevice == MKLDNN_DEVICE) { + return true; + } else { + // do not support GPU yet + CHECK_EQ(prevDevice, CPU_DEVICE) << "Only support CPU yet"; + return false; + } + } + + /** + * Sync input value data + */ + void syncInputValue() { + if (prevIsMKLDNN()) { + return; + } + real* iData = getInputValue(0, CPU_DEVICE)->getData(); + // update input data + // since it might be changed if this is after data layer + inVal_->updateData(iData); + } + + /** + * Sync output grad data + */ + void syncOutputGrad() { + if (nextIsMKLDNN()) { + return; + } + + // update diff + real* oDiff = getOutput(CPU_DEVICE).grad->getData(); + outGrad_->updateData(oDiff); + } + /** * Set deviceId of this layer. */ diff --git a/paddle/math/Allocator.h b/paddle/math/Allocator.h index 666a8b8368..94ef561f06 100644 --- a/paddle/math/Allocator.h +++ b/paddle/math/Allocator.h @@ -48,7 +48,13 @@ public: */ virtual void* alloc(size_t size) { void* ptr; +#ifdef PADDLE_USE_MKLDNN + // refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp + // memory alignment + CHECK_EQ(posix_memalign(&ptr, 4096ul, size), 0); +#else CHECK_EQ(posix_memalign(&ptr, 32ul, size), 0); +#endif CHECK(ptr) << "Fail to allocate CPU memory: size=" << size; return ptr; } diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index 44fc54278c..24d54ec0f7 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -18,29 +18,74 @@ using namespace mkldnn; // NOLINT namespace paddle { -MKLDNNMatrixPtr MKLDNNMatrix::create(const MatrixPtr& m, - memory::dims dims, - memory::format fmt, - engine& eg, - mkldnn::memory::data_type dtype) { - CpuMatrixPtr cpuM = std::dynamic_pointer_cast(m); - CHECK(cpuM) << "Only support create from CPU matrix yet"; - - size_t ndims = dims.size(); +MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) { + memory::desc md = pd.desc(); + size_t ndims = md.data.ndims; + int* dims = md.data.dims; CHECK(ndims > 0) << "Input dims should not be empty"; - size_t cnt = 1; + size_t cnts = 1; for (size_t i = 0; i < ndims; ++i) { - cnt *= dims[i]; + cnts *= dims[i]; } - CHECK_EQ(cnt, m->getElementCnt()) << "Count size does not match"; + if (m == nullptr) { + size_t height = dims[0]; + size_t width = cnts / dims[0]; + // LOG(INFO) << height << "," << width; + m = Matrix::create(height, width, false, false); + } + + CHECK(m) << " Matrix should not be empty"; + CpuMatrixPtr cpuMatrix = std::dynamic_pointer_cast(m); + CHECK(cpuMatrix) << "Only support create from CPU matrix yet"; + + CHECK_EQ(cnts, m->getElementCnt()) << "Count size does not match"; size_t width = m->getWidth(); size_t height = m->getHeight(); real* data = m->getData(); + return std::make_shared(data, height, width, pd); +} +MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, + memory::dims dims, + memory::format fmt, + engine& eg, + mkldnn::memory::data_type dtype) { memory::desc md = memory::desc(dims, dtype, fmt); memory::primitive_desc pd = memory::primitive_desc(md, eg); - return std::make_shared(data, height, width, pd); + return create(m, pd); +} + +void MKLDNNMatrix::downSpatial() { + int fmt = getFormat(); + if (!(fmt == memory::format::nchw || fmt == memory::format::oihw)) { + // only support nchw and oihw yet, later can support more like nhwc, ihwo + return; + } + + memory::dims srcDims = getDims(); + const int H = 2, W = 3; + if (srcDims[H] != 1 || srcDims[W] != 1) { + // can not down spatial + return; + } + + memory::dims dstDims = memory::dims{srcDims[0], srcDims[1]}; + memory::format dstFmt; + switch (fmt) { + case memory::format::nchw: + dstFmt = memory::format::nc; + break; + case memory::format::oihw: + dstFmt = memory::format::oi; + break; + default: + LOG(FATAL) << "unsupported format"; + } + memory::desc md = memory::desc(dstDims, getDtype(), dstFmt); + memory::primitive_desc pd = memory::primitive_desc(md, getEngine()); + void* data = getData(); + memory(pd, data); } } // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index 54c0a1fdcb..05adc867c2 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -39,20 +39,37 @@ public: mkldnn::memory::primitive_desc pd) : CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {} - MKLDNNMatrix(size_t height, size_t width, mkldnn::memory::primitive_desc pd) - : CpuMatrix(height, width, false), mkldnn::memory(pd) { - set_data_handle(CpuMatrix::getData()); - } - ~MKLDNNMatrix() {} + /** + * Create MKLDNNMatrix from a MatrixPtr and memory primitive_desc + */ + static MKLDNNMatrixPtr create(MatrixPtr m, mkldnn::memory::primitive_desc pd); + + /** + * Create MKLDNNMatrix from a MatrixPtr and memory details info + */ static MKLDNNMatrixPtr create( - const MatrixPtr& m, + MatrixPtr m, mkldnn::memory::dims dims, mkldnn::memory::format fmt, mkldnn::engine& eg, mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32); +public: + /** + * Dimensionality reduction. + * Change format "nchw --> nc" or "oihw --> oi" if the h and w are both 1 + */ + void downSpatial(); + + /** + * Update the memory data handle. + * Caution: This will not check the buffer size of the data, + * it should be coverd by user. + */ + void updateData(void* data) { set_data_handle(data); } + /** * Get primitive descriptor. */ @@ -64,12 +81,13 @@ public: mkldnn::memory::desc getMD() { return getPD().desc(); } /** - * Get dims. + * Get dimensions. */ mkldnn::memory::dims getDims() { + mkldnn::memory::desc md = getMD(); + const int* src = md.data.dims; + int ndims = md.data.ndims; mkldnn::memory::dims dst; - int* src = getMD().data.dims; - int ndims = getMD().data.ndims; dst.resize(ndims); for (int i = 0; i < ndims; ++i) { dst[i] = src[i]; @@ -85,11 +103,16 @@ public: } /** - * Update the memory data handle. - * Caution: This will not check the buffer size of the data, - * it should be coverd by user. + * Get memory data type. */ - void updateData(void* data) { set_data_handle(data); } + mkldnn::memory::data_type getDtype() { + return (mkldnn::memory::data_type)(getMD().data.data_type); + } + + /** + * Get engine. + */ + mkldnn::engine getEngine() { return getPD().get_engine(); } }; } // namespace paddle From 780c8d969e0d2d220df19a672c141ff7c44f53d2 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 23 Aug 2017 17:03:16 +0800 Subject: [PATCH 073/170] make downSpatial work, and remove hasSpatial_ --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 4 ---- paddle/gserver/layers/MKLDNNFcLayer.h | 5 +---- paddle/math/MKLDNNMatrix.cpp | 7 ++++++- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index a3291e6a8f..a5555c4618 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -111,10 +111,6 @@ void MKLDNNFcLayer::reshape() { if (iw_ == 0) { iw_ = 1; } - hasSpatial_ = true; - if (ih_ == 1 && iw_ == 1) { - hasSpatial_ = false; - } CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); ic_ = iLayerSize_ / (ih_ * iw_); CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index 7954852a23..e2657a8d5e 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -32,16 +32,13 @@ protected: // if has already init the weight bool hasInitedWgt_; - // if input layer has image size info (ih>1 && iw>1) - bool hasSpatial_; - // fc weight and bias std::unique_ptr weight_; std::unique_ptr biases_; public: explicit MKLDNNFcLayer(const LayerConfig& config) - : MKLDNNLayer(config), hasInitedWgt_(false), hasSpatial_(true) {} + : MKLDNNLayer(config), hasInitedWgt_(false) {} ~MKLDNNFcLayer() {} diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index 24d54ec0f7..94df9c1550 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -85,7 +85,12 @@ void MKLDNNMatrix::downSpatial() { memory::desc md = memory::desc(dstDims, getDtype(), dstFmt); memory::primitive_desc pd = memory::primitive_desc(md, getEngine()); void* data = getData(); - memory(pd, data); + mkldnn_primitive_t result; + mkldnn::error::wrap_c_api( + mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr), + "could not create a memory primitive"); + reset(result); + set_data_handle(data); } } // namespace paddle From bfcaf880d0eed61291f0483091382131ef6cde88 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Wed, 23 Aug 2017 18:48:05 +0800 Subject: [PATCH 074/170] Move pybind from package paddle/framework into paddle/pybind. --- paddle/CMakeLists.txt | 1 + paddle/framework/CMakeLists.txt | 20 -------------------- paddle/pybind/CMakeLists.txt | 19 +++++++++++++++++++ paddle/{framework => pybind}/pybind.cc | 18 ++++++++++-------- paddle/{framework => pybind}/tensor_py.h | 11 +++++++---- 5 files changed, 37 insertions(+), 32 deletions(-) create mode 100644 paddle/pybind/CMakeLists.txt rename paddle/{framework => pybind}/pybind.cc (95%) rename paddle/{framework => pybind}/tensor_py.h (92%) diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index cf61a243e9..ec866b2907 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -15,6 +15,7 @@ if(Boost_FOUND) add_subdirectory(platform) add_subdirectory(framework) add_subdirectory(operators) + add_subdirectory(pybind) endif() if(WITH_C_API) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index ad219887d6..c0838d9b75 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -39,23 +39,3 @@ add_custom_command(TARGET framework_py_proto POST_BUILD cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context) - -if(WITH_PYTHON) -cc_library(paddle_pybind SHARED - SRCS pybind.cc - DEPS pybind python backward - sgd_op - gather_op - add_op - mul_op - rowwise_add_op - sigmoid_op - softmax_op - mean_op - cross_entropy_op - recurrent_op - uniform_random_op - gaussian_random_op - fill_zeros_like_op - scale_op) -endif(WITH_PYTHON) diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt new file mode 100644 index 0000000000..10be83efc6 --- /dev/null +++ b/paddle/pybind/CMakeLists.txt @@ -0,0 +1,19 @@ +if(WITH_PYTHON) +cc_library(paddle_pybind SHARED + SRCS pybind.cc + DEPS pybind python backward + sgd_op + gather_op + add_op + mul_op + rowwise_add_op + sigmoid_op + softmax_op + mean_op + cross_entropy_op + recurrent_op + uniform_random_op + gaussian_random_op + fill_zeros_like_op + scale_op) +endif(WITH_PYTHON) diff --git a/paddle/framework/pybind.cc b/paddle/pybind/pybind.cc similarity index 95% rename from paddle/framework/pybind.cc rename to paddle/pybind/pybind.cc index b5ae81ebca..cdf739c3a2 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -18,11 +18,11 @@ limitations under the License. */ #include "paddle/framework/backward.h" #include "paddle/framework/op_registry.h" -#include "paddle/framework/tensor_py.h" #include "paddle/operators/net_op.h" #include "paddle/operators/recurrent_op.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" +#include "paddle/pybind/tensor_py.h" #include "paddle/string/to_string.h" #include "pybind11/numpy.h" #include "pybind11/pybind11.h" @@ -134,7 +134,8 @@ All parameter, weight, gradient are variables in Paddle. py::return_value_policy::reference) .def("find_var", &Scope::FindVar, py::return_value_policy::reference) .def(py::init<>()) - .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); }, + .def("new_scope", + [](Scope &self) -> Scope * { return &self.NewScope(); }, py::return_value_policy::reference) .def("drop_kids", &Scope::DropKids); @@ -222,8 +223,10 @@ All parameter, weight, gradient are variables in Paddle. retv->SetType("plain_net"); return retv; }) - .def("append_op", [](operators::NetOp &self, - const OperatorBase &op) { self.AppendOp(op); }) + .def("append_op", + [](operators::NetOp &self, const OperatorBase &op) { + self.AppendOp(op); + }) .def("complete_add_op", &operators::NetOp::CompleteAddOp) .def("complete_add_op", [](std::shared_ptr &self) { self->CompleteAddOp(); @@ -243,10 +246,9 @@ All parameter, weight, gradient are variables in Paddle. auto rnn_op = OpRegistry::CreateOp(desc); return static_cast(rnn_op.release()); }) - .def("set_stepnet", [](operators::RecurrentOp &self, - const operators::NetOp &net) -> void { - self.set_stepnet(net.Clone()); - }); + .def("set_stepnet", + [](operators::RecurrentOp &self, const operators::NetOp &net) + -> void { self.set_stepnet(net.Clone()); }); m.def("unique_integer", UniqueIntegerGenerator); diff --git a/paddle/framework/tensor_py.h b/paddle/pybind/tensor_py.h similarity index 92% rename from paddle/framework/tensor_py.h rename to paddle/pybind/tensor_py.h index 4e1ab77b15..39ba60b4dc 100644 --- a/paddle/framework/tensor_py.h +++ b/paddle/pybind/tensor_py.h @@ -63,8 +63,11 @@ struct CastToPyBufferImpl { } return py::buffer_info( dst_tensor.mutable_data(dst_tensor.holder_->place()), - sizeof(CUR_TYPE), py::format_descriptor::format(), - (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides); + sizeof(CUR_TYPE), + py::format_descriptor::format(), + (size_t)framework::arity(dst_tensor.dims()), + dims_outside, + strides); } else { constexpr bool less = I + 1 < std::tuple_size>::value; return CastToPyBufferImpl()(tensor); @@ -107,8 +110,8 @@ void PyCUDATensorSetFromArray( self.Resize(framework::make_ddim(dims)); auto *dst = self.mutable_data(place); - paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(), - cudaMemcpyHostToDevice); + paddle::platform::GpuMemcpySync( + dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice); } #endif From e3342ff8e79fbe1cacb8fa5a66cb9c69cba1eeb9 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 23 Aug 2017 19:30:46 +0800 Subject: [PATCH 075/170] Fix android build error. --- paddle/cuda/include/stub/hl_cuda_cudnn_stub.h | 11 ++++++++--- paddle/cuda/src/hl_cuda_cudnn.cc | 3 ++- paddle/gserver/tests/test_LayerGrad.cpp | 6 ++++-- python/paddle/trainer/config_parser.py | 6 +++--- python/paddle/trainer_config_helpers/layers.py | 7 +++---- 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h b/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h index abd0d6b099..3afcc6fa85 100644 --- a/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h +++ b/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h @@ -78,7 +78,9 @@ inline void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, int padding_height, int padding_width, int stride_height, - int stride_width) {} + int stride_width, + int dilation_h, + int dilation_w) {} inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, hl_tensor_descriptor image, @@ -86,7 +88,9 @@ inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, int padding_height, int padding_width, int stride_height, - int stride_width) {} + int stride_width, + int dilation_h, + int dilation_w) {} inline void hl_destroy_convolution_descriptor(hl_convolution_descriptor conv) {} @@ -99,7 +103,8 @@ inline void hl_conv_workspace(hl_tensor_descriptor input, int* convBwdDataAlgo, size_t* bwdDataLimitBytes, int* convBwdFilterAlgo, - size_t* bwdFilterLimitBytes) {} + size_t* bwdFilterLimitBytes, + bool useDilation) {} inline void hl_convolution_forward(hl_tensor_descriptor input, real* input_data, diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc index f55fa523e1..f38ef69255 100644 --- a/paddle/cuda/src/hl_cuda_cudnn.cc +++ b/paddle/cuda/src/hl_cuda_cudnn.cc @@ -640,7 +640,8 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, #else if (dilation_h > 1 || dilation_w > 1) { LOG(FATAL) - << "Current cudnn version does't support for dilation convolution."; + << "Current cuDNN version does't support for dilation convolution. " + << "The dilation convolution requires cuDNN >= v6.0."; } CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(hl_conv->desc, diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 9348c47bd4..9946f76664 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#ifndef PADDLE_ONLY_CPU #include +#endif #include #include #include @@ -262,8 +264,8 @@ TEST(Projection, conv) { testProjectionConv(1, false); testProjectionConv(3, false); /// test ConvTransProjection - /// testProjectionConv(1, true); - /// testProjectionConv(3, true); + testProjectionConv(1, true); + testProjectionConv(3, true); } #endif diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 33a20afb18..ddfd615d84 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -862,7 +862,6 @@ class Conv(Cfg): filter_size, channels, padding=None, - dilation=None, stride=None, groups=None, filter_channels=None, @@ -871,8 +870,9 @@ class Conv(Cfg): caffe_mode=True, filter_size_y=None, padding_y=None, - dilation_y=None, - stride_y=None): + stride_y=None, + dilation=None, + dilation_y=None): self.add_keys(locals()) if filter_size_y is None: self.filter_size_y = filter_size diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 74b88cd4f8..9876798558 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -2340,7 +2340,7 @@ def img_conv_layer(input, groups=1, stride=1, padding=0, - dilation=0, + dilation=1, bias_attr=None, param_attr=None, shared_biases=True, @@ -2472,9 +2472,6 @@ def img_conv_layer(input, else: dilation_y = dilation - if dilation > 1 or dilation_y > 1: - assert layer_type in ["cudnn_conv", "cudnn_convt"] - if param_attr.attr.get('initial_smart'): # special initial for conv layers. init_w = (2.0 / (filter_size**2 * num_channels))**0.5 @@ -2484,6 +2481,8 @@ def img_conv_layer(input, param_attr.attr["initial_smart"] = False if layer_type: + if dilation > 1 or dilation_y > 1: + assert layer_type in ["cudnn_conv", "cudnn_convt"] if trans: assert layer_type in ["exconvt", "cudnn_convt"] else: From 5e59ca7ccc8232b2028cfc8b4cffe19ffc73ba18 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 23 Aug 2017 10:40:46 +0800 Subject: [PATCH 076/170] fix config helper. --- .../gserver/layers/CrossEntropyOverBeam.cpp | 10 ++ paddle/gserver/layers/CrossEntropyOverBeam.h | 16 ++- .../tests/test_CrossEntropyOverBeamGrad.cpp | 22 ++- python/paddle/trainer/config_parser.py | 12 +- .../paddle/trainer_config_helpers/layers.py | 129 +++++++++++++++--- .../test_cross_entropy_over_beam.protostr | 17 ++- .../configs/test_cross_entropy_over_beam.py | 18 ++- 7 files changed, 162 insertions(+), 62 deletions(-) diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp index 09258fb305..f7736f0ce9 100644 --- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp +++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp @@ -161,7 +161,17 @@ real CostForOneSequence::forward() { } void CostForOneSequence::backward() { + /* + * when softmax layer is the output layer, and it is combined with + * cross-entropy as cost. The derivate with regard to softmax's input + * is simply: + * + * grad_i = softmax_out_i - target_i, + * + * and here hard label is used. + */ softmaxOut_->getData()[goldIdsInFinalExpansion_] -= 1.; + MatrixPtr tmp = Matrix::create( softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false); diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/gserver/layers/CrossEntropyOverBeam.h index 96a5df7dfb..5d0cffee3c 100644 --- a/paddle/gserver/layers/CrossEntropyOverBeam.h +++ b/paddle/gserver/layers/CrossEntropyOverBeam.h @@ -19,8 +19,8 @@ limitations under the License. */ namespace paddle { +/* This struct stores the beams in all search steps for a single sequence. */ struct BeamExpansion { - // store the entire beam expansion for a single sequence std::vector scores; std::vector seqInfo; @@ -111,8 +111,11 @@ private: size_t batchSize_; size_t beamSize_; - // Currently, this layer only works on CPU, if its inputs is on GPU, - // copy them to CPU memory. + /* + * the process of constructing beams is not friendly to GPU, currently, this + * layer only runs on CPU, if any of its inputs is on GPU memory, then copy + * it to CPU memory. + */ std::vector candidateScores_; std::vector candidateScoreGrad_; std::vector candidateInBeam_; @@ -120,9 +123,12 @@ private: std::vector goldSequence_; std::vector> beamSplitPos_; - // split entire bath of beams into beam per sequnence. + /* + * split entire bath of beams into beam per sequnence and store the result + * into this member. + */ std::vector beamPerSeq_; - // beamCosts_ is used to propagate error in one sequence. + /* beamCosts_ is used to propagate error in one sequence. */ std::vector beamCosts_; }; diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp index 506a4281df..538d18cdc3 100644 --- a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp +++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp @@ -28,16 +28,10 @@ using namespace paddle; // NOLINT DECLARE_int32(gpu_id); DECLARE_bool(thread_local_rand_use_global_seed); -// const size_t MAX_SEQ_NUM = 5; -// const size_t MAX_SEQ_LEN = 10; -// const size_t MAX_BEAM_SIZE = 3; - const size_t MAX_SEQ_NUM = 23; const size_t MAX_SEQ_LEN = 50; const size_t MAX_BEAM_SIZE = 27; -// const size_t SEED = 1503391792; -// const size_t SEED = 1; const size_t SEED = (size_t)(time(NULL)); struct SingleBeamExpansion { @@ -176,10 +170,12 @@ void genGroundTruth(vector& beamExpansions, beam.resetGroundTruth(seqNum); for (size_t i = 0; i < seqNum; ++i) { if (randFloat() > 0.5) { - // force the randomly generated label falls in the beam by chance 0.5. - // otherwise, when sequence length is relatively long and beam size is - // relatively small, the gold sequences falls off the beam at in - // the first search. + /* + * force the randomly generated label falls in the beam by chance 0.5. + * otherwise, when sequence length is relatively long and beam size is + * relatively small, the gold sequences falls off the beam at in the + * first search. + */ real* begPos = beam.selectedIndices.data() + i * beamSize; beam.colIdxInBeam[i] = rand() % count_if(begPos, begPos + beamSize, [](const real& val) { @@ -222,9 +218,7 @@ void genGroundTruth(vector& beamExpansions, if (randFloat() > 0.5) { // force the randomly generated label falls in the beam by chance 0.5. - // otherwise, when sequence length is relatively long and beam size is - // relatively small, the gold sequences falls off the beam at in - // the first search. + real* start = curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize; int n = rand() % count_if(start, start + beamSize, [](const real& val) { @@ -339,7 +333,7 @@ TEST(Layer, CrossEntropyOverBeam) { const size_t beamSize = 1 + rand() % MAX_BEAM_SIZE; LOG(INFO) << "beamSize = " << beamSize; - // TODO(caoying): test with more beam expansions. + // TODO(caoying): test with random beam expansions. const size_t expansionCount = 3; vector beams; genRandomBeamExpansion(expansionCount, beamSize, beams); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 7707ece819..579713546f 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1605,16 +1605,16 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): @config_layer('cross_entropy_over_beam') class CrossEntropyOverBeamLayer(LayerBase): def __init__(self, name, inputs, **xargs): - config_assert(len(inputs) % 3 == 0, "Error input numbers.") + config_assert(len(inputs) % 3 == 0, "Error input number.") super(CrossEntropyOverBeamLayer, self).__init__( name, 'cross_entropy_over_beam', 0, inputs, **xargs) input_num = len(inputs) / 3 for i in range(input_num): - input_layer = self.get_input_layer(i * 2) - config_assert( - input_layer.size == 1, "Inputs for this layer are made up of " - "several pairs and the first one in a pair is scores for " - "all the candidates, so its size should be equal to 1.") + input_layer = self.get_input_layer(i * 3) + config_assert(input_layer.size == 1, ( + "Inputs for this layer are made up of " + "several triples, in which the first one is scores over " + "all candidate paths, whose size should be equal to 1.")) @config_layer('fc') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index b027f84b5d..053c92d005 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -103,6 +103,7 @@ __all__ = [ 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', + 'BeamInput', 'cross_entropy_over_beam', 'multi_binary_label_cross_entropy', 'sum_cost', @@ -5681,10 +5682,10 @@ def multi_binary_label_cross_entropy(input, if input.activation is None or \ not isinstance(input.activation, SigmoidActivation): - logger.log( - logging.WARN, - "%s is not recommend for multi_binary_label_cross_entropy's activation, " - "maybe the sigmoid is better" % repr(input.activation)) + logger.log(logging.WARN, + ("%s is not a recommended activation for " + "multi_binary_label_cross_entropy, sigmoid is better") % + repr(input.activation)) Layer( name=name, @@ -5699,26 +5700,110 @@ def multi_binary_label_cross_entropy(input, size=1) +class BeamInput(object): + """ + Define the input for cross_entropy_over_beam layer. + + A beam is made up of a triple: the first one is scores over all + candidates; the second one is indices of top k selected candidates; the + third one is the index of ground truth, which is also always called + gold. + """ + + def __init__(self, candidate_scores, selected_candidates, gold): + assert isinstance(candidate_scores, LayerOutput) + self.candidate_scores = candidate_scores + assert candidate_scores.size == 1 + + assert isinstance(selected_candidates, LayerOutput) + self.selected_candidates = selected_candidates + + assert isinstance(gold, LayerOutput) + self.gold = gold + + @wrap_name_default() @layer_support() -def cross_entropy_over_beam(input, label, name=None, coeff=1.0, weight=None): - """ - TODO(caoying) add comments. +def cross_entropy_over_beam(input, name=None): """ + This layer is used in learning to search models, which is to solve complex + joint prediction problems based on learning to search through a + problem-defined search space. - assert len(input) / 2 == len(label), "Error input numbers." - for i in range(0, len(input), 2): - assert (input[i].size == 1), ( - "Inputs for this layer are made up of " - "several pairs and the first one in a pair is scores for " - "all the candidates, so its size should be equal to 1.") + Specifically, the learning to search process for this layer begins with + searching a target sequence from a nested sequence. In the first search + step, top beam size sequences with highest scores, indices of these top k + sequences in the original nested sequence, and the ground truth (also + called gold) altogether (a triple) make up of the first beam. - ipts, parents = __cost_input__(input, label, weight) - Layer( - name=name, - type=LayerType.CROSS_ENTROPY_OVER_BEAM, - inputs=ipts, - coeff=coeff) + Then, several special positions, for example, start and end positions + that define meaningful segments are searched. In these searches, top k + positions with highest scores are selected, and then sequence, starting + from the selected starts till ends of the sequences (or a fixed position) + are taken to search next. + + We call the possible top k results returned in one search the beam. This + search process can be repeated for pre-defined turns and leads to several + beam expansions. + + Finally, the layer cross_entropy_over_beam takes all the beam expansions + which contain several candidate targets found along the multi-step search. + cross_entropy_over_beam calculates cross entropy over the expanded beams + which all the candidates in the beam as the normalized factor. + + Note that, if gold falls off the beam at search step t, then the cost is + calculated over the beam at step t. + + This cost layer always works together with kmax_sequence_score_layer, + sub_nested_seq_layer, and sequence_slice_layer to trim the input to form a + sub-search space. + + + The example usage is: + + .. code-block:: python + + cost = cross_entropy_over_beam(input=[ + BeamInput( + candidate_scores=beam1_candidates, + selected_candidates=beam1_topk, + gold=gold1), + BeamInput( + candidate_scores=beam2_candidates, + selected_candidates=beam2_topk, + gold=gold2), + ]) + + + :param input: input beams for this layer. + :type input: BeamInput + :param name: input beams for this layer. + :type name: basestring + :return: LayerOutput object. + :rtype: LayerOutput + """ + + if isinstance(input, BeamInput): + input = [input] + else: + assert isinstance(input, list), ( + 'input for cross_entropy_over_beam shold be a python list ' + 'of BeamInput object.') + for ipt in input: + assert isinstance(ipt, BeamInput), ( + 'input for cross_entropy_over_beam ' + 'should be a BeamInput object.') + + ipts = [] + parents = [] + for beam in input: + parents += [beam.candidate_scores, beam.selected_candidates, beam.gold] + ipts += [ + beam.candidate_scores.name, beam.selected_candidates.name, + beam.gold.name + ] + + Layer(name=name, type=LayerType.CROSS_ENTROPY_OVER_BEAM, inputs=ipts) return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1) @@ -6247,11 +6332,11 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1): @wrap_bias_attr_default() def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None): """ - A layer applies a linear transformation to each element in each row of - the input matrix. For each element, the layer first re-scale it and then + A layer applies a linear transformation to each element in each row of + the input matrix. For each element, the layer first re-scale it and then adds a bias to it. - This layer is very like the SlopeInterceptLayer, except the scale and + This layer is very like the SlopeInterceptLayer, except the scale and bias are trainable. .. math:: diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr index e44478ec2b..c43fc48e22 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr @@ -114,27 +114,26 @@ layers { input_layer_name: "__kmax_sequence_score_layer_0__" } inputs { - input_layer_name: "__fc_layer_0__" + input_layer_name: "sentences_ids" } inputs { - input_layer_name: "__kmax_sequence_score_layer_1__" + input_layer_name: "__fc_layer_0__" } inputs { - input_layer_name: "__fc_layer_1__" + input_layer_name: "__kmax_sequence_score_layer_1__" } inputs { - input_layer_name: "__kmax_sequence_score_layer_2__" + input_layer_name: "start_ids" } inputs { - input_layer_name: "sentences_ids" + input_layer_name: "__fc_layer_1__" } inputs { - input_layer_name: "start_ids" + input_layer_name: "__kmax_sequence_score_layer_2__" } inputs { input_layer_name: "end_ids" } - coeff: 1.0 } parameters { name: "___fc_layer_0__.w0" @@ -177,8 +176,8 @@ parameters { initial_smart: false } input_layer_names: "sentence_scores" -input_layer_names: "sentence_states" input_layer_names: "sentences_ids" +input_layer_names: "sentence_states" input_layer_names: "start_ids" input_layer_names: "end_ids" output_layer_names: "__cross_entropy_over_beam_0__" @@ -198,8 +197,8 @@ sub_models { layer_names: "end_ids" layer_names: "__cross_entropy_over_beam_0__" input_layer_names: "sentence_scores" - input_layer_names: "sentence_states" input_layer_names: "sentences_ids" + input_layer_names: "sentence_states" input_layer_names: "start_ids" input_layer_names: "end_ids" output_layer_names: "__cross_entropy_over_beam_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py index edc2d32fca..240e703dc9 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py @@ -29,11 +29,17 @@ topk_end_pos_ids = kmax_sequence_score_layer( sentence_idx = data_layer(name="sentences_ids", size=1) start_idx = data_layer(name="start_ids", size=1) end_idx = data_layer(name="end_ids", size=1) -cost = cross_entropy_over_beam( - input=[ - sentence_scores, topk_sentence_ids, start_pos_scores, - topk_start_pos_ids, end_pos_scores, topk_end_pos_ids - ], - label=[sentence_idx, start_idx, end_idx]) +cost = cross_entropy_over_beam(input=[ + BeamInput( + candidate_scores=sentence_scores, + selected_candidates=topk_sentence_ids, + gold=sentence_idx), BeamInput( + candidate_scores=start_pos_scores, + selected_candidates=topk_start_pos_ids, + gold=start_idx), BeamInput( + candidate_scores=end_pos_scores, + selected_candidates=topk_end_pos_ids, + gold=end_idx) +]) outputs(cost) From 76677f25774a84d9ced011be02e62ae15b03506c Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 23 Aug 2017 09:12:34 -0700 Subject: [PATCH 077/170] add test --- python/paddle/v2/framework/tests/mnist.py | 36 ++++++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index ededf767bc..e47de2436f 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -52,7 +52,7 @@ def grad_var_name(var_name): return var_name + "@GRAD" -def sgd_optimizer(net, param_name, learning_rate=0.01): +def sgd_optimizer(net, param_name, learning_rate=0.005): grad_name = grad_var_name(param_name) optimize_op = Operator( "sgd", @@ -166,9 +166,9 @@ def set_cost(): cost_grad.set(numpy.ones(cost_shape).astype("float32"), place) -def print_cost(): +def mean_cost(): cost_data = numpy.array(scope.find_var("cross_entropy_3").get_tensor()) - print(cost_data.sum() / len(cost_data)) + return cost_data.sum() / len(cost_data) def error_rate(predict, label): @@ -176,7 +176,7 @@ def error_rate(predict, label): axis=1) label = numpy.array(scope.find_var(label).get_tensor()) error_num = numpy.sum(predict_var != label) - print(error_num / float(len(label))) + return error_num / float(len(label)) images = data_layer(name='pixel', dims=[BATCH_SIZE, 784]) @@ -198,16 +198,35 @@ print_inputs_outputs(forward_network) print_inputs_outputs(backward_net) print_inputs_outputs(optimize_net) -reader = paddle.batch( +train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=BATCH_SIZE) + +def test(): + test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) + cost = [] + error = [] + for data in test_reader(): + image = numpy.array(map(lambda x: x[0], data)).astype("float32") + label = numpy.array(map(lambda x: x[1], data)).astype("int32") + feed_data("pixel", image) + feed_data("label", label) + + forward_network.infer_shape(scope) + forward_network.run(scope, dev_ctx) + cost.append(mean_cost()) + error.append(error_rate(predict, "label")) + print("cost=" + str(sum(cost) / float(len(cost))) + " error_rate=" + str( + sum(error) / float(len(error)))) + + PASS_NUM = 1000 for pass_id in range(PASS_NUM): batch_id = 0 - for data in reader(): + for data in train_reader(): image = numpy.array(map(lambda x: x[0], data)).astype("float32") label = numpy.array(map(lambda x: x[1], data)).astype("int32") feed_data("pixel", image) @@ -222,7 +241,8 @@ for pass_id in range(PASS_NUM): optimize_net.run(scope, dev_ctx) if batch_id % 100 == 0: print("pass[" + str(pass_id) + "] batch_id[" + str(batch_id) + "]") - print_cost() - error_rate(predict, "label") + test() + # print(mean_cost()) + # print(error_rate(predict, "label")) batch_id = batch_id + 1 From cf515e4a72f4b02fbbbfdbd79c3b66b1be694e7b Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 23 Aug 2017 09:39:47 -0700 Subject: [PATCH 078/170] optimize code and name --- python/paddle/v2/framework/tests/mnist.py | 56 +++++++++++------------ 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index e47de2436f..886e99610d 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -134,7 +134,7 @@ def cross_entropy_layer(net, input, label): return cost_name -def get_backward_net(forward_net): +def create_backward_net(forward_net): net = core.Operator.backward(forward_net, set()) for input in net.inputs()["all"]: var = scope.new_var(input) @@ -145,29 +145,29 @@ def get_backward_net(forward_net): return net -def print_inputs_outputs(op): +def debug_print_op(op): print("===============" + op.type() + "==============") print("***inputs:***") for input in op.inputs()["all"]: print input, scope.find_var(input).get_tensor().get_dims() - print("***outputs:***") + print("\n***outputs:***") for output in op.outputs()["all"]: print output, scope.find_var(output).get_tensor().get_dims() print("") print("") -def set_cost(): - cost_shape = numpy.array(scope.find_var("cross_entropy_3").get_tensor( - )).shape - cost_grad = scope.find_var(grad_var_name("cross_entropy_3")).get_tensor() +def set_cost(cost): + cost_shape = numpy.array(scope.find_var(cost).get_tensor()).shape + cost_grad = \ + scope.find_var(grad_var_name(cost)).get_tensor() cost_grad.set_dims(cost_shape) cost_grad.alloc_float(place) cost_grad.set(numpy.ones(cost_shape).astype("float32"), place) -def mean_cost(): - cost_data = numpy.array(scope.find_var("cross_entropy_3").get_tensor()) +def mean_cost(cost): + cost_data = numpy.array(scope.find_var(cost).get_tensor()) return cost_data.sum() / len(cost_data) @@ -180,23 +180,23 @@ def error_rate(predict, label): images = data_layer(name='pixel', dims=[BATCH_SIZE, 784]) -label = data_layer(name='label', dims=[BATCH_SIZE]) +labels = data_layer(name='label', dims=[BATCH_SIZE]) fc1 = fc_layer(net=forward_network, input=images, size=100, act="sigmoid") fc2 = fc_layer(net=forward_network, input=fc1, size=100, act="sigmoid") predict = fc_layer(net=forward_network, input=fc2, size=100, act="softmax") -cost = cross_entropy_layer(net=forward_network, input=predict, label=label) +cost = cross_entropy_layer(net=forward_network, input=predict, label=labels) forward_network.complete_add_op(True) -backward_net = get_backward_net(forward_network) +backward_net = create_backward_net(forward_network) optimize_net.complete_add_op(True) print(forward_network) print(backward_net) print(optimize_net) -print_inputs_outputs(forward_network) -print_inputs_outputs(backward_net) -print_inputs_outputs(optimize_net) +debug_print_op(forward_network) +debug_print_op(backward_net) +debug_print_op(optimize_net) train_reader = paddle.batch( paddle.reader.shuffle( @@ -204,19 +204,19 @@ train_reader = paddle.batch( batch_size=BATCH_SIZE) -def test(): +def test(cost_name): test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) cost = [] error = [] for data in test_reader(): - image = numpy.array(map(lambda x: x[0], data)).astype("float32") - label = numpy.array(map(lambda x: x[1], data)).astype("int32") - feed_data("pixel", image) - feed_data("label", label) + image_data = numpy.array(map(lambda x: x[0], data)).astype("float32") + label_data = numpy.array(map(lambda x: x[1], data)).astype("int32") + feed_data(images, image_data) + feed_data(labels, label_data) forward_network.infer_shape(scope) forward_network.run(scope, dev_ctx) - cost.append(mean_cost()) + cost.append(mean_cost(cost_name)) error.append(error_rate(predict, "label")) print("cost=" + str(sum(cost) / float(len(cost))) + " error_rate=" + str( sum(error) / float(len(error)))) @@ -227,22 +227,20 @@ for pass_id in range(PASS_NUM): batch_id = 0 for data in train_reader(): - image = numpy.array(map(lambda x: x[0], data)).astype("float32") - label = numpy.array(map(lambda x: x[1], data)).astype("int32") - feed_data("pixel", image) - feed_data("label", label) + image_data = numpy.array(map(lambda x: x[0], data)).astype("float32") + label_data = numpy.array(map(lambda x: x[1], data)).astype("int32") + feed_data(images, image_data) + feed_data(labels, label_data) forward_network.infer_shape(scope) forward_network.run(scope, dev_ctx) - set_cost() + set_cost(cost) backward_net.infer_shape(scope) backward_net.run(scope, dev_ctx) optimize_net.run(scope, dev_ctx) if batch_id % 100 == 0: print("pass[" + str(pass_id) + "] batch_id[" + str(batch_id) + "]") - test() - # print(mean_cost()) - # print(error_rate(predict, "label")) + test(cost) batch_id = batch_id + 1 From 9db4ad6130d79d72fa150e534b5b54fa723c3240 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 23 Aug 2017 09:42:58 -0700 Subject: [PATCH 079/170] reduce pass num to 1 --- python/paddle/v2/framework/tests/mnist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index 886e99610d..eefd5709a3 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -222,7 +222,7 @@ def test(cost_name): sum(error) / float(len(error)))) -PASS_NUM = 1000 +PASS_NUM = 1 for pass_id in range(PASS_NUM): batch_id = 0 From 37cd8165b3089c8e4a6ce743f5e0ee8c029ba46b Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 23 Aug 2017 10:56:56 -0700 Subject: [PATCH 080/170] change 128 to BATCH_SIZE --- python/paddle/v2/framework/tests/mnist.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index eefd5709a3..e878bfa4e9 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -205,7 +205,8 @@ train_reader = paddle.batch( def test(cost_name): - test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) + test_reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) cost = [] error = [] for data in test_reader(): From 72d29186bb426efc4eb78d9d6b6e605c7e2ce56c Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 23 Aug 2017 11:07:15 -0700 Subject: [PATCH 081/170] reduce some compile warning --- paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp | 4 ++-- paddle/operators/net_op_test.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index 1829f72a87..d00d408ab8 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -1399,8 +1399,8 @@ void RecurrentGradientMachine::createDataOutlinkCopySizeInfo( getBeamSize() > 1 ? finalPaths_.size() : finalPaths_[0].size()); int* starts = inputSeqStartPos->getMutableData(false); int seqId = 0; - for (int i = 0; i < finalPaths_.size(); ++i) { - for (int j = 0; j < finalPaths_[i].size(); ++j) { + for (size_t i = 0; i < finalPaths_.size(); ++i) { + for (size_t j = 0; j < finalPaths_[i].size(); ++j) { copySize[seqId] = getBeamSize() > 1 ? starts[i + 1] - starts[i] : starts[j + 1] - starts[j]; batchMachineStartPos_[seqId + 1] = diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc index 99019754a9..f2e98ee7a1 100644 --- a/paddle/operators/net_op_test.cc +++ b/paddle/operators/net_op_test.cc @@ -79,7 +79,7 @@ TEST(NetOp, Clone) { ASSERT_NE(new_net_op, nullptr); ASSERT_TRUE(new_net_op->IsNetOp()); auto* new_net = static_cast(new_net_op.get()); - ASSERT_EQ(2, new_net->ops_.size()); + ASSERT_EQ(2UL, new_net->ops_.size()); ASSERT_EQ(new_net->ops_[0]->Type(), "empty"); ASSERT_EQ(new_net->ops_[1]->Type(), "empty2"); } From 3b6069b7f511e41a00551cc3141385d9eb464f51 Mon Sep 17 00:00:00 2001 From: haonanyu Date: Wed, 23 Aug 2017 12:16:36 -0700 Subject: [PATCH 082/170] fix ids reverse order in fillGenOutputs --- paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index 1829f72a87..56c3951cfd 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -1344,7 +1344,7 @@ void RecurrentGradientMachine::fillGenOutputs() { CHECK(!finalPaths_[i].empty()); Path& path = finalPaths_[i][0]; generator_.ids.insert( - generator_.ids.begin(), path.ids.begin(), path.ids.end()); + generator_.ids.end(), path.ids.begin(), path.ids.end()); starts[i + 1] = starts[i] + path.ids.size(); } } From da7a1f2f6c355b1bcdc0bd88e644f027d70f75d8 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Wed, 23 Aug 2017 21:30:08 +0000 Subject: [PATCH 083/170] master client: retry connecting to etcd --- go/master/client.go | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/go/master/client.go b/go/master/client.go index 62801b9b7f..9344c6f0ab 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -60,13 +60,30 @@ func WithAddr(addr string) func(c *Client) error { } } +func retry(f func() error, dur time.Duration, count int) error { + err := f() + if err != nil { + if count > 0 { + return retry(f, dur, count-1) + } + return err + } + return nil +} + // WithEtcd sets the client to use etcd for master discovery. func WithEtcd(endpoints []string, timeout time.Duration) func(*Client) error { return func(c *Client) error { - cli, err := clientv3.New(clientv3.Config{ - Endpoints: endpoints, - DialTimeout: timeout, - }) + var cli *clientv3.Client + f := func() error { + var err error + cli, err = clientv3.New(clientv3.Config{ + Endpoints: endpoints, + DialTimeout: timeout, + }) + return err + } + err := retry(f, time.Second, 10) if err != nil { return err } @@ -101,9 +118,6 @@ func NewClient(opts ...func(*Client) error) (*Client, error) { } } c.ch = make(chan record, c.bufSize) - // FIXME: connection is created asyncrosly in monitorMaster go routine, - // ensure the connection is ready for use before calling c.addClient. - time.Sleep(time.Second) return c, nil } From 5270585e107b16dc527ada329dddf6fc44714a35 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Wed, 23 Aug 2017 21:38:43 +0000 Subject: [PATCH 084/170] fix according to comment --- go/master/client.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/master/client.go b/go/master/client.go index 9344c6f0ab..199690d488 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -64,6 +64,7 @@ func retry(f func() error, dur time.Duration, count int) error { err := f() if err != nil { if count > 0 { + time.Sleep(dur) return retry(f, dur, count-1) } return err From 05176bd1bb5af94bfbabbb524ed9e65448134e39 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Thu, 24 Aug 2017 01:23:27 +0000 Subject: [PATCH 085/170] master server will wait etcd forever --- go/master/client.go | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/go/master/client.go b/go/master/client.go index 199690d488..f04cf50ce3 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -60,18 +60,6 @@ func WithAddr(addr string) func(c *Client) error { } } -func retry(f func() error, dur time.Duration, count int) error { - err := f() - if err != nil { - if count > 0 { - time.Sleep(dur) - return retry(f, dur, count-1) - } - return err - } - return nil -} - // WithEtcd sets the client to use etcd for master discovery. func WithEtcd(endpoints []string, timeout time.Duration) func(*Client) error { return func(c *Client) error { @@ -84,9 +72,14 @@ func WithEtcd(endpoints []string, timeout time.Duration) func(*Client) error { }) return err } - err := retry(f, time.Second, 10) - if err != nil { - return err + for { + err := f() + if err != nil { + log.Warningln(err) + } else { + break + } + time.Sleep(time.Second) } ch := make(chan string, 1) From 161a15f055c2cbe1937522a7a11dbdeb31f1a774 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Thu, 24 Aug 2017 03:11:54 +0000 Subject: [PATCH 086/170] gradient check --- python/paddle/v2/framework/tests/gradient_checker.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index c22c6f8831..d7809e52fb 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -86,6 +86,9 @@ def get_numeric_gradient(op, # we only compute gradient of one element each time. # we use a for loop to compute the gradient of every element. for i in xrange(tensor_size): + for var_name in input_values: + tensor_ = local_scope.find_var(var_name).get_tensor() + tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace()) # get one input element throw it's index i. origin = tensor_to_check.get_float_element(i) @@ -95,6 +98,9 @@ def get_numeric_gradient(op, y_pos = get_output() # plus delta to this element, run op and get the sum of the result tensor. + for var_name in input_values: + tensor_ = local_scope.find_var(var_name).get_tensor() + tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace()) x_neg = origin - delta tensor_to_check.set_float_element(i, x_neg) y_neg = get_output() From 0e300f9bf04ba459dbef93af9537f847cebbcd27 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 23 Aug 2017 20:14:54 -0700 Subject: [PATCH 087/170] use init_net and random_op to initialize parameter --- python/paddle/v2/framework/tests/mnist.py | 54 +++++++++++------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index e878bfa4e9..0c27ce3e35 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -9,11 +9,8 @@ scope = core.Scope() place = core.CPUPlace() dev_ctx = core.DeviceContext.create(place) -# init_net = core.Net.create() -forward_network = core.Net.create() - -# should be init after forward_op is constructed -# backward_net = core.Operator.backward(forward_net, set()) +init_net = core.Net.create() +forward_net = core.Net.create() backward_net = None optimize_net = core.Net.create() @@ -64,13 +61,12 @@ def sgd_optimizer(net, param_name, learning_rate=0.005): # should use operator and add these to the init_network -def init_param(param_name, dims): - var = scope.new_var(param_name) - tensor = var.get_tensor() - tensor.set_dims(dims) - data = numpy.random.uniform( - low=-0.5, high=0.5, size=tensor.shape()).astype("float32") - tensor.set(data, place) +def init_param(net, param_name, dims): + scope.new_var(param_name) + op = Operator( + "uniform_random", Out=param_name, dims=dims, min=-0.5, max=0.5, seed=10) + op.infer_shape(scope) + net.append_op(op) # fc_layer @@ -96,7 +92,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): input_dims = scope.find_var(input).get_tensor().get_dims() w_name = param or name + ".w" - init_param(param_name=w_name, dims=[input_dims[1], size]) + init_param(net=init_net, param_name=w_name, dims=[input_dims[1], size]) sgd_optimizer(net=optimize_net, param_name=w_name, learning_rate=0.01) pre_activation = name + ".mul.out" @@ -107,7 +103,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): # create bias variable if needed if bias: bias_name = name + ".b" - init_param(param_name=bias_name, dims=[size]) + init_param(net=init_net, param_name=bias_name, dims=[size]) sgd_optimizer( net=optimize_net, param_name=bias_name, learning_rate=0.001) bias_out = name + ".rowwise_add.out" @@ -181,20 +177,22 @@ def error_rate(predict, label): images = data_layer(name='pixel', dims=[BATCH_SIZE, 784]) labels = data_layer(name='label', dims=[BATCH_SIZE]) -fc1 = fc_layer(net=forward_network, input=images, size=100, act="sigmoid") -fc2 = fc_layer(net=forward_network, input=fc1, size=100, act="sigmoid") -predict = fc_layer(net=forward_network, input=fc2, size=100, act="softmax") -cost = cross_entropy_layer(net=forward_network, input=predict, label=labels) - -forward_network.complete_add_op(True) -backward_net = create_backward_net(forward_network) +fc1 = fc_layer(net=forward_net, input=images, size=100, act="sigmoid") +fc2 = fc_layer(net=forward_net, input=fc1, size=100, act="sigmoid") +predict = fc_layer(net=forward_net, input=fc2, size=100, act="softmax") +cost = cross_entropy_layer(net=forward_net, input=predict, label=labels) + +init_net.complete_add_op(True) +forward_net.complete_add_op(True) +backward_net = create_backward_net(forward_net) optimize_net.complete_add_op(True) -print(forward_network) +print(init_net) +print(forward_net) print(backward_net) print(optimize_net) -debug_print_op(forward_network) +debug_print_op(forward_net) debug_print_op(backward_net) debug_print_op(optimize_net) @@ -215,8 +213,8 @@ def test(cost_name): feed_data(images, image_data) feed_data(labels, label_data) - forward_network.infer_shape(scope) - forward_network.run(scope, dev_ctx) + forward_net.infer_shape(scope) + forward_net.run(scope, dev_ctx) cost.append(mean_cost(cost_name)) error.append(error_rate(predict, "label")) print("cost=" + str(sum(cost) / float(len(cost))) + " error_rate=" + str( @@ -224,6 +222,8 @@ def test(cost_name): PASS_NUM = 1 + +init_net.run(scope, dev_ctx) for pass_id in range(PASS_NUM): batch_id = 0 @@ -233,8 +233,8 @@ for pass_id in range(PASS_NUM): feed_data(images, image_data) feed_data(labels, label_data) - forward_network.infer_shape(scope) - forward_network.run(scope, dev_ctx) + forward_net.infer_shape(scope) + forward_net.run(scope, dev_ctx) set_cost(cost) backward_net.infer_shape(scope) backward_net.run(scope, dev_ctx) From 0ee18a86d18b4d4506c63e13b2953c9153c27f8d Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Thu, 24 Aug 2017 11:50:35 +0800 Subject: [PATCH 088/170] Fix doc. --- python/paddle/trainer_config_helpers/layers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index b3568cc257..f323b017c0 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -2418,8 +2418,8 @@ def img_conv_layer(input, :param dilation: The x dimension of the dilation. Or input a tuple for two image dimension :type dilation: int|tuple|list - :param padding_y: The y dimension of the dilation. - :type padding_y: int + :param dilation_y: The y dimension of the dilation. + :type dilation_y: int :param bias_attr: Convolution bias attribute. None means default bias. False means no bias. :type bias_attr: ParameterAttribute|False From 12864f142073b4a280120e4d9b3abe4e2483ca32 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 23 Aug 2017 22:51:35 -0700 Subject: [PATCH 089/170] register rowwise add gpu kernel --- paddle/operators/rowwise_add_op.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu index cbc61ad3e1..4a57f64c89 100644 --- a/paddle/operators/rowwise_add_op.cu +++ b/paddle/operators/rowwise_add_op.cu @@ -18,3 +18,6 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( rowwise_add, ops::RowwiseAddKernel); +REGISTER_OP_GPU_KERNEL( + rowwise_add_grad, + ops::RowwiseAddGradKernel); From 3648165b63bd5331d1809cba896176e4af0a9ff2 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 23 Aug 2017 23:00:45 -0700 Subject: [PATCH 090/170] add gpu support --- python/paddle/v2/framework/tests/mnist.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index 0c27ce3e35..d9941023fe 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -7,6 +7,8 @@ BATCH_SIZE = 100 scope = core.Scope() place = core.CPUPlace() +# if you want to test GPU training, you can use gpu place +# place = core.GPUPlace(0) dev_ctx = core.DeviceContext.create(place) init_net = core.Net.create() From 625b15355a16fa42476e7dbd166b77e092dcb97f Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 23 Aug 2017 23:56:55 -0700 Subject: [PATCH 091/170] optimize code --- python/paddle/v2/framework/tests/mnist.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index d9941023fe..9a0b109850 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -17,14 +17,14 @@ backward_net = None optimize_net = core.Net.create() -def atom_id(): +def atomic_id(): id = 0 while True: yield id id += 1 -uniq_id = atom_id().next +uniq_id = atomic_id().next def data_layer(name, dims): @@ -164,7 +164,7 @@ def set_cost(cost): cost_grad.set(numpy.ones(cost_shape).astype("float32"), place) -def mean_cost(cost): +def get_cost_mean(cost): cost_data = numpy.array(scope.find_var(cost).get_tensor()) return cost_data.sum() / len(cost_data) @@ -217,7 +217,7 @@ def test(cost_name): forward_net.infer_shape(scope) forward_net.run(scope, dev_ctx) - cost.append(mean_cost(cost_name)) + cost.append(get_cost_mean(cost_name)) error.append(error_rate(predict, "label")) print("cost=" + str(sum(cost) / float(len(cost))) + " error_rate=" + str( sum(error) / float(len(error)))) From 790379f1477835badbc35c563623d13ec5fd2b7a Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Wed, 23 Aug 2017 14:11:30 +0800 Subject: [PATCH 092/170] fix above comments --- paddle/cuda/include/hl_cnn.h | 106 ------------------ paddle/cuda/include/stub/hl_cnn_stub.h | 6 +- paddle/gserver/layers/Pool3DLayer.cpp | 71 +++++------- paddle/gserver/layers/Pool3DLayer.h | 1 + paddle/math/Matrix.cpp | 131 +++++++++++----------- paddle/math/Matrix.h | 135 +++++++++++------------ paddle/math/tests/test_matrixCompare.cpp | 97 ++++++++-------- 7 files changed, 208 insertions(+), 339 deletions(-) diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h index e9687d0a58..84f1c84359 100644 --- a/paddle/cuda/include/hl_cnn.h +++ b/paddle/cuda/include/hl_cnn.h @@ -173,31 +173,6 @@ extern void hl_avgpool_backward(const int frameCnt, real* backGrad, const int outStride); -/** - * @brief Maximum pool forward. - * - * @param[in] frameCnt batch size of input image. - * @param[in] inputData input data. - * @param[in] channels number of channel. - * @param[in] depth image depth. - * @param[in] height image height. - * @param[in] width image width. - * @param[in] pooledD output image depth. - * @param[in] pooledH output image height. - * @param[in] pooledW output image width. - * @param[in] sizeZ depth of pooling window. - * @param[in] sizeY height of pooling window. - * @param[in] sizeX width of pooling window. - * @param[in] strideD pooling stride depth. - * @param[in] strideH pooling stride height. - * @param[in] strideW pooling stride width. - * @param[in] paddingD padding depth. - * @param[in] paddingH padding height. - * @param[in] paddingW padding width. - * @param[out] tgtData output data. - * @param[in] tgtStride stride between output data samples. - * - */ extern void hl_maxpool3D_forward(const int frameCnt, const real* inputData, const int channels, @@ -219,35 +194,6 @@ extern void hl_maxpool3D_forward(const int frameCnt, real* tgtData, const int tgtStride); -/** - * @brief Maximum pool backward. - * - * @param[in] frameCnt batch size of input image. - * @param[in] inputData input data. - * @param[out] outData output data. - * @param[out] outGrad output grad data. - * @param[in] channels number of channel. - * @param[in] depth image depth. - * @param[in] height image height. - * @param[in] width image width. - * @param[in] pooledD output image depth. - * @param[in] pooledH output image height. - * @param[in] pooledW output image width. - * @param[in] sizeZ depth of pooling window. - * @param[in] sizeY height of pooling window. - * @param[in] sizeX width of pooling window. - * @param[in] strideD pooling stride depth. - * @param[in] strideH pooling stride height. - * @param[in] strideW pooling stride width. - * @param[in] scaleA scale. - * @param[in] scaleB scale. - * @param[in] paddingD padding depth. - * @param[in] paddingH padding height. - * @param[in] paddingW padding width. - * @param[out] targetGrad output grad. - * @param[in] outStride stride between output data samples. - * - */ extern void hl_maxpool3D_backward(const int frameCnt, const real* inputData, const real* outData, @@ -273,31 +219,6 @@ extern void hl_maxpool3D_backward(const int frameCnt, real* targetGrad, const int outStride); -/** - * @brief Averge pool forward. - * - * @param[in] frameCnt batch size of input image. - * @param[in] inputData input data. - * @param[in] channels number of channel. - * @param[in] depth image depth. - * @param[in] height image height. - * @param[in] width image width. - * @param[in] pooledD output image depth. - * @param[in] pooledH output image height. - * @param[in] pooledW output image width. - * @param[in] sizeZ depth of pooling window. - * @param[in] sizeY height of pooling window. - * @param[in] sizeX width of pooling window. - * @param[in] strideD pooling stride depth. - * @param[in] strideH pooling stride height. - * @param[in] strideW pooling stride width. - * @param[in] paddingD padding depth. - * @param[in] paddingH padding height. - * @param[in] paddingW padding width. - * @param[out] tgtData output data. - * @param[in] tgtStride stride between output data samples. - * - */ extern void hl_avgpool3D_forward(const int frameCnt, const real* inputData, const int channels, @@ -319,33 +240,6 @@ extern void hl_avgpool3D_forward(const int frameCnt, real* tgtData, const int tgtStride); -/** - * @brief Maximum pool backward. - * - * @param[in] frameCnt batch size of input image. - * @param[in] outGrad output grad data. - * @param[in] channels number of channel. - * @param[in] depth image depth. - * @param[in] height image height. - * @param[in] width image width. - * @param[in] pooledD output image depth. - * @param[in] pooledH output image height. - * @param[in] pooledW output image width. - * @param[in] sizeZ depth of pooling window. - * @param[in] sizeY height of pooling window. - * @param[in] sizeX width of pooling window. - * @param[in] strideD pooling stride depth. - * @param[in] strideH pooling stride height. - * @param[in] strideW pooling stride width. - * @param[in] paddingD padding depth. - * @param[in] paddingH padding height. - * @param[in] paddingW padding width. - * @param[in] scaleA scale. - * @param[in] scaleB scale. - * @param[out] backGrad output grad. - * @param[in] outStride stride between output data samples. - * - */ extern void hl_avgpool3D_backward(const int frameCnt, const real* outGrad, const int channels, diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h index 28f61781be..6750f537bf 100644 --- a/paddle/cuda/include/stub/hl_cnn_stub.h +++ b/paddle/cuda/include/stub/hl_cnn_stub.h @@ -169,9 +169,9 @@ inline void hl_avgpool3D_backward(const int frameCnt, const int strideD, const int strideH, const int strideW, - int paddingD, - int paddingH, - int paddingW, + const int paddingD, + const int paddingH, + const int paddingW, real scaleA, real scaleB, real* backGrad, diff --git a/paddle/gserver/layers/Pool3DLayer.cpp b/paddle/gserver/layers/Pool3DLayer.cpp index fc6b9bdd2f..40a913ebfc 100644 --- a/paddle/gserver/layers/Pool3DLayer.cpp +++ b/paddle/gserver/layers/Pool3DLayer.cpp @@ -58,30 +58,9 @@ size_t Pool3DLayer::getSize() { CHECK_EQ(inputLayers_.size(), 1UL); size_t layerSize = 0; - // imgSizeD_ = inputLayers_[0]->getOutput().getFrameDepth(); - // imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight(); - // imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth(); - if (imgSizeH_ == 0) { - // imgSizeH_ = imgSizeY_; - } - if (imgSizeW_ == 0) { - // imgSizeW_ = imgSize_; - } - outputD_ = outputSize(imgSizeD_, - sizeZ_, - paddingD_, - strideD_, - /* caffeMode */ false); - outputH_ = outputSize(imgSizeH_, - sizeY_, - paddingH_, - strideH_, - /* caffeMode */ false); - outputW_ = outputSize(imgSizeW_, - sizeX_, - paddingW_, - strideW_, - /* caffeMode */ false); + outputD_ = outputSize(imgSizeD_, sizeZ_, paddingD_, strideD_, false); + outputH_ = outputSize(imgSizeH_, sizeY_, paddingH_, strideH_, false); + outputW_ = outputSize(imgSizeW_, sizeX_, paddingW_, strideW_, false); layerSize = outputD_ * outputH_ * outputW_ * channels_; getOutput().setFrameHeight(outputH_); @@ -100,37 +79,37 @@ void Pool3DLayer::forward(PassType passType) { if (poolType_ == "avg") { outMat->avgPool3DForward(*inMat, + channels_, imgSizeD_, imgSizeH_, imgSizeW_, - channels_, + outputD_, + outputH_, + outputW_, sizeZ_, sizeY_, sizeX_, strideD_, strideH_, strideW_, - outputD_, - outputH_, - outputW_, paddingD_, paddingH_, paddingW_); } else if (poolType_ == "max") { outMat->maxPool3DForward(*inMat, + channels_, imgSizeD_, imgSizeH_, imgSizeW_, - channels_, + outputD_, + outputH_, + outputW_, sizeZ_, sizeY_, sizeX_, strideD_, strideH_, strideW_, - outputD_, - outputH_, - outputW_, paddingD_, paddingH_, paddingW_); @@ -155,41 +134,41 @@ void Pool3DLayer::backward(const UpdateCallback& callback) { imgSizeD_, imgSizeH_, imgSizeW_, + outputD_, + outputH_, + outputW_, sizeZ_, sizeY_, sizeZ_, strideD_, strideH_, strideW_, - outputD_, - outputH_, - outputW_, - 1, - 1, paddingD_, paddingH_, - paddingW_); + paddingW_, + 1.0, + 1.0); } else if (poolType_ == "max") { inGradMat->maxPool3DBackward(*inMat, + *outGradMat, + *outMat, imgSizeD_, imgSizeH_, imgSizeW_, - *outGradMat, - *outMat, + outputD_, + outputH_, + outputW_, sizeZ_, sizeY_, sizeZ_, strideD_, strideH_, strideW_, - outputD_, - outputH_, - outputW_, - 1, - 1, paddingD_, paddingH_, - paddingW_); + paddingW_, + 1.0, + 1.0); } else { LOG(FATAL) << "Unknown pool type: " << poolType_; } diff --git a/paddle/gserver/layers/Pool3DLayer.h b/paddle/gserver/layers/Pool3DLayer.h index afc65ac2b0..8329a02f57 100644 --- a/paddle/gserver/layers/Pool3DLayer.h +++ b/paddle/gserver/layers/Pool3DLayer.h @@ -44,5 +44,6 @@ protected: int imgSizeW_, imgSizeH_, imgSizeD_; int outputW_, outputH_, outputD_; std::string poolType_; + MatrixPtr maxPoolIdx_; }; } // namespace paddle diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index e7f1489b8b..4f9216896c 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1191,23 +1191,23 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, } void GpuMatrix::maxPool3DForward(Matrix& inputMat, + size_t channels, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - size_t channels, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, size_t paddingD, size_t paddingH, size_t paddingW) { - CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; + CHECK(inputMat.useGpu_) << "Matrix type are not correct"; real* inputData = inputMat.getData(); size_t num = inputMat.getHeight(); @@ -1236,32 +1236,31 @@ void GpuMatrix::maxPool3DForward(Matrix& inputMat, paddingD, paddingH, paddingW, - data_, + getData(), getStride()); } void GpuMatrix::maxPool3DBackward(Matrix& inputMat, + Matrix& outGrad, + Matrix& outV, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, size_t paddingD, size_t paddingH, - size_t paddingW) { - CHECK(inputMat.useGpu_ == true && outGrad.useGpu_ == true && - outV.useGpu_ == true) + size_t paddingW, + real scaleTargets, + real scaleOutput) { + CHECK(inputMat.useGpu_ && outGrad.useGpu_ && outV.useGpu_) << "Matrix type are not equal"; real* inputData = inputMat.getData(); @@ -1300,28 +1299,28 @@ void GpuMatrix::maxPool3DBackward(Matrix& inputMat, paddingW, scaleTargets, scaleOutput, - data_, + getData(), outGrad.getStride()); } void GpuMatrix::avgPool3DForward(Matrix& inputMat, + size_t channels, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - size_t channels, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, size_t paddingD, size_t paddingH, size_t paddingW) { - CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; + CHECK(inputMat.useGpu_) << "Matrix type are not equal"; real* inputData = inputMat.getData(); size_t frameNum = inputMat.getHeight(); @@ -1350,7 +1349,7 @@ void GpuMatrix::avgPool3DForward(Matrix& inputMat, paddingD, paddingH, paddingW, - data_, + getData(), getStride()); } @@ -1358,21 +1357,21 @@ void GpuMatrix::avgPool3DBackward(Matrix& outGrad, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, size_t paddingD, size_t paddingH, - size_t paddingW) { - CHECK(outGrad.useGpu_ == true) << "Matrix type are not equal"; + size_t paddingW, + real scaleTargets, + real scaleOutput) { + CHECK(outGrad.useGpu_) << "Matrix type are not equal"; real* outDiff = outGrad.getData(); size_t frameNum = outGrad.getHeight(); @@ -1404,7 +1403,7 @@ void GpuMatrix::avgPool3DBackward(Matrix& outGrad, paddingW, scaleTargets, scaleOutput, - data_, + getData(), outGrad.getStride()); } @@ -2149,24 +2148,24 @@ void CpuMatrix::avgPoolBackward(Matrix& input, } void CpuMatrix::maxPool3DForward(Matrix& inputMat, + size_t channels, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - size_t channels, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, size_t paddingD, size_t paddingH, size_t paddingW) { real* inputData = inputMat.getData(); - real* outData = data_; + real* outData = getData(); size_t num = inputMat.getHeight(); size_t inWidth = imgSizeW; size_t inHeight = imgSizeH; @@ -2186,7 +2185,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, /* pool max one by one */ for (size_t n = 0; n < num; ++n) { // frame by frame if (!isContiguous()) { - outData = data_ + n * outStride; + outData = getData() + n * outStride; } for (size_t c = 0; c < channels; ++c) { // channel by channel for (size_t pd = 0; pd < outputD; ++pd) { @@ -2201,15 +2200,18 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, dstart = std::max(dstart, 0); hstart = std::max(hstart, 0); wstart = std::max(wstart, 0); + real maxOutData = outData[(pd * outputH + ph) * outputW + pw]; for (int d = dstart; d < dend; ++d) { for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { - outData[(pd * outputH + ph) * outputW + pw] = - std::max(outData[(pd * outputH + ph) * outputW + pw], - inputData[(d * inHeight + h) * inWidth + w]); + if (maxOutData < + inputData[(d * inHeight + h) * inWidth + w]) { + maxOutData = inputData[(d * inHeight + h) * inWidth + w]; + } } } } + outData[(pd * outputH + ph) * outputW + pw] = maxOutData; } } } @@ -2221,25 +2223,25 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, } void CpuMatrix::maxPool3DBackward(Matrix& image, + Matrix& outGrad, + Matrix& outV, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, size_t paddingD, size_t paddingH, - size_t paddingW) { + size_t paddingW, + real scaleTargets, + real scaleOutput) { size_t num = image.getHeight(); size_t channels = size_t(width_ / imgSizeD / imgSizeH / imgSizeW); CHECK(image.getWidth() == imgSizeD * imgSizeH * imgSizeW * channels); @@ -2247,19 +2249,18 @@ void CpuMatrix::maxPool3DBackward(Matrix& image, CHECK(outV.getHeight() == outGrad.getHeight() && outV.getWidth() == outGrad.getWidth()); - real* tgtGrad = data_; + real* tgtGrad = getData(); real* inData = image.getData(); real* otData = outV.getData(); real* otGrad = outGrad.getData(); size_t outStride = outV.getStride(); - real* origOutData = otData; - real* origOutGrad = otGrad; + ; for (size_t n = 0; n < num; ++n) { if (!outV.isContiguous()) { - otData = origOutData + n * outStride; - otGrad = origOutGrad + n * outStride; + otData = outV.getData() + n * outStride; + otGrad = outGrad.getData() + n * outStride; } for (size_t c = 0; c < channels; ++c) { for (size_t pd = 0; pd < outputD; ++pd) { @@ -2274,7 +2275,7 @@ void CpuMatrix::maxPool3DBackward(Matrix& image, dstart = std::max(dstart, 0); hstart = std::max(hstart, 0); wstart = std::max(wstart, 0); - for (int d = 0; d < dend; ++d) { + for (int d = dstart; d < dend; ++d) { for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { tgtGrad[(d * imgSizeH + h) * imgSizeW + w] = @@ -2299,19 +2300,19 @@ void CpuMatrix::maxPool3DBackward(Matrix& image, } void CpuMatrix::avgPool3DForward(Matrix& input, + size_t channels, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - size_t channels, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, size_t paddingD, size_t paddingH, size_t paddingW) { @@ -2322,7 +2323,7 @@ void CpuMatrix::avgPool3DForward(Matrix& input, size_t inWidth = imgSizeW; CHECK(inDepth * inHeight * inWidth * channels == input.getWidth()); CHECK(outputD * outputH * outputW * channels * num == height_ * width_); - real* tgtData = data_; + real* tgtData = getData(); real* inData = input.getData(); for (size_t n = 0; n < num; ++n) { @@ -2372,20 +2373,20 @@ void CpuMatrix::avgPool3DBackward(Matrix& input, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, size_t paddingD, size_t paddingH, - size_t paddingW) { + size_t paddingW, + real scaleTargets, + real scaleOutput) { size_t num = input.getHeight(); size_t channels = input.getWidth() / outputD / outputH / outputW; CHECK(imgSizeD * imgSizeH * imgSizeW * channels == getWidth()); diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index f1534c5ea0..dec9702433 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -933,19 +933,19 @@ public: * in the sizeX of value */ virtual void maxPool3DForward(Matrix& inputMat, + size_t channels, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - size_t channels, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, size_t paddingD, size_t paddingH, size_t paddingW) { @@ -953,42 +953,42 @@ public: } virtual void maxPool3DBackward(Matrix& image, + Matrix& outGrad, + Matrix& outV, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, size_t paddingD, size_t paddingH, - size_t paddingW) { + size_t paddingW, + real scaleTargets, + real scaleOutput) { LOG(FATAL) << "Not implemeted"; } virtual void avgPool3DForward(Matrix& input, + size_t channels, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - size_t channels, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, size_t paddingD, size_t paddingH, size_t paddingW) { @@ -999,20 +999,20 @@ public: size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, size_t paddingD, size_t paddingH, - size_t paddingW) { + size_t paddingW, + real scaleTargets, + real scaleOutput) { LOG(FATAL) << "Not implemeted"; } @@ -1435,60 +1435,59 @@ public: size_t paddingH, size_t paddingW); - ///////////////////////// void maxPool3DForward(Matrix& inputMat, + size_t channels, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - size_t channels, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, size_t paddingD, size_t paddingH, size_t paddingW); void maxPool3DBackward(Matrix& image, + Matrix& outGrad, + Matrix& outV, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, size_t paddingD, size_t paddingH, - size_t paddingW); + size_t paddingW, + real scaleTargets, + real scaleOutput); void avgPool3DForward(Matrix& input, + size_t channels, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - size_t channels, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, size_t paddingD, size_t paddingH, size_t paddingW); @@ -1497,20 +1496,20 @@ public: size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, size_t paddingD, size_t paddingH, - size_t paddingW); + size_t paddingW, + real scaleTargets, + real scaleOutput); void maxSequenceForward(Matrix& input, const IVector& sequence, @@ -1670,60 +1669,60 @@ public: real scaleOutput, size_t paddingH, size_t paddingW); - ////////////////////// + void maxPool3DForward(Matrix& inputMat, + size_t channels, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - size_t channels, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, size_t paddingD, size_t paddingH, size_t paddingW); void maxPool3DBackward(Matrix& image, + Matrix& outGrad, + Matrix& outV, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, size_t paddingD, size_t paddingH, - size_t paddingW); + size_t paddingW, + real scaleTargets, + real scaleOutput); void avgPool3DForward(Matrix& input, + size_t channels, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, - size_t channels, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, size_t paddingD, size_t paddingH, size_t paddingW); @@ -1732,20 +1731,20 @@ public: size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, size_t sizeZ, size_t sizeY, size_t sizeX, size_t strideD, size_t strideH, size_t strideW, - size_t outputD, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, size_t paddingD, size_t paddingH, - size_t paddingW); + size_t paddingW, + real scaleTargets, + real scaleOutput); void maxSequenceForward(Matrix& input, const IVector& sequence, diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 7a961d2751..21ee8543cd 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -1204,7 +1204,6 @@ TEST(Matrix, warpCTC) { } } -///// void testMatrixPool3D(int depth, int height, int width) { int channel = 3; int filterX = 3, filterY = 4, filterZ = 5; @@ -1226,38 +1225,37 @@ void testMatrixPool3D(int depth, int height, int width) { cpuImage->randomizeUniform(); gpuImage->copyFrom(*cpuImage); - // std::cout << "test maxPool3DForward...\n"; cpuOutput->maxPool3DForward(*cpuImage, + channel, depth, height, width, - channel, + outD, + outH, + outW, filterZ, filterY, filterX, strideZ, strideY, strideX, - outD, - outH, - outW, padZ, padY, padX); gpuOutput->maxPool3DForward(*gpuImage, + channel, depth, height, width, - channel, + outD, + outH, + outW, filterZ, filterY, filterX, strideZ, strideY, strideX, - outD, - outH, - outW, padZ, padY, padX); @@ -1265,39 +1263,38 @@ void testMatrixPool3D(int depth, int height, int width) { cpuImage->randomizeUniform(); gpuImage->copyFrom(*cpuImage); - // std::cout << "test avgPool3DForward...\n"; cpuOutput->avgPool3DForward(*cpuImage, + channel, depth, height, width, - channel, + outD, + outH, + outW, filterZ, filterY, filterX, strideZ, strideY, strideX, - outD, - outH, - outW, padZ, padY, padX); gpuOutput->avgPool3DForward(*gpuImage, + channel, depth, height, width, - channel, + outD, + outH, + outW, filterZ, filterY, filterX, strideZ, strideY, strideX, - outD, - outH, - outW, padZ, padY, padX); @@ -1306,98 +1303,96 @@ void testMatrixPool3D(int depth, int height, int width) { gpuImage->copyFrom(*cpuImage); cpuOutput->randomizeUniform(); gpuOutput->copyFrom(*cpuOutput); - // std::cout << "test avgPool3DBackward...\n"; cpuImage->avgPool3DBackward(*cpuOutput, depth, height, width, + outD, + outH, + outW, filterZ, filterY, filterX, strideZ, strideY, strideX, - outD, - outH, - outW, - 1, - 1, padZ, padY, - padX); + padX, + 1.0, + 1.0); gpuImage->avgPool3DBackward(*gpuOutput, depth, height, width, + outD, + outH, + outW, filterZ, filterY, filterX, strideZ, strideY, strideX, - outD, - outH, - outW, - 1, - 1, padZ, padY, - padX); + padX, + 1.0, + 1.0); TensorCheckErr(*cpuImage, *gpuImage); cpuImage->randomizeUniform(); gpuImage->copyFrom(*cpuImage); cpuOutput->randomizeUniform(); gpuOutput->copyFrom(*cpuOutput); - // std::cout << "test maxPool3DBackward...\n"; cpuImage->maxPool3DBackward(*cpuImage, + *cpuOutput, + *cpuOutput, depth, height, width, - *cpuOutput, - *cpuOutput, + outD, + outH, + outW, filterZ, filterY, filterX, strideZ, strideY, strideX, - outD, - outH, - outW, - 1, - 1, padZ, padY, - padX); + padX, + 1.0, + 1.0); gpuImage->maxPool3DBackward(*gpuImage, + *gpuOutput, + *gpuOutput, depth, height, width, - *gpuOutput, - *gpuOutput, + outD, + outH, + outW, filterZ, filterY, filterX, strideZ, strideY, strideX, - outD, - outH, - outW, - 1, - 1, padZ, padY, - padX); + padX, + 1.0, + 1.0); TensorCheckErr(*cpuImage, *gpuImage); } TEST(Matrix, Pool3D) { - for (auto depth : {9, 16, 64, 128}) { - for (auto height : {9, 11, 128, 256}) { + for (auto depth : {9, 16, 64}) { + for (auto height : {9, 11, 128}) { for (auto width : {9, 32, 128}) { VLOG(3) << "depth=" << depth << " height=" << height << " width=" << width; From 860bf192c904627ee0446051fe97911eb11895ad Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Thu, 24 Aug 2017 19:28:56 +0800 Subject: [PATCH 093/170] Add maxPoolIdx --- paddle/cuda/include/hl_cnn.h | 4 +- paddle/cuda/include/stub/hl_cnn_stub.h | 4 +- paddle/cuda/src/hl_cuda_cnn.cu | 73 +-- paddle/gserver/layers/Pool3DLayer.cpp | 11 +- paddle/math/Matrix.cpp | 86 ++-- paddle/math/Matrix.h | 18 +- paddle/math/tests/test_matrixCompare.cpp | 564 +++++++++++++++-------- 7 files changed, 473 insertions(+), 287 deletions(-) diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h index 84f1c84359..6b56d9ec8d 100644 --- a/paddle/cuda/include/hl_cnn.h +++ b/paddle/cuda/include/hl_cnn.h @@ -192,11 +192,10 @@ extern void hl_maxpool3D_forward(const int frameCnt, const int paddingH, const int paddingW, real* tgtData, + real* maxPoolIdxData, const int tgtStride); extern void hl_maxpool3D_backward(const int frameCnt, - const real* inputData, - const real* outData, const real* outGrad, const int channels, const int depth, @@ -217,6 +216,7 @@ extern void hl_maxpool3D_backward(const int frameCnt, real scaleA, real scaleB, real* targetGrad, + real* maxPoolIdxData, const int outStride); extern void hl_avgpool3D_forward(const int frameCnt, diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h index 6750f537bf..a76dbf0b65 100644 --- a/paddle/cuda/include/stub/hl_cnn_stub.h +++ b/paddle/cuda/include/stub/hl_cnn_stub.h @@ -106,11 +106,10 @@ inline void hl_maxpool3D_forward(const int frameCnt, const int paddingH, const int paddingW, real* tgtData, + real* maxPoolIdxData, const int tgtStride) {} inline void hl_maxpool3D_backward(const int frameCnt, - const real* inputData, - const real* outData, const real* outGrad, const int channels, const int depth, @@ -131,6 +130,7 @@ inline void hl_maxpool3D_backward(const int frameCnt, real scaleA, real scaleB, real* targetGrad, + real* maxPoolIdxData, const int outStride) {} inline void hl_avgpool3D_forward(const int frameCnt, diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu index 458c347728..95440c9446 100644 --- a/paddle/cuda/src/hl_cuda_cnn.cu +++ b/paddle/cuda/src/hl_cuda_cnn.cu @@ -366,10 +366,11 @@ __global__ void KeMaxPool3DForward(const int nthreads, const int strideD, const int strideH, const int strideW, - const int offsetD, - const int offsetH, - const int offsetW, + const int padD, + const int padH, + const int padW, real* tgtData, + real* maxPoolIdxData, const int tgtStride) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); index += blockDim.x * gridDim.x) { @@ -378,9 +379,9 @@ __global__ void KeMaxPool3DForward(const int nthreads, int pd = (index / pooledW / pooledH) % pooledD; int c = (index / pooledW / pooledH / pooledD) % channels; int frameNum = index / pooledW / pooledH / pooledD / channels; - int dstart = pd * strideD - offsetD; - int hstart = ph * strideH - offsetH; - int wstart = pw * strideW - offsetW; + int dstart = pd * strideD - padD; + int hstart = ph * strideH - padH; + int wstart = pw * strideW - padW; int dend = min(dstart + ksizeD, depth); int hend = min(hstart + ksizeH, height); int wend = min(wstart + ksizeW, width); @@ -388,18 +389,22 @@ __global__ void KeMaxPool3DForward(const int nthreads, hstart = max(hstart, 0); wstart = max(wstart, 0); real maxval = -FLT_MAX; + int maxIdx = -1; inputData += (frameNum * channels + c) * depth * height * width; for (int d = dstart; d < dend; ++d) { for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { - if (maxval < inputData[(d * height + h) * width + w]) + if (maxval < inputData[(d * height + h) * width + w]) { maxval = inputData[(d * height + h) * width + w]; + maxIdx = (d * height + h) * width + w; + } } } } int tgtIndex = index % (pooledW * pooledH * pooledD * channels) + frameNum * tgtStride; tgtData[tgtIndex] = maxval; + maxPoolIdxData[tgtIndex] = maxIdx; } } @@ -418,10 +423,11 @@ void hl_maxpool3D_forward(const int frameCnt, const int strideD, const int strideH, const int strideW, - const int paddingD, - const int paddingH, - const int paddingW, + const int padD, + const int padH, + const int padW, real* tgtData, + real* maxPoolIdxData, const int tgtStride) { int num_kernels = pooledD * pooledH * pooledW * channels * frameCnt; int blocks = (num_kernels + 1024 - 1) / 1024; @@ -443,17 +449,16 @@ void hl_maxpool3D_forward(const int frameCnt, strideD, strideH, strideW, - paddingD, - paddingH, - paddingW, + padD, + padH, + padW, tgtData, + maxPoolIdxData, tgtStride); CHECK_SYNC("hl_maxpool3D_forward failed"); } __global__ void KeMaxPool3DBackward(const int nthreads, - const real* inputData, - const real* outData, const real* outGrad, const int channels, const int depth, @@ -474,33 +479,35 @@ __global__ void KeMaxPool3DBackward(const int nthreads, real scaleA, real scaleB, real* targetGrad, + real* maxPoolIdxData, const int outStride) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); index += blockDim.x * gridDim.x) { - // find out the local index - // find out the local offset - int offsetW = index % width + padW; - int offsetH = (index / width) % height + padH; - int offsetD = (index / width / height) % depth + padD; + int offsetW = index % width; + int offsetH = (index / width) % height; + int offsetD = (index / width / height) % depth; int offsetC = (index / width / height / depth) % channels; int frameNum = index / width / height / depth / channels; - int pdstart = (offsetD < sizeZ) ? 0 : (offsetD - sizeZ) / strideD + 1; - int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1; - int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1; - int pdend = min(offsetD / strideD + 1, pooledD); - int phend = min(offsetH / strideH + 1, pooledH); - int pwend = min(offsetW / strideW + 1, pooledW); + int pdstart = + (offsetD + padD < sizeZ) ? 0 : (offsetD + padD - sizeZ) / strideD + 1; + int phstart = + (offsetH + padH < sizeY) ? 0 : (offsetH + padH - sizeY) / strideH + 1; + int pwstart = + (offsetW + padW < sizeX) ? 0 : (offsetW + padW - sizeX) / strideW + 1; + int pdend = min((offsetD + padD) / strideD + 1, pooledD); + int phend = min((offsetH + padH) / strideH + 1, pooledH); + int pwend = min((offsetW + padW) / strideW + 1, pooledW); real gradient = 0; - real input = inputData[index]; - - outData += ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW); outGrad += ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW); + maxPoolIdxData += + ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW); for (int pd = pdstart; pd < pdend; ++pd) { for (int ph = phstart; ph < phend; ++ph) { for (int pw = pwstart; pw < pwend; ++pw) { - if (input == outData[(pd * pooledH + ph) * pooledW + pw]) + if (((offsetD * height + offsetH) * width + offsetW) == + maxPoolIdxData[(pd * pooledH + ph) * pooledW + pw]) gradient += outGrad[(pd * pooledH + ph) * pooledW + pw]; } } @@ -510,8 +517,6 @@ __global__ void KeMaxPool3DBackward(const int nthreads, } void hl_maxpool3D_backward(const int frameCnt, - const real* inputData, - const real* outData, const real* outGrad, const int channels, const int depth, @@ -532,13 +537,12 @@ void hl_maxpool3D_backward(const int frameCnt, real scaleA, real scaleB, real* targetGrad, + real* maxPoolIdxData, const int outStride) { int num_kernels = depth * height * width * channels * frameCnt; int blocks = (num_kernels + 1024 - 1) / 1024; KeMaxPool3DBackward<<>>(num_kernels, - inputData, - outData, outGrad, channels, depth, @@ -559,6 +563,7 @@ void hl_maxpool3D_backward(const int frameCnt, scaleA, scaleB, targetGrad, + maxPoolIdxData, outStride); CHECK_SYNC("hl_maxpool3D_backward"); } diff --git a/paddle/gserver/layers/Pool3DLayer.cpp b/paddle/gserver/layers/Pool3DLayer.cpp index 40a913ebfc..199f21adb1 100644 --- a/paddle/gserver/layers/Pool3DLayer.cpp +++ b/paddle/gserver/layers/Pool3DLayer.cpp @@ -72,9 +72,10 @@ size_t Pool3DLayer::getSize() { void Pool3DLayer::forward(PassType passType) { Layer::forward(passType); const MatrixPtr& inMat = inputLayers_[0]->getOutputValue(); - int batchSize = inMat->getHeight(); - int outWidth = getSize(); + size_t batchSize = inMat->getHeight(); + size_t outWidth = getSize(); resetOutput(batchSize, outWidth); + Matrix::resizeOrCreate(maxPoolIdx_, batchSize, outWidth, false, useGpu_); const MatrixPtr outMat = getOutputValue(); if (poolType_ == "avg") { @@ -97,6 +98,7 @@ void Pool3DLayer::forward(PassType passType) { paddingW_); } else if (poolType_ == "max") { outMat->maxPool3DForward(*inMat, + *maxPoolIdx_, channels_, imgSizeD_, imgSizeH_, @@ -149,9 +151,8 @@ void Pool3DLayer::backward(const UpdateCallback& callback) { 1.0, 1.0); } else if (poolType_ == "max") { - inGradMat->maxPool3DBackward(*inMat, - *outGradMat, - *outMat, + inGradMat->maxPool3DBackward(*outGradMat, + *maxPoolIdx_, imgSizeD_, imgSizeH_, imgSizeW_, diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 4f9216896c..54c2eae475 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1191,6 +1191,7 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, } void GpuMatrix::maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, size_t channels, size_t imgSizeD, size_t imgSizeH, @@ -1210,6 +1211,7 @@ void GpuMatrix::maxPool3DForward(Matrix& inputMat, CHECK(inputMat.useGpu_) << "Matrix type are not correct"; real* inputData = inputMat.getData(); + real* maxPoolIdxData = maxPoolIdx.getData(); size_t num = inputMat.getHeight(); size_t width = imgSizeW; size_t height = imgSizeH; @@ -1237,12 +1239,12 @@ void GpuMatrix::maxPool3DForward(Matrix& inputMat, paddingH, paddingW, getData(), + maxPoolIdxData, getStride()); } -void GpuMatrix::maxPool3DBackward(Matrix& inputMat, - Matrix& outGrad, - Matrix& outV, +void GpuMatrix::maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, @@ -1260,26 +1262,21 @@ void GpuMatrix::maxPool3DBackward(Matrix& inputMat, size_t paddingW, real scaleTargets, real scaleOutput) { - CHECK(inputMat.useGpu_ && outGrad.useGpu_ && outV.useGpu_) - << "Matrix type are not equal"; + CHECK(outGrad.useGpu_ && maxPoolIdx.useGpu_) << "Matrix type are not equal"; - real* inputData = inputMat.getData(); - real* outData = outV.getData(); real* outDiff = outGrad.getData(); - size_t frameNum = inputMat.getHeight(); - size_t channels = outV.getWidth() / outputD / outputH / outputW; + real* maxPoolIdxData = maxPoolIdx.getData(); + size_t frameNum = getHeight(); + size_t channels = outGrad.getWidth() / outputD / outputH / outputW; size_t width = imgSizeW; size_t height = imgSizeH; size_t depth = imgSizeD; - CHECK(depth * height * width * channels == inputMat.getWidth()); - CHECK(height_ == inputMat.getHeight()); + CHECK(depth * height * width * channels == getWidth()); CHECK(width_ == depth * width * height * channels); - CHECK(outGrad.getHeight() == outV.getHeight() && - outGrad.getWidth() == outV.getWidth()); + CHECK(outGrad.getHeight() == maxPoolIdx.getHeight() && + outGrad.getWidth() == maxPoolIdx.getWidth()); hl_maxpool3D_backward(frameNum, - inputData, - outData, outDiff, channels, depth, @@ -1300,6 +1297,7 @@ void GpuMatrix::maxPool3DBackward(Matrix& inputMat, scaleTargets, scaleOutput, getData(), + maxPoolIdxData, outGrad.getStride()); } @@ -2148,6 +2146,7 @@ void CpuMatrix::avgPoolBackward(Matrix& input, } void CpuMatrix::maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, size_t channels, size_t imgSizeD, size_t imgSizeH, @@ -2166,6 +2165,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, size_t paddingW) { real* inputData = inputMat.getData(); real* outData = getData(); + real* maxPoolIdxData = maxPoolIdx.getData(); size_t num = inputMat.getHeight(); size_t inWidth = imgSizeW; size_t inHeight = imgSizeH; @@ -2179,6 +2179,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, for (size_t i = 0; i < height_; i++) { for (size_t j = 0; j < width_; j++) { outData[(i)*outStride + j] = -(real)FLT_MAX; + maxPoolIdxData[(i)*outStride + j] = -1; } } @@ -2186,6 +2187,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, for (size_t n = 0; n < num; ++n) { // frame by frame if (!isContiguous()) { outData = getData() + n * outStride; + maxPoolIdxData = maxPoolIdx.getData() + n * outStride; } for (size_t c = 0; c < channels; ++c) { // channel by channel for (size_t pd = 0; pd < outputD; ++pd) { @@ -2200,6 +2202,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, dstart = std::max(dstart, 0); hstart = std::max(hstart, 0); wstart = std::max(wstart, 0); + int maxIdx = -1; real maxOutData = outData[(pd * outputH + ph) * outputW + pw]; for (int d = dstart; d < dend; ++d) { for (int h = hstart; h < hend; ++h) { @@ -2207,24 +2210,26 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, if (maxOutData < inputData[(d * inHeight + h) * inWidth + w]) { maxOutData = inputData[(d * inHeight + h) * inWidth + w]; + maxIdx = (d * inHeight + h) * inWidth + w; } } } } outData[(pd * outputH + ph) * outputW + pw] = maxOutData; + maxPoolIdxData[(pd * outputH + ph) * outputW + pw] = maxIdx; } } } // compute offset inputData += inDepth * inHeight * inWidth; outData += outputD * outputH * outputW; + maxPoolIdxData += outputD * outputH * outputW; } } } -void CpuMatrix::maxPool3DBackward(Matrix& image, - Matrix& outGrad, - Matrix& outV, +void CpuMatrix::maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, @@ -2242,59 +2247,38 @@ void CpuMatrix::maxPool3DBackward(Matrix& image, size_t paddingW, real scaleTargets, real scaleOutput) { - size_t num = image.getHeight(); + size_t num = getHeight(); size_t channels = size_t(width_ / imgSizeD / imgSizeH / imgSizeW); - CHECK(image.getWidth() == imgSizeD * imgSizeH * imgSizeW * channels); - CHECK(image.getHeight() == height_ && image.getWidth() == width_); - CHECK(outV.getHeight() == outGrad.getHeight() && - outV.getWidth() == outGrad.getWidth()); + CHECK(maxPoolIdx.getHeight() == outGrad.getHeight() && + maxPoolIdx.getWidth() == outGrad.getWidth()); real* tgtGrad = getData(); - real* inData = image.getData(); - real* otData = outV.getData(); real* otGrad = outGrad.getData(); + real* maxPoolIdxData = maxPoolIdx.getData(); - size_t outStride = outV.getStride(); + size_t outStride = outGrad.getStride(); ; for (size_t n = 0; n < num; ++n) { - if (!outV.isContiguous()) { - otData = outV.getData() + n * outStride; + if (!outGrad.isContiguous()) { otGrad = outGrad.getData() + n * outStride; + maxPoolIdxData = maxPoolIdx.getData() + n * outStride; } for (size_t c = 0; c < channels; ++c) { for (size_t pd = 0; pd < outputD; ++pd) { for (size_t ph = 0; ph < outputH; ++ph) { for (size_t pw = 0; pw < outputW; ++pw) { - int dstart = pd * strideD - paddingD; - int hstart = ph * strideH - paddingH; - int wstart = pw * strideW - paddingW; - int dend = std::min(dstart + sizeZ, imgSizeD); - int hend = std::min(hstart + sizeY, imgSizeH); - int wend = std::min(wstart + sizeX, imgSizeW); - dstart = std::max(dstart, 0); - hstart = std::max(hstart, 0); - wstart = std::max(wstart, 0); - for (int d = dstart; d < dend; ++d) { - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - tgtGrad[(d * imgSizeH + h) * imgSizeW + w] = - scaleTargets * - tgtGrad[(d * imgSizeH + h) * imgSizeW + w] + - scaleOutput * otGrad[(pd * outputH + ph) * outputW + pw] * - (inData[(d * imgSizeH + h) * imgSizeW + w] == - otData[(pd * outputH + ph) * outputW + pw]); - } - } - } + const size_t index = (pd * outputH + ph) * outputW + pw; + const size_t tgtIdx = static_cast(maxPoolIdxData[index]); + tgtGrad[tgtIdx] = + scaleTargets * tgtGrad[tgtIdx] + scaleOutput * otGrad[index]; } } } // offset - inData += imgSizeD * imgSizeH * imgSizeW; tgtGrad += imgSizeD * imgSizeH * imgSizeW; - otData += outputD * outputH * outputW; otGrad += outputD * outputH * outputW; + maxPoolIdxData += outputD * outputH * outputW; } } } diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index dec9702433..e674c1e9ab 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -933,6 +933,7 @@ public: * in the sizeX of value */ virtual void maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, size_t channels, size_t imgSizeD, size_t imgSizeH, @@ -952,9 +953,8 @@ public: LOG(FATAL) << "Not implemeted"; } - virtual void maxPool3DBackward(Matrix& image, - Matrix& outGrad, - Matrix& outV, + virtual void maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, @@ -1436,6 +1436,7 @@ public: size_t paddingW); void maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, size_t channels, size_t imgSizeD, size_t imgSizeH, @@ -1453,9 +1454,8 @@ public: size_t paddingH, size_t paddingW); - void maxPool3DBackward(Matrix& image, - Matrix& outGrad, - Matrix& outV, + void maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, @@ -1671,6 +1671,7 @@ public: size_t paddingW); void maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, size_t channels, size_t imgSizeD, size_t imgSizeH, @@ -1688,9 +1689,8 @@ public: size_t paddingH, size_t paddingW); - void maxPool3DBackward(Matrix& image, - Matrix& outGrad, - Matrix& outV, + void maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 21ee8543cd..d7ad6f18ac 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -1204,202 +1204,398 @@ TEST(Matrix, warpCTC) { } } -void testMatrixPool3D(int depth, int height, int width) { - int channel = 3; - int filterX = 3, filterY = 4, filterZ = 5; - int strideX = 2, strideY = 2, strideZ = 2; - int padX = 1, padY = 1, padZ = 1; - - MatrixPtr cpuImage = - std::make_shared(1, channel * depth * height * width); - MatrixPtr gpuImage = - std::make_shared(1, channel * depth * height * width); - - int outD = outputSize(depth, filterZ, padZ, strideZ, true); - int outH = outputSize(height, filterY, padZ, strideY, true); - int outW = outputSize(width, filterX, padZ, strideX, true); - - int colBufWidth = outD * outH * outW; - MatrixPtr cpuOutput = std::make_shared(1, channel * colBufWidth); - MatrixPtr gpuOutput = std::make_shared(1, channel * colBufWidth); - - cpuImage->randomizeUniform(); - gpuImage->copyFrom(*cpuImage); - cpuOutput->maxPool3DForward(*cpuImage, - channel, - depth, - height, - width, - outD, - outH, - outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX); - gpuOutput->maxPool3DForward(*gpuImage, - channel, - depth, - height, - width, - outD, - outH, - outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX); - TensorCheckErr(*cpuOutput, *gpuOutput); +void testMaxPool3DFwdBwd(int numSamples, + int channels, + int imgSizeD, + int imgSizeH, + int imgSizeW, + int ksizeD, + int ksizeH, + int ksizeW, + int strideD, + int strideH, + int strideW, + int padD, + int padH, + int padW) { + int outD = outputSize(imgSizeD, ksizeD, padD, strideD, true); + int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true); + int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true); + + int inWidth = channels * imgSizeD * imgSizeH * imgSizeW; + MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); - cpuImage->randomizeUniform(); - gpuImage->copyFrom(*cpuImage); - cpuOutput->avgPool3DForward(*cpuImage, - channel, - depth, - height, - width, - outD, - outH, - outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX); - - gpuOutput->avgPool3DForward(*gpuImage, - channel, - depth, - height, - width, - outD, - outH, - outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX); - TensorCheckErr(*cpuOutput, *gpuOutput); - cpuImage->randomizeUniform(); - gpuImage->copyFrom(*cpuImage); - cpuOutput->randomizeUniform(); - gpuOutput->copyFrom(*cpuOutput); - cpuImage->avgPool3DBackward(*cpuOutput, - depth, - height, - width, - outD, - outH, - outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX, - 1.0, - 1.0); - - gpuImage->avgPool3DBackward(*gpuOutput, - depth, - height, - width, - outD, - outH, - outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX, - 1.0, - 1.0); - TensorCheckErr(*cpuImage, *gpuImage); - - cpuImage->randomizeUniform(); - gpuImage->copyFrom(*cpuImage); - cpuOutput->randomizeUniform(); - gpuOutput->copyFrom(*cpuOutput); - cpuImage->maxPool3DBackward(*cpuImage, - *cpuOutput, - *cpuOutput, - depth, - height, - width, + int outWidth = channels * outD * outH * outW; + MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); + MatrixPtr maxIdx = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr maxIdxGpu = GpuMatrix::create(numSamples, outWidth, false, true); + + input->randomizeUniform(); + target->randomizeUniform(); + inputGpu->copyFrom(*input); + targetGpu->copyFrom(*target); + + target->maxPool3DForward(*input, + *maxIdx, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW); + targetGpu->maxPool3DForward(*inputGpu, + *maxIdxGpu, + channels, + imgSizeD, + imgSizeH, + imgSizeW, outD, outH, outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX, - 1.0, - 1.0); - - gpuImage->maxPool3DBackward(*gpuImage, - *gpuOutput, - *gpuOutput, - depth, - height, - width, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW); + MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); + targetCheck->copyFrom(*targetGpu); + checkMatrixEqual(target, targetCheck); + + MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); + MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpuGrad = + GpuMatrix::create(numSamples, outWidth, false, true); + + inputGrad->randomizeUniform(); + targetGrad->randomizeUniform(); + inputGpuGrad->copyFrom(*inputGrad); + targetGpuGrad->copyFrom(*targetGrad); + + inputGrad->maxPool3DBackward(*targetGrad, + *maxIdx, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW, + 1.0, + 1.0); + inputGpuGrad->maxPool3DBackward(*targetGpuGrad, + *maxIdxGpu, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW, + 1.0, + 1.0); + MatrixPtr targetBwdCheck = + CpuMatrix::create(numSamples, inWidth, false, false); + targetBwdCheck->copyFrom(*inputGpuGrad); + checkMatrixEqual(inputGrad, targetBwdCheck); +} + +void testAvgPool3DFwdBwd(int numSamples, + int channels, + int imgSizeD, + int imgSizeH, + int imgSizeW, + int ksizeD, + int ksizeH, + int ksizeW, + int strideD, + int strideH, + int strideW, + int padD, + int padH, + int padW) { + int outD = outputSize(imgSizeD, ksizeD, padD, strideD, true); + int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true); + int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true); + + int inWidth = imgSizeD * imgSizeH * imgSizeW * channels; + MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); + + int outWidth = channels * outD * outH * outW; + MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); + + input->randomizeUniform(); + target->randomizeUniform(); + inputGpu->copyFrom(*input); + targetGpu->copyFrom(*target); + + target->avgPool3DForward(*input, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW); + + targetGpu->avgPool3DForward(*inputGpu, + channels, + imgSizeD, + imgSizeH, + imgSizeW, outD, outH, outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX, - 1.0, - 1.0); - TensorCheckErr(*cpuImage, *gpuImage); + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW); + + TensorCheckErr(*target, *targetGpu); + + MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); + MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpuGrad = + GpuMatrix::create(numSamples, outWidth, false, true); + + inputGrad->randomizeUniform(); + targetGrad->randomizeUniform(); + inputGpuGrad->copyFrom(*inputGrad); + targetGpuGrad->copyFrom(*targetGrad); + + inputGrad->avgPool3DBackward(*targetGrad, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW, + 1.0, + 1.0); + + inputGpuGrad->avgPool3DBackward(*targetGpuGrad, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW, + 1.0, + 1.0); + TensorCheckErr(*inputGrad, *inputGpuGrad); } -TEST(Matrix, Pool3D) { - for (auto depth : {9, 16, 64}) { - for (auto height : {9, 11, 128}) { - for (auto width : {9, 32, 128}) { - VLOG(3) << "depth=" << depth << " height=" << height - << " width=" << width; - testMatrixPool3D(depth, height, width); +// TODO(yi): I noticed many such blindly combinatorial tests in this +// file. They are no help to locate defects at all. +TEST(Matrix, Pool3DFwdBwd) { + for (auto numSamples : {1, 3}) { + for (auto channels : {3}) { + for (auto imgSizeD : {9, 16}) { + for (auto imgSizeH : {9, 32}) { + for (auto imgSizeW : {9, 32}) { + for (auto sizeX : {3}) { + for (auto sizeY : {3}) { + for (auto sizeZ : {3}) { + for (auto sD : {2}) { + for (auto sH : {2}) { + for (auto sW : {2}) { + for (auto pD : {0, (sizeZ - 1) / 2}) { + for (auto pH : {0, (sizeY - 1) / 2}) { + for (auto pW : {0, (sizeX - 1) / 2}) { + VLOG(3) << " numSamples=" << numSamples + << " channels=" << channels + << " imgSizeD=" << imgSizeD + << " imgSizeH=" << imgSizeH + << " imgSizeW=" << imgSizeW + << " sizeX=" << sizeX + << " sizeY=" << sizeY + << " sizeZ=" << sizeZ << " strideD=" << sD + << " strideH=" << sH << " strideW=" << sW + << " padingD=" << pD << " padingH=" << pH + << " padingW=" << pW; + + testMaxPool3DFwdBwd(numSamples, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + sizeX, + sizeY, + sizeZ, + sD, + sH, + sW, + pD, + pH, + pW); + testAvgPool3DFwdBwd(numSamples, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + sizeX, + sizeY, + sizeZ, + sD, + sH, + sW, + pD, + pH, + pW); + } + } + } + } + } + } + } + } + } + } + } } } } + + // + // for (auto numSamples : {1, 3}) { + // for (auto channels : {1, 3}) { + // for (auto imgSizeD : {9,16}) { + // for (auto imgSizeH : {9, 32}) { + // for (auto imgSizeW : {9, 32}) { + // for (auto sizeX : {2, 3}) { + // for (auto sizeY : {2, 3}) { + // for (auto sizeZ : {2,3}){ + // for (auto sD : {1, 2}) { + // for (auto sH : {1, 2}) { + // for (auto sW : {1, 2}) { + // for (auto pD : {0, (sizeZ - 1) / 2}){ + // for (auto pH : {0, (sizeY - 1) / 2}) { + // for (auto pW : {0, (sizeX - 1) / 2}) { + // VLOG(3) << " numSamples=" << numSamples + // << " channels=" << channels + // << " imgSizeD=" << imgSizeD + // << " imgSizeH=" << imgSizeH + // << " imgSizeW=" << imgSizeW + // << " sizeX=" << sizeX + // << " sizeY=" << sizeY + // << " sizeZ=" << sizeZ + // << " strideD=" << sD + // << " strideH=" << sH + // << " strideW=" << sW + // << " padingD=" << pD + // << " padingH=" << pH + // << " padingW=" << pW; + // + // testMaxPool3DFwdBwd(numSamples, + // channels, + // imgSizeD, + // imgSizeH, + // imgSizeW, + // sizeX, + // sizeY, + // sizeZ, + // sD, + // sH, + // sW, + // pD, + // pH, + // pW); + // testAvgPool3DFwdBwd(numSamples, + // channels, + // imgSizeD, + // imgSizeH, + // imgSizeW, + // sizeX, + // sizeY, + // sizeZ, + // sD, + // sH, + // sW, + // pD, + // pH, + // pW); + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } } #endif From 692259e071494d08f50cc3b1a4e34326249bc132 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 24 Aug 2017 23:42:32 +0800 Subject: [PATCH 094/170] Fix pre-commit-config.yaml --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a772125df6..83fe9af768 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ - id: clang-format-with-version-check name: clang-format description: Format files with ClangFormat. - entry: ./.clang_format.hook -i + entry: bash ./.clang_format.hook -i language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ - repo: https://github.com/PaddlePaddle/pre-commit-golang From 0dffe68ca9973c5cf7d95029e369330ffcfe0187 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 24 Aug 2017 23:45:17 +0800 Subject: [PATCH 095/170] Add NeonDepthwiseConvFunction. --- paddle/function/CMakeLists.txt | 2 + paddle/function/DepthwiseConvOpTest.cpp | 5 + paddle/function/Im2Col.h | 92 +++++++++ paddle/function/neon/NeonDepthwiseConv.cpp | 227 +++++++++++++++++++++ paddle/function/neon/NeonDepthwiseConv.h | 25 +++ paddle/function/neon/neon_util.h | 47 +++++ 6 files changed, 398 insertions(+) create mode 100644 paddle/function/neon/NeonDepthwiseConv.cpp create mode 100644 paddle/function/neon/NeonDepthwiseConv.h create mode 100644 paddle/function/neon/neon_util.h diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index c572a9d433..05f808a6a1 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -21,6 +21,8 @@ if(USE_NNPACK) endif() endif() +list(APPEND cpp_files neon/NeonDepthwiseConv.cpp) + add_library(paddle_function STATIC ${cpp_files} ${cu_objs}) add_dependencies(paddle_function ${external_project_dependencies}) add_dependencies(paddle_function paddle_proto) diff --git a/paddle/function/DepthwiseConvOpTest.cpp b/paddle/function/DepthwiseConvOpTest.cpp index f44ae0c342..bdace2c372 100644 --- a/paddle/function/DepthwiseConvOpTest.cpp +++ b/paddle/function/DepthwiseConvOpTest.cpp @@ -34,4 +34,9 @@ TEST(DepthwiseConv, BackwardFilter) { } #endif +TEST(DepthwiseConv, Forward) { + DepthwiseConvolution( + "GemmConv-CPU", "NeonDepthwiseConv-CPU", forward); +} + } // namespace paddle diff --git a/paddle/function/Im2Col.h b/paddle/function/Im2Col.h index 48e2e32f92..9b91e223a6 100644 --- a/paddle/function/Im2Col.h +++ b/paddle/function/Im2Col.h @@ -16,6 +16,7 @@ limitations under the License. */ #include "TensorShape.h" #include "TensorType.h" +#include "neon/neon_util.h" namespace paddle { @@ -93,4 +94,95 @@ public: int paddingWidth); }; +template +struct Padding { + static void run(const T* src, + T* dest, + int channels, + int inputHeight, + int inputWidth, + int paddingHeight, + int paddingWidth) { + const int destWidth = inputWidth + 2 * paddingWidth; + for (int c = 0; c < channels; c++) { + if (paddingHeight > 0) { + memset(dest, 0, destWidth * paddingHeight * sizeof(T)); + dest += destWidth * paddingHeight; + } + + for (int i = 0; i < inputHeight; i++) { + // padding head + for (int j = 0; j < paddingWidth; j++) { + *dest++ = T(0); + } + + memcpy(dest, src, inputWidth * sizeof(T)); + dest += inputWidth; + src += inputWidth; + + // padding tail + for (int j = 0; j < paddingWidth; j++) { + *dest++ = T(0); + } + } + + if (paddingHeight > 0) { + memset(dest, 0, destWidth * paddingHeight * sizeof(T)); + dest += destWidth * paddingHeight; + } + } + } +}; + +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +template <> +struct Padding { + static void run(const float* src, + float* dest, + int channels, + int inputHeight, + int inputWidth, + int paddingHeight, + int paddingWidth) { + const int destWidth = inputWidth + 2 * paddingWidth; + for (int c = 0; c < channels; c++) { + if (paddingHeight > 0) { + memset(dest, 0, destWidth * paddingHeight * sizeof(float)); + dest += destWidth * paddingHeight; + } + + for (int i = 0; i < inputHeight; i++) { + // padding head + for (int j = 0; j < paddingWidth; j++) { + *dest++ = float(0); + } + + int step = inputWidth >> 2; + int remain = inputWidth & 3; + for (int s = 0; s < step; s++) { + float32x4_t s0 = vld1q_f32(src); + vst1q_f32(dest, s0); + src += 4; + dest += 4; + } + for (int r = 0; r < remain; r++) { + *dest++ = *src++; + } + + // padding tail + for (int j = 0; j < paddingWidth; j++) { + *dest++ = float(0); + } + } + + if (paddingHeight > 0) { + memset(dest, 0, destWidth * paddingHeight * sizeof(float)); + dest += destWidth * paddingHeight; + } + } + } +}; + +#endif + } // namespace paddle diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp new file mode 100644 index 0000000000..16d94c976e --- /dev/null +++ b/paddle/function/neon/NeonDepthwiseConv.cpp @@ -0,0 +1,227 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "neon_util.h" +#include "paddle/function/ConvOp.h" +#include "paddle/function/Im2Col.h" + +namespace paddle { + +namespace neon { + +#if defined(__ARM_NEON__) || defined(__ARM_NEON) + +template +struct DepthwiseConvKernel {}; + +inline float32_t conv3x3(float32x4_t r0, + float32x4_t r1, + float32x4_t r2, + float32x4_t k0, + float32x4_t k1, + float32x4_t k2) { + float32x4_t tmp; + tmp = vmulq_f32(r0, k0); + tmp = vmlaq_f32(tmp, r1, k1); + tmp = vmlaq_f32(tmp, r2, k2); + return vaddvq_f32(tmp); +} + +/** + * Each step calculates four elements of the output. + * First step: + * R0[0, 1, 2, 3...] * K[0][0] + * R0[1, 2, 3, 4...] * K[0][1] + * R0[2, 3, 4, 5...] * K[0][2] + * R1[0, 1, 2, 3...] * K[1][0] + * R1[1, 2, 3, 4...] * K[1][1] + * R1[2, 3, 4, 5...] * K[1][2] + * R2[0, 1, 2, 3...] * K[2][0] + * R2[1, 2, 3, 4...] * K[2][1] + * + R2[2, 3, 4, 5...] * K[2][2] + * ------------------------------ + * Output[0, 1, 2, 3] + */ +template <> +struct DepthwiseConvKernel<3, 1> { + static void run(const float* inputData, + const float* filterData, + int inputHeight, + int inputWidth, + int outputChannels, + int outputHeight, + int outputWidth, + int filterMultiplier, + float* outputData) { + const int steps = outputWidth >> 2; + const int remain = outputWidth & 3; + for (int c = 0; c < outputChannels; c++, filterData += 9) { + // Load the filters + float32x4_t k[3]; + k[0] = vld1q_f32(filterData); + k[1] = vld1q_f32(filterData + 3); + k[2] = vld1q_f32(filterData + 6); + k[0] = vsetq_lane_f32(0.f, k[0], 3); + k[1] = vsetq_lane_f32(0.f, k[1], 3); + k[2] = vsetq_lane_f32(0.f, k[2], 3); + + const float* r0 = + inputData + (c / filterMultiplier) * (inputHeight * inputWidth); + const float* r1 = r0 + inputWidth; + const float* r2 = r0 + inputWidth * 2; + float32x4_t input[3][3]; + for (int h = 0; h < outputHeight; h++) { + for (int s = 0; s < steps; s++) { + // Load the inputs + float32x4_t tmp; + input[0][0] = vld1q_f32(r0); + tmp = vld1q_f32(r0 + 4); + input[0][1] = vextq_f32(input[0][0], tmp, 1); + input[0][2] = vextq_f32(input[0][0], tmp, 2); + input[1][0] = vld1q_f32(r1); + tmp = vld1q_f32(r1 + 4); + input[1][1] = vextq_f32(input[1][0], tmp, 1); + input[1][2] = vextq_f32(input[1][0], tmp, 2); + input[2][0] = vld1q_f32(r2); + tmp = vld1q_f32(r2 + 4); + input[2][1] = vextq_f32(input[2][0], tmp, 1); + input[2][2] = vextq_f32(input[2][0], tmp, 2); + + float32x4_t tmp1 = vdupq_n_f32(0.f); + float32x4_t tmp2 = vdupq_n_f32(0.f); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp1 = vaddq_f32(tmp1, tmp2); + + vst1q_f32(outputData, tmp1); + r0 += 4; + r1 += 4; + r2 += 4; + outputData += 4; + } + + for (int r = 0; r < remain; r++) { + float32x4_t i0 = vld1q_f32(r0); + float32x4_t i1 = vld1q_f32(r1); + float32x4_t i2 = vld1q_f32(r2); + *outputData = conv3x3(i0, i1, i2, k[0], k[1], k[2]); + r0++; + r1++; + r2++; + outputData++; + } + + r0 += 2; + r1 += 2; + r2 += 2; + } + } + } +}; + +template +class NeonDepthwiseConvFunction : public ConvFunctionBase { +public: + void init(const FuncConfig& config) override { + ConvFunctionBase::init(config); + } + + void check(const BufferArgs& inputs, const BufferArgs& outputs) override { + const TensorShape& input = inputs[0].shape(); + const TensorShape& filter = inputs[1].shape(); + const TensorShape& output = outputs[0].shape(); + checkShape(input, filter, output); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(numInputs_, inputs.size()); + CHECK_EQ(numOutputs_, outputs.size()); + check(inputs, outputs); + + const TensorShape& input = inputs[0].shape(); + const TensorShape& filter = inputs[1].shape(); + const TensorShape& output = outputs[0].shape(); + + size_t batchSize = input[0]; + size_t inputChannels = input[1]; + size_t inputHeight = input[2]; + size_t inputWidth = input[3]; + size_t filterHeight = getFilterHeight(filter); + size_t filterWidth = getFilterWidth(filter); + size_t outputChannels = output[1]; + size_t outputHeight = output[2]; + size_t outputWidth = output[3]; + size_t filterMultiplier = outputChannels / groups_; + CHECK_EQ(inputChannels, groups_); + + // only support + CHECK_EQ(strideH(), strideW()); + CHECK_EQ(filterHeight, filterWidth); + CHECK_EQ(filterHeight, size_t(3)); + CHECK_LT(strideH(), size_t(3)); + + float* inputData = inputs[0].data(); + float* filterData = inputs[1].data(); + float* outputData = outputs[0].data(); + + // padding the input + float* inputPadding = inputData; + if (paddingH() > 0 || paddingW() > 0) { + int newSize = batchSize * inputChannels * (inputHeight + 2 * paddingH()) * + (inputWidth + 2 * paddingW()); + resizeBuffer(newSize); + inputPadding = reinterpret_cast(memory_->getBuf()); + Padding::run(inputData, + inputPadding, + batchSize * inputChannels, + inputHeight, + inputWidth, + paddingH(), + paddingW()); + + // height and width of padding data + inputHeight += 2 * paddingH(); + inputWidth += 2 * paddingW(); + } + + for (size_t i = 0; i < batchSize; i++) { + DepthwiseConvKernel<3, 1>::run(inputPadding, + filterData, + inputHeight, + inputWidth, + outputChannels, + outputHeight, + outputWidth, + filterMultiplier, + outputData); + + inputPadding += inputChannels * inputHeight * inputWidth; + outputData += outputChannels * outputHeight * outputWidth; + } + } +}; + +REGISTER_TYPED_FUNC(NeonDepthwiseConv, CPU, NeonDepthwiseConvFunction); + +#endif + +} // namespace neon +} // namespace paddle diff --git a/paddle/function/neon/NeonDepthwiseConv.h b/paddle/function/neon/NeonDepthwiseConv.h new file mode 100644 index 0000000000..23e4be1921 --- /dev/null +++ b/paddle/function/neon/NeonDepthwiseConv.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +namespace paddle { + +namespace neon { + +template +struct DepthwiseConvKernel {}; + +} // namespace neon +} // namespace paddle diff --git a/paddle/function/neon/neon_util.h b/paddle/function/neon/neon_util.h new file mode 100644 index 0000000000..56b3febe2d --- /dev/null +++ b/paddle/function/neon/neon_util.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#if defined(__ARM_NEON__) || defined(__ARM_NEON) + +#include + +namespace paddle { + +namespace neon { + +inline float32x4_t vld1q_f32_aligned(const float* p) { + return vld1q_f32( + (const float*)__builtin_assume_aligned(p, sizeof(float32x4_t))); +} + +#ifndef __aarch64__ +inline float32_t vaddvq_f32(float32x4_t a) { + float32x2_t v = vadd_f32(vget_high_f32(a), vget_low_f32(a)); + return vget_lane_f32(vpadd_f32(v, v), 0); +} + +inline float32x4_t vmlaq_laneq_f32(float32x4_t a, + float32x4_t b, + float32x4_t v, + const int lane) { + return vmlaq_n_f32(a, b, vgetq_lane_f32(v, lane)); +} +#endif + +} // namespace neon +} // namespace paddle + +#endif From b7885b087b74a1ab446f8f34d1fd78085d8b4316 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Fri, 25 Aug 2017 00:47:51 +0800 Subject: [PATCH 096/170] Add DepthwiseConvKernel for filter size is 4. --- paddle/function/neon/NeonDepthwiseConv.cpp | 155 +++++++++++++++++++-- 1 file changed, 145 insertions(+), 10 deletions(-) diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp index 16d94c976e..c017241c92 100644 --- a/paddle/function/neon/NeonDepthwiseConv.cpp +++ b/paddle/function/neon/NeonDepthwiseConv.cpp @@ -38,6 +38,22 @@ inline float32_t conv3x3(float32x4_t r0, return vaddvq_f32(tmp); } +inline float32_t conv4x4(float32x4_t r0, + float32x4_t r1, + float32x4_t r2, + float32x4_t r3, + float32x4_t k0, + float32x4_t k1, + float32x4_t k2, + float32x4_t k3) { + float32x4_t tmp; + tmp = vmulq_f32(r0, k0); + tmp = vmlaq_f32(tmp, r1, k1); + tmp = vmlaq_f32(tmp, r2, k2); + tmp = vmlaq_f32(tmp, r3, k3); + return vaddvq_f32(tmp); +} + /** * Each step calculates four elements of the output. * First step: @@ -137,6 +153,114 @@ struct DepthwiseConvKernel<3, 1> { } }; +/** + * Each step calculates four elements of the output. + */ +template <> +struct DepthwiseConvKernel<4, 1> { + static void run(const float* inputData, + const float* filterData, + int inputHeight, + int inputWidth, + int outputChannels, + int outputHeight, + int outputWidth, + int filterMultiplier, + float* outputData) { + const int steps = outputWidth >> 2; + const int remain = outputWidth & 3; + for (int c = 0; c < outputChannels; c++, filterData += 16) { + // Load the filters + float32x4_t k[4]; + k[0] = vld1q_f32(filterData); + k[1] = vld1q_f32(filterData + 4); + k[2] = vld1q_f32(filterData + 8); + k[3] = vld1q_f32(filterData + 12); + + const float* r0 = + inputData + (c / filterMultiplier) * (inputHeight * inputWidth); + const float* r1 = r0 + inputWidth; + const float* r2 = r0 + inputWidth * 2; + const float* r3 = r0 + inputWidth * 3; + float32x4_t input[4][4]; + for (int h = 0; h < outputHeight; h++) { + for (int s = 0; s < steps; s++) { + // Load the inputs + float32x4_t tmp; + input[0][0] = vld1q_f32(r0); + tmp = vld1q_f32(r0 + 4); + input[0][1] = vextq_f32(input[0][0], tmp, 1); + input[0][2] = vextq_f32(input[0][0], tmp, 2); + input[0][3] = vextq_f32(input[0][0], tmp, 3); + + input[1][0] = vld1q_f32(r1); + tmp = vld1q_f32(r1 + 4); + input[1][1] = vextq_f32(input[1][0], tmp, 1); + input[1][2] = vextq_f32(input[1][0], tmp, 2); + input[1][3] = vextq_f32(input[1][0], tmp, 3); + + input[2][0] = vld1q_f32(r2); + tmp = vld1q_f32(r2 + 4); + input[2][1] = vextq_f32(input[2][0], tmp, 1); + input[2][2] = vextq_f32(input[2][0], tmp, 2); + input[2][3] = vextq_f32(input[2][0], tmp, 3); + + input[3][0] = vld1q_f32(r3); + tmp = vld1q_f32(r3 + 4); + input[3][1] = vextq_f32(input[3][0], tmp, 1); + input[3][2] = vextq_f32(input[3][0], tmp, 2); + input[3][3] = vextq_f32(input[3][0], tmp, 3); + + float32x4_t tmp1 = vdupq_n_f32(0.f); + float32x4_t tmp2 = vdupq_n_f32(0.f); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); + tmp1 = vaddq_f32(tmp1, tmp2); + + vst1q_f32(outputData, tmp1); + r0 += 4; + r1 += 4; + r2 += 4; + r3 += 4; + outputData += 4; + } + + for (int r = 0; r < remain; r++) { + float32x4_t i0 = vld1q_f32(r0); + float32x4_t i1 = vld1q_f32(r1); + float32x4_t i2 = vld1q_f32(r2); + float32x4_t i3 = vld1q_f32(r3); + *outputData = conv4x4(i0, i1, i2, i3, k[0], k[1], k[2], k[3]); + r0++; + r1++; + r2++; + r3++; + outputData++; + } + + r0 += 3; + r1 += 3; + r2 += 3; + r3 += 3; + } + } + } +}; + template class NeonDepthwiseConvFunction : public ConvFunctionBase { public: @@ -175,7 +299,6 @@ public: // only support CHECK_EQ(strideH(), strideW()); CHECK_EQ(filterHeight, filterWidth); - CHECK_EQ(filterHeight, size_t(3)); CHECK_LT(strideH(), size_t(3)); float* inputData = inputs[0].data(); @@ -203,15 +326,27 @@ public: } for (size_t i = 0; i < batchSize; i++) { - DepthwiseConvKernel<3, 1>::run(inputPadding, - filterData, - inputHeight, - inputWidth, - outputChannels, - outputHeight, - outputWidth, - filterMultiplier, - outputData); + if (filterWidth == 3) { + DepthwiseConvKernel<3, 1>::run(inputPadding, + filterData, + inputHeight, + inputWidth, + outputChannels, + outputHeight, + outputWidth, + filterMultiplier, + outputData); + } else if (filterWidth == 4) { + DepthwiseConvKernel<4, 1>::run(inputPadding, + filterData, + inputHeight, + inputWidth, + outputChannels, + outputHeight, + outputWidth, + filterMultiplier, + outputData); + } inputPadding += inputChannels * inputHeight * inputWidth; outputData += outputChannels * outputHeight * outputWidth; From a1ce705517fca1551029541e17cb0ac3ddb65677 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Thu, 24 Aug 2017 10:35:50 -0700 Subject: [PATCH 097/170] update etcd graph for design doc --- .../cluster_train/src/paddle-etcd.graffle | Bin 5069 -> 5765 bytes doc/design/cluster_train/src/paddle-etcd.png | Bin 56296 -> 57495 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/doc/design/cluster_train/src/paddle-etcd.graffle b/doc/design/cluster_train/src/paddle-etcd.graffle index 56681ae5bbe11849116d621b066a6317e003e4ca..1b6611bccfb0034a10044f2f175b56c46a98f1ec 100644 GIT binary patch literal 5765 zcmV;07JBI)iwFP!000030PS5_bK1(be%|>NoIIR|dt&H8Ln}MUDXsg%B(@M9vMc}nc8ftR)ROT)z)rg=0a|PIyuSW=P_Iw_z3X|U8yXJW!2kRnmcjcH z^_`&Y`kl}3Pmb$rENXHZF{}+eyvie_WK^KR5p$` zN(bMzkB&<$j!I>7?|tcgH;VcnE0x>Z+p^82%1+SZf`iII81!iv-G5_|tucjiJ8HjY z$&6x8Wru~fT_^hT?vGE`^!^J^&UNgFJwF|ZeX8(q_SgMj&W#Z+M(eWsC}wTzEAXqZkrCi1#x^?Ib_+7XxqOE zrlMo(9q03?7DQ3d+qFZMgNck_yVIzzyugk`JDon!K+1e8iQSJ<*a|}TN8sBY%eU*t zEm)Kg13TW~B?;B4(U11lj(2_r0sUU{F1ELVs@AMEw~scv)$aZY&|&Lr$8$RC?00+r z1YUGoTc=)?Jzw9{kB`yL-xp_lpt^mwSv~rj$<*(>+x13q_P^1q=sDE`NpbuKHiOH}so*Ps44$P4A|ePw*Uwcs@~MLshl?*KiO?6ri4? z!TKS~;Y=+RCsebYYd+%p?RX?~9YJqelCdb48YIt?UjEM-vA~DY8pIm-`Jwa=GNAdP z#D2h^sg6I6Xx+01vUtXPldWgP;`Fo_OyjeTK=fN>e? z1QX37Y-9u`B%ZZ(l;e}%t|G8b{R%j z@X}^59slR_I}qvU(g~cv=8trsjeZPtH6(@$}7ze{@8XOl46p31ZE z%f219FQYr%0PKIrES`TDgnuBU#UBLl4~%0GY{ep)7K^aFgON7>FkmcVm`sGn>xOK$ zG7N3+@^6O+M4W(9|H^ivAO!E;#ibiyRJwGoKLOF**f zF)aCJx2U%f2D8y#+1`Lw=EggY7R6{6q-T@)U5ZQQbg5|%uUm*vx=e5xs69kVOk{-meV#0?FgLgv&a2WuT zK#X)?Xa;9^P{VJY;+YGhc*YYco<>Y;8LS};(?k#xq$r-EcyFEJl`8!~rL?xz59yVA zrwE@Sd~yUG@9ad~pu;ymYIevMPb>nohR9@tStLqB)!3SZIORVx#uArqSjB4{GM z!jsvJ{5?n+UKlCUo=3_E%Nx>&VM0a96e&}rOp&rx-;p84b9ZE9nEBLg4NIHja162@ z*nq%gA`(2vREVVrog#FuzeuwtvH%2Xe zVTb>oI=M7VKD{?h$nZobnx*EJYHsBiAf`thAl?;ECcbkc?n~CmS&ln$q0t7A>%jMN+1i{ zO0b^D_88SR0MtzlYmD+)K)vdr=$_h2P<(HR=R2&IpkW4xX&5eIO$Oks$qQVqv&#AsF$gORQW&H#XjL%?Ef|9s30Xb{X$pfB1}O|$ zRSd$vga-k*LBvb95 z`={XI?C>}h*y=ge?sju~8hGsO9h~?2-Z^eor-9vzubW(oJGqF&wUA2l}{*TK=I5O~M(c-TM3hy7__3+Y|Qzpkd@w43`!-5vX^2J|HLCb9-C z{B<_YHj8ubrq!_Sr-9X-qs{8s?gq<6ZyI>fsn)vvubW=~yeXEwdH#K;dx3Yl(?F-u zJ#KG#gNt+fC`pJ_a-C)8`1@&6$>(P~gNxHz*Xh;$M7>wPwd;F;p51v#mYDkHrha_V zsNW0f?fdiZwcrFF>Pdw(PIzT(Ho;!f94EKi)oyd0m;G!S=yfhw*)MKdzEJbLcDmKk zgCtJBNxhxx>u&Y5H4WVTaq-vp=6RLx)Oxq;t?ITd^pMWy&!SnS`u=Mer1=9d3I=f?$Q`#Yok1tG z?=Ky&N<%mr4dL2?hj0Vy@kQ2_RW>Z$&@7}|(10T^5o~yU^V=rIq7IfB@Tu>pYdWz2 z0z@|mu}rMoM5w9CTb`;^8O&V;VJQl2*Jm|X;-k$HcAW~~OySaObd{{z%8)s= zfz99I;s@fndoTVfqe&!CoS*x7;W!V=M3^MG=XoUPkvBDxB8Wt|z^aSwI*TNdfE>Hc z$+JvS+4y5^eB{sc=Tqj4vgRu~6PkNutPKFkq4bAH}lJlW+oe<4GBbs|kG_R8v z8nl&b(6ZB_Sws_zz=Zfi32CH^O-5Yg?@Xt}`Q={(%6r3XGLOX=$%@W`5oUSVR=|^T zQ_ejpI`3K0!qcKM<9q1bXW_}uIS^eT#xFq(1H2q!7{JPf08xrOj4KK;6k;gESWU!$ zFU&D8CJ#{y%x}a%a}W?+>PAk%iGq{Gz)2x8$k!JNR~V$$BCm1zIzxJ;!QgktgNIfc zipnzoYC|q`q`(8qz@x|5 z_wl^!iL}>ubO~ak3W`C3n!RXw-kXR};zm{*-qo0n)k z$g(Q^K?Q#Nw-^2C_CNMn_Ws?8{(Q0XYEObo2v-j*O`yP1fu#aV1(slGV5ym9ApYsx zG3@9iWW*iv2zQQ>r5>6$;VTA9qH7j_mSJfa5$-YbNnkmDEa8j9qQFvtx?e*6Yj#Z?)ja4EY2-4&W#qjyKy?}C&E2|^}kpRtzY!clM+?W?f4o& ze(_#%m0qKMKe>1>3B-qbt8q4Sw_uHD2f}uv?h08tEx0kTxh|^L_D|qkmjklOsBP|T zH0!%U=C@C(zVw1Z!H$kW8?kms9zSLVU%#YoxIKa@0Lj7g~pVn=w|@-|G( z*Lus?Q@Tcu3&n?=C@%U4u!+=8lG;gn{t@7rV;aze3~Mwp#u^KkbpY7ZSSLEbSYrJS*nLOL{osj&eU=;g#T#iNnN zGLMlHsLWEC*y!f&WESJEnxvdYI=rF84y)ovWz-n7o@ zKiZAc#0$jeH zB}?~X=Rw?OkPzp{c@STNPnNI8&V#tmpd`+*^C0dsNZiYaw*vSMfKc|4^C0dsXc{jeHC5z`{=Rtg1AlXS7?+;W(^@;N!zI7p_K5`z^5`FMYoClrU3XA*`=Rw?OkWluK z^Pm}zN)xX_O`60xb{@oi22BGe&V#DrZo)l$@UUn$1wKV_Yo%C(+iyI#h*Jj)wy%ETKFOt2H-NY6^u+`5TYYa zge)|^FTjFjU}8-8(t?IH6D^e`Y3dbN^$M(-O8(|k$?^4Pm6oGny2OhEvamV2fN;e? zQ&*s=KvRLH0?jFCwpZo?0mR+R=s>m}XPV2rK!6zP0s(b_fI>}$nlWlFel>?+wVLw^ zS96M8d}}6ujeaz`X&*`k#*1<2o!_mjXLhUk8#?DEY!w}^B zU>8~% z+Ucs$v{Q+rzU{C)38{@;rHX3_vGlI}R}gZQ+0~ScQQ9(N0%a&Op_WuuCfS)=GMrm7 zvo4aqAd-TiRDDRQaMDOKo2p`S#ZE&H><$$SIYE#aJudK-(*AL9nWPf6PJ2P`1n&7*Lbj`h@~w|Npp zeV?To45Q-SU-jLd9f_IF!_ZjVaYHBAb^Q%@5M`}}WXvU&KcWm9?8puLj0z}&jE)y) zd(-c@zARtuAd+N^PYna^g!vowJE=+HBq20e5w-|Gm|!0UQRmGwXVJ=s6M3*zYL+*buWHkKbKNqhmKp8Q{1*)@}dB z9?W#~SnI@h{~l72D&)NhIqLF{!Gu$H;I=$^8o2GR^nSJk0)|hkKQWb2k$Pzxi`}y$`=_DWo{3)@6bo;o($-wAGu+wIg`eWWzJlmks;BFr#3tyZo@ES zxsID4z8!MRJCE{?lEIzs{?O`jhQJsRc;uv%)_ z4?R9}+kz$M5=&9#f5&!fu1Xm9nM2wUZ7->eHOy&qToo4Jmr!F2B{gvo1ie$6v9P4C z3S&)b*eNm*Dc#5f2>>s2;Ax+Z@iyr7Lpm7jbA-MYtYM~8IYIxPq2)DgZ}+&R6-ct= zBYMPg>d}LZdcGiIpNw@@nYbt;Z302QIkY`Dx))VOvGSzE9^ z_lV2n*E^fHMVoQu%%3d=S!{HT?JHCB;)oRsvkrB`7|8swGv)Ta@7?F;nRU^?8}ea~ z)qidGAlv-uETrTc+x17uZyoP`dz8Y?LE};K2YZi`Ki;lCq6BQ@j0W>n{J_4U)j{2} zqdM!_`+cr1)5ICg;ebwQaDKBfsol8e6pHKxksMGaQ1z+O_t<#(<=y`SEsPXH00aR5 D8X8?! literal 5069 zcmV;;6Ef@{iwFP!000030PS5_bKAJGexCda-aMR_u_^2TGnt%{yn7Pwj>j%nouVXK zVoZw~inilS<-gwswQ<#EIkppOCf33R*aW*jeBA(n_y7HR;~95O&~yFHpKq~=-WpD) z;Wyn*>(95RC!4G4?SDVKdH;Xw`)eoX2OCD$b9nH2R z!N;BB69ej~R5teB8gJWS*nL;2+~41uww5v*{*4ywRSp8b>jdG$N2qKS3Yg8X`4&#o zuRWm-{MvLI;fFWBy}x!IK2#f_d*^(#ADm#P({#SRujpUlpxX(ZmJ@tH?<>QP!y<5Q zw_%4+<>!9Qqe6BN*!sh7?|WeYm;3=VH2s^7+Y0>KuDK7NwgUU=%5&uV%0MJ0jioRN zXip@yEEbb^Um4ci*WfnvtIaRBy)dIL<{5o!cH>%~zte*H#3;Y zn%%jxd;5Xga>oWTY*e&jpm^)WN*uf2oQ!<`7GR2bwv_MP-gK&-+v;SkA*)p+ecZJh zP{l|-aBS}b74Iw2hoLBZ(MG;*hZ%(k!g3X{Rb&~6zhmefqQ>tCUK#ClC4P`DzwLN# z$2haK4#@)RZm;Xv562DL%bXSb*XV}EpGNfIYr5#B>p3S6-ApkX+$Aj=8RNR&xV>>Y zVcLyH%||osH!$L!Mw%SAPg-TlrD8rMkTMaqkRXT%hLB(@hOm?=B@u#KNMb>;)b7T> zR47WiTAr^>ceY_8g(uF}Fk9|?<2Xm!^Y62wM@}y?(zJBUlgEun5lc98U-YBfnc%MR zisCiVWKDhH1Hb2nvD>GAkL(zk(n1uP6ALMj#YD`)hlP*)Ff#JE-Nes{Qo2`?cQ^@3 znd|7a?Pk0I;{y4>RD?|NGv`Cza_UZBV9HifIwX&d?fWtaukfYa>ABILMz@6tCzlPs z;oJHUL#+R$cUy<=OrkgcUS9c~Fsyr*SLpJp)>zI8DO&Wh-!;j{Z>hZXI|Bsmrb%G%v%iTp;+1F^S%U(r<*Y=uIL0MrkBM ztw@BiNQ9$E1WM~Sfg=$iK}6T<1|2)R4Q%i7pN9S+bX)Dv>0H?jKs)s2O>`AKN{q|K z6*4@(as9oZr|Gxk8Tf0~(ULbR);lE&7a)Q5v0@y6*52t{`NcO^OXS?#(alKNP~;A= zzH8)$n8ds;ecjuR-JBN(uwZ#Oki43)IGgOk1)xQ9Kx;n;+kVUM*j~*JGzq%_>}bS_ zU)f&IsT34A@ZC-h?*7Xy%cP7-uDbwiTHqam@EpKT)s{xRerq#qN*-_UK19N);}$aN zvEq6N3>pY-T0&AqIptg`l~QCZF1Rv5Wr=CJ*Gd`s_*^d^GPOBYgCL>6sD6~Je$bIQ zExYlM5RX|wthY<`FhyK<1g*B~&8o>H{Dx}g?`I2py+Q~oraG8&Z6|=cX~T-xO>}UX z1gF5CagIMfiz&|6!zOtB1Y%P5$HV@3YG+*O_WsuZa}D1a$Id_5 zt&TGl|8XiNjUQXh9()Nybh4Si$EF?H{g1V3_^r9U>z$urNkyQd*^WH#M?VKzv+7C^sw5#_-kYETO<<| z0KuBKb6ET9zHwJS+x*tt`ux!L57+F|>spOR1wKK64f|}be+K)^x;_7UmsfX>H>zjX z>(~DAnb&H9DtmYJE$gA(jS6i;q4oY5$k)3@YwfgM-TVT&uU*`nSF7jtaeLQ3+t}bI zr(2s3=V#4_^S^6;RN#~z@%q-;7Ss$EscXi#xwYxmw>}@jncEk1x82xk!!>xl{zV_I zfnaU_6xZ5~o6QdVySF#@kaPCc(`wc>_SO$KKUJ$cr`4!{*3s*pZ-I_mb;8j==h64= z?(xO$&f(_Sw@)N0aE{Sd6<*u=&q3w)<7&;TZrxX#EqLEPtlEdQFV(G$>Ir<03RJgG zt6Ht<+4Gm!$B5=lhCNmdLcl!+`V7f+f%1@Ldd#0llf)M{9aqj*-PA=4I|=dAXAVI|S;{$t{6jTDFC7vBYc(vxG@3!oV3RA;RLok~otl zRYGDgaLtdfLyhOekR@XdlJI&(6m-UC(od zhkuDtEwGPyoS7_Jszb zOi@z_Muk+AVavi4ES5-Yav?ZnLUA}IevZU9;Y*9_*qm`40tP`TCrW3(71%E=1G_S? zD>L7(K8Q=_*PbBrP1q01d<%k1h>)x(h{K35lPC*Y$fEsjV8Kvs6)zOJF&=eBKx~tp zQ2{NArWi_&CdPSX*85k;dSmun(HKNvCgEuO655231h*nsU_v?Ol4(9=k=M?r*wXkE zV{=a=fNqd6=n5s1K&x;`r%F2Y40H+v>>CG=vG;_Wsu*pEMNOGT8?2xUWYASwf-p>3 zG%P3Ot4ABTG_(=(q79M?QHbEA#gK;a5^YMfc?Ps;1$NVQfH;={+_C~VLgz}{^F;zU zmKVUyHu*+q+Ib$&HKn6Et-%IKsC z$Q*z`Nfr>MxOckrbO|-5NtN?w2Bu_kW(OAAao!BUsnWrW(s3tDeaYC2Y(0%THFBUoB7sW_Rv24n#Srt}hm%y|*> zTtj|C&eM*okG_c2MH{m09e&B);iUig;EbI@Cfv#ocrji0ag2-)F+JaXIv5pt@r+>h zSRi^N3{6&lcJ1?B{G%B zl(Sr|R95kFwdpNhmX1x?@wA^Gn-ogE*v!~8OUYp2k|j2k*fbk9&5c6OHoV5TG$AA; z0#geqC8=IB7I+tC6hW*AZOIu|8u64gIhMfA1jZaAsQ^KUFbRaLn>`bpLH=Uw zFVnz3Fby2qy=%h-D9S8`vz+n4TqLI4lo$yjBoI$RkSvk-H6k%Ck+?+S5{Z|N#Naza@z&^!iSg zeoD*YI7-I?IwPv6uqa}j3krh7GO@Bo|C43Ia3RLfr^J}WUE#+#T){;D(ygbubD z?K(K(z{B;Q;ZHMpW7F?AliV=xX{H$;-gvarQesLAOQ1;LxQOD&YX$HuOJYJOBA6>8 ztY`FER(fsewa?(SU8mE8>v@TetDi2jxT&y`lqsbY|MHw?b#i|8-;qb3~QOxZ6VlJOj8(My zOM1=Qyj4ZZm0+F_Q>k3`N-#4nzVP_v7hd*CFeGmg213e}V9IX!S6vBa&JIF|!9E9= znHW=sta7QtAJi$Ra;d`-!b%8Rcrhy~Ue01x^Ony@;w%MFf{A$+!^$uSI+ym(xRzC} z_MFVM7S`%i4)(aUtgsX&!-Nvh3}K2b9ZDfdOev{kL~tRL70oZi?DcOLhnKcP1DbP( z28D=TZP}7aDT@hS?)F{o(C{2x*1a7z+V6}?w^uP%SG$38<$f(UY*^mjaq8)}ib9L* z?hppI^Kb3|K@^gRVjW6x{Zt5a3XDqrgZFh9t{pUGBkx#&Xy*s;8dD@Iqi7Zoib>9s)8G69I;Xq;JKWm1e7Mueycf+7T>Y{g(mnL=4ySyE!ADbo@& z=uaed~kAzL#`29JxQrCKvd1v5#dD1Q;(k3nDJsxyr=Z2Cmrdwdh3#=Ui$PVb}NxOPS( zgt_EDCjHs%x%IJ|=lf@W{N=!IIU^X&A6V!^%nb;x0dj{$^l_YJU3f>65o#-#po5$KYyZCLC))sA~(hhqf`#=P~>h8r~ePi|-3?S*MaQz=i2WRD#{5IZqAIt*}8 zO5Tgq+~~C86I6z}H2pB6jP3<|J-YVM>9k_~!SDoR5IGY|AwmHcQzwkd&!J#=?|0pq zB!3S19yr8t+lfU6eOn$G`X|5XUO^wDwO)_i>0zY=VY!OfDy9ZP z?+|%Mkny{2xy-quiem0Jo{8Xgj5AvUOS;5eg!|ier|CH}U(}u-I5;tW6 zj({|n*vyq{c&fpDz3$B*s~xx4(e{pOei-^UpX>lM7~4-Qsu|RG<@t7)1ct$9hLSKp zMy>uCw*A2UrspMq-fkylsM!-W{H6x=oD^ ztN+ot3Ln!v8igj+e*oM(NKB5&sXjZ$ZZt&%6P;pBPD#u;i?)L!=MK6TC&xHn!;Lu7 ztnVrB#EIX1it3Zl8{LTgj-#EpfCU^mc5}buJxurRBmX{;qMSfJKs3d)`kjQH>^$rv zqF6}7_K6#2foRR{+}XVZJLs3V0Y)CFp<%U8wyKTLy_0b!$h(FC6ipM3Rywp$Dnl^1yGd>0Ty1##=+<+BLIJo#b$i<3Gq)LMKjWP# zbFOGm_xQOpAQBA;RtD;z3TL jGsi=I%c(u@r^FocG;)mJyB@g8hd2KZ)%R%icGmy^B%t!% diff --git a/doc/design/cluster_train/src/paddle-etcd.png b/doc/design/cluster_train/src/paddle-etcd.png index 4f9c9762b3a8c089dd5e9b2c07cb9dfc78296a21..4e5c3d886e65a654d734788afdabab3fd15e0632 100644 GIT binary patch literal 57495 zcmeFZbyQW|*ES3XRJt2!5P?Itq?B}bcOzYb2+}285&}v{hje#?BGQr~-7SdkIvBs_ zeaG{Bi2RG_DYp=cbTyxF1<~4V?vf>kT6cQ9TI5>0}X$e(0I0R}qILHbT z1bk9daf%H7fOl1WA_iABdVdrA1KCMh#}y6^6&LnDJlyLHA~4{Kjk>m*wu1a4Ge-v& z6LUvX3l=X2ComcgPSEQS_^X43n+c_tgZ)$2M_xkIw<8{bzr#LerKY?c;$|mAt*xL; zDemZELCMX+#ll7{j6z9CDd=Ku`AAhl>d)oiUqaN@Zf;JGSXn(iJy|?CSsY!gSlRjc z`B~XGSUEVD!3bto@274iUd&HjX?{=gcOD4~S2GtICpR0%r?~}o|K}CR6oiN9rtN-T_|L0svE;bh6sc)}l|8wxftSmcjsQTe2x%bbEv7ZZR4f`|e zqr-quUwP1NrGs8^eXEDIx!UZ0P*EXA3KN?5ATVO*K-H!u0Se$un8j3^##NCQdnv-7X7XqrpgSI_}3DB_8JOQ^JGi)>ijnIQgR6zZNC&1 zyz%=np{BHEe|5HNb+HJ>PKL zLufSIzStRzdJmJ_UJ|PNVdJomqHV8nyTT=!LWDeU5}kJ*tHv$LkV&h|ey*0;>axjl zz@?9Nd2XGMV;+bK+urw>HX-5V;`qS?p^mkHSXAl1DZ&K` z6;Oz%S)yI(b+|HHW9KvG5~eU@dN6sEHS4{e$)CY(mps7WX}dSyP~LX)C^X%nHchfj zukPv3AE%4Y&kb6ho$7Pk0ZJf2iHb;DT?MD30kru#Ml^-RK=@)Qu)OuhU@n#@L7{f# z%J+1~(TB^w;-6L_hk`}b5J{w-CpPG5l$!JQdKv4?vIv; z>$m!byFY{-9b!757b?w+`iK(kO8(w2p*8Kgnihlh*FP`8#LALrc)1ZY2Cu0^;=#; zlRta;bH)6(ybo~O-G73|Zv1&RSE(3H%8OAGw6DH7o+lG3CmQMN63x4QJkX(QaHjtPd={?ryci_ zI$o^&eYA>Cg`+M5#HHfFw+BC^eJ#aE*lk0Zlx5!P&2w$voTpzKll>V2`k@o}p(V>V z?Fg7$4zr1D>AUi8gfrL3L(oZkhaZ<15gK}#wE6uUru~kxKUqquS$uiCp2DOxDv4vv z`DW`&@!N4C)stQ(C`_PfH2!gh989iW8dcz&$U{m-*<8QNzr%d%HyS>ecLPq|Cz(0> z>gY)40B}-q@q0?d(SPwjKmq$3l;Cf!$yXk!OZV5P#hxR^>Ku&>UKJN&{4;0&?J(Nv zqZ^zegn!0xnU*pQe{?%nGyJbP`z8dAQ*&F;*541pMESrW2tIBs)cALqw-PWq`>Z*S z{@GP5MPL?}n^|m={u)%#1>D%-!7I7{yQKdsrT;(aQUC|>*5mi8d9VN=r&FT@=xa?VfE)0Vw!->TE-cMdQ7QA}g#y%lDV3N`CF8@Do+``WUmWj)!GV*M1S^p%tCw`g zd|ypP<=?AYZ5iGAoT}lzZ3yhBiNd}%2eVWGyG%b*uFef}T>eOiz~Q6#zg7EpW~pNZ z!O8wM!%*<|g#I0*c2VGWu^gIW_>3XkPcefpt|yHEsvRp~XaW2-Eb0lMrlL&Kpe20t zjJGd_;>BLWx|B$P_U%`&Zzq74&+Aoc_$|6ycXSuripUs+=|>;M3&F!~++y|js=zer zJ#Fa8f0yYp$l$r)O&`h3Z9nbQA4@g4icKTM^>n`8v?s!SYl4{1DLcKn)@^fqG*e*z z!)HZ3hng=sR+*Azoz(9Z{unsXAHXW*y|I~=y}n$3L&{|>S{#BwHe2g32aj-9-+L|T zKDqa|I1o!%HMp!P@+~7my62ty$ksDG7jCZhMSL#~fOkIKnW=VMZ0)YD@Je`~@IXHG zV~fvu(_W+9LbLbRMt2%RihmCY2O+IXbtj1QcSFw9tu*aP)_a7?GQ4sk;W(g(`1be{3;f zEI!+29yZ9Y;#MDz3N)F++|lnJ2@oLQRJVPA^;)s#scMKb45_q9l)I0Uza_h&qvTyrSOzffBO=VP`BZNke46Jw&3GuF0i zocHDwg2l1=7nYYl!u2rO!3Mb;P8s?z3u{NQsY1kG86ptv9k8VRb%K--6`&E4J*vua z@59-CxLzicwjc58k}Ed9PQ;lG&YKV?V>8U;ceKO|9IpC7D_8Y~#~vH1C3O z#vzDSh(X-Izn&4OygiE7p z^$1U~L6gTsqMl=m`-InRYfjpQ!?T>D&DJn-wqE_9hC~9%*4SzgJ+&WzevO9Bqr8JSS!STOgEaB0llox0yp6@ERS=E0 z)?Cx%tIk$5Y=yLnlxVC!!|QbUB`6;R^h6R4y~a;?rTAM8<_V%+*TK~@c3(8w5kG3% zO_m^cMKjxJ!Y^8Gz5erjcr@_&Zb9$n;&rGRuzs83+lA#|#||@oU@E%#Jr(#J6a?Zm zX_lbZRDb2CYz_1WWjCTo0u~lO9yQ1E-(`3tz=n!7e?2!yB3f%^7*fy=fAdN#lWaETIB-8OnHLxDoJBphyJ(e+3iRbEhoUn>9qP@ za#(N_27gP}Fs_Skyww3>Dy$F#FV1mxeBBbns_K>x=9-lYMq72af)x{;ERnEga3II( zA3Yn7*YS2@Dh!9MB-RyTG9Apjjy2&gMje~En@3lTvYDk~^yg5CBCh=Tbfu97spJ1lMVP2Qkl#w@JBfedqmG4TffC^iEq{gOkJK$m87Sw{qwxZpcDyl8 z(kYH}T$1@99lL~49VWgiX%NGbR_wR^y0KS(`v@hh9augi(H0dg*n+a9k*z;gNV7IK zGDEy&je$kBmpVHh6Ir*p`eF}!UM@B8i0UbNng4)+esZvaok~pw>R4O|NGPv?-nppO zFv11|P}76+GDPYhx~L@bGtm@vTva;4LHW!H=i*y}(&(e@k_ddJ1(zY_WqXsY(jOp8 zRRUglTG=CknwIS^9VBvkPBJ6exo1!#ori3(HW~L|w04KcJZcZW~x@`tvGfSdjCto{Jz6 zeAbCfpU7(DH~voM=hb-|NIbnQVwx%xq?>=;TpgcLyh!HCmoQVv;B#JT53qq&Pe=UN zsUCZFwsSdaSDwUTFe{+kFLgrk{Nxkx&r_v(o*?ECfCBFTi96$8jpEe57CN5?zK&!? z=EVs2RJ|8UotGjyDbH|^E9oz=^|TKb>?m?sR!Xl~TQ55E`kKFuf94@HDpF_IGru{? zp(dnYYA5Y!z1o8z0J<-MyN20ZE*Frsd!K&MDsMT{wYwgme{&611u2k>B-n2`Cr8oG%3&c-$ckJ9+GWrT zJ5@F?UG#W&NYGy-E$DX!?(fAZA-9gwfyHgsi{{s{`y|=49PBpClL0^W=}0nW?rGC5 z3jm=u*74FY5l01&<<&V*dZ(rxTQ5U=-h!yOzZ?W~p-)Qah=iVT!LM9WCg}^_CFeqn zdv{LIqn+O4I!GF0NJU3sB&fSc{ru26^F_kd=W?Sq^0Xc!;e{i!VC=-G2oU2GBpZj4 zd1QNU2nJ15Sqwp4&UR*KyQJ*2F@#P&0w3ua$y{swIUn7Kz*@V?KbP8W5uAgMiE+E{ z((}2W@4n%*Qk*y$e(iQ>m==sa-s5ATx!C&L;5v~(O~0$rV|Qz%KjqtV6AF)ed9JR1I`3$ zThVnt3$-_$hbWDtn>oxwIFTm>sMZdjg4Pu6VmlnMe z;PkKzNxtj|ym$O|vBk&JaFXFN)$EncbVbh420B!y3Q_Ie%cxNJv9FOVjn>Dxc z-y1K{sTTfq?OOERz3utc8CPo5JR87%y0(Mq?}x%O@DC|3agGUg>ABBTSug~#llb`L zKS`bUI^+Bpjfl2yWA0uZUFpr?%iwj);-;Zo1BQO|?JI1Q*|CJIS4-jc!5#UjkrXw5t3Hu=aXcV=9-{l%#$m*}r^LaG>4)5bVsM);l zUi~UYEB3+|zLtK6(111NgJMkK3x7iVJ?d66%VLB$Zx&M(@ss0tC!iDe@RB^WCyaqJ zP{zIxN&cl=Q&K_Ng~x`+HOVlgV6Pr^;S`2k;IcNTBz#HxoeqcXYO~^nNQNm3J`dxE z$jeAC)W=i~t(V`+xF8SlsgWu5*@O@K53|-PgN{+&GE28CwD_pKJueBaq&@7eGJQ|- zDbN9Kpl3A#($XbbsmkFi?m=$G6~eJe6TEafHo=CWqPrnz%j?MY4X+c`HS>!f@D65& zuMhU`;jy)oTNa$uNkF1d*p2({u?Ku`0QwcKbHoCFPE{6Ee**l?2uaLIUl&gtU@|(& zDKzNvt5rn|5mYFQ^aKSbsGnTuS^Z;iN2<7|)MZ=}XpDp56#y09%?Tx6`4m|oI7Yf- zrNT`xLu~HWAYFaF;KSKy)#I{pnh~88f6~wQS=*eJ@bwtJ7+TCPxELSX?##5!^Zv*B zBShw0%hnDb7rl_!T6_JE04982X{A}NYq!Jy3Z!CQ4x))hUdzGLyI4{@jnRQ3`D(3C z3bLLtzRqf>n?3{PT;MJ``-L;BLOKt+8KZ9Wt3F1?{nWw^U`ca85Reo&)IkG364Vw} z_q5k@p30I`A`1QT(BoB@$(IliZsSJ>D}|%OU7|cL;d^qQDb(CBFhO#KA;t&gupjDM zN)+stBz2d_>QKtXTH#1am(NzVm&?1~`%2OjgIL^+Zb|N2XC>sq=1{+T;2ikOUO`-@ zw3WzMxLPGCIGsNDrv1nF$hv*{fr~`HH~gkqgzZh>r}ka|mjZ;cQaxXa17nm{*j`tf z-H=Kf4Z_JhGk{ZENd2ye1%y-Y;{5NTe@5b5$R&EWbce(&o)^J!uGzMU2oXgI34?g2 zOD-i2LyA}U`aoQ2K2}Xo%r}@amB_y5M{s+R)axC;JS94~!YuO>@?Xnhvv8K5pAvKR-{i2GLx zmlyB|NBUNLGG%h_)sm{Lbg6{iQ4EEWn;4u3C#HHJEn0ry@k)d>23<4Vd{Bsqo2`$rD%QYb zCKyUoT%yfEJ>;?AZDqtvH1NoCFDel4l`@~?@~^!b=1Ux(bctwdvg-j~n;kY=*(@u0 z(i272CzPsTo%glG`{o77N6NK~8y8>}6Ee`ORGqBe!*)-l(TTKf(MVd0W zi$Y8-@s@2OEMZ?!U>8pZiN;P)UHgnATq?QJtS`6G{sB#^@`bSpvTLc;H;kUi0ws#U zCr!#^YfaTG{>o-GckZIFM^%1A5NEcO?@SgApn38N%1&3N^&vElUxC2~GCnE_)+vOTQeydA*wA ztl%P@ul`9kd_6x@r!%1FH+21!lkn7Z?9g*j*%E*w=6GJhs(ZZ7*bRTVYF`grt?xJ|xwO@?@?%V26d zA;<>BA%KEKoCs|htaw6jitihtE%H4mDRDToJ5NH<d7ZUwpb)I2qMF-zq& zS45xYXAGz07<#&30@2Qb#>jZ7`Q)ZcPE+nT%ypqYI=@qbZ)l!ZV z;Ve<*;nHqK-Vx|jS3M71GQ}^J4Z>idQ5nl^zo=J(5BB=GVekV7K{$F%fr7H4c>w9& zJ2bvErfv*`SzPWG>OVLcWf0=Gbo|c9+z0=_cRUEZPPNMmd;k|l^EOUtCtdM4R3WqI zIFt0QC=rMFy+{9(@x*e2lzc`s`i%llkqX(L55bmE$0DTlybLJ0?p_&s?_^vGKkunM zOPM5Am5|~fRHYUNtHk|I?2+&T?q66|dcbYd=Sc_jpiW0Yd&CkA{*%vhJfWQ9h)iOB z128l-MSi*@mgk2(geC6Y0M*D-4LkY`Gv4OBQpo0dmX!qj#g8g;JsDd}kF}EBj#y&S zWNijPxeKIzl#usW#!3|cGs2|l47f+$|ASlMHJ1sgrWPd=P1OchF?m(j_tPn(u||K7 zE#RnIw~C5A<{WryHO=;QYrlW+qP{xc7jc;VFj=Zs-N~6ARr$DpDVmHQzzG+l0{d9F zO2{D8cK62b7Y?u=Dgk(0J}}oiJY9zFC5I!zY*5VwVCn%*Fb~RVd*998;(I(<0P$@@ z8cXZNN`veABd&Rk9l3;Dq2Z+bzmV2DX*f?4E`paKR=DwDX{*4-Xy6H04yOV?bs%B( zCPp#yQ6ue?pv6*yZh}b)8 zu;BZ?R|xWkhZi8qZB{XXI-rhwfK|W|K((TM;uNdA7qyI@2P-}~J>U8gnf@K%lvbR= z3&_(Fn7chKJ-hu|%Uz+Epa3lJWB8-|Yp(DXgPMDC$&Pp$^tV#R67?4n^b(xuEw{>W zz8gULv>u+g`>x&ar0Ey(BU6j^*X?^Bf?&Q5Jb8*01!;zB|D=^Y@ewe^E8fG)2};8J z)*mU)rYWO_#XeMi-DPRo>7){9;mGQG_2Do@l;ydLBxNzjc9NFid#!TvA&G{C6{FyY z_1N^ghZ00h+Y~RZJ9;?)Y-(mEplD*T_EbE1Mw8k4MOxK;(LXq2rzjk{IZ0$M8+MkJ z4uC+tGL~rrv6lhFYgRBTjwy69spnWTY3f!8z`B*Fn4bGi?eJqFznTZ2XMR1c#)mtC zLpL@VV(#o&;!+bt1uLu_gP#^Nu==;MsR$U}h}kZ-wrqPedFe z<&LnuxxTzd=7vRznbzAmfBNYOMW)wEI7`YO^@M>F+K9~#Y@}a6$PE&l;immoQl-@I zBk5XY`o!rF0;pz*2b4AHyYME1aM z0f4$ph{s8FTxe!!!Yf5RVN_j_f-L;$%E$}jc%u&jb_SsGYJKEBC62!c^vj!DdalOK zh!#Mu$ri!E9G3#8J_X{}0PM&_684i7Rqb07KuL>uz|-roMsf@)w?}+kOvP4XS>mA> zbXml~Q9n+%r!eh00pv|gZH_~P!2RXsYN@Sr~x90)4s?#i8{z2qW1w6==g7SR0;HR+0N2bbzp>w?5*Idrt&@ z{qPkE@+FmFG?in{YeGfHtO#Yslw>JRfHvYmg!e(mJz^mVOm^?>*LGzc@Rpwycz3@x z@%jx+msLRJGx)IO@NJyFbdseJhXKgs9;QIA6Zx|ZDtki})V{ah0h3^Nu8y@;=jlj) zkZhQ433at7r5pzgcSoaNwOHH6>1F(`yoBE$OD!G*_KC^tC3Jt(2hPeetLY02tW4F?^{5j^?!pxD^?cm%aP<-%?@W6D0`d~%#qkKQt4LZ-(qnP7(vr*G zd2!$6=X4vl^pM}0wAO7F86I?@%P!lM!yT?6a~SYgENz`V`OcKO=7Ua(ki1t!1MhWf ziXwR(Jw2`A3HVJ(@r}&I=q`@7*aQ%(XsaK2?1bepR>={Gum_E%ao$_^)P#3Nj@^|P z{ARgU);WaP#KcS8tKC~`K=QAoppIC>n+u^Ba-?o;Lmym*P0LF&Kf6ic4c zhZ8+;P9?)?(Q7q&%IJkqPwNwcPI-b!Ynj|>7>b3{4Xnyhe%75;Iz6^L!&kCSh=3G8 z(Yu|-o$F2XDjXn@$Pm89rLL^-cn!-|-2SxTBBM58T$b9e9&^6*scu)qm=?^F;u!q2 zn>hU&Oo&r*mi26J{6%qkOeYtBU2(0U=OwW_+4v;GrKdxU?0Yt21r-~4EyOpin8slN zN=I=vFswY51;ST1QKG|XkoP_VAf9=TyQIz6hEhD`_$9KKLx}RXISD`_(gh+P$p0ox(9eAt!*cl`k(QSDL8^bur8 z+fy@Ur=Z^9jEq!qAxu|&F5t@Fa>z1l+cWH863(Gca`x^I2tm$aj)dw^F-v~ctHwQ= zz7u7RifbljrXL^PXO-zr5RCZl8&o_%@M1>ur8gShIH%a@wo zV}RfyC_;4fF1olR)m3!IxWDgXOBHUDi74j2K|DF0Vau3ZcC%jXVo=@)9*9_DgFyv& zL~>44`a6krSf54i6Nm3cn*@E#d?BF%BROBPcl> z(RE3KddG10H~FLRFnSQt(LcDaZ=2NgzauYRm;Cx5ew}OXe&t4_M)ZaWyAUm*COnFp+&5<)P1&PFiB_e}qE<;Qk;-wOBgBIZL%f1zQ;O-Cpsm8!{eALEg4wRR8* z69bRRnWCt1&tR_C@AB_3b}ToOt9_ts?^b(k25&GjLnKvHVx_@+U(66W`NO>rlrqlT|R9ELW70=h{M5P03vwq`RsrqxN(v@6S%ZA?uZu zd1jtr167Sj9a%@A3Q3AX5Zrqox7!XlRzdf>rMoC;&UlJdUIZeu?G3wxq1`h>x6rXI z&&C`rto|KT+R#$kX_A=q>ft5Ra^Y^z`fyf=77KLS2ZmJwP9Oshz^61Fynyx97DW5< zk^#vErv^()tOQPur=pscT(BRSJQb5Pyz=~5l>gm~O0|T-KWM_e6p9;cP16uQ@;uI zA(1c+_E0K2l`qG32n&wPEMQ|EVv1bHV)GK%wFfiVQnC1yz+Leh&lyXE&pIcYMlop0 zu4K|D=PiHaJ=!>uQV7S643N~nFyMgPf3}u+`YD~#mZ*z7Fw|=WhF`1TKKy-aQA9Cm z(ZG+~K>TOV@}P(|77ST(8WJnqsEr#G%r z2Gigvs$8_Kz9DBysgs!qn+&fhwV6mnbLM#A^y6f+%omY5nD#>>8r|x%m>BG`pI)Ci z)5rf483$#6vO+EEM-WGbTAG?>mYen>I@|Q+_q@i4dc{76F-BYRgs+*?x*v;*B$!>` z`*|g=N{N`S#VpmI1lEeULA>%5ltGU^v8X?}+$%vf76A!tFN?FfRnEOha}G0{n+ZHV zmv8+zYAerz&CPpP02=)8+j1yHe(dTifl-T-yF|zqUjv9aEtgb+in=Hwb+sLmWOYo4 zUvQUyZsC2~`6nj=yd)B?;WPKjq_T>W3r69~ZwVlYl`+XMea>L7m>hB{7WepjtOVMS zP%O?ihN5m<@WtQkvj>(C2$gtVBU`x|c_hz@(Hi*d)^u|=_mgP- zCwFxKWsg{$+5%<32~2GadwHT^9%|1W6C+sc#BDbj6_r4*lwTkrrBHH9Sp*pvk_bQq z?2Tx+BqnT(pp#CZ7Qq=5`omf6*!sS>J@32|!VzjsccTu^DcHA3;>pRQvk2xo1dp%1 zGN^gMZwPdVQck{Gu&aI`40))iQp1Gny{;hTTauVzibG9-(m}9x+TAG`6~mMO$HOzM zMG(l1TIUJFD}INUl>0DZ=2bkEg{j5>&XfAWCqj{P^;52b*|CK)a#0`f*WW%kMky&o z);AX&^jBkP;>G$%?#S`)@?lu!`n~vV?Z3?R8~bhMdT9H9WUdQ$9zb5>y%_WtbCdm( zTcX4O?b$15X#G~}t+TTs0NS(%=7(qCg%V5mn-ikQi2?e&9t%wo%1ZKlaSj`R$zU~Ar`kc629RGGO3Sz2T z=cN3w0LFo#@l^?)t+FQeLsb}u_u29n2bg_xLe%CYj_&Dn@2-$GRJS82zSndDBQ|u) zfC8bSvg`v`>~TlV-(pw0GDhRa^xdq?`rNYwA;#QZCU6WX{lVkF-G^~~Kv|H|coN!+ zuCu~Y#DIzdjn~J6PRL+OGl9+iu%d1PQ_9#-#8k7e$B|0pwFU|V0m`@l#>iegKTYf)56aO}bETcXHLpsBGHW-ldL~O^``Om}g4sI9H`Xk9tkRI?lv%^g>$Tod zYYMt*jtH$=DyC=vmN8QnPF)IygBB>*@dAbr$g}uAFspY}$@^b%gMFl}wyw(g*Ofh#C0pKM**ZGmh9^`)@rMt@DcU7j%aWKb0@5&BtU!k$n z#5qZn{B6fa5a-PpM%FCz>Oq+UmG%C-bCs&zV+{`DcDNE@+z0X-pl!1c}7vR5i9{oa)~s(Zb(Sc z^IJAY;b6X8E!1dW1)(nNeFvc2{oOH_?qLh|$LyRv#NY7+gr1V{;vH+nN4n@y4q3}Q zB;}dk>H;VZlO^xUWB=b(0|nV&d;|K{Tr&bL z={KZBxr6u)KfYB=EQRaQ(8IJOYEcWA1c4zIfVT+-x6ofokg_7?U0VcfUR7vMN+xq` z?Yzs-2g2=Q$xyH%uiyk;2?6#nF#b~@OVfU@N=I1kPXkB&^C3S8%v_iUz=;S{Sy}{C z{}DKt{Gk*uG(BElzF6I^4z^`bz{?wim}~rp3=#!Sa>QX#@z}qUtOH}$-03d@>>A2* z#BaZ{uI#c2!bA^Y)RI=aGV*_R!8{m_PUd-vNrC4c*X69H5Kd zvmb0KM+uyo%{AeT^xYL;`qe46^b(cD>vR!w4nM>9XaJPJ4UfN%XqD>XC!aY~4O|?q zf{bYX-~)YDG5}(}1F``?aeN%>mqWneeG1A@g~XOVSeJmR(WfSsVFOFq063W^7L4MY zA0zJp+Z-ZxYL3<=GcIfGgA{EtJebhuaSH)r)i^l&31ldZ?%RgWJ@~+>p8@NTa@D)$ zfGYLs{w2VkT0jN;$$jS__m}N<rjk-N9#M2(r#fQGrT8`k z#X^CMyykvO=DI$*#yYp;u|0`ikUPRN7Zq0y%N<^a9d3gfuMXhp2NN3B19tu0cfNpu z2#VRt83{1b65u8Ree*jSpy#w47M-r_c$)dh9hMLix%EO_}r7!$=ofq+35KxepL*d zqzY23Oa&05YY(7oz<{DA9z1PPj^cKfLO>qsk%UW$137TP^g2lFO-YG_uK=?`4N4nR zMWbo$0EBrt2N<@1HF|x0JoaKGoc@)_jaP4*{O5xUt8c|V=Xxc$s#n^Ikz6 zki#7lLLDuDUMfd-(-mb_V(?UP@bYTEO@=rUO>6mWj4))0+-C>gFcF~ZM%SAKZ*twjCB{_*;PIwG zExhmqPzu%~Jhvvoi`l#mmii6x6w!2rTPUu+$T_Alf<;Bu&x$AggnaVELwAy^m*MH1HH^%8% z3kLy|(|ehZ_M4CE?_CIjvbvy-_sL$>5Q|`sw+rsxRQa22hK2dMg=b8mfV0}OOOjLV zvL=gxp!z!Qd&fPB4`Ra!nfDsVT*7VI0rP2;>YONs<0iX)NK4?xSY+Z>>!|uA{cQosT~LTo&~T%{NR&XoWPz}2Y_LPC)!&EAu2^w zb2z zb>AnZC5x%rz}?7<2_BO9A%`pPzmKeNl7)bsF7f2;zJ$$%g{(Qr}ZJ+7P3> z6=rCVPx~d*XM-S~TlOeKft}JhOlMTyEhsKL87{R@e1aY^j7mNL9O>H3HAm1@m6!>;`9yGJyI$R#wzUa6IzEMQ+;s!aoY%TR_hzHazp&bly z0?9a}MDT^rFp)WrdPGUT69{_@-aJa7A>;=wE}ek0=EP2r3RoF;^oy;Tg=IEKrB;A3 z3Ek(fB}NQlHG6>JELWiYI-+B2(9_O5`$3BYG6YQBhB9ou-#c)nm8e#aS$eiB(#5-6tizSfyu&tL! z=E-oF()u9=WEV8BTzU^niKG{@!&YSHpuTQ;-=)&ZIIL618&Jqn@978NRRKHhycH{s zfvjyET8T5x1}7mkdkoytvU!&@BF4ki`r*3SK{Th@Q-T!|tQVBohIftFmOlB-2UMac zTdI%QW%1Ik9>&5cF!Jd32s{8QJ77EoaL?k~W z3Rq&Z7{tR#Rjhg?FlQLfAlK~lApq0Ngh;c6;yT&a+aXw~Ea5q&xADX-R7D{OPbNiE-=cV_>Rb2=bQrQAnDQlU`i7~X3$$oJ{ zF0>lP>t~35x`h%vjEiD@UeYDU(hw9pc(L@c_aVO#orG;kw>KGUTGCXCQZ!+JJ=W5v z&ccY6oui@1><)?^L)-V{j+2=lv*Du9saffD(=1o7Ew&uwbyLd>z^cbJ3pqtJ=tSxS ztyHh7F~~WQ^0hl5_Y9?Mp@e>Y4{@SUrhZ;6YmweFBwa-I0Kzksq&wlwHk81B-XWCs z7+2rK)Z38PW*o6e8u&6v#hh-xmCibP!5G$LawAuFP_!}Wg77;B7@*Y zF_h{RgqAM7$ZhhPk;8(h*O}6^NSuu5`%f~kuiXdxt1bXTbG{y^hdy0dOw!K|1#Cn0Xn4{fB1H6H3E)8R$Saz zq(c1W=xi*C4U#A-dA&Es;G4He2F`W06S*4)lTN|`YjP1uHgl1A!&>28RlS>)7L;gIx9P^By@))V@`)j)udyVP% zG*m&{#ate=ArymNwR_It=yw~C>j3^6cCXnRyFnkGVdV1llNdUw8`TF{`yTxq==b9@ z#SMvKWr>urT|XBnt#hQA2^1XQuhu>ez6RLh{gtt9h2XG`_Aa9xDX5ZYbGe2-lmxKX zdEXX%;`L=ML=6xdg!*Q84bL5a7XnBrNgO6Bf0{CyQ_5~cz7i_ zWE^zZNOBUHtziw=F`#a;@^3Bu6ubb8Y2+T=e1q^r#CZpT<{< zzx#cI$L?%vt%+3U-N03(Q38)arfR2#yqZ01X^6gfzgRo{gqL*eI9F&nO7o*wN*0`k zRlgBt91(pq$)HLE6(j_@>-@Ru?zJQQl@(+*tU$Zw=Uz{-1DQ@x9^ocBECBjT0$@nfep=^BB6E5UI||)k^eJw*y#5lQZ>Y@Si8>!59((hBevU}+omT??;Ih>T zB`?xptP$GR%G)n|Cyr|W(HR&~jm9F8qywtF zSf%dC^p0(@ItzEm4e8eLbJ44= zz9d{lv6QoC)@1lG9{hYHOeBx>?S5>4*yq7kuSKfaQDTMX18c6As6fC)u0zn!1Ob zQMNN;angG0;;V9MM4?EU=9M!aHn0QxS$4JocJ`e3dFV9Bi2Z8hIhpBEN8(y!lwdbo z*L|Tz|2U2#dk@n>vh6rd>^thReQ9PJjWG^m3gfhd2HC95&^w9k@A(E0@j6ppp0j^1 zL43#HuSVwt^(QYv1u5x!+;*v+45y4duTyiz`!SCQyiJEARqpDGKNM-NnbZ;>4rd>G zD$tWvr5z=Et>d}IW*N#E>Lab)JmpTVVD!-o6isHCs#DbjB!I)CWVbxa8^U!L%u-QgH z57}^=0Qn-c7T1J2DF$tl+l_SFekO*qmLf_%`K~+uCQL<)(hl$6orrWK(EIL<8ouUD z_JtQ!$y@40tVJ1OBC+nIL)yv;J`B)chWCr(QQXUZem0j_*5;)AB7A!0}j?k&j~bAKw0^n_UR<|wxc;e7lQM5(!(hEd?MiLAh4!1 zo;e2p?Lw9?QEjAba;H-F(n_1-y^n;(3vWEd-t60#h^KQBZQDaVbzN)S8(yM153FEz z7x7jh&OK%2Fk}aMcSlCFRp*Yk&KHJ%;Kn>Is6(Aki86!7l#YWde%`o7tOjMw*=Z=^ z&5>fP1mQdea{ucj@rxpWwowWp5+Rz8Y@Z}1Ni%>EaXgH>QV>{b3En^eC6K|wYNXBf zu9mj+wt0BllTEGckEmTGnB^{jTCvXH8BRiX!rzSiFvmiQ4T;8?XYdsVGZh zPio@z^!^7tOSSO-5dqT-P{Pix`i&O_CBj}{Upq4WFM&!kGg~^vqKin?MBF?MuyifWC--9jxF{WI13k zBrZ+=i!cU=QMQ}j14OllL0R1!;-B*s%raEmkP#Wx3zfwpVl#V#8vK6#h~<*yV4@`y z8v&Y)6_9}~YCJ=4#jM<8LI)$w{W=2`A9oFN zYTnWUv7SN}dh$BIs@AZ~As#835od!QlTzIpMl@cJyQMn(DX}xH#2Rm4JN1noUO(li zdfB!HNrE2MAU9c}ldubQ$g(++&J~~OJ%}{ zmjP(lAJ4{#A;qSylK#7SDXd0Wo1}UiOj=V~r?8rfs4%S1{-!K>mgF9MZHnb~m;B;% zypFSe1R8Cc$cq~Q<8mlF6@l=TKVgpLi)tdk?6a4Uf=mb(UIrFnti}Jn-{1&$D?m#v z$FhC`@*Iqw0;*?LxzSz#W^@FNsuHeAsW%`WXaazw6IwIq^I8PaInL*Mv|gYLNdb6L zDgG22fHqeHOap)czkbXSZv#AdPeu+WP)u(T)pMtLuO<_W=J6yuSl8phsPzTOl{PP^a1wprJS;8m2$qs}+FP$OM3jj(0di zAg(Xy{sFLa&J^Pr;El9^Rl#~|hLV}5N^}BK5yYIRkTXZHV)^F$dr5?zxn+bW$=_~o zwilB1F-VK~3nc6^K25}Lq)6R&XSsXgLH`^(s2%|pB`I4ZIQ z97sG4kDY1I$P5E~-Bm$1IB53T2Jjsy7%+dLLxaNlgJ8WvU6BHy9UD;g?4?R%5+1Ni z<^rVHe1Ea+mkfR8$rmjnD*YR9(w?8qtOE`tXC$6`%Rwh9un{x@!~-xnC(u<d*+hqixX;1`9sosG-Rl8V|`G%uDgrN|CXut~P<^cXHFROacP_y@O$>s*KU;>dh z?DYbm>&BStewVAqR+i6R$2n@+c%!?WY%AC)2g)(6I6A3~*sq<}nS9RF)cL?D!d^M~>}-`Db#~Y%gs9a0FIENfJiHilvSLVYgj3BiL5gAPfEiEK4+zD?kan5EAHJ_RzRhO=E007MJq)8aS1 zJL9zfv|VE{LO^lp$|c*E>FMk6x35i2DoXYlN z_G6?(z$d#RX_>K1H_@+@#z_t2f3^o6U1AZuK_FNSK=W;&xeLn#$ad|c9(Jg7Rp;aWNu!- zvg3e+r%0t!fkuT`iSHbS9rn~zyh*>3KMW6cuD!z8h{ZajtgyES{ zPlQ9K(>_`uL4-Y$)vQ?)bQU7`BCk#L(KyqYLY*J2qbdmo2I7ScudtfERGe4lmf}z& zMI!_xqEuads#drV3Mol;3HI;O%Nuf}n5WM)$v}-v(Nd+v4o6MwV`d>2n$7fdKPej{ zVe#SrqUtT6s@lHyZ{pA(4Tq9OknR-e?r!N2Ns%<@?rsDGRJubznnNR?f+Et0f&vyO z>VF>Z{oeQYj^S{4x#w*5UVHDg)|~TsK64GL-mBSYL06$*jE!?fp@Q5)Bnf%@to~)c z{43V3(|O};8n3a7FCJ)8np-XLC8F02C>F{=vCy)wn)&t-rv6*l>Wn_lF}&2sgTKeW zMBQ%yRTH;&{AL1e+S@$S{LaqXQ(=T#Sxi4Jhr;DHJqb46l;4q6>2t7kTAvO|?8wM` zrn#CgWSkMJ{D74`A^I6xgSzbRpNfg)=|Z%uIrAkPfmVq^RcF)YDkpK=bUWjuXbpvXs{#k^% zasQtBWHR#AJBvE2K0@r27;G}9_%7k{X);2n8FchGlx9up!K~zYP5LMZibSa#H@>X*LPvIl=w2N!Tb>UEVGCbwoGiL!+vyDJF3bM&?at6(H9br65tvW=GD26t8~)yI-(_x-b}aCYsY zg=BN^NJGI@q^!nbIM9&cQ{oA3MfC&CY32u}!2BrY9OEr}>=7mDm^& zVA_euqZT-Ocj?80fhSXUj(!cT4YNjtn|Mg;#4Xe@=g()A?X!`hGOhnCxgVZ|sD8YJ&9|-W^Bsrco__e_NAB_*4?0 zY-kC%LnqG~Gtr{)8odQ2fcvT)8*RJaVFf42d=w zDvcg!aRl4=ki8qvmrYpplkYkz61LozHn>=ar&>CxruQ*9zKvJ~+L#;G+yl z;rcq(s)HT2Zy{|VYvDI(CLI6NS%3tWVkAzwyYl&|%B^5hZptuHCp){RY(9=J<~NU> zJk^4?_-~Af>KJE0Ny;H5X;F2e1sgLVb}1>_1^HZv)^9Q-P4xv*Ja+77OB3rB!KLEx zO#=5`YHEEgLPgrKw=<}3DqevJ8B)(VzmROYDpOS)8Vol#+)Bu~toV$giTQBdO;OGx zk>9DvPJ^9nkQ+NiU-tBH@NUyj6-nJch8hUA5pMAbwPPISVM3u@2093Vynzln zZ*bf1x%ht(f(-?1tMk5?265r9>(lIYT|eg$eWsVggZ?<1!4bp{B4G)4rp#PAL~{NK zCgJB~pkzw7JU~qQZ>)*uDq%%;|B`xvIH1mQm!PDlyv3mfIIQd;@~oI{*igNs0s`A=&4H=Ylr>$Jz&f%-} zPu+VLgYp|NlDt}$jVPu#AJk%AR{vl412hlk*M(nUyVeFcN3hiMWu<(e__;`a3Q1NA z+>-j3712{bV2NZ=c;5RCvP_cGW!HR?Ppb=i99%?C=6yLWD1J3O`RMpl?wh&m-WC*L z!qizUZA8{13me#tX_Nt1G3AO*x+VAJEUkmQq0&jdSUq~V zwq+(CAYuXn!&TKs?HAw)y{)e|>H4Q#ymA5der@zb9(DoGR6E4&oK>@j@7@!?zBK=l zTe=&^^wGa3L3KHz?gG8zlgA(gsk}SyoF?Z?Swu)F)P>LDOv%MfL#VFm5a<|ZZ%tMX z(1UlP=Znv*`+0>ywv?T#OX^%p6L{>@=n}xNnM{IM5ls66fwfYY@X~UZpVNgtYD0`8%1$ND(?w;VjdShArn}k_@)n->Xbo3PoEm7{2BPN&V=14d#nYjV|s*^|9!XTmftmr9L#+H~B`$dWjNW zSJU1!0YXgEjpj}4P$G5SG0YWpX}#4Bg5!|{%C~=>G9Or*Mb^eEry$2PO(#xLG2XH# zcbzhcgqH@T=sz4p^F{qQzsAz07N7KBrpyHJHQT$c_|U3^>?qNk*xswpf0s$8=9x5ejTKFL^f~0 zQ6+Qt05$YP>f@o`F6RoUDh5M8Lg8NY_t#vayhiXHsY|Q_XTY}EQ59%Xn}Tw`v-v9N z?>SacVKPTN2qL#{Ai(~jGrL0_lSkPvzed;9Z$-F%1Ob}cN20q0(@_p<(-5~sKivfm zN<{AFb8oap28={LW~ZJae_~M!RobFIkx~#v$sR*t)=^930xoPpWv|jd6u3(Uc(O@X zYCxBa+d3Sr58^0j+QQs{B8cFdsDoHIm&bVbe$<7Nz|8O3c8qBH3TT^Vz=Nh4e|`sM zj{+qrV2a6p?81KD4!BTI9R4s;yRr&_hy|iH=+hcx$zDUrzgoZSE?~Ac(x^R1hqFhq zDBJk2OFvRS!O-X<%Q1*XWWFZJN?-?q37d9tHorAZ^G;W{Z1Du=Ljk)mp#`C2_l9&1 z>u!I}(j=O>&bj{Z5q6PzPL5qFHOH_+*3#qe@N!t5fBSQ{PN2VjT8p(*A3`S2=Z{^E zQMCOBG-)6Gu7y0{=tr0d`l#RLyHXK%4L9j6#5HKlRZ`mdlqW@N=5hT2;4$3_ih8}@ z$n4Uxx8BD@m}E;6*gt3Y2QpXhpI#7NNl_Pr@zi-kJyzA(9arQ75IY~pAgxqS16nMI zI|d-MQ8&{i_DQo?-KW)6t<6}@bg&Kk@=U${DX?NgVKd8j7oetX<3C0bOlHl^rvSRx zM$1%!>B0QHXDN4;1?9q9w!d(juaw}5PJhR#wqRd20VYja_O{Ey`#vzB=6iV^zf*0i z&Y4TyCuS5S|?=pj$eNPY2E2Us}0Q}wxx0sso5P@_LeWhpzpEx5W(;)PwGAEQzo>u zX6|w+??ZuD3JcSzyoG*Z%{rA&yz20NQeeSj`8@L0`e|@c^sliC!qZ1xqc5iNGREtF zDvpy2I(zJX*VdYk{3)jpGd)oPO*t#yo|+S6dP?7x&J%GyZy_~{``K>FQ&RA?;i9or zQi+Hf+1DHIGj2OL&uB3@`Q;8UM1E&5VMBOJ*dg<8CH*81dIL!}1Ed8HAA6&i&R6c{ zbUCecngIJK6H%=*`Z+0{3KFV^wV_usOoTg5SFWsI>^DY7O!VpjrKEel;~DF;w?rxUCXPx@?l$?9 zH#ZGzyq`7TYP7ba<-KdImW&(>N`IzIOq$&8fHCYli${y&BOTq?wV6m3XroJb2@^L& z*mprfr`MtepXD80o9c(p9ZrF7Eks506VjgL@#UKfclp1~j3HoLo%Uv?nsuzL*5P}! z%o40@Urm&8n~k?6_}2_Sdd^!76p3yZT_1W(N$l>pH~cWyod{69n!PjV>|khrgM?Bv zZ*x(Wsbj-Kf+pc*WJ<|s-HKcf zG-Ugkp1w@3eskyh8{!lupg>CB&{3;(mCNYlaW-rJBM%fRnmGox0)JFILl0#bwb#0Q zbFTg5=GnM8DXE0oad7yC$-o2Wwaw+$q}sDaF5UJ$9l+5o8-{xIRa%^rl0yYA&p7*N?*oHe+7*C5^nLDo*OV zzrv6BD&~0Y6GIRVyH!zco1EIPX{`aAk8pW|86~=?y#baA=>f zO!QUko_sZ5w7fn~tcx)55k3b=Cz;!E;5~3F`XP*6K^QNu?S_?v>=x+6?g%A= z3w#vG?_GEC*Pjd(vYYkKOs}tP#bo7)eVQh|hOOZtEQKxP?T#B}heRc5CU;uO^T`f< zsQujg#cZ2bH?A8jo&~Yp@9pqE?9|4HBYj9?C4$MzV{~>M4Wh;|-Xrws znI$zBEZKP3GHFArS&m(}rsEvs?aG@pvhp-`J$eXqZ|+WS@WeNfYFS%pJ&xyqWw2$ zl-C`PW25*B6qRqcb|nRigfQ6)D;*7vCF|?F&$WGt#7Qi`cuKtX7V&mE*}}P^0dFvk z!YN6<(eb<0hgxp4iNrZBbl9&M=42ax@os-X+|Y*>9@4Ql{Naar>R64t#9S@wWcTQ7 zhN(s#c&`ReOx1-I$;SFR91ItxJ{Vw|F^=dA8Pl5gC7xU1p?SGs8EG__mYEqMvZ7Gh z$fi9V&$_}teeK2kUcpj@Dp9Ybyo+qZB@#Z|-DiNX&F%dozvq1;I@RilfkBV%^3ue-ir)eh)r% z<8J`u=|+pUhy|}Rs?Gq7lJ22oR=v5k3RK7I);2@3;Qhx z0<)16YhIiu7kC*9bp|gEYi0IYu%o!Kj7bF8@u@zYNSldyYkDiP|II32NYnU|a>X1; z<6tDimaO_2wNoFj%)yk@6`Du0RUR`jQ!2KtDs!Z(=3lxnw#4 zm7`}Axf{)~XTMu+aIHKd3M;BFR>2bva^h|I`a{^Jd>OF3VE^s1zddwegFpHjjmih_+DrUAa@H4Km9&7In zZ*n)wc>E+<+x4ibLtx~J8e|-r0IgJkoWfUBwC+=eo#mholFeDOJLRS$uSrw(`SuCJ zPHr2?Y(wkHC9+?B`M(0GG*gld?CBjow_E&D9qb~BJeqL9Q5B=v@=Q+J_@*K7$)boC zLAO2B`=ruIYGhW$u9d)HRFTe?@X&>)kD5H{+%bBz*2+NcqgQEg@&c9nsQ;uU-s3S1 z*CJIB8;zF_M<2(ZCj~1_l&2$aj#R5+>1ef)b!!p` z#-NG2R6>}P3d{SIvx;NuRJUB!iWQEiH&QZ~6$l4E&WdoJ3yB<%xdn1g$)jJe$@3^u z`E@$RTj@iu1_!cUQjO43--;PivKIP6Q(NC5k@<b?9k-z^VXkOQ zbN(Z@VT&+cA%k*#G+B|LgLQBwirImg#I6syDCBc_O!(-Y)Ym0rzr%r?7wDAV({IFl zO;53|3UaL59s`SJJxKhzqiQVLrDfKL^;Q5Eh4uK$bs{kud4=1yeoji(_x(@TaJRpf z4BZ$WDE9G`s|oI{&K= zEejU0qR0Jo>dHocYC#+o+WTc)exDa{yvKys*1A1PIW@FgWkB33SH6IZcRrm?xgmm^ zIF96{Xy$1QI-ro`GoiZ4@QA!oa60(LrgYe>&Nis@_FevW$Py=tT1R)Ei45mg`!tqArPhRTFAYTXVf}8(A!pl+u|9 zE-x2sCT|f4f62-xRXGcVaVj~?yUx9{)V)_uG}~UD-#yV3%Uf_O3%}jkGG3g)UfN|5 zWs}1djx3xMZxm|hxlUhlh>DVkAo7mqf!tq_G1oh z^kpg}qvzow2Rpa;Pb9*{+nNTK3(41liHeSi7frT#h;1#lHoq9P94IJMlvnj~U6EB~ zXhN2ov+)wfv(nB<_+SNg+OrE$=_^d}-;v|tK^P+MF|)tk^0HQPD1J}z1J|#$oG2sc z;fBRanR49iFh`SF@s~EkJ6qb$%nh?ZVixX4^edm7I!2Oy=Dl)fqccVV&nD=n1F0Tm zx^0aLiwWX10X5{q)edU4st>dOrNJk$`$jK4qj7q%Ny$rFU1(7gpLV&4udmYqlQUS~ zD?lNBBQo4dt3sh{?BoUYyuXjg5GD_{-Wg%@aQHVB-A2?j`Fg2apj+Kf{+nNSJtnxy zPuh;5>p*nDz@KJBba(mnB6K=*L?1&omu54KInM~*F6Vf7w*HDzlrh{|r0`6)^b3kg zAHG<^7ZPH+#s79m%hVg4O4mq+DblewTV@*(Ij+!`l6JL9GH>Wi3A8rX8nR#UV8!Ud zKU-hE?38hLCRbHhp2BRo^Zl#w2N3mEpsNO#Y0$8A=K{F&+=yBk(D2+q{~lbI z=V0J8puptUx1wMzfl2^M859jVn-yL?`_-D;Ml|qA4EPn8zG&Y&%Qdtt@m;d;`#@CU_ZZpprva>>n8typ9hz4zz!6FO*?L^oa{`Q17_H43`+dtGF4E% z1PYP}cPlt9DQnF}(ege8HQ-eXvGzVR5;KEL;6yu!N7UP0(ekfyksJ`zVqno_TN*xn z7zqUq;NP4)J~qq|pTgrfB{S{W$+ZRn*iA?qf`ZO#X#_-m$@GnYDu7dhZ(+8-{|91% zrb#OW&lHY!Z9w-Y>JKC#0BR!0c{Bp!38S*$g)XOrBgEYE=T%4P35Deec8nb1C=>+l zZ|W$>c_QFEf1@Q%e;Nv`_^@Jtz+Fq$`1l70mMYktYv1``VgenNEImPrDo-&5{m2QT zT}%>vIA{w6{3jausHtEKgZnoN41aT@@61GU_WuFVXQJJ+2(kY)uYgZRWD=kxYn|1! z{xRL!BihIsz?h((OPs*Z z4`tyR+0x0;HusW@L{s+$f+k6YhNAS}KUO$P?#Qj2g)_+hstVJ}{@_zVB6^=24o}g@QFFEs@$I?~7=ImOVRKnd2~)Ob7Rua10%nqYFDAq$FlT)bO1E zFp+T0t_MXMpT;XWW(7#^y^0m(Qd2kb436hzw_TT&4m}2gRxTw%@_xnn*=5( zRCa>`3p86>NB^2ieO=zT7I{HijwkNV*x1nRy<|W2-URjFHob^*{0_aaqsrD;3NQ4) zg7QKHHlFu#)8OWr?42K9fQPnO+YF*pbgg!j7X}DT5d`&LOevZ@=$wc#Ns#gt(B76f z&@DoQ-V+R$fl>}b(I9LQont(^1&uh^O3_U~K*5@R9|Q#tq)$-npv;;BF;$805CI~- z8_g1d69O;uz0;!&NM(OmX>|i+biLT`z<2W-04Tl-ZoI%Zw_bu>=M-uOxBihlL_8os z;mCUgOhR|8bVV#=ZYt2bAwT>Xl$te#kiI6}`2w{ys~n5gdHrZK85(jZ_SG%Hx7YY* zIP+{y(84^>aDi2jvgXKF-2|;I19SD}5fte-aQkjS!ryxA{L&# za`*gr9F0+58pV}v|EIai6Ph%7@%QjU|F}yf0huw!-&|G!0Uz@*`)_G9T?}SMt`t2w zm<>g=!`qQ?n+8wcEIl$tvkmIdU2R4a-b3kc4XO|qq}DMWk5hgCe#jVcub2BQ}g zg(^~o)c6(umXgyEuv&23=ShMi<>Ir{_4*^-eh6RJmv{%j;aBZNBQr+>hh1CkB3 z0(?SrgB*O3B3r4@FYiEmQUUrl9S&y~JTqT=%=mNG@?3aXwmBnoJ=%j(`r87qYVbOo zbi$fFJcFAZcs9w(^iJ73~P7om2oBW2se~zusM{H{-C{`e{#!u}y-Fi$3IA)S0K{CrH~(LwGW67&X-PNu zSP7oR|0#q;w7GAxZmum&%KEhxB{F&`<*U04#E9aZYyu)8*YRT5N zs*&A?3VoY_)z@Q_n*c85meDK;sGn6WFUHzW0694F7Y5 z+yeZ=06568wcGj2h-rdLjZrm2D}GhbJt&k;U$w(kK|iDm)gFZS3USH?cx%HKj1O#dI(#OyOKK@KGK}g1|%Q z%E4j-BFC61^LE(MIIF|h$m&gk`1}71s6h*it=6~s$4+bpvxsdmzP-xE{v?IlFcPDC z77saLv?9bxXC&MgR6ymPDfxivXACH&LH|Ne7b z@_1=D<9ucoaeV|9%Moar5eC|Zrsd0Z6*4y0>+rMrBHW$PhzRu5DW&`i@B~j!Hm##*bqX|JPTaQR;`2Z$Fgh&4-iVlrD9AOATxddr6P30jl98o zGHm;x+4Tf}>zXOLvB#fZpY8>{@~AoRd}!7FUKf@ISi!hxLx2lVB<|S(^YV=3Te*;0 zBNY}4ELda1fQ|rbOh2eL8{1!4p;ZF04Bp5CfuV;f1Sx-h>~)^}`IE7$i|W|XO8{Q^cD5Wu1Vh4(=CzIV$6*Gi|z%VKa%PXlA(1ZLiCtN9asfSrpf6iIlU@HQB+ zkx@rLkI+=+tG_|A40`ZepwuvbNj&lbG~zCfoZ1DO*}8)#?5*w+SI_UPT~pH9H6`pJ zMqe%aeH`?96(6lo?n3kZBg}sC-poI4KWp81z z-1nj|+K*>X6*}BB(U>%C)X^+(s7#S)3HUB~GlsxgbSaJIONZqY+-o)iHbig&@i#X= zbDj?)vu&XGeO-+WBu#+S0y0&*I^G!iQGk|&Y+^d<+ch$PRV8a zD5w&B2bXfk7nsPmKsUb8e+?AszzI3&*9!$d01&Na-V_%vc}V-QL%XNz(0}DMgv}s_ z>48Z)>o+_I0474_X?1$Pfi0f;)WmgwfI)%uiuE19ltS%0=$rN?&<*}T&cI6M-Y3vF z!)BmqAFRJDuj*inkBv**RIkDqw7(GJnvR~0ufjg1m{*D<y`%o|)el;? zz(d*c6Mtk+5)8j!k;B;xT070Vbr9SNUUrDvcv8L#PkRwJ`X(=-uV{s05jyfWfXODKpk zZ@|VpclNC@h+F2BNpIDfptw0I8hx*^y)pSHgO9JKqE{q=3E%u*V}Ou%+_ief$CRA8a_Dx!wNhvB}uJF6}H>zX^OJ3&fLZ z=d2l8E-DKSEA}Q>p(XSYd<4Ee6CyP&oRr~qiuIPKv+1&33(L*Fkn?Utr>Dr6y%d@8 zTkaLnafsLoK;Pr=NI-L!-e4++5D&esd%i&P`^Z1Q(CN`x{#no#MDt#da|fO9 zgdh2*{mcvv_lHI}Qr{t;usT8@fop>9iw-;TLA zr|=c)+z*}^r3d7XteVmIvKJQ26g_ky$B)9uBltue*b!%VZ}9y{@C}Lm$U{R}^kRtl z2SIqb)whqldD}}&W}@EFV(9*k?X4+nybCJr_6ls-gq+U%%h7gWhvFl2Y`8mZfH6k1 zNgCpGq)1V{L1|VC!YZ1l z42FjdxNWr6>?TXFkv6ogDfTDq;O=}*L*{nw`<}dh^A|aj3tjw$KvksEJL*Uf1YH`r z4g#O$25{i$V&v|KG3}OE7TxDP&*`APR~1B=Kv27OrN`D&5!T!AiEp5s#ed|FUUOkS zOOH_4aHb*L1x{8VZaJ}+*78WOg5z{Tl8E0Hm)kw?^61O#bBI)0;U~IL&#{vm5{wFG z9)hnWn8EPsH}>28P+N@eh%ScCJY2Jib(XoaSqTy8v^KtjhU!c%Gl5UIl!!Xn!ve&G zdWnb%>RV)cvu3T-*sNxDU@WMTFqJac4yywqeM!6^ElV#`v7ZcCEhIc@k?5 z%FhJUtpcQf zXwN~*6h4+R1DHcR^7|Wl-d|R#!~e!6v#d?VsmGOEP!Q{ng?SF^itxi3)89LXOt1dk&iZh8xmf3u@ynAMuqY6yf{h zB@CzJ6wzkM+W@box6znYuri(@R-k3tnx~UBmdqb}M_`&CW1QJBNz&fVjVBD)UL0)> zkf-Qf@~`yUM{zZ{)mirbrWn%;mY4Kfi^^~up)%e;w_q&FWZ=H_?bF+`Vzb=WR}`I` zTOAVG3Q$5;o7Z1N;zz64Lr?7Kt$~7Rx(~g}2Y|#JF1bXucO6n+UY~R_hbZ4%)Gpo@ z;2?i1a~zYK+Ko!FbVwHZJZC8r6)D|ZfAM@D*e@AXq6)!qN^JoYbbOBcy??r<%0?ii z@n%8TMD%CEjvKw)u1<46gNwG#@1Jm2vM3!KSkvU*9Ci$RMPZkoYB%Wejs`pXwB+exh5B@xe=1af_C(HO;S%E=J$liU7F&1Snan6nX zHDUdcXV4sTc;U_K7Tzix^7zDox46iUM1vnof6AJ-cfJCckMEyNt4SYqlhHfO4Akbb z;zCDcS(*G!gPWXb#5QHqk|o`Di=lL0h$qB)X?VSx_Q^`?s+sqdCH)<u1Ig0q0KE!mN@C;2+ z3v%Sn*6Kk*rT(JYLC^TrG>b5CN6J@89+rGh7un3BpNf&rMB~t}MBvS;|LdigjPom{onQ<63`v2ZCK8Sz&xg-t7+;`()`h@aTs#k4nm z56``t@WO1$owbc;-d~HfN=?VOOmlpBV&^lNug}KFH%rYGnmD@%XR{SHvvJk?LI@pK z)(Zwg#Q8g4!_CQqZaTBQbxf1-7fE`jc^*n&(j4d{$4-$nkusjbp#F}!B9T^<>XJy( zbQk_JeSGn?@2KSD;lLPDB2p(M(>fzWtBrq;QDPFtWd4y#OXs^H@hxEMJ#Lozs}Q z=!lkL=p$!(pl)zT8=->LdzE}Cl06OIvmzwk!T(r4 z!7|CWB$%a67g6x*Eoy_mgN2ar69;Z@fO3@IwQ9y3oi7}UH&A@$)cz?)JQPV}$%Vz} zSU5w&?!zp1sksOBO^A0PuvNL>u;>_U<^Rx_wTS$nKR#SdUE~KMWCU6)B&!wE zl&Zj)qAoAc1r6m>J3jU;6|qQjSwh;Wnk8g+8cq-^bD@&$1(iM}JlGa&(3B_C{59BN z*{-UDF?uj|OKkxI0#6ne&s;rh*i|{Q4XlgY(8K2$s=;@4E5zUn#OG=w$Nqv$9e*fh zMpv5L<`_-rboXqN3>JZF(_M6lqD0{gInyRsBkPoSWl`KJc5wHNHJlh3eVx8B z#K?CDP}2K4_`Yre^SzcXSSD}Kgsnm$}>C#)R*l{|F!&+U~(yGOLmAY;X3$kKB>6%;l&-;YG9wgB3(=s8vXdF zrgSon9#Vd#LA21Kxgfjjq^@ZQ+Q7^|nqJ9oPo3C1)llBO;o$SvZA^NR@ta*s-BqlD z(Hm2+^9#&9G?w_c(!kAzb-SE#aS7(#(;n^@dH1cn%Ou@byM)cEH2U#IQ8SQ87PuJ{ zxZ*-vjq^?KL8kOQ=^H5uu8=skz0F)1yy4oUc0wTvB-wuEar5nbE13bJx?sP5=)DGJ z2neiZU(PUyUk{j9#A`k{41*l>nUHkr69?BVN(-K+`|`js#ML;tg6%U1u+3u~)*T3} z_9}5YJiyNJuC%`6RpP7DSD24y7tSWdry&WCrp$*ap%Ne;( zbeGVYYhl*Up&#n;Fy#*ixbh92gA;W+V1{r0BE{lYlqvOlCCCscMK3mhNum7P<)ukc z3-&NTCqnk)bB7<4rcA-KDthbu^tfC?E$5#{`vJ#5RBoL2r0xdii3`@Z*Zp_G0tfM? zV3HiCUG)kIrU+3xlAeWNw++C?BhsAKjNtW}*ehKQ1)+Et1Dd5-G_BOP!qva;v9Jva z^X*n8e0urIG2G=#01BSBfIK?8#a&RZe|>TD-(Z(C9NqIdW_$Yf_betu(&rcGM4}bA z-w$5XoJAAf(`=d*(x7yqMsoWoQP4)1Rfrk)4j43-M>^>sV0{C2ge(`MQw{D!yla;T zp*N6Wb@Xxo#MIy4J&XDj=a|p3M7*KFBfl}g^@%i(*OlmEW&9L`y|^Y=95 z@Xx)e#VMI1NgyVkm*TZ4j`L^@ZvA$#7J0+~f$yl_%`)kmT;|u-b83~))Y^C0j21ZF z(#ICTU%=&x_bsW>eAsVE ztGqO1%t8X4IGiHV8nZO6?vP&G)Y>*;Tr#wKTV2{}#LAekA*GUFae7k}P?c7~Xs z4yRl?aH4tk;Rd&xW@FQc7%4<#R%RA?_e@Zi8v{o@sy>u?kuG$5GPYB}lgMjkh%xc; z4j-DrYOu-u5&YOReG{F_4c)u+Zi+hw4gs9WslhS=)Oswh3eVJg8Jy2fafL(Cj*cZh zm>D)M#(Qt`3u1Pi^D(tvBC&whp|u^*7U#B2?(+beXU?7+VdWHJ;#JU9DhZgIzLQol z#XC^y-JbOO3h^E~D1vk-TfsbL&F zU(Ad$TI3xVdK1f@+e*2}W@nn!Lno7*bwT+9WRuYCrEex)gDDXyH<3m>^jmZN;$6>g zLlJ|$&Q6xXVG9_oF;+9n5_E%u{xV7KE#jHeCMQr?X=ws11vFBLx%uY$mLMC2cBA$4 zf5LQyL42yap+?i@l$Z24>Jr^jex*|(orEqt^Bh6=oOCS|N_ZvuVdmvtktN3et-^;R z$JXi>aDu_9m5_f)P{+(Rm@+r!V0fESgp^T=%q0ro&10nrYkgfi;B4>O2_RaN%fC2% zj>&lvw)cvqm(PXd^WL03k~fieerLolAsO1d(vx0W!YUl}(y~Ccs1sC0T9NqCoNjZW zbDFS$rxjW88e-S}V>y}`t)O^k%ZV%Wf-?8#)O=&QDkUi@o{6%A_R#s!_{)(MO%9nu zk8}t_iL&@=4$!#NTI~0e-y-pyXh&8pR|4({>lu;W%EZK`Zsh8c?IsrL2IuolFkkZ~ zrH;+bh~O~@me;1^3PquyBzb7m28jdsV<3bT9TrwLiL)%2Vd^e>!$gDdHIvVY+{<{z zPRV;%nWyBkqjiMAEAzr~ihP&)m4-SD^%H}qLkuIrcxx|gUnySWcRD$h1oc8C5bS%mckF$E0P3;B8IVN zC-_>9^M_Yeg`RZ37dc3BvxJ&!Z)-9|?a1fZd-oF9#_bf&n|UN69Yn;~pXypz2fqj# zk8>c?w4{DBcF$?c*~(d><6_M==aiw+cjqesSff>vfnMO}$nqg6{Y%Eu=S zq4gzeU#6!J#}3}A297;XzP<7kb$emA3HOzBjT@&_#g!{%B+?P$pt-M3Fr#O|4|%j#q>(EG zJ|`UmvUs@r$f_fY)^MW@4vZB@ppokoSL5IIQT)`xb9t5^4<&Mm@P$>IRf}cL4Ia*H zNyatPoJFjjq7{|=v8YNr2RTbUab74wp^JpY1Qx*t{wjo%1ngMW&n%+ZvoOn*4MaY( z-=}CA;Rr|+a#WfvAY^5|;*b-XKZ{YNlur2qG4$Nf|CEukg|m@@PA&ezMMo&Y{Q1D&lrOAcvL2( za>IO07bS8~i@4k(D|v&uebx^DHCs)JSru&q6^;07mBgS252psO+*n^|t7SA@^XbV| zy^n#ADiAMM#+34;RaL=YAi%uj=PA@65V;z%iFhvbc*vgI#A|7ootT;2^+GKrcC_^c zXTf)vH*U$rJR%_&z4p--alkIH_&klcNm_(Lgls*AbMDGbf`Xu_wPJ_ws3(olE+3~8 zWC|Y$-F2r|AvQE4$k{R*?sjGW=}RsWusC*6WQBmTP?0Q&xdIO-idm+-&RTW#{)grk zwl`7?=@S;S_*lbANkNPbsqV@P{%j|?0}quEl&^@M>t`X2rXmKv#GKyR2yWQTCbIM7 zQ1Hl4DH*{`c2~ZJE^FntAGl%b7`>=e`R83Y!nfy|SjQ#sd0ODPOQ1C3+tYljV5)jm z%XC*!yIVX>E2t;=c0V z{LS_y9{2U-7mS?;^^Y;GoO~1*^^ZI~d0Kd5W@zkl*`M=q>sHHFq_v_Pjn|NG=m@#` z{a`-Q{f}Kkc~0y6MZL)^!!NZpXiQlX6Y3MtlJ66QQ4rnSMNA3IfN)gkzc8l8^(I8jJ_*}>E<0D-Kq+i0zP zCu2%aGUhg##bi3dPJxYgPgpBQ?IlI%V9A*eiN^I7`J*f)cPar>pLQLu@UN)at^#Jv zw?eF8D8AE*@n@%NC60rRax6LPND1NXH--KMtanGgWW05?INBXNNO1n9ciqLBM~Oj0 zKQKKk$)RLBo05hJ?&#^G14)2>hs*v4zqwkz1v zo!mEJ9$YP0eD$s+E=O0dh~=t_-uj8^gKb@#iv-F-6#X;Xi9-FuxqdiY$-Ds}^HUS_ zN%d66seu7!|C_g$5W!ib624C!dW_T`6B7-@P$Lj&9#d7R8U6Y`hUPN5I?B1 zs1&G1^%@?Xwd+^G7OaK!erMqgZXSJ0a+cX>#dp(AmS%lsZd+NB@?F+`nhrJvnTLMWcFkE6HceBx--|C4Hw8kE!l1IV$C^Ec(;+V( zMf$M}_})qp`*M#ZKQVKH>(@);4si|FoWpYSadgCeKN}>5IQ{{gt+ySCB_()mxANkhP?+20ezfum3U6INJOUP1kT(_tH**lNjl5hm zD4r(x4Z;c={|Qde5uF`v44-NJf=XEk#gmS~$FS=LjU4?on>Qdfu?aoFl0LhB`QD>k zLCqsC+bQh#0P2rXxqFcP>6Mkt^)&ss3=MLu#50!6s(TQof?29L^c1NEetG^o;5h~& z$!SgD^V`lraLkqf$DT9!gy)M74J)CRJdfrHe+59x=zFw)>`bn<>hs|?kc+HkUS%|X zwvs9H+F8^wn*%Jy0G{Qe0f5zA6Y@YQZFD&onqj!@!``W7Wc%suveHk|My|{s0qF)|)1hqX4(Sf*P)gm1BHbuRmxL11A*HBDN{EPvGzbzF zsepod$Aa(koZtQX{&An@-t%}o&%w>wbImo^oMXJ>9q;@2`R{ZQ8xIz}YC3spwIJ;k zlLUFD)0oWNAHG2g$0#e#HittNQQMx_>9_E#^VN%t(jUZBYvi+%!g)ap?9t2}yTq-M z551?-&6G!`8a;dQL#D`NAH4BfQ2TIq;G}@G&|2&CdClJZ5zv!h6^CUB?RByiOOmP0 zYNn@K=J95ug!7)YecIHRKMPjI__i5UR8v2JW4Yz0Bfdu^RTDyblY1t?K_OJ=RT8`M z-U(TKN~^bal!Qq*X3RT8zj7FrE+}j=b z>mHcGnp<1Tq2mXrm0tj5rR(k25iwD622q?JF|kA3kB+e(jA2DEDz~*=agE4;Cg3Hy zO0`~M_vc5llJ@Y;J!0HQrtRfTk)WEZ5X3Za;miv86Qq%87w&qImQVUs91jv8Z}Izl z+2GgG!$14<#+R$_lA=k-0uXwn-mD)GpgV6Q3BI5E^IQ5A&c&GP+IOa;rR|tpnvG}6 z4Q-|;)*r8*_4CVEX(T$<1O(LC?Ush^0Crxx5Ep96ZDUNzrrf{)_37vL*#h`F@I#_; zgGy<3ua4AJYzD}Fu592NNNKkODG|QeDX$5^hBa`Y$?iJI%ZM21B;0$MPF91uyfy!x zy-Qw1`o`Mi!(qV&#AWubKZraubNm#jm!NyC ze4?f|Ps5j{McSJZ>ri{=){8%8%ywFd|Ky-EG~ES`H@6&16DUr-aoUMv#ew~5?^A<~ zHVt}gX>qgsAhEfeR*hZcN6OM)bI;nZrt7(|Cy1)Q52!uE(Qr$4nZatAvE53tS@s=+ z?U_Ja=QMgpE#P1dNLkDauo8P&O0cen7wczB5b>*dfp!EKaLzgRT>YU-q@ckPqElDzy`3tu$DqmJL@hsUz%fm62myyw56oe8FTY0 znndwLJ!#fz9H1Ow-0l3)gB|y}-bNd=yY-Wh(x1SK*KNCudi>b@tqYpOllm88wDZJ! z(I)1q@A(!DU)^icltRwC93uX<&9&DzjHA-tU0ZosN%gF-GpsT>8!Ta$V6c1ZFWbq+)a$ zVjW7c!EDLSx6b^gR0vRMSh&x*kuq-(RMw*975bQ0?D!t;*IuV66xqh?D^ms=|Cf?2 zEW#^5dI+Dwvq(3<=bc#Sjz=_VN0O}{W<@jdgP@(vQmRo)_mrues*7|}JOhaU7ai)| zeZlHul!Ui&N~KSegN(z#d$-P}?5#(HHMq&6gP!CyGqdbH)0h@1#<%)GpWfBHTKWT9 zoYhoIsE4cbNJ{hsp4L4`J0mn zGyoyg1DB$@dsc>O?GD4kX&%!V^y_=m)PbD6cnMVX0S6(dB#xmWnMKl6>h(&<;K~FS z=m^`}v<%BI!O0^Ll^wM0W`XM9Y2U}EIu#f?r|{)QcVV#FL(12xEh`|Dkf{-g<1Mlc zYT9q4Z2hpSO5viq)m=cHW(aCJsky?WFTqPryH1ieb^lmCY8nSG|GSf2?v&pa`_R)C zKPFt;DN)*ZwOJIT(hNQ-F>)mjtf%674lQXcvE5GA61(Tj@Fz)5aR_+8*~)iPM{B%j z+SCe@y9sKRQ<-zm_ZQE4GRFM5tjZ9W%7*f$e(oSd!M*G^Dey7jm6GB6%5N7tN@LFr zoZq0O;)n{k5Roi>8!{(*RPK`|`=A6TbrlaPxxUYI7gdz@#Db|U?K!tvaO%8X<}2nG z1nVq*cwcqoPPezm}B0NR}=+utLe7JE-&=@wOUj zNWbxzF;&G{U_Had$#MClVEonglAv?Eloqc9E>$E<=#?>6FxcW>Y*F!?Am8W3UGEFV zReBKfydyi!0KXzZU*&bsFUnQf`?Qk&#OUjJ@-2ehJm(3g>S=_0TD%DN*k%;&ij*Qm ziv#L;OPfGfCEDJ|^ zh*g~S>oCGpaUe+Mt3Sh_NqAZA9h_&16ZnE75I4oWjl=F4Q(w=x(UM5j_8p=F@q-*3 z8pm;>&<-+v9|GE~wOjTb!_KT~E{*I$q|eyLgw`5#RyzXDwY}!~!o>a}kG?8BX-HsO zcRqwIE*?wo(PN=$H*DT^rXh_->^R%0n(peJDe=em{0LaDeP*|>fYOZ&1qj^PAgsPn zYWsk~cUoVo^6P|jc9#-0H!i;Q!=e~|ve+(_%OWy~xV2TH#%JG=_HmNcwTmy>DV>Rs zc9`*v)foxDs5dh)!P*MCW85+vW@j6$st1FA|l^JNO1Q$@I_>X8fF+bLr zy-!0)n1-{==tQ^pF-F~Cq|#uw7-B_o7|L2~lKQ4X+{V*H6~2_sQ-K?O^@RRI4z=@_ z(to7A)DC%bHZpuLhqKIC$RxNN$S@78qspVQiWL;6PO&}_5>#g`9*7mGG`L_$M`F=Y zpmZs6wpF0*;~^If=}5)Bdm?-IxuU-;*$uRifmiw~)AE)h&<4zE585pDq+^8nJ=}cV z+90&OM^PHGYQ7M5;n) zhdkN2az?FR`DS-4&N8a8HNYEL4WafM*T>@Y>Re|a_sQCJn(e`5bf&O|HyJ7#&l_^5QC|f|z z+$i{FlRX(BZsmzrdC(%wX z5Y?`gA|O8-4QeR<`j+B-4T@vn88Vy*bX4Hp0*7@54l3QBz^J`#w*`}2G6VsN_F+BM>iX0?-anvLwfRGonk8yUep)wFJ+LJK~5?AcpNy8Vt0~P zpkPt_QYZile$YQ*D$p7Urko|Xh=4L8(ggpFTGE-In!5iC!3GSt^z zfJQjn`U;yW5FERILzF|3Rh`XDon8KknS#njPN|v$GCohp3%J=MhURzqFY5aHUYUt;ll!TvwFwh7}LylD;bpwl&?RP+>SW`gvs z;X9B`>myTzRfI-x0uW=eSKwC0@zJt_=htDGDuPIb3U3bF0U(J?6@7~rpo z*NvtBATjWmik2o6I_Tn}%mN=}LzLSKl%@Mq<=?yrH>{sR&trYx1BE`6%va@u0EtH4 z15tHu)HCsdS6g0BzK;pnyVjfNs0$r{L*judsRhSr?ihiG^!TSVkR)Ez;{B*H(ffU6 z+2UN@Fx+tSD#%2(VGUWFp}9}EtyN2$>RH_ybvj|P1fC?p<~JY+PY7Jp74r5f&ch1( z23TI}m%-5ic_OKO%8XPcHU6n3z%)Y{BYv)OxF*IOtEO0%AB0PpQ5+b#E|Dy zVV-8*{(9M_(OG5W`#U>D@Hbbgi78O}W^e9Dy`~gE9!8Y0fK=Uqb54+E|DgW-T8-|_ z%b?)%h{npi);?6YAb*+%MJMYkWZ4f?hHE=tVtq;crDONR+=!enynQLJ#c`{ zh4)a(Z_qLFlNZa5gyRoOBGb&e6k}oO1@tl?D&;vQ0Y?P zCu^7Hq{2uMauV96uAG0E{aR2bm9BukJtPC;PKaEOUw*mO*M>oRLQj=>l+fOpQabUq z+l*%+KMuQhOU@&lOsb$VCpXVWq#>~|g~F$DYeC8UhSD#TAA3)-1rlUAM`&zsPWp~{X*5oUL+ediT3WNcl$Gk!4qW@k& zoD8(5$kO}}rqOVrG>9bNxGwO{bit%cTjQZTwS=fMU)@*I&zvvl2k17fgv56!@>wdn zMr%L<{aiS~moN@Oo)0v2rldAbR6+~Gj72ei3+o8i9U2U?b4I3Ebu36V?&qb)@B?IF zL5S9IQ;B`Qc>p!x9WX#`LO;X5>1%=3RgR*w)GN9}#D6bxqPdHy;BZ5NJ&8gX+E;M) zczb*HbXPVB^HRni;v|se>tpF^!p`$?1O~dkCu*?Kh+R^&CFS<;>*nAtMhNuH6y39{+v)KZ9X8|=&dBypu5xvDqUmHR=AuP z*6)5*X8Gx_9V>}M&EiNBda79G_uJ;R<6Te3K84nHP2WHIqc2|nw2@UMRb%+;=s0&5_>**R!x`k_DuFW!FzGb-py=_R~Xd+o-l z%IFFXC0ehH!E&4=sUt?r9xp{-n+2(Oja75?pPmeVvTW&rmDi{LJ0Jr z0TI~aXyM%XTwZC>N@Wy$xZoj}qlWfwLJ>dQ1)Y9M#lwG${h$A+ zp)~)4Fz4kB8{hp@PBK`C6881F>4W|0sbBz&1V1$NxM#Vy;EjOU5gHp+X(7a8$4iQs?^w`MmfJt(2`KyDZWpM}b8hz%52T;w zdIDMQ?Cq!jH986-JOI99-3Q`IE$U%9P5qdDqRJNjtr;NgWY<}d0pXZfABB95c-hhw zj%sy->{(0OwJ@Z=AE%{`;X?g}KqM0!WV#cLZheE_9%<La2Q9-gR8A&omsi#FcB!M znb1M*2|%6lB|YhGa(=YCj&dPe0{O&(Y!x&5%GMfvD=Ur4Cm3R8IyODwiShBOR&0S?3UNC@1zG4`2 z-sOkaqqulX7Ddubv(AYC_p9P}-e3WQhvEX(J)G?ERakyR+DG$FA%{D0Ls`9&C6y01 zit#Mq@7$BEc$c+d@vOqvd%gJ2*E_M*LOlH&4n{__n^7bTML;E0Ryh?dNOjWBg?Uoo zuuw!n5UO!;FZ=H3pF(KW?-SL^T59+R%iLKBb7c-KJ6t^TQsep@;Mm%rA*yxKYy8O@ zSW^EPPN>Ep&fgh<2nJCb-P`A1I`aae2K;tS)@u0wYCyy(& zsJ(iZzJoCCMURx3cZI`Bn^Z9$B-BokGD(zS3suLrKoPaZ_6bv6Bea17^$6Lw54gvK zJ9ub6R=?wabY)vZaehXo`!YBT14#R#SuNObgC_;ye&2n^A>LWYo ze!KHgFVfY8|8kSlt!%mT9Vm&y@ci(px(mGk_O~)^oRsnyeNpAXu8n6XN5)yl?%Jn1 zaWqMIToip|c5hOC%7#Js4#!^>m9CkQqXb_A?AMN{oHs&S^nguHZfG@nF=5gE+ufRKh<_wO4K{1T7W@uh~d+e zU7AcJjL@`)#y?fQh}LMTnth0^xwzu>)I30Y~7VUNqP?;8-FEAr^yZG&`9 z49#Jtg%l^ibp!~6S)S*XPBY-zs+m<=XZc-iJB6H7{80LB`!=EtINK`F|4OE_b6emQ z2GQbTD1+tN^jW2Wml7LZu`bT-_n<-_1dPT!Xap~wd9!hizIc$hKMJuW%Y}A74Q}0! zl(Vr^4liE_oy_YUcD%JE*2~-gtSHN%Es8PO%TB-=gw8lE-Yvg!tt{1oL81?5mLCkZ zs?LI)WA7St$|tsQt4`v`lEr^(Pcv-z3`y6Yd^_j@T-|{Lr z0#bCPh%V7vj4`LqNDs{L=MYAmW4rT)!LAodn@nm~`%8xqDhvG#M3(9L8R1{uVJpF0 z+XD83BD7)|Uxs=bas*0I&t%vI_Z3S*l1iXHzvi$^UQ*vUlROTABIu3!jEGuF7Lp;B z$~#n1OGa;j4wJK_qd}DB)@YBK1qUK1F`*?-EXIoZA}v29aj+$9L-@0B!bL{fpD#CH zL~{-0msXm8yR4!f~w>O94}X64TVw8yNTt$CTj>)RB?>HK|rnk z5MxX>9IVQwcSW4J0zgiG3lLQjB&BB9u@`&gOJY`{{3NgVMP)xwya!hACtU;z^ zlB}0kR4ZK|W06aZeke}_G+jMP)EI0rNDCzJLT6MiwpLljoW!lvUFAA^{B^7U3Merg z_E7fTDo!3WovyJ{?2yu2(aBW28NBta6zZpoc~@s!!O{n6D;&bxyk>iK&gps>6TFaA8%IQzdM^Jv4hnu@?zYywCXQALI=5D_*2`8QmR-NTC&1(ha zDAMO>F(4OP>pAuGcb7nV_t?!XNb154VzV$lTJNmwSP@7DIZ8@AO5m$e;p(+$2R|fg z;nA!#VyIi~Mx-ksY4$_OikZ)Vzgw->dwCyLHOIDLbLhnS1ai$TwI6PA&hM%9f~i|_ zn}ds@<5oE4n86U1$kUIGat*=f4_TQ9;n zkvz4ej_;PGqlQ4Nkba&V?dM3~dV#{Pdr&7sHoRIU{Z#u@ipX)t z-9$~h(F6{TkRE~1Q|sH@!z7oVD~6VdJmIUdaQMb}k7lm5(&lkfv!7|5e(gt~cXnnk z%|vM8^K$aTQ)RjN$Uz6XB2zYHKdfgGf&kdeVsQtXkW(qMcSDrNRYXUN?aEw- zgh>!(Xba`Wx_GSB?a{jn92BqzT$m@?e!JD;9Cv)(!9FedPZJU4@{E(1)qT-;mYae0 zLF(y-(9Wa!keM2f^y(f!0HX51&w&m!SmKq1+C1+Z_U(agKo}-fuT#_( zz42@fI$~?uw&LzRbL=Koh|(dp&k=ROO3UW6;hH;b&~aV4yCQMSCRn&D8i$UDi^f0E zZ+>t$I)A6pBqlASY(->>?u|BaG;I)2^+jJM{*^k-nH#GivD>*nm}!)1 z;ivacfDGa^c!vFzgTe#B6*q|I;a$msn5G!ZvB1#=c0~*;ulU3X$m@q2JCC?OMUVRe zmzFtT`Z{)3)Vl5f2nP!$PH7r}9H*YF*)zrB$%CVm>rifAsGgg6x~NN_kD>(Qn;w<@ zv{=zEPl=w4l^+3{45*?2CW5!8_T%=T|5DrdCGa@)+U3QDkZxH{;)!FOFBx|9Wljuq zEqwd<=6F_QBRBc5|EO*d)?hE-3m zieK_JKT8b?M)*y6>b~0Z`;zj^|W<#0<(&1&Rmj)yh@rd6v^Xsb0e5nI}#YX zrp(`GX}SuwiPV1yQj)4BSvFEdqs#AsxDJt3qX1qY#r%EZj{z_5i3xDv1rw@VMmNmy zR${o3e*S`Q8G}0fPaTmUJ-L9pkRoru^ZueTnf{WBasGu_Va}?|A9;kk9d4Cj zmo1K3g(;y416Bq{$Ndr{<^=>UVx!R$Nm@3PG3WvEGikJ;N)&`sYgRF5-A*0ibm^!f z($IIE_k{FYY4Ui=`BlsB97P-|1uuwH_#3uZ6`i+r=wt&WpP`6W2Zf%8mx~-GEab>H z9b{J}pZ?)%^k9LT?eV2eR{2JV>fp5Hld2fh$GQBBknn@|u5j4X2eF4=Hi9ZjwO~>- z87WPd<2{Tu+3=S1*tD{^%`{r`Z+tpLfe|2BB9zbBR5RRUMTJciuYR8(>OAeC=Yj(; zgJ{#6cX;qxsgq`;uet~v>Q&XT!QL>^$w^ll!gM5Hg{1o@;3DZg`jP%RCKZS45g|wl z5yDM5;$jjhLdTIy>_lsj9cn&|LNt8d->{spD>~%M(@j}Ak67fFKB&O(iZ_j9Sp=Mq zjM-o|`9$F6g<6&3K3f}l9!2mlX@1BK)&HQdUkJ}x8$#v!Gv7V|dXK3C#8+U4vur2h zEJW(se;6vFVK%t6kPAnv$KEyml>YDV+UJgDsM!Bd(-3(Q1%NX0H)t_bJmSH_rGgP- zGRr|P4TB;^Bu|naHlC(@fsFWvfe|C}q+@W8Zo7LPWkCJUqr{*(kg?qn`c7)&HeEs?3wGLMX1o2y_R5?>!Gjn=nh*>?U%o77;TLCb)PbtjRHD@6 z3vLmsCd9l5RHDY9!v=%}roMlC)9d|ds`(NGeNyB0_#?1`_d^&6?no!U?;y}Qqj*## zL}f`oK#A(hM8gUqg9Uy60bj;9A%E)zx@BzfUHaKj|60x(iiGPIRJ83ps~RDKE5&#iaWAROLqPf!KKB6L@$8g)%71s83>5?^ZRH0Owsoca(RHb7|g~#;HGfLpP?z) z{yR{`5W_mDnSeK75>o|iBcw_Kk}NYI(45Nr{UHeD!J*ayQTPN&KjOCt^rHvoi~FIK z2hgJ_P!|JWEoJcHZOa&Pm}0uc(r*VEfdW1+@9Uhd?gA|q`2m6qtD6m7MPARTFnZvn z=c-UmRelEI%v$OS>o<_ym{yp6UA=ev3Oe4P3gEm@ z^9l$}QVJoegqi@V2(;9eb1ngr)dX%KEFWolQzdWyK0q3_>Wy0fbhG3JMEe^gE}L+m zUxY5L4_(GRBqyaBAUlYD3qB2VzP&8;HUX3aKf9CQR90?Pa>Oc?X(Ck!q^V7ONQSfU zDtJbAdWCdyv!BU%n@a0qWDuwo+YWLtF>AiTorvfZ6r>Rd=QHOHua0#|9khI@+wVih z9kavH3H+ojr~%#i1D)>(>V6TzMpy(>v;ibuGr-6S22GIN-#=dF$WLz>%urhNb zWMI;KR<7OG}Q zTOUL7H2M3bj)p5!-qJPY+9K7dZm?mh4%h|JhsF_oaNS)Rnr-!WU7N}Dh$3Y?zj-lE zpcfW(r?@*J*#f9N!}!}`ZF=dSd2VU29Nz)E4{%us<)4r>Az?Wp%mV3nR;HtpRb`Ev zf!d-`x{1y)c!MS(If3fa1y&dHa^~A#1OrqbLeHKLWl^rrF+aKYHxPE;lbPNyRRWF$ z$RxfZd_f5KR@$5?ZZCa&eed|JYcLXFBXbJt9#(E(d;{`+A)!C1`?ue=H@=$;sL7Zq z1y}XNVN&ZXewJ=_whf=^xT_5`NKcW7tHtN)`%TG;I z_TlG>s=$(IMojlUR8T%7##!Mj+r(Vt{}5@j_uKIaFGE^fUUjMer^>Flr{1;5HwP?(JFgU3ME7;sR z(vGlBh{%vo($75EBBlYDP!=y+Y+@3g|LG7&vlpE=?A@jc>O`oJ2B)J!mW&-95ePnc zNMx_$G57RLa3>VkxO3`Zo=X8s)D#d@WbQE z4<$Q$?xUbg_xoqOE3FNWeF`s~c$esWWN0ts&lhXuwal#E?+;*4p^6{8f|zj=g4@Rs zu+NZ**vND0+qGfE#5i8yUHm!SZxbYA71O>nUT3c;_3P5+x;yQKr(cR}t#4B(Cj^%o zU7^~Fk%K_dTTtBtGz0>I&l6m!i;ZTy&0XXGg*$xuOr2nbD3^Z6EPE%sEMY9k{wef{ zT{5&_DX|SnmN-#)WlwIEG?TrS)o*&_o#EgUS;Pec^doN<13+J_Qp zpvz6<3q~=XRxIe5ZYN{DKzpfS+WW;|t1j8JkIPVQ?A!F_u$w>?8h6ezx*J4UgL_*6 zGA{D7F}Okw>XwPT$a$g=>vZw$E%~*W2W;_`-fhYVE)i5rM1zc?1R$gNVc zVz`Wz&cF*n<{*(!*8B~IiUoJCHa!aEa1B3dJr}#e=3v%{VK#2dK{OTzhgDir<(x4W zkE!?k_n^n^^{J+KNu(7r#C6xVS??;ilbrVA6<(!^&lmC4LbF=~y;7h?4j=iII~pW`OG5haTegc~xiF5D?>4`MlglX5k4)y8wP z5qCJq&Oc|NHH4=}AY3_XO8SIz&#-KTzQV~|&kBPl0dX;x%_zjWDvniH?;rS7VmTu% zdIaXtkB`;#CaHX(K)vQHA6`Sb6gFcimW>N5EJ!$#bV82fstLU@_gYGu686vD4F3Gq zysuk0GUZv!n(D`Eh8I5?S~euHSv)NDwE;}%E6 zG{^jSa<>PU8I>_Hos4Z-?$^Xp5;Y-#UfHk?&RaS0xo@E&k2gSfp>nS+{A;#4nkoJk zXDM3YPS$YbJIRY=%#z}sR-0wNqeku4ZAjNoN|9gmCUuQ3&-w9{zJZ&URq=#~$CM!R zc7^EKVwqiRs6zldeP!b(D55u;OXv+jo_vy1g16P%veq8AnoiHAgk|druCOW}>6noA z2LdH8vSmTXMBm|!{ZAD&qGZTE;3s@o+2W)qmK>1m_=89FT!PNTm%fKT>gw%X8pKOYjupY>-JOvMws%Oj3NzvHq)v8c0eZ@c@8l|n4< zx#u^5I?pH7<2+7V)&Go@w|u|pLLmRJdC2f zAn+u*9_N?VtZK-WxexvEVv&MOf}B0PA84d3b>8tA2Kl#p#IKWkcgfy9O@J>|*E6Q> zHPVI_i=g=0?ISuX*LFj_+F=)cE=piYH9iIp?PSEh>@hiCrF*F5XOENatMQ3(BHb8cq?RCf%J0 zKaJvsFxQfAaI=5UAeeve+(s^?5#YMqz`Z{v;2MG=%v@Zhz{D=ahA4T}b+KgY=RT<& zyp4mSV?E@dEoT;bV<2}RYhryGOYh0hKp}QFRXeT!$FhJlV!=Tkd_xrtzS+@<$3K4OHgOt9CJPydg{f$Wg;I=!PNY>@cG0b` zWgf>`bL$(Zu!hAmDz1|Wby{b55e}9l<1Q1`JWJ^1x{@eH4K>D;9^#FpW9GCxJ&?T? zM$5R%iZlN1kWDa}dQ2jeY@4_=V_d97+cOemG>vw#uOJ}nUia}8!I0IGo zfMpIDEne&)CUvn$TtMr)`sxAUSX|@JYJcm~paj|R3 z?AjC3cPz>wfm3r-$0{r)VEia|lGkSB_C3TusOWArMne+ENo+wt*h!EzN8+r>Ma(J4 zXy=~q9!`ED19h87@&aqSn>twnK}gON{a4qEXL8+KMmo8*?3D5--hP$qRKou19*S;T z-U!^i)p&Zw&FE57Qab*_{ix9nJhOO*Sjw&l_AR zGr87KvO00famVzUTFWBSZEWzy^+p_$ccM@4K3BmvuND13k*m^jt-dB&m%(q^hs#WA z8*hYo-Mo5>(28=6bZtK~!$)Q-!~7J_Rr~3blM|^9&-R4wtTgr~%{?YEV=cLZk|+FxNmr~W|}b;HNgc~u*aUbJNd zShb4Oj&jJz{3(_lF+6`w4<++JNzTUKBMKcKaud%mZ}5w>R;j~$HHDXG0+-g9IWxP| zQh6PpYJgca)4&kH$&xM>;>7M2#Y!!mg|x(;tw3~P8KH=njjD+TM{|M_ha6e27AsSH zxs2Jc>x8FJK1*Lc7QE-`ar=4xLFX8eWsH`nD3dPS-rEs%7==nKLjx-_K*oeZVOQcG z8F^ffKphO`&^k06A&nHF=VM+vjxgmq*U6=iSI0Ox-cim?Lw!QyD`}mys+_Fs=}He- zei`?*(6AGMr`W`i*n3t_8P6KMt7UaTsbKn+aqD+!p3O3?f$w$o_ z$8KKeuX+|pF)+23)BDsI{1TAw#vqzQ6eZ#MU+Wz8VxGNF^JdERsgK9~j>{ zu08P=FpG;tu&wGOM>(U>qo#9&|HO;{bbemt^-!<}fUa4GyzP-NP#_NgQH=Y;P<|W~ zfv?P+SM2zd571g4a^a8f4eLwO{--90(9dn2PwA;#cXX#bV%* zjJWfeX~^F7{dCms4oEQ>vU(;krs}Hg^awSBB%c_GhA;teCq)-(&ctJ`0J8b=R*K&t zfcynn&)+s;W*7Xk@$uG||=qgV{`~ z)!8Te{b_Pz6tfvDPo_NgYql83K>KV^WY zArDzVL7^W9*C|~$3~4#!%zJ$XX^KpK0K5f_7)TLnpfvg$x?&yK@7t7F9{zIZ9bOo| zKw!%!do~n2MAz?#0<;j&IHbUTfc*Y)K-x3_RgmMp1QKzwh;rzg&W3%u7S72$WX+|X z45do808n{-s(C$uQM}x=3SmpH8v=z_aUHN+Fe^T7`7ow;8{rB7KgT>OMNjSlwA*=g zT-=1!SX>7v03hHDe$s){lmm>N(GMgUeML7o}~Gh^g|0TkS}(Ln;^(|iCJ+FQko zZz}FUqc-( z82Ax)avlO0SJ<7$tUpxn?<0Q^g-IO~vStr$D?z#x)u}#tK*9OmtHhJiSHy-iilJ7T zp?mz)bwr3qvoTY#EilaCV|A1FHK`Xu zq3ZswQ2Ds&_jn>)olJz^A#UrK*Ab&SwXy3E4rGuegk=4R&dMv)nS^|v;G~zmFmDe@ zV)t`E$4~Z8QGt9hvTtKhc(Ar41Jj$`hT#y=P0 zPh+{P2)I&(H|R4Kpz**0yO=1AC{U*>{$7cejo4sgju9!PTfodi1Ax5-zwi%0(8ZiL z8*pQpeXmCXdtX_?pW-UiJN_-DdqO^a{bZ@RgsB14MVKWU4}-maKqP_ifmvkds3i7G zZJ_BZJM9DV#&?O|aBP~Rh;^*xSdV_h##=j3fKJoRt+ z+yih_b=RE14jT<2mJ?m?JEQ$ES(}LA`W|l8bEu$C0!5r?Gzbd1V&~On^<5;+A4iIt`a^Cak{M8QA*mqx)SnZINctdrHBimD zEq$Y^QS15E%2&g>)8tGNtB84ndkd5h95SG6UW$NqpJ{Nd_=Lah0!SQ1f(7l~gv8oc zQUK1ck%nUd>_l*OARqWu#^;4ko<58oD6H@dwW2Y24IB7Kg*=mw*^z+gDIgY2aSfM% z9RN$OYD()sKKwP6j`hU~LDf`pHOi>ciJF^#en$jyJH7FUKxGaQ6LnkPBNdgHJ4GPi zNsg~4JbQxZR?;VcjH!MAI{UCHrV7B%lSbSQfuJtt6Jax4eRLRU-9=70z7{}>#$bP-DDSQS@#s&>9k0XgXhY$m)u(FM7P^|N90hpVC1{8Qk2!doya@@SPq;kH6s zG$|t**fYT7QE+zQR#So+`3a7vHae!v7dciKuo}7Y&(|5M&0VJX4&@X<8U1otty*Xv zXm2I3FAc`t|0VlXg5vQdy&z+kqkR!Wm>M?7(GeBMXrKh&>jqUd>`!#t-0Fq|i=+q5 z*okGeNr20Rx3{7{*QXFmCI6N4!#mhlL1sb^AGslvN$$3^}2ueDAK|+YFm4 zjMWZNWJUL1cSW>|592m0fsSBUM=8y4ru98707+YQQ|R z_wHGM3*A8Sd3j_lf%quARDoU3@VW03E_BR~*&b~|@OOVSXgx0ZSH6AY$jw&jP{%6U zXa&FU1->rr7e5AS&JvzJb6m!e}{W2A{v&g+{78ax&s?QolrVy?0ib*%kB6hDC}RY1pSU0>r7t$eN=EA zbVVikGr6OK1<7_;;n{7Bmmh7Z$eU5m16sRd(RPT_+|fFP9l*&H>t*t9t`LKEH=#nRLX> z@uR{&{||l!I0!7#Fq5?Z{30mr4E_BD$jBq%t>QB`IYb4Ic0{-Y^Z&oZ|HX6RLud@< nn7MKP;laVOJ&nsX_6J{$SfIc?WG@y6{-dd?r&6tCANIchX`=41 literal 56296 zcmdS>WmuKl_dN~+B8^hgvFYyaF6k1GZcsWTq`SKtkq`kXX#r`ZLrRd8mJ*TXzczTz z=Ucz)dfq2~$|Rx;hH5vbwvwv$%7zI5=Cevh(xvv$Aop za&Rz%E0|q8?OjbgnC)FC?{D(YeWWa0%$#i;U2PofA<+Apm^!$*3R6%(Kj`0o?#JnB zWBH#?vUmCWSl|I!p?|~5&cep}@4dlWg`lqrNIKX#I$OB7fcuMZ3*BA$-(LIA=iFbf zYVF|a0A|72#!TMc)xsIP+0_I(IuVY)@BV*Z@jvgS;%s99p8D?H?0?_=Utjxsy$~z( z!T-xd+|Tmet6-W%P=r|jT{01rsx^*97#J}ac`0#C57@mN&sL?cx3_+I>I97H#a}ed zX*4LLdxy-_>E!9CEW7IixaGsgyXhaOmG^%4L*9VxjnmT<7LZZ`QV5R*yQ4 z3SWMA84=lZyJ?&@d|mNz#;bC7Z^oy6>1BoA)$bXjTNN@)4A{Rva>8&{Z@Z zO}^c?db_1I;p5{BwM1rbzl-gXA!}W`NHHq#7K(zm9AE$g@-QS-9!INOeW$%d4p(br zw`V^amL`}8>N?Jj*7{;e1}mO0KC@kDWVg;>%OLZ+Vpq9c2_x%Fj@W-Tn94l!!HCRY z5$zqf=kdBk2#SHnN{HyqnOT$b4^pqK0yLXvG2vqC&`+}}N`!uz%QA z9YM?COt)`#4M)ka4-gcyc$h!MwJZms&ez!_QOd@iJVP_OnN$%Kc3c*V!e#En8*FX4 zy_jhaMI&(jHOzZ3S4*psEA)GJhE=cLc0*X1IZ8|wPB(CaLE~Vm=ZDF++O`65qu?dxmPRsq<`yA&cnc9;ZtLV%hVLFWPS|7}Gc{hdMSB znf2-ynp_&}zMXF7`O#owqwX^p_D&J<+GoED`B?1rdxzg;gGud8_ttn}PMC_{`H!?2 z!`F7FdvmPqSI1U)KGVr1Fkf5Tb}d;R_>E-q*Sc(u1TU-Zz5mdb$?HHGp_Ik*>2kki z7R>Kj9I3Du4y$2yB;Vrxr$67LSljSz)EgaFOu9ldrs)(@d@fI&_#Kx!#Dfr+pVw`b zy{{Dbv*7Zf?czmAn%(|Flk3gZuTh8DsujV5R-BBB)f)XF?C`K@cQ zg1AgNs$e01jR<~t^>g0g)9+`ceK|s&#{r0>je9kN@q+nm55$8BY-ZnUf7lq#(rAzj zdz3_@L^0mrV8n;5YC-7zBSjm3YPv#ixX3qBtQYL#FFLs|l;EWxui#aAtn;ZAQqyJI zz-F+{|Fh(`q1Itw z;J<8u^V|M=S}%b~=pp8_w{nu@I4M+6Vnd;Gf4d4(>v z3T3_!3%-h$h0xIby8g37`St?rxOW<~u2|E_U{=@_?Uix_QigUvwC9ds+f9~fF`}uh zg9#l9LB%ul+Ah*I^rYUV6A^s1C-%VY;5!L@#H)c6M!XSjY>Ovnjm1h?aIn-|u3M_1 z(Px5QCv;`~{g90ikx9g2tONXrT;bPP)!q2=2TSd@`z;5Ry0tWAT#>$Ikw|Zq{6#I! zF#OV9sii=JQ9vUW+miN3UaRSOI`Q{pIwJVsrgf6Im0+%MTCFlo_KYj3&zhyGpY>x2 zI5*xzv2fVz$;_%0W*9trJ<5BXwdSCM95=o+L@+` z*`%tPzxCL&xhb2|>XA_^o&Q>2TvE^%afTRt_Dg_t9}GK$tLy+*(d&YlTuh-RA~dM#Cj7{? z>P=WNReeLN=Py%*wd1h@Nv=Q?t_CDD3HMB{=h%Gxd}z`ZUy{$kN+`AcqILUS4d3x2 zxX^VS{)ULioIxl5>0QiaMKad?T-WvTtUrM=`~eeouRvh&XT9!k$!dqDJZ9uBV9O|^ zu?Vk4LqZ+xSHidFKfWj7D;TC#&aO(E)m9{2L-b{D8M;hVV5zpyN8VF0Ay)A|T3zoP zlwl?|>GhK(Gzf`^Iz?u1=I*C{QK(aGN<)^1gN()|=)S*B`>BS9!QMoby^?w}h%RpY zN6=#t5EbzhDM#|SR`mtK?7OPgzcj5zGM1{>S*vw=DH?c~N_RcCOpf{GgcdwYllj98 zg;Hf)rV%b@)U7C{FDAtHud(#)t6yN+!pX)`%*^Alaeg?R9!;`QnUIBH?t-Y)K`2{%EmcE- zMoU=CVTmzk#mGPoN1zN6UROeyIM}wcm~=W-kVq3S>U81Crj1M{-I~Mso2|mQcnBTU zFWTc6-tYapb7v)0j6%o#l-(cUUyaj|zVQr?<8hbA9#z0mFkX>!4u`W}Z04@4WDAMF z<#4ZtIhyWk?2(D`_*3=4cs}Av<;wf~czh(4S?_R{pG+1bagLy0B)r8nddtCzJqy7y zKZTkt?6|SPmKjTD?QCF8vDk;0#LQ5iadAZGm!j2T2iJ344cyfT9SSCIsaV8RwOAx` zqyR|j3Yv{}40OYZ*&;zm;>F>|4x4{OTuC7$NQ$H}MVJoE`utgx?XNwm!0(bbUJ4f{ zpOn!_vdkiz7JK+R|KT(D9~kvQuTO1DNP4t7+~o6+Lud>U^-xu)2oWw*-z%moDAHJ? znqy!}Zv<%}2R+SShQayXAl9Om-W&AY^lR3~YgK;?GFK{-naa*|m$sXWmpsB3M(r#V zPjkX;wAp8uin1;1COUFuz#_BtN~wym^qB3pO&qk>ErY1ov>RH*8si*waB@gZpz2 zX$Y980XjfS7L6d<>e4aZB)~!avZYsAOfhPefj>>yF+nppsq;IOXk05thw(zJioXWI zq;ErPvP9)EYo~S6tmB&=A}u=BB0`K=Nt@H&fy zp@6|l+~(m3bTn)6j#lJBrzp*E;8&#dcy}0nWJ>crL$8#Y!PG~b%TXQ&vp{IWK-mP& zO}9h3O_x5@gThXvyKVU+xgG9%!L*@){XEOW7iR=O2PFb80D~vT! zKlRvH2#K?PI!Sz4q*EvpV_V_Sex2%6@V@_ZdUqR4?`nv@@!a|LWXXWL2$D@mQf$$7 zeaIG8IgVNX>?|7NI!S6tM_)#7HBt2ersP_evNHaFqVHIWDGaT1c<;DyF@9MxyTHtt z^vvdZWl@^P)UV4t>Q~Ea4U<%IyOdB2DK7&vGyDD1>$$ekQh37-4@Wkfha@zxQ}|(Y z#ARY*&np!l!7*x;C8H-eEuyJT+g8=mKQBdG&5&R=EOg>AW2O4EJwGmP0syeKYm?AEX^n3SNfG?16^X#+`U#}X=Te?Js50xoG)^f!;k^D1Nfi|CzbASLN( z$npQxCfe;>6w1I&ye4J0bkz!0kMta99*Wm0S1LwyJeT!~7a zWp!PBOaz?4gk}PF|DdL2C7P+d@9nkwMHuPvgu>S+ceDYC%Ku_TkOx3v-5T@#G|%P2 zlq5dr*oLGx1bU!wuTVD5Ydde}ep!OaJKUL;>P%3!o=srKYYYwISXH9cOhZ@=G!5=1 zI6xJ?3gJkJXs7a6BLF%|OMVv%46G;ZAJcCBj$aHP&b#keiKy7$o>~}i9#)O)CXRlE zQBCc8mIH8}5}b2nQ3RbLSWz}dSB1Z?W98i4P>!P?Rrv4lF#`Co5Wh=E}WQ3th-X&5YFi+*4F+@a?=V&s#gy4uu^|NOA-Ovjp&jl^J~IGdfS+gOI!;?-1-38S+;*qSwBGyv z{;CZyK4rDdBn>BiwB*UTz?cd; zO4SPY8`omNwMrQrMc`4dFaMBWIT{a4RX%qm0unpi#blc1P*QI#qSHi>|X==(GSTI z1=G`Nsb8tzR0G(A%mXLgR*xfaFuvnzNZdmm=;|N3(N%-J=}TzY&EAI#S=&0cw%?1&WyGou^6MFQ)+~;u|#Mbv9(UyTuU&hy{4Z z&XLgFDgqHZgeMjPrMxfu(Hv1j^oOj(`v-~=6utkh}ENk(i zVx@N*Mhu909jgHgvdB-G?$GdvmqA z_)CpWW=yi>*|Pg+1RTLvfCGOL&UGlCKMfhL{VhWWYtS$&PRk%a1 zp)*41&m49gfOLYq_=1~9Q!q<;F1kk)L| z{~k@j?=;|G{J+8qbi$!?ZUt8Zq6EzUj1u5P{f$5Wxe%=I|Gp4x!N1|;Z;}(szKc8m z>yH14JO8;5oCXUx5ST&u`kTtaIiYj+zmo{aHpp_xoIN9aYti704GzlP$F#%F;GH9{!xO_%UJKH|W-3#kF-6#i%zzeYtz0ffaG;9`lP?m`B0_R!`k z$J9E1$?tY?5dZJ(5@D8Nyd`OVjB%BsQ9<{0`cazrRg=rfmKv*>>fI>ip2B(a9;0U@ zOHSr<#6!~^^cv_aib6k5&J@fj_0hWJ^4L1gFF4K{j_`9qpNqOQ_R_mtM@0?WM)ZU_ zCz&&=Iw+C5A9eI>5W@ON`=1$*bAqnO`NRL8Ql>eWmir5R#FoRze2u@Q0CW|0uzUPt zkiOpU_U5oB+nv|pyJ89m($<5Fw75keM4R4afsVUFznDzJ1`hflYr8iHjf~oriZ&R8 ztzcW}BZJj>*nXGIvzp=0zJutmJen)~a;o$Qq|g!;q!@jWWxn4f*88NtD+;IAU~e~9 zV{vnJj*7>!01hk&UHpAjR-?JSF+`gKNhOLI91jp5q18U~1P34S5wSJ+)XUSKMz4Q< zH0z7y|EBi*4jIWnk7((ASjxi$KwJR?OZUB5cJH%8-|GvX!_SB*jM_Iq;oD;al8|A( zSb%&I4Y%FAYj-#%;Iy?3`VqRdmR~CkvO4JsuxXX^gY_S}(nI%rH92%$NAA(bC=wbW z7afS!fUxh+f7})Rvrr4fk38QCo4wiUSNQ$;1e}(xd$UIaw0Xp$zCzD8zd5vjbhitn zd<0U^Y=I=AGYSxg-ali0I^S}~xW$;D6Bkl{H*ve1a0l(TzG*B5&g=aq7e3oXX{Nu) zCm#VxX}@Y4-0C`-b3~`!HeT-x^q3q-$GFm>U*q9G2R`NWJMnP@LWE4@H}h`wSbQ;BURvQAPUAKr>dU1&b+>)v&dZQ3IMGDdMZ(p%9(>KB+@r z`Tw(A@js*I9W%LoG`UD0| z6T|`D+FNM)m0^{84hkXQt}Y;&gFQc-M5_YgHK+~Lf=U3F?d;)#%P2F@`+f&865FJ05m~e21ExdmzUsp%;>u)#S-(gG%g2%<9D#olyt>oH^1yI=zYd!7fZxD zlAJB*ZVxUT6}|PI_uI^ISKMehXww5w50nA`t0M*Rg*W5kF3y7m2J#x1V!0$K1fZ-t zd;q|qqTxGA6acX01}*h!g)%@7_T0#@QqB{JX#?!{8l5xN#}r`R00S9QoB|55!o-DA74_~&p~W0GHUldt7n6IXb=Y0%Bx%i z8YDq!$bP3V*eYQJ0GB*;!b5Wu0qpkqlcL2S8Y7d%66Rvth? zsy`^1u*;f{lY;Id13Q6f5J+U6Z_+EqB8dBd8hbXa=dgk!2+keYtr283Ztu0;%OsD+ zfxS5Y@F97Yrv{}D0CgXUB)bdK}J|KgbY|aE-`NNY>Xc7+r zy!r+&TY?Hd*a+q0OFnq-v9R}G>Ux07>P4wFZyNfE>y5_I7~Ah6Z5fI4&md* zl7QOTAjXm8TjW>0FMFvIrl3c$C-R9WycQCb#6BQCh-PkpSROQOlBT9(KJFd`;@V89 zT36%B#5b}Oy!$e303DzpxDUDXdj4>PFI$^t6snSHX0VySafIgtU~oYzr4v?5Z9Yu? zv=irZR(UymPNv^mJzX}2JOQY9OvE!}JJi1+2G|}w<9dRUShkQS$2r)BefCliOd`}p zr#=$g$~%e(eXu!bB$hZMqtSV+gkdse*i71+=v+?2WR5iHL1mHs*v;+9xq6xAyV62O z#~8rx(}shijrKtOL<%nB@CB3D!kf(n;ob7uY#+y>^CwcG2F?T2mEHz-tR%{RmuSds z##4SE8YSV;FZ4a{HxnY%9Yj0b5!fqB8X_dm(VBl=rkH9Q`FzAvEG64y8HGQeoj&~d z1b{)qP$S0T{DSA}x^G&``N+AF%iMU_XS&vgZ@}eN=qNN=zT*pz! zZJLt1pFQ;$e_e*rQB5KbnV6Zh>HKKI)_Kgo<1xZ++;83$xjO86U)lU+rZC)WlR9V> z$tdn0JAn%PtjJO3%gUoEsTbO*XitBh&ul^G74jL$Q79Eo4BJ*Yu|hC-tNPblf)sPr zn&HacJ6TtR8YqfYu7FZ*V@%l@VTd7A*T*|9!pX+rN*7ZclTq8%6K(x*pq_x7DWk4nafj#=0M|(FuT3)d zQIL3v(Y6l2!Q!iLb~ia?e{BTAH+e#yHK5=QihLHkuFCj^pS-vS;W1=vo@s(v3&5dT zE<`7acrNU7;fgYn5K8?~*JC-7J=f%71=l`Pp{M=v zg;uKq2)!c!9b|A=AOP~0)=TgWxn#`1@$@N;d`>b&-0|!Cs z6}aKVCbZ5ELJbvw5#1VcuDjF2Jf_%2e%EdxC{LmhC~6&7Iz4}FNaN4J-Iq(Ddsa#U zB5yth1w|lrFL~`&L}#3UJBA`KsOEP7fS(C6tZ6q=A+7JlHf^jCKL7%bdH(?dsBt!+ z@UElFpjE~Nutc-l?&-mj-`U}crg*Wj=f$Sz?Htf>bDdA;tbk?2?`XBB_#?nG*;aF@ zfEvZstQ_C75*tu`#!$hgQKSP%j$8z1g9)D_K_i?9i;fC~GZLtUz2H%VU!Q_9bnGW7 z&1+zV;IeQ@{@0Ha9uD)tMFfXCBKlgMil;d0Fh*9rckX?Dg-O+C7_&$K%%VAh;co}+tLG}#=+15}SM1WcP4T%21WJ%m``*v{_OWT<#%jRg_s75N2uBp9= zovEx#H#)MjsMz97&%v8TwIa&;k&Z z^+bWMX@bNd)E(o>tx50$1RJVH0r^YD;w`fBZKCI?NM1AMGCDMhsK{=NpFRWy@3ac6 zQNoSf^2wW`cT#Sk#J)oKvfz99vme5x3gE>k5Mx;knvK!tfz<+tb~?s!4X702qDVBj zE44U---{?J8CihDsv$A5@95aad9R=`9jj~Qy%Q-GJgEC4o(7sK~cW`qHVY6H@E z2J1skMNYx&@C)9hj5*AEfrWQ;3{0zB642A2Mj}k)DS$pUt?jnZGIaET)FO+IvCsC# zr78y!jq-#+6G*ne>(HY+n8K4F3BNS!t?W>M$pX|1wQ?QyER|0E#DgKUOqyg`Yu{!L z=iA^Y?dJErYP2Jl#DeEBDfC)TkQD)5nL(Bo zCdqkC%D*2H{{r5dHHa;2=ni%W(vHe7CN%*J8H|*Lo@f&-15a6)-8hs;JvjgIWzLfv zn`aqL5x0YuH6K*~-gTivV8cIAP7eY_Lv5=;ZJ;$}c26n&WO@4FNLy3V?YT z9(G<>p3*76J#@b{9kC9t6_O3$6F7qV`DCRp029v>bg$IS2th3eP|@-ye!;M0DB3x| zhnODI4vD04)9xIhLFg_ff0zPZY1~=2^s0U9cH@wsG zs}5B6jV%^PjH07}i+e;O0O$wX)wB9xHso$>%mjFb0Z`%N;`R|JSwRTs&bVxYpJ`Nb6@@TDs?(}M zUxJ`hjCMZl5E3h1$hV77jH!qpkA(b&o`;&^I8!NGiM`d#7S4Q+eKsK1V--2uVgvW( zBC;co)+~kfLZj12is?uZSvM_tt|GHJt|h|@%-_)|QNac`8+$sSLeTW%V)lpSNa9oI zF(HJ#*_4^3e)^(FC%9Zw5q?6t<2-MdY-*kGSEzjaNXO#@E}-On#=lKaBfvcALc znceb%q+Y1Sp{-@O3JO8@^Rlb+UuG87?()X4rowmz6i?^3h&C7VZB9!pA<0$+8SgtH zuxZ0lnolMa49@FRo|r48r&w1qK&@YLUT~ETRcvcTR2<hJUT%00EFp)|4Ph!VBdQT1A`E%#w@w2WUw_KEWrB5hfeL|BH(Tl>C31>5wKbm_^&WCW?nYm{P>)7WV^HM&&p5ocm97+j|r7L;}>_BB8>uHU0+4Kz3#ngpwRe zbbTuu-$?F6|B! zKys73gL#kw6gv12WMY&ob!X}Hk?OMioCaVR;G|wH+>xhHrSbHs$ zG=H@gCkpCwFHcJ>ROW%9VHVM8d34czX}D>mRrdpMrR4ja4GyVWac9_9!%Q{L|$Ya3Y(_54&kwzFfUYLr5%fa$+~{;`btaWII~jx@aA&MBUL z_x`1X0u=}VccuQkt%z@_Cz#J8mHXy^$@3x zV)ovCV8t@7f)&6+Yl%1*<7q#JJH+fefrF^d&JsIM%;%WTzuEEKjfQV!rz7A&HdhZU z+Sz6PdTW-yjZq~Is_PgHMAuCox)N~6XVk8t4G!Q~d%SbW<+>a`{v-UsWW;3s<1izJ zRvn;&*iu@GP|>D=SL8TgD3tB6!H9la59$ zNLTkLtv`9CD@-HTEEz+^+!+HqGX4RbY%H<7NVj7!W=+?2Jit!h>TIwtVO*-Pel9n! z@&-R>rBzHn1cI$7s22wF#E}T5%A`jDwB|I$>;4m4iDJs!{AdvdUpr!aCT zgOBMGeahkfb5u1++JuRB2<}UCFf&;?($~O`YU5hiCc_SJh_sOYGYhfarp(oD#VPWO zIPXv+Ai!HwHKH9CX$Hkx_3xIicy4wDJcrR1+n z;k#Db*egu5mwj$06j(7ch@L)}V{rluG%5d`s#d2fw`GGP`MAm2sDQ^IqYU}LyhOOGIg>tN4`SQ0^fMEXpHW6$@P4!h+Ft*B&4K?kAk45#G80tFOuwpRK+ zU8xxO?W^9Z-jl=?F`R?tc&|YzpP!rKP!*V{w(zZ&e|EZ<3o0Thhdm-;OTswn3Rrq9 zbO;kT0Ava#`Eo%bPD?aj_FwM|KM9ltCUq*ouJO5iUF877$9J{ZKO<4xDG$!=j_G2>!ibAj4%-e<7qmz<g51%VLr6Q{t(PlYIIw4!0jQmxnGGV+MS3D22(Pr*(@ba>t*Y47J$v2zTVa}6F^Cc|B703A2LJr{S2ENV?x8wBzK-AO`etb4q zQ>4;F`rSo6J{GLCg4adZtNX84r)2uQQR9n!|=z7!O}Lj}~0sbiT?WA4#RT?d?- zx77lLFH#rQX#s#c#r%hCNhy{F8jxY!^rz3l#`SolsZKV*_H*HfKesda=m){|hor}= z?kV7ySefeYktx!~nG4Y*RL99;VpE3NqTT6EpWeKm=j}(9vH@q740HJj{>J4PKStYb zI9?k__AWoZ{1|dCrlenEB9p!VYLe8$Y8inlmIJrI;BcH`q5${8e))r4NTjU!t0y#g z0sUSqfu$a}PGe_rikS+gL2RW}CCcgi+`?1~D8nYtHe|nVvL!vK!-Plg>)L^TvEN@f? z!9DNKdu+UF4(~lqwzo&sX(m`m$!zYDizuABl?+lOnB$7X4TEdA#Se)C#d)hFx>G@(e~U{yL$8-WPa)CO0Wn|7e839L=Ax^P2<}imF=-k@wOQ;J`)K{8g?X z^2iw3QhPk4l~g1~0-;41+pZnzYS0J=MPP7nOp9p<9KYxYlHuD&2?PIy;#@$-d{a;? z@Gmiv7Zc;4L#9!ayniS7GaM8wRJo?qTOWPPs<{l2MY822^Qg)1&qDyR;F2kz3q}b9=%E*V4x(X<^wwUX^B;wC+o0!npqzIexDhonzk$? zljNw-Ui`1K&Qu=UC$%nVMR$(^;K6q1+}O0w5FZBZ${~bz*;2#0oO?;%ij{~?O(cyy zH6?7VU-vxG_Snu!YqrjZ7gd-W-1_S?i1R;1f7-@BynyM%z>#I|RAo99XYywP$UDx7 zWX#a6knF53wUD=S4TJ&GkBFX2E%KG{${J|@9fx|1*a>T`CS}o)#4<3U5VH;iOuX%j zA<_n#0nmr+XR87@PCxC#ufUBd+is;onpx z#lvb23nEey5tVmo=Ab<^ph~3pkLV|W|7N4j!5J2d9 z)Qg_32vB_{|AsSt7s;d`u)v9t>YtR@XwYfaW8=5cP5k``U_ePfM=v%j%EB<2SmJ?d z?-O4j@BG>vm4#gZVlc7nkZyM=g&>2FskyV~*`bNAfXZys)|;q)aiOE6E_-Y2Nz83F zGWcOoR+l!{bn>lxt_U^X5aO^Nw|TRm z^lrO#js#frX9q;nMrn)SLa4@py)Nu|CtNpVU{l|;(6&(s6lMS=j#vyof!0j6g(L{J zjej{b{lJYRXhF>gAe;9DHF>_ZW>Clm#xf3$ek~@=$7&3eGSSNz;(8*Q0{}rKkvhZG zZWD>CpZ2Uk5 zO1ri};d61`7_xcEkDdG(T^0#*_b0oQi4Mw|8_HKn&8Y1lJAO@h8CQ@L3DKoYUXT~` zb8&=c(!@BCwFbOriL&qfS(k_CjjpyKM^>*YT zp8hg1TH35cKlu(pOpWba&8WyFT8w(Ls|{}83hnANXiln*N`Kz#E7_W07mQCiA^uaK z9&Mk;RvGj$t<#&p$J3LQh1GLsAw5r|qrNf%^4`ZDImd{m!3`V+&HiFZ>TvAUpa%&S zPTlFyI*6t`RD%NPs5{Z$G{{u`+tR4BXE%N{azb_C>O^w}sDJ+I##TWi5vT*+JgF6o zis+YU2~zP&MNd-{pFHbNa@_9@4~ig8a>#O{X6wxtswyAL<1(}FJQ@3HJC>Mk7ykQm z!Y0#Dkp}8Ue?pfhuO?y87?DG3z(^M%PFeKveRKqt91gnPj_V7)Ju-ZcDg=JSE+uiFrt_H+E7mJb%s!WB>QZEl zgdz-7qht@!A9L_feGBKrEDIaqpdqQFA`+^(iGon+k$!v9MK!tcHY496uum5@>^#zX zcek>IWtib_u^5906nJ|=YY*}Q8EMCmdoCQOBPm_S#G>$6X`W|rNlFW2=w^%h`344K zs4%_leCqn$a}yK4``CF9E)6$9m<+Tjt#pPQ5%>Tr9w2WG0aASJ7{mbhy~;GSU2OlP zSTs9$wAd~>W8^1{Da4~XfQ$N7@&xyJj2sE>udL33r&t%9HWO6S-D>#gQp}-qfJ0s) zSp`DKds~3Sbwakr&8|3>FBEnDGZl7(0V<6^S1$qN0OFO)jq!B7@Z;^J_V)LEdo4jJ z;Vx|beMuqR?9=(6#ST+N*lAU29kXFf5}Zxmm&F)dKt6Wk2`oTzm;w$lm1Rx_SOw%4 zT{s=$^+2OIMBT8d08)K+%4Nnm$RU+R2=m#d)9q}p-6LV zcgied^B2asj6zpS3}cl|0I)Jss~z_iTks}he~E*hFj42QKf;e#kJ)`eRmW0TC={EC zYXkQZrXVt2s?t>7)S`e2j$$}|v!j_Otk+;8mFuEoWN_Q|9oVF{xiyiONS%)8I(t7~#PdvNJ+N{;uN2ii|XSs}G!PpKUEK zFhHq(V?u0176Ap~l6JNVdPwFHEniB#-d4n5USE(J_hdY-Q+UA77`jtX8!aD3Pq6XK zSa3afTV?F{3($_(;2pzjvjZ2ED5s)UY2XsDmst_(3tf8MpCU*rt5Dw`0geP1KcPce zOh5M!c7Kc2wXd!(FqRvlme_qKhsfT`Qv(cywe?gnH0|9wJi=HyJ|62&Djx%hz)sff zNWp;q*Z7_TjgOy_U<6Rn`0Hi=Nt&Bac9hf&O7y5)6c>pQH| zI^;lz{9J*QpnD0ue9m{PAgq}2{^pVKJQzo>^)PSjzrOX-H}Z{rq_!HD)>J#pfEoJp zI%`bY*OdFS<&WEEfu-q7rMr&%Y)bm4-_7m^_JIXF_6u@14NJa*%ImP0DKA1 z4oi|-J{%taxe*_e5`*GT*5tzLgHOP1M=IpOb~FVVJo7t1VHo}8qdjx&EXK-43Z5bTR;;vU0t7u9#!%@jD6q=~BVz*-NvGc)aQFg$`A3lBlf41oa(eS)Br3T50Z?5 zEb+D)p_((^>Cduk3uyOcd{*9HR|riwXu4KC1sD) zJ{e8GHy{Y`C#f;8=E4+)@QJDj;N0wgoH3kyB$n}q5wwPzpT0Pr5E*$&oG=p3a%#6~ zi_L16*$m2*J5?rKtqs@)s7T#F5|!50{&-JaOjRJr7OKEo1)M%Z$JMiwiXpT&pb)z^ zfYXadfE*0mCh4Ct)9E$RIhR3UE|I*K{E^(->s*9?z6&5??VY1F%K(n9 zPURH9J2r=&Q%s}R0B46wEAo$~Zs#t3S}I_4G1;HbgPOCTF{~F6l$~ob3b4S!nH4&| zk}<9=1yxEH85HWS6O7AAV1(ZRWmABT52>4?-lOvFg4{S7;Od&y6N(n;UH^8~30e~s zbZ6NVFbSUrhB(kB0Nk>%>gMhU`e9V&>A@)?Z8Z4}M5J^B3H21hI+`*r!O#`bF^K@G~|NeNZYd zF1y+g>iy-31zf{<*h!j>4bqS@#|vd^sc8s9^!9LiR-B3z(@80wSuc>v5frZnJV1=+ z7QMaV4ExPvJ!ZT+UC~XNVKRY~xC`8);M`UfUF!HBG)hU|JlFR@(n`cM0FH5)2jhtUool?_QtSwROv9Lr z;nSO|33`;iW!Aoh8h9x%NMIi#NcRm-C4ycw){i@@JF7AK(Nj*IHI3c4a5xM4*2zFa z0xZhPI(ec-UR23d4>S`>u_tYBSkfEE$*#+29=$W5mGJbt%IuBVy@I zrcvq*mMjh6|IK=o0#Ocs{HPmj6-+Y`@T~)?)G4fR$Z}#1bMVePDFk?_6vC-A9HyzSBh zJ3r{6PD;)c2Bmh}N?!6EXNNSozW59rhdR`poC~)Vh$a3)aARThHk)?1~(A%9+T|W#HOCxiI}hc>P9Q z3)lP7`>Gu10|v~PXr#>#r7~NL7H79hmoaus`~k3~rdwN>?TGz9NW?Lc?(&s{)Qh;IOne$^)!d$|SA#HCPoUioQXQ1AKdqM^=0nZg@6_>J zP}M|0$<7@;-N(HO3mu=$A5`5%$s)|k#X&Zs%=H5OK>d{V1!4Pbm-f?&bfKi!#l?p&32au<{zvs zWL!d*C_Zvc%tRn36tdVXSlG+ghMkTlB%7yqBTM5WOAQ8GSI22k=prw8Zsyqd3Lc3v z4*e-eLR)f6XFi{#C>5Vdj(!_mJG?cqeZ3L4Pha+g8&*kmYO9nE^d*2faR zp7}5w?h&jbX{>q75hOjuoRlk2))4%EvjV7cCkz`R*O%LrOZT?|Odtc1xHJtFW^E!& z!Qj?X#|n`|p^Ml84mh=&QXN zRAW9Sl`)(cZ%w>E1#PszAbn7x`T(jo-2H^d0;=e`p!shPW*`h>57ry9o4w#{)0^yu z^pni5?d+bhq@c+%ehj{jRIs2#Hs_ZNaDNcBuUF)3oYD>my2dEp=Ta@;eN1KGtZ@3< z*5r@W2o8CZ|Cah+?eR)VOpwqcgZV!#R*8B3-)ja$`{;!vz}MmAdZYg%Q>tfyk-Zpg zm-)xXM-P<+LTHU7@12&1qOd^LwRm*j7jvhZ`a*HkEpf>q5_OQe$1%6---nd516I9v z!ncH<{=%2HY_LZA6b4|+;N)`r`-wMk{;#;sL~*xO&eajNv%^kQ>Rtdg4H}O{Z=RGi z*h*aOlwxN{iYZqQt&}~xPdNaWVC8~=cB&)@{IhNp@%%uiuNmf+5>| zuK{!^;;+E&R=VL=Ucr2dP;|Kda_fG9DB|bATh(9RaC`kTb`vmmdZBZ|=KHbVB3sKo zV0EFpYuNj1wgamV41*!ETZ2EbH9Xo(d0j>Ak8uSByb_UuA^F1wlKyJ-NL0}88-mYC zm)e>J1$yiPqMab5vyhG)_wU$Ld|w#TDR zC~>O)bs;bUrf47p$@9NHjNi)-;bJI2$$Gjn0J_uf6z40IyH%HjuDW{NK2YfVfXpmK zbOkC7Py!^in{q)32efR5IjVFibaEUpQdRS0vcwM183X7+2tN4kAB}#A0*8~`82~lD zqxy(`5d=_Hui14Q3Y6ab00u4YFVAi&dCYpFLD$zeTU z)dGo!Tw{Re72>M{LU^@uyD75E?Q^{U9u=}szd+o zzSj#_+(`<40i%2pfyv8l=2S2iP-e>k`E|P@o%}~(?j)LL^PRy1Xn3HTd9=YsU^GM`?RF>DXOwpyjC z1%P2dTYsZ{Kua@_k1K$qA~6IGpj1E5-_>@xFNj87=d$QFLm(ow3vA-EjZTzb-^u+3 z4{~dCdx2M<0Tgl5TwUOY1UQo6S-IRf_|VB^0;{W;=7=Rw6iwlKqjr%X?vSplw_W~rpc=W@)%yx~D>!F?43>t^G!G@Bk4XevfsKmO z(AK7|OZ$5`CT0KjLK9~iu%W&O&1Inc&ucpiQwiW>EBI0+fcAM*g4aQ_bo<2)}g-|{A0S(WUOy76YM7ojaNGwCd;=zc5gdXxn&N(H>ada zdZZkaGOOmp@DzX@mXvSteR5U~iRk|$>@1+7?7MakLpVcscOxNP0@Bi`APv$WAdP?` zGIY0eryvr70)j|42v~q99SSNXVxN8cJnuQ@J8OO4S&L^q>wO<$=AN1Vz4x`R>$f4m zU=9-ExA;S}?=)ynU>muBvvKtK_MZZL?;EWI7%6H@A4{!y+Kq^FR0x?2r2U`P%dCOjbt?`a8qVmZ&IIaD`&iB}Ae0{RJk5?nm&?^Pn?G zN<@5>{jd&Mg~4%kTu){=EmWP_%Olb_x{7BMvU(eygLBfM@Z!a7kzIATKEd$bnH1s5 zYR<7cTs}Y?zaVl!$Wc3={Mn$W1Dlhd*um1BXQi zYi(X#wS(qiHmeRl-i)~T@O41U#12nONx+l^aAK3@cSd~skl138e}WGA4{8OflO(bj zt)NSKf2noaWJpkVSaHaKM$XH}vCL9Ui6k0pGq5-qj(v~Xz>W^cHI1}&^SBacN#%6) zQR?KTw+|Sq+c;k6eT22ar>0pL$*Lqb#Y>NrlROw%+~99uab=nbPfTFRAJlY~qR1H` zh|)~w($sd0E4NrXT$R}CpWc(25h}bPz(AyecNjmTVqKnG_>6ag(b+K|{@z3QP&HG* zhtnR!9cG090j>N}rZ~$HvI?x{k5}1Rv^AXxzVoiW#AZy9RdnahJl^K2z_#NLg1T^X zxd+!{*|)bc=`b>$>?!kDs=kR}gVsdY6bg~Y2a0gA-?o44eaZ@4iKqVbJH?F_EklgF z)F;7G*ymXhdDuNx+c#ai@uWn;-hOccL|xIJ%DAxSr>soB%UP+X+x6T92ti9_GiM%) z2D#q~C^Y6Zd(<7fj7P~1!ShY40o&7932Uy`V#euOV{?>@hZsIaDcrTb`!S*cqx5EL zQ=;=ydaJ$@_9p6t^J&~SU*GYOb`xbxG{(t0jpgm_6JfD3WLnSFJD7Z4BxYmOmLp}T zNXI5??QI@&GS|`MoCTq3D0BAL-Q6-&zq7_=;b`T+s4+(HV*pMW7M<05-n2@W*0!xH zqa=7WUM$e2EqL0GOII@uLej!0I&f&6x3~~y)~4TWc6NNxY*w6LuK)d)mDTn8?EgSl5gSbvS81zwOV9RFho6{ zk)B%#uH7|5U_hO-|H^f1lsAVWJ1S^&4Aoa}ajZ0D*9Vvk8-}7*CQe<&eZROSOc30Bw(jTN<{^}4vS@Z7xHr_KCf|SW8h3-dvo(mj zEWS%ebdGF~l&*4&>tZOiSy0ERtpo2fQg3j1q^0r+dD#r+N7QY@RI25aCuNjH#LL%R zx$&&m-*FkRV2b&YPE}%Np1{yQq>L){Cr}<9>k>X0_!^ZS^STQ&#hZ|KS+6T{QrFLF z(376#gf^V>)RTx;D|GIJJBfmk8R|qF85Al~YAtXt@^L@==7B-eb`pilE7vl$k%7I) z{rKigEuNM0YwM&hNODN1YdFUz^=%d}q50OZ67tw{%_Pmfl&?x5s21M)7;Vg!$W-L? zC8_k85oz$zbL!-^t%J-UXHcxWP2W0Si=RVL@&oJa_L2Ifh&bZQj-Pf?1$@7^M zR9D}lgei$Xow348^xo3>71dgq_qCGm{`~eke`3XEFP@nCl>&gAmoSG{1paFytD;ZN zhyPr{l$`_*f2VRsRbL^7%l0K734zGqa7?Af9L*5V^*@rmUD(SPEKfQjhQ^i|`lzF( z(BFxM-POXT6R<{{vCVc@RK;fO>~LwwzYA~Q@98V^+ioHalW?9Bk85JnAfY(%j_{bc z+#?eRcLd1Pk)%8AUNdjm6)C_`&KJI`KAG*yi|SklI2b){`TXq9>#Mg-b3d!QM#6~9 zSYs;Y`>z@waDrdc$}#*bdpp4B;{CfAw}^7B^O}(Hn%2AXIG1Af9#*CQkN%b(z0v^x zlMykOJZNs=YaQTwMf8Ju@L#l%>3S1(8?;=3BF5~K;@ z{5(og(9bC(zx)@1@KaBvS@n0(*)PCZ_}f$bc5I-FzKh|xBaI%$#g<>cxp=qCDOgj@ z`dk0PfV)%hn!A_u#!)NC7sq$> zDm^2rJ~E`zi|~Czji320;cB+5t45`TK7mG$PPknK`mMQ!t81L&)VF}YstE@znkSWH z^)sz04yC|YtFJBJdH0&2-xC%T1HF}P4krh@rRwz!Z*QLowSW6>4+#A|BX-?4!B>M< zFTtFWRr;(cpZ}QZivI1cfNKEY$?|WAkT=9>8e*Jt6wzP4K?NjG zu(|T4kEDO){O*;BKF3FGD|Xim2OaPjuBWWLFgi|ml$ZU{*X&E&xu6nt*guaG<4`Z= z@JzvPE;a~eScQc>2|Q#}ZEud63)rvut@9TgG}k7{zs|hnY)xKHkm9vWc0;`#+u86`nXgodx(Vb#cEfV9Gjur72IDTS!s* z8W#`-UJ194*;Zcm^ma1q8*{bL7*d%GWQ#PMnwf%7%KTi+F}3?E?)UonZ%Q;~keXNO_V3V^uEc90;u5}1c>C(Rk&B%{ z-iF*Y<`8UtFXpPu5!oJ~JR)@fuI>l#hAY4P#e;G5RIji&x?V`@k+?S75Slq^WY*oH z7&Kp!U1GoBo4u9$bi3Q}nrjw}W4ZUPFw(5tCdtx$Y-*_WKs^9HM$?g}>!e;1W{m9i zolhcL60BBrBK4BEeoBj3p51o!Ou4bga#-Z=<~>bIJPRN3Wo3ofMd;xJ8AxLH2Yx0a9_ zoV#<%o5qrd1ii&KGC)GP@2FNsDPAUH-8>xg#^u}PevJX=M%yQz)jB@KRXTF)M-UnJ z7jw;h8*7vNkC;Q12D%MKPVmDeElNn|L;uxOatGdkmS-Chbg%GK2$-CqPEN_0F z+QtJ^87Mq9fee$h>MJg7%F-7}p^~*igBok{Q|>b+hGuM-PX%T}JgS-lc>Dgq!rB76GM1smPu|V15oaLhZCnOSY2s*+r`BGQaS}y?j1pKid z^23>=-J7~Rzyl^+Q=oRsSR%d)qsq{g-W&9P7-wmP=toaCLCWQ(5qsggAb|Q=w%(QG8n%!# z$$x3VaomK5SUOYK(+PrHBI+pXV(D}5j^d|(%6;}8# z_&CpoFpVI}sSNA#te92P3`x#uikY6A+JAle{GjuQsDbC1DQxHUdjw>b_P6n`lY2wJ z2__Ph3cF@(M@M(|l#=#DUGdE7Ox{C@lhPDAM3;EOQ1TJ8yT~EhZK-s^@m}^V-JY+U zv&tKvmb@wuF=*5ITqUS3&L?~GjM$wbiQi3e&6;cG z8+}X|h$=4j?G8%IFz=fOvF~PL0!%>_+RS%i7q7moyW>B(3|h;?^$VWDUSH|X<6WF6 zF6zS-R*dkr1`n!DEzUM&&?f-pWXV<4{MYj434(R8&JI-5OP#ixKisW`dtDUbpGv z8z5WYxjS@|i5X+BzME&#?qeUes;I`&)W-A14QueN?9Z_ zP`(19E@$cR;sOM?jE$5u&qAcaeo0Tpc}1|=jHZV*W>+O){<=1YL462ebnoV}Vk=A> z7uB?q*p$A*Rjw5Lo;nf?SuRhTBYYca?uU6;OkcQd=)PuoiavNXjV_JvUY!ydk_ODh zqEk}zlNrXG_PUZN7bC z{^KHTKJeLTZIm@{lBA9@Rchf%VvsR%nsM!w|b`;@f!k=81x%wwl_{5jKic}!X|om zv1MXSiMgE&Vq!L>ieh;Tr|7=D%31fsW^87d$#G#MsdDD7-t{0kJtxzYXcwWzoR@Lb z>(1dv;i|=!*ZFhc&YFIOjnBn&^_KR#NrbCZ9pO#jQ9@rOxhUlkYHLjx#=um}cO%-E zoIoAd%}!aIDNe67{jeiSzQQv})b}BnK$K0(NttEf81^h2j!5Ub3lqr){cap@x*t|u z9DENhy<>~TzB@y9wSz|2wPoSwFEqvPfe)Y_J4}!%F-6X&@Y74+oW#Ydb|^27+D zks=6XetqrTsXx~NW%FmsrvB!X{DUchyq@6-Okp+61WDBAy@gF@BhL$VA(`qL7nel> z$Z0W(e(xU44$m_s;Nuax_RH%rF=vdb9&yl^vVrKuW=69dL0T=9 zIhF8YU-L$#YDx#slHyy;^DhB6b=C$PSA6@w#5l*L77<%%#q)sI!jsCVA%>5!atW)A zu?x5gT4(O8;VBBHI2FcFr^3>KoourMY9mF;>V_S<#&3{PPGSux{AQXi$38G7y{n@& zIoFq)Uig9Ni~3LLe8Ojxy|RxOFuPoe#8)G#L%^z1)aJvZwAX_dHB-vGE3R*Dy;sWk zjDF28PfK4er6s*b>X?>941MPNVyPv-lMJu#r%Zoy+7AZEgxgnsw;O@V!04Z5>t zYSHX?pY^&lqbTwm-oFiG+M@ZsR`~Q>H9?^%K92la>FBUaBtSzRy|FGTYlxTSDk(0g za=$BJZ7CODzeUzaN2!O;v+pG)G^KxbcR;&K(&KqBmz4xnCSqP#)Y%k0=HIogJkzmG z)t9j+YGI+$-YQ7F$zONR<8{N`1kl?lsG=kjPndH=hE0cW#mo=AbAHV=kU5U;OeE>V zh1F*gmcvb!oeY?rM;BF6=ERREi;MLJW5`!Y9EHtK26s`dJsD#X9to^CL$pydFm`IHnA0!l8S2@|Y?Eu?%{nzj<5ec&iQU7zYadC0 zDTV$0GP<(PlQNVh!~TnaQQ<$=Wgx_YPgG6QSAua>eVF^$javi9loVA?NmO6R&xF|_ zqhq4Wdqb{jXl~uEAv#SRMV2_{JDc7!pxJj4tuRO!5;3`Ydsrn&2MNFLyAsn= z-WKq3pzezQq_kp$p+i~)<-3S_rhcLglEz2L?VDM?2+MBZ)^n?*kh$YZtgsNT28Sw^ul?vbF4J2ssg`R|5>3QO-A_KmKV-<19>t!VDSJWET0*}tMp#xd z&{vN$H|f+E;Qj|B28J}vBh7~<>};Y0Q-K5h;Cqn|uN2YU`7gt*>TBzBG8cpAp3H5g zvnY&a$m&~Ea_aD>TSRej7H>-?{s=C=zpW3Kp^Wncov!s!K2fm99slHW#a^jN!O;sw z@9$w9&wrF)|8MPP>RTy!S4NS%t}7v~!woUc-0bG>b_^Wm{5Iqcv*I-Sbu#t>Zi31( zJiL9?Vtma%N-&$G^%1ju&uFDg35O|)IRdn%&E!m}4jBcdcPb^c{WzP?ZZQIb`H8hmAjRag5 zHeqyCcEqfaCy0&B&D+^2WDS4>gPy96AmT8_IeE_-m@6vJUe}rm1uX}=LH1)NX3#_v zNL`gQ{}pj0qE(`f2lYc9{b9Z%J@Uu@E+vm37Dlw44rOYjS!tz5H<;C>t3oN!JaBGu zP;c;%!eqEIPnVQu9<9OVsS$Af0jcfUC=2q5^>44c%#JXthcjy$kU_M{W2BG{Sh=)f z!U6MXqLzB`e{2anBDk-7cs|Za+_v*ea8`C)Qtl1Rj`kidOMxE?<|_R%SG-s2=YY(x zcfZA7`8>P$n%Y|KA;$-%X}@r7DkAr$QR35Z6;aZ>)Vf3o zKUequh21|o6)<|POYKRl{R6B-R12PSpg?8bPB9LS`rng%l?2K(W8pzQ>wW{o_OOs- z@)v8I5s5^jQbJw`AaK~ZnXn%icye+-}NS0v6mcQI@6&4J#8N#(V#KqdyCQ935-i)P2Ig{+DWB>Adf#dXF%qY}1w_$(}GvE)v%aVqEg$m$w>c2jMrQd}uH6$!7_p$)x{0JF}_F;nn)cGDP1T&t=OwtHc zTOshnM9H}l2PBP8uHd^Gb$=Gzo}i-TkAv&60dha^b>F;s70_QF->iPP*Jul2^ZOED zQ=k*EoEwJ$wrQj7FZj#<+5U5QhXOkRzP0C-W}@krsLauQ!h-brKv#a=^0j zqqe5_F)Vc|Y||!(Jxl`Skbj=ty=-Rl5}-oh0AL5>s1;xWzCm|fSn#%N0_2bPmWsPCc0(7X5M+}4P$@U|JP3>=W34<_1 z;gm4muk$@eHC-kHO?=k2KX_PyyBp`D>Yipd0o|?K^<4(F8mG|=Yl!Wqox29kUE6XE zj0s{~^LqO0`ayFiheS7o{-8}-Omn#R_#X~HLXZk@{S2yqK>%+>+;K<|>Mxk)avj1* z{j|y5X&9)v5l87N9?v<#OWbB!0-bg%6fJNI1w$+GI-n2;6oeRr3k8%_h5 z_%o+x#el!bhF+pS(>b=c4UY(%7`AGf1nwLO5QY8(=8m>^H}2h^1!twU(=;S`E|@Wd z>1z%=XLK#Zo=X{?ueOed4%+d#TFJa6;@N)P$j&Dk>eSHpmS8v7 zP{MUwVc%fGlz88uxhVq;8*fDZzAWMe5S~%nX+kBwv~}syFo$S;7m%Byw9i3^9-J0| zzTl)m@s$Iy^UY6Cu%4YPjEFCcECiFW$Tj(I86NmjbM&D@yMZHL9-5|6eTydJzS;^n z)boczUV--`p5*(Ls>C$6x#kfmf}_8h^sJuBtjv>v!VTWYLa8i6In;JM2r3IvpPSrC zaGOI8UG${~HgcUUhKi10u^Shkxnn9NT8PrQzHEdH!9Cw4HOZP3$1K_d!-V^Xl&Ez( zg=X~YWiS${ZRB=>b8ZY+NYJM=1H+(ritCKWXHKU4H10wNNXj^UGlxxNh*Z_rhAHC9 zx(O%7$r||KUAVEoZ{PH0YcqcM=T8ZFTS&1h6;-<6#f&q$WwXMS@|5Kn8HSpMOqUbf zrYjY=(gmgDCY=R%H8~|2C7(TAYI(J`+hX(lN#jm;j8a#_xldqwetly7W8ufcrJX0Q zE_e7jzhJnQ3x4;&{Ty0%;hDBN;;RK@nmwfWM_GaW0(VJkxq7~Q{He}e$j^sBxPoH_ zr#5(!YhQ$nwKLTYc=7FlZKX7gd1fG=8zOfZ zG6X*49#lR=TWl4c^*64bra#9tr%PW#Be*b`I8J8B~`);lr0D9av$2c$mhoQx{kDyT3}X*><*e6>07oS=5S_{|T}xi)eOjnfYAG$6539}rGxMUTjW+wb2Q4?arKnS0>0 zE`F#gwEb5|DiqQ^QF78&{?|rDtv2FShLAEZsmyH93poy6rC;on&_(i%jQ6kHNmS8A z{%e-q=M6(tzSuen|ndrZ!w@geb3;iqL~m{;S&$>nL!^p^ET( zZP)yxsgtSlRh&uys;20A{(SD)8&rQvIySPc%5Pm;S^M+*UaTkwR33g3=zN-SjMB#6 z3E>~=w6cXijx5V;x`YJ5doQ7^z=l)WEtQ6UF@H;o^+e3_c-l38%Nbexb*_&EWhUn{ zkSMpyP1x0UZVy%JRaI z6i{fUij~e<<&tHt&>%>nC9p|@&N%gGVh?})F;pAiOJollOB9FtG_j&urY0+#+<6*t zVZz$GtV_l#a8+l}9N}~NZZK1uq2dwhhNBSfR*5!9LIosxsBLP_!B#K85+)SMtu?;8i)@zP3KxBL@ms4@K=<6r1n=c3V|ykp;Zcp==`fEz&!^i2dssu0f}**kIbma zXGQG+y{NU^Pm@xI*0WbiW6ru2eYrzo32b$^rH4T(7fg!?_Za~MKuvOExd#jW)MpR` zg!&HdVME<8&GhidEdj$JRBm6u;C2-jB!VHZJ{Xl1^AcIl10L@sK)L>cTN@O`uZmUd z;l-msL3IBIm_Flyl^+U>M<7Y*1;a^*7+3*>KNV^Lr>N+YqggPD=AA%ElpH+e;_O@$ zFslQDRA;bxsSyD;>k$5cEf_12O)BdzprC@f@x~CNYZn-23qgBd0O(MY+C@#EP2V%A zJ%CCq3`}u_-(h8`m**A=q@LTuCV*YoAc%-fmmUU@ z=W%(0&>r-rF*{LZ7IWf$sUBW&dfQ;m##{V_J}4{{3ad=>aB9N;hov9hD!wTS@C4*v zT7LcefZSbXdkq3LRL@Y}b^#g+)-TG|zy%p%&1U`z-nk@()ps*c-a+licMHd)6nzqp z7SgGz}MGQvl{Lnbzl5oHTc(}%Ep@7hW9&?`?xc63`jyDJHnA#c! zldp6#8eWB0Cf7aAb(W}Q63*c_^%j!u-{9E^CY%oTuC;0t#nhQKdPTj4MBl_4N>m&( zs%F~At$r`VfaJM7G%uCsXQ+b3ZAE8oS)?DzSK)^`ldi@fjtIw-72Z^w<#x$1NWcKI zMfI1@$XN%t?rGAC$l?Vo))nf1um{T3fC0HrZylEGIr~(kCC~^S05zN8HcI0%Jlqa$ zaFM>?nZ5$=1|QU2(t%+}lEkGG;*n#*etj_$e&EJh!Jj~XX6h^1N*_I_#F6yYKDcK@ zc)01aIWNJ-Xo;p1uv^`}NDT(@BWM!zom!vL$T^Kr9Fbyj`_*>Rp1?sTN522%JX?|9 zn4q?Jz%$UTon4*>KU*=ts9ywiv6i>eDc~iPW8e-QN(*L92qy*$d1N6;LSwz^3I|wO z6XoB!aZEY<`t$05>Ak-kQv;>>%|yD{2zv%hBx5YuY7GGEYdg`qNUtl zAZ?3VDR9Oeo+9fsM>TWhg*W?hGk!yJ(bRH~?+aESbEY^3scBfhbFwn)DNR98?g*UG ziMBgWLp#5D{IOfq#LQ1}GTijrnwli-Elv`9Za-4t(a;go!dVj$a<|4)Y*>lo$?+X& zfzXNe{UWfl9S8esD+W2OcF##tjlhzSrdEe)cR7 z^yw>c2LN`x|7*H*@usjt97fQ(M!bhZRo{v1kA&}#SpjpzWwLv!-`rcOClbVQT~?=E zU)8`uDHxue&yMDKUWRUuQfYmeB6_uWykJG0alj@JF7(isQ$g%W@4DheZMzr_$rA_L z4_CP+GsLU=6@orZ*d`iR>-SP(AKC>wSkhDPd5zsK_@(f`U{LQyf9)pR z`!TtB+=>;GU6M=@nx8{&<=U+#j#33!jyQDM!uxSD^^96dJ1M9rHeee-%j zewKmLOCa-&W@+(0bhy}YEsLK9%YXVC z@}gsQT2!GkSCZLfTcYOC*uvE`1Nx9o15e_Gy;+Ch@SxrZg68Mj_2~9koKMOt1;3(X zQyb4U$mH53)00s}rD9r~NUVf0Cka*8Ea<#7=3<(u*SJ{UEd)E}swN&kj5a#CpV}yY zEaI{~A#9)Yp|CO_W5|}^eSd+I5ruf6c|)A0pad}9R`1qM!qLT*hKATs7Fv2^UmAAP zT_JB&;78648AMJaR9B@uLAvV(ynpqiJu%$H&x(uR^<|X=L}&9hcU!}uRPil=# z>h161$%WH;ezpcZ{ET-KsImIgd&7sL>`z9H9+rN$f#r`QHh3*|y{I5*0lWJvvJwE% zXKpf#y`FPLFEE&uhA6|IWehyIy_|3y&`R-RKmY}P{VV_y>2h(+ z&bRPG!LGzz^`Df|YpOJLO~V*12?742tiP~R2pbt${7LlUaANoYyyTaZWxtYpBV2*5Dr3vq34(5VVE z2mT2RWBHV|E}G^MwP(V6#)LrGf%@kYR1BIw;g+mg3VEJCu;04z$+2Z<5?B%JiIPmSo+w1W)lq>Y z9wXwTBV||q36TY47oJtCTeIKB7=VBHFQpv-H4fjrrwhtnh@S9go%P9EjJXHfZ(K-r ziDy1i8y%lOL1u~PlG-+FoF5Wg;AE;8OZhyL!y&Zw$n+!Gc@HVo0Srx7hrWrC-O+!<7C1Idh4|%F|BcWGk@Y~5j zW@`c=Gd|D_@9Na5+!?EQ_B{uCTcEUH#%~Nek)R=iOi2#TD(|9W8Hf zaq0XI3!pXN{VdpaVv}rI`5CGBd8)3)=sa_~l>fwjW~_NOmIp20nsDIJ^|DvqfAahCTN!U-gL3@}%4o$ZWTZ4OylHPk zva?qE78Co#G6Fz{gBtbZ+@EhoTRlZzxh4gr1zm8r?$|+Ghx6uHP+3#)(cIOeu#NB+ z{15JkZ>t=i;vvCw54I?M10`|KAroA2$U5^grl6}bXj;4!T((3hACW4+Vi!bh`_!jz z0!*;q2|^9PntmdUGs7|ujMd+#*n-1%xHs;TW`yU4-rH(;!m9r*udQdk1xh6J*=Kdl1$S>~R zWjN#e28GT;GbzjdCr{R!4FlCiQturd@oq1my>C@t+xYOj=2_$I+iW50l-;I^UMXx! zOceq`6{rT@WpxhdJ~1?VAQZ=5-^Sg3YRoVG8MFl4~V2NrXUegVPl;7+NzWO>)gfq{ypFGB@>x0qB+ z=Z9-;Z)3h4!5(%?XilNiw!Ud`*Pag7D+9VyiE`}LD!9PR4g0Ddx=b?ds-rF5QCc|q zfx24^B81b*ja)fjE@QO;?%s|$sQBz?ol%QYzO5Qrc4p*xGEDM*Jl`3r`igQ65bS%Y zt1Tfnevs9>wSCPWuvc5)6uH6JRzA;!^k@s|ePm0e?KNbxP-h#qHtR$1A5$WW%lBsR<#Af2nd!ecn#`uU5O|3F#{0w3?`W(|qyz0VZ( zS%b-3_?-?zI<$`xk$GaGM4UGf#XPw+A$=8vz8X%ZWDqVWMshE-<0LnQ!}p|*tMQxK z{s)8~Nd1hvA0Nq^H;8(z>4UNL$H%S&Yap>%_sk?b1}$DGS%4$?YngOhdjE@OffKwV zUlLq=?4_UzycptkNfcjUM{RrA)}|)7=#+vXZoHL=v*P|=mwepH#F}L zvX(oxiX9E{L!wIU6F67DP&VVUB{R%@?)c9dK>S8@4|GS<=GLuXpIjcKQQC)A-Ld41)ua0ThQZmZQS|hZr<3s)JLRJ}RHWuS zr0n^E+BXdt;)mXre6ssUZj{&0Fu<67nyAc&IxZ$LIDmxC)Vz#a+#vc-5@sy&jvR~^ zTArp@iM0Cc6I^NcS%E%z+?q2o;@l}stb)BY%mX%m+g*+ljC+NePrz7mSeyR{$5GdR z+J5iwaVM<1ZIJ5LonZ2RvVUvqR%F4s{l!1~q05Q!&T&VOk>F^25c*4g&{I$s{fqU! z0Q&_ZHTqCDQir+r2lrh`&U87OsR|52M+7@dCJn-T;}F=U!A>?EeauS zS>56RhM}3>+t11iH5H#*Et+?jS$vGR>&)^kb^7ky9+Iqo7M=MU{8*|cA zMRVUfH1e(}muM@2i%D93pu&2egobWFjbL6Z11H0?=fx9rZGK$#dw@PMCfpx*fVZkVUXhqP zm*SaCx;U2D8zXB-_K`p$a;Seu*O6R9xHp(@p8e+L{?%u9n_|(+ye>(J`lrP$Tuf*4 zF?_|Hi{^RWnt1k#sy;%0!l>3We+aY|e}if70nit54HC#yT*eJ-Rv zQjAeWB@RF)m-ISfP!@@&yW~ejUa_)zcF>w(=wd5tDzzyyWP+7UpU9P$W1AB8`>W=` zL^g9puQONh+&f#f?o`ZjA4_z(LkdZ!M2yhAkawg$5Ahett_9I<*hDn)ED3j?fAyt* zxjZ>IhmUv3p<+CwWF;A`PT6wmF0Ux2I`?DHftzN?*Gm|~@n3?G!85JooUys0@hjEO z{JEU2(cf4qmgDj4X3h$KCtDM2umpZr?FS68U6*|5`l(_}Gm~6hla3TN!ch7(&jlPVQFS6hlWf0~%!ImXx_uva@sfxxRv7b?C{%nSwjFvHSjNKl`(C?=6 z=5|FA`<(HUocHdb3*qK#9=VP3_ovL{m6hNUofdyRp-(3nub7)2V}31!TlTW#d~Ef= zP-MN6lHsF|7+2cgO{w&+0q-~Y<{W8%T2k}rH!=d)^BAA~s4M60vj3=W{)Mu0VR3yJ zSCnjSGLV%JK#qI+JK;XX1B%1^DYnufW7_jF$8*bEoB1;Ap(XR0FLYVhIJk|3Z(=7- zoj)WcdS;j;hz&gTYJN`Ia+Jo0AD8Wez9OfudBjuzL&K5j*^7@S!l>e9mHY)JNcVGP zT{}Jme~@HPD3PY7Hq9=?pLlY1KzG3YM5)MSPDr{fIH^Lu_O1OphW0(52u!=!x6*{Y z{JxeknnZP3Fv<|I=pLQ&ogsX2`a6m9&;`FnpE--Ce8jJ_!aml1r&U4y&QhI| z?}(L%Ph|WF^UuEUc(ZdA1>xMx)i*;;PLVPVJhEi?5R$!IOOeyh@%2V_Pt!oSE59fo z`izqW&Kc>YfIL+v{rz%j;>ITP+y?Q;Ll^-&eCh2Txykcs??l+@65d&gY)yxzfn|JZ zhbDazGD%hIg!+iq2i)8~?U<#_^eUY8KX2GwPP~|@YwyTvV%^&3JR?R4PHS=;g!Ms> zDmxul&3HnKrHmz!Bu&qHBtJaJA)&RXV19jTz$+(fv#N&T2aWp;cM}n=Wk2NUoMb*Y zJSB=!YzXGwiq5!-){1zo>bW~{u$mZtzhGlO+sHb}ca@j+jC^%Mi03j(EPwUGJvlAW zAA&572=N^y^^<4MdqX6%<+qP?Q_%Bad%j1xV;YyaCE)UMPQN-ibg-6s_o?2Ht5`2l zM|{cwBHWpSC$)2$DgWZ4pXQG|0GdNFF+$$-LD>u`kT! z$d!tdP#A8}>}HgmCDs@1x0h5+gqrbLxvJXI+~Uw3ar#Yw_*)jPe)mbnzrK>q@}}dwzf;R~ zBiVT0xmMevj#O@I3U$SA?FNS=Ep#dS0nHhjLfC?{veQ5j$Y-VLczc+(tTS35n%pX@ zZ!b%2w_6J*_UB?OsPux+^$;h1TzGg$(f`x*=nM+GPCP6*O}JQ$4O~nD-K3H zDcQTm(X>44o?Wpc9xm-LTaMf+Up;V9(rbU1k!KO0c+YZ9H}>=%dtY+=z`p4gkJ3Uk zMm5PuKUUnE0gs+gQi{b}kY?Np_YAYkS)7|9+lmVl%*6MroPN~e_mTP9XQ_=Ni?NPR zGf218wkll$xgNKXTJDqV@q>>fZDu>QCD$CNmUSnXIolpFbQD&K2g>V@o@M`3Qorjn z+b312O2|2Q!pKydg)=aNy*xefP<)*3(plH}BypZgvF~|$jw4q(Uq{J?@8nsC=xYqU z#mDpwAB?Q1IY*Fq#9BB~6)U|Lw{|@F!{z0Zo^nxca)>6%QY~RnJEkV3e@ASq-REtE zjueTp5NYB}9Z#`e+_-(RxKA(t&sW2*S$@!YtIjjsDMIuV`|}Pw71?_!1KN$oT+5uO z--R^(FLCUY9418$DJN2w=_7C)aK(UhlWgL+%GG0xHJ}!L>-e727@aEnil^ED@1o#q z21&VQX^K%d8F%hSgb9g}{R84YoVS9#%l5B6SDEsa-z{p(m{;0(^3w02DgGr5j#r(I z33d8`&A|loITSUI75Fz=&a-%N)ju6H>>*1I=G$NN%eZ*;ukR-w+{qWH?CS(>Q|y(I zICN6dr;t9vy$|op_PaX&2p+0@90eL_=davCm@e2|%!oLmW~^91k5tU6ApJM&x5~p& z5K!2YwfF}N2S3jZeJDewT?`=DXq!c#qu_UYU(hCh5;#I+IG zV*3cJw%S)&(N*y4wL$MGHK9(a2sP*##?{jSSzuNZWPz&0)(@jNxCRN%IFIX)2o*Mo z0@M=5-Ui*bK%5zwtf+W|tgus&z>H`rWxM_}9|1e!0;D9ze%+ zw-;iFGNUbl3}r!yIaJ2|l^c^dbRpZ7o-`((3jm6xRhy%wcO^79q7lvvS{7SQ4@MUi zQD;hc+ROZ!&|ns%*S$3sJSWQh(PO8e#QILI|FbXE**rzze1jP&CIo*)DLr^T6IFN> zF3t!)XmjqK%ilkI2GQ1R-_=dx!EaX-({?ZacQ6^JavCmLRYV;^K&8O_Loxk<^iNXN z4JrT$36R8dAs4RaA2<6c)3>^X@Jm;Bv%tvELyd2~-Rk;`%}7f6WpY+xKpo~@=rc-m z6bZ0G=74FfBrD_Igbq&cfz=3LWy+$S7YEcBOiNtf1jUXv-ChI`0_;=Rf~|fnB9e-| zkk^NKAOl+?13Zq~6!41~Xbu_bg(iFTIxA#}DyS@j4I9V`x*9MT9X)3c zSx<*$0fUAte#h`jpr!o{{}^(tn@p2E^u92yrFh}jOUQ#Wc?Fu{|5X1wk-Y8#{T_%w zb>Q9wV#NakPoq8PMjF9|1I+DXZT2W>!Ocij6Z#Cjo)U>|aB_q|k8v9`DCl`p7{vQh zl71|JXyrOp*a5a;7~(7eqXdd}zFs(L72Kvvk*9`8LvW7-86^uUGamI+q z1wHl>=S86ctqbJequBf2i$CHC==$Lmj!lUY_efX;2aKo+)66ems=w!k+m{|4+2 z{lV%8ue)Xn?`M@j&P_e)|b|_2L)gxt5t>Lf5St5bK?Wj3PZS zORE2M6nUi;Hpo~9y$wWHfbk~gkLWPOKSGj6bI!0Yt@>z~drZ9c0bwEZGHmX73u50z z#G9sb(c~hm83p=rMl&oP)CxxU`baa6iaU$*?=&)A)efkh)cf^Z@L3O4@9}1YC|DXi)wjY$yd>7AKfbV>#29lUFnU^-Cu!KnDbC@lU|`- zg_hb(zsZf2hAI|}NL$PaJ}ZNG+(Ks&h1Dy;Vfa)q`LhJmUHEK(dtf4yKlQTa z01g2e_|I?k&w~b85jbMo;$l!3d(zFFYAC-!Q_1jn3xqL=maFI@Q0E*3`nvT*C=)MN za)$Tne^PzS@KYCg!pLf2(jKMPOigS`H_TVSj_6f;1^JTIS8@vQiCUwh;)F3`k48hfVTcWW63UL zEP4IY;;7j!xU-t2D8$`Lfidz=!$PDn40P>w(0PAuDED0X5o;lzmG)M?8eDYnNH)pr zj5a?v!5i}HNnH?zVy{3t?uRf2zG~nEy2eLW1BSOP zpbMq3CpG~6DOh(9Zdp;(Jyx(iBq&o*tR8k?+SfmtaC!o>NpmKjM?2GHcz~v(Qx|^3 z;;AUdES}D4>!M9USDKdVvqNyqIXm}sZCk9~m`|Cf^9uPz?26OR1&c3w_=WIrNF=(98Sk!ii@v-RCWYC zCaM0PNo3`~-$|qkz*Z(Iz>%Qmx$j&(8$4-;?X&WJ0u+6zq{izXsha8d(>II5ji`tw zbf1+4Ha2n5!tDl4hWpeYLE3Ga0|?+lJM&3gKbkDwgxQt+*&k+yCx4ApOt9lHCN~bA zbdOCiTpmw_h#wK(fFe<ao6Z0Jj7*f0NTF)jHXZ_>4;7V2|T0lxO6^rW_NpiH+v4 z(B@>b!Sdl)p(e?GHFihp_sq#k-U!QY=hdVTpHDcZ^2nyfbMey7rb#GD%t$y5Wg^!v z)gEt&xc5z9&}=zkU1D7%&P5MDowcM$ETNF`vDMF8@1vS>5ce8r*KTnV9`bqT(@&Ze z-X{IIUbto08TZr#SP+Lt%8j`w%u#6i?oUiFM(`j^JNC z&r`lq?25ak!|i~r|5_ZED(C(+Uiy1-{!^LAoN z1Wx+YyZ@4b2IjeKp+kAXMxM*9PFB{w=#RJg^mBVIozZ<%qRX)e+m*YTE}q-9i+UI@ zVIl4Egpc<^>DB}A?mr{VV=8n?#k2wq zFDd9yR}dA!1V+2hrl*L)!pY;x8M;|*k}v!29J*+iS$%1+WDM=EM3XT4} z!8JocsJpUOFC*?Hu4OKiU;6bKySsv)AX%`U;(I{|`R>18vxZR7d>py_w@3|62xo~@ zl(u&F48;%K;bLVcd|gTQZItYFs~#yOb_Cy?pZ1$ZnuF5O!o|X%iVT zW*##ZMVUfKG89F|l&DBalD=o{I{m)yx_*Cr=h~+eXYY5fcfIes*7MxYecw-{HCLh@ z?uPyIUQz46U!>Fa4uY(!>TMr#P>{7iAo)rDwubC1D=E~(sJ%Z9mD&+wL`M`OKEau0 zeTNv)KahHiSljf}!$}PfDv6l}L0q=mrDn_&1#rZdDhxrDCL9HxI5I6t7y9#}^jtrq zpyZWEe&P&jLl~S+CNIXqm}o?f8Vz9e&s>A1!W6;~{4ou9ox0-y#lsj<{gQukvW?>C zX9X1xiE;EGB{1@`L<+}-BTwX_EoB$TE7{4-h@>tsy} z3kwkHRzZUoAaQ}?^PEUA%2aD}fs^*+bD~uz2?bg;MLK`xlHya~$bchfh5> z53Lbll`z9kKXE)mDh^FycrTC*F;eiVXVj6`dKd!-XZVGUsUYT)$czxXHxEw1 z23==PR3T0{E@o$p?uWC5W5>{^;dl4Rb$G+zDpO#AIrKN0CkqdXHH)dmN{WvDdEY86 zT>FLWT9YI5!ZR%F^5=#NAx8T2s`1D}Kd;1mS@_z`SdzL&p&bOeqG#S=(})tjU+sIxv0{ zW&Lp9)Kc1Fx0dhqB>!4=fagN-QZ@}QI{xx&Zrs*~I&^}CH<0K?@3v# zcjkh*Fcn`i%5~7}tTK|iZCwA5u*Hx0zwgCd${G(s?t_wt(YS4x_c`ZcPU4+>6c&w* z#ip_`F(g9KY7!?J+n{^D?J2x(QccD67W#h29QgjeZMM?gVglG)>f{2eK60(yL`72{ zx|!cbnFhb4=n@J2YBbZhW*UH|Z6*+&o}u67qJo$}#;9ZDl$n1o6`FbI2u1LSFySIT zx+z$Ks*Yhzx$$MJ!ARn{_oo*UX3fy@g*h5@Efn0(nO(>}v`}y!<871`g$*65j2S@2 zgVY16%-61tsr=y-7-Zo<{{h;DwnH~nt2#|QlWxYAXqy?bU@>K>nu{;zflNgC6wCpa zmstrFhEs5xLIRFofrn`3Q=iSJhO%lS=rvj*61*7k-(+u3&hHZ@$0O642!Xt?kMz0=o#tV^K?Dg-J;l5f_qJoX+GO^HfC*P!hV z8N>Q#ykwdUd%2Ce0PlIiGxC#DXwr!=nm-lY$o%<`gpc?o08XYBO@94XN z1!3(r#ON>+2_@>nKq$GY*7PA0C^5vrz(`<`Pf0S<;9?|r5FAb#{xiLuIRFRyab9hV zQ?x7|h4!AyW#UBb0#%XcU zZ@=u<@g1-sbENt5(y#m=x)q481lYxt^@_sKPgnzVhJ;S;6r&^m%c6BPR4eE`L;!GY z;KeHn$D-;psL1_yEIt_DzqiaN!NJ}uajLAc!Uh-^Uvh=mCFD8Ut=_v`6wU+sH`7q? zse5p(F$TP~TtN)|%(#{hb*#{Le&O-)6SvS!Kzt<=yRu0 z|BM8-I%7S{Ul-BFC{rFM3?-mV@qlGpnQI!kpaLy#;-)NkyG>HV!MOt;vZ05d0}?IZRJODk`Ur~oy}f>n7Ik<7m`BQAO*N0IU0fLVu162Pw;EG#*2MoC6hpv7(az)$wb|~ZUV#tRF+bjfs-4z zE45aVXgd?tzVyP#w}By`0GlAFES#Yk892}2ty`{A%UqcPJ~w;{`y1#QxI9P@p##wy zAaF-n+;iu8!XjnWyTD8O6}yBeHAYE6=^$Rp#DLSR=VFF)0=rx{=Z8Bm+YAVajFvV{ z(V!qZ1D;$%I3Ov@bUx{1qmjS-$;zjHGQ@o^fN$FR( z0q!PG>}p~>nxppP{&KZIvb5N99E41MpPJuEHO+)TpWoA4(H3%ebpsZsA%h^ZPLC%{ zXAg8)H+8N;a+E;+;d#NHy2LB7Ok%@jCq(Ayck^VD<)Vr$G*L3Yvij3*umbQZEbay( zA2m>&CBF?ZlxdY^DhU;3bEs$`DL&qg^TR}aiB}eXP>%M7vfHg>6jP$rZjMY^SLwHL z{Cb!5;pp@k2g}%B*ACz_vwjzR+Pyja3Von(KgeP89Vs}~{0JvRbw>rA zmsl;G7kZP-W>1~*WrotC$L-R?jNG!0z0o2$E*(I((xd#gH1ZA%lY9I6I&_@Roux?}u2%ur>sxGIqq=$W#9{|=`E=dev2WBMh(hI&E(P=hJ>}hr$RU?70CXY^pa|imwv0Gx+xcnq!yoJoo!{rj#Bak z%FxT)i?Ue$_R{7N=tt6{s1LX0H?MATsE5o(b9EbMDipn!j)e7s?&XVqu5ZqPS z-n3`bcujoQ>i8SLA3?hjvxEs`T|93-)Fnwr&eyc=5(pq4kejZpunai!DM;T7OWfle zU1?U8=_?K{+2)3oozUFkw4Dg4QzOA1G#5pOWEhXV;1d=~i78{Hk+E$fu}a~F4sK2b z>8EZ#Rg%+{>`G~KOUrPVi-bmelCl)Xn*e@d4CNpsX>J&K^)&=viEk=9btmU=VuD~> zL$-3mj{9VjOawJQyOt|RVBU+5rl$6D8{FQijrn%J_@2t`APOZxSoSzWOzLK^5V!qE zJ^~l8)*QJPn@gq}*qe}l&qhuEz6EK?9_s@zW4-s_i6f$kpi!BZU^MC!<;II`>ffpl z^9m^gGXOt(5gF7!2Y7mYVM&K%xF;?t>(*z`3XrOVveb!k6)-JIe8tL{iQb9|Psc3z zHMK~abXyD9i8uDxD$fs%9wE8s<->AR&lVmGzS}*9=aR#JKQ3AVuo~j|ZsD*4No0mf z{0Pi~mtZz7EF-({=%WIAUeGe0t|IPL%=;diD!Y{)KNF^X3$t z-fmQ@q;!~V%VY2&kFT%Q7=I(ZI$ot9+@tQLM8=Jdov;J+{X^zO z0BzdnN(f#Wn{}S}F*KevPzGgslBYe6W(J0w5$4Y$s5@->t`5nMm%@E>PQ$9jPbT4= zpJD#^dwNF4je3+HI^GPjJ|f$1(+FB5#~~ZPBvrL*7Hf`<*pA&|on6@eY^8oJX{Ywp zNA3I_UF`0Il5riZY`@wqjAo^~I&&0e2SoFA(@*lR0jFrSceXjf`Pj-WijDLL8jgs5 zE;WMmQJKTvPcdHL$%?f|N;tIm+T+pVgA^s4!ud4_{XRgE0HR+9GJA2F+_Nl|x)NPx zIU~INT?l^W9TGDOp1Sn4?D-6d3j@LpYV<6CvL2LFPxkyo)qvT4`e+lac~#yDu%(^#pH`&yEg^JB-=h zjA56_S>9Cjp=#98TREimS-C5g;uLoQoxwl>*Kw^D3$X}4em8YLnOos!y65{WX*Z_V zfAgHL98gh^=&odPc-22}=(redBJevEC<*HCxqX67zUi`_33H@8&?WxVKPc5+o-QDL ziM3+-`Dth_R7_TH;s#_+vvWYY(#&G7XRn^F{4FbX80%HUCKo5D*P6l5o4~_hJ?!UV zakf~Lq=G56(q6YMn~dSXB4htsBaCY*RP_uN?{ln^Y%JRpcfYt}V*~wP)`NO7(k%F> zb?JY`hVPokMi&T0rAr9H1V36}6_LEhF(`X1GMLI)z>%;vqn8AB>{Q|TQP)D_++nj5 zY`vjv)myt?=6IBzot;KScU|_P!&|P8akZGi!>wgTp*P?DBz&+u`4IT-IQy4Z?VBe@ z&Nu3i*<%T@7pawcvbBDV2U0Qhq|F=g1O~}OAN5K|jQvTm3*VqtiVbjBd15Zm-4hdi zJSw%@pX8-S|4-FRQUk}OUgBzB->HWvi-ojxzqu&WDi4I>KRobb0w^Yq!bAO0ck%W( z3FA;~1v%4b6z|R$p5d78GWB0 zLx(m#=P3IKlWvf{n}1DP;lNLjL*cmdcFOB9G^7u+$LCEnN%jT;vwD0kxvF~N3W-bu z$c3lgp9r=*dq1A!(eYk7-3L{su89J|M5#7=yb) zX|Ss(?Vjs83Csc=b#i996=vI{@<+V+2}vhD`{dK%hA6dyw44myaE$uKz6?ADOzbMa zuGujpwwki5X`Y33Kd4lrX0;jz7E>0Ft21{04|uz+C^~$ubiukD5#o%wfJiNsFOjsA&{MVJ1vr znQ-NoDT6%l5?Cy`3luORnuTt9lUu|DU_T@t!yy)_O^GBvc97m?()&#Jmg-gui!nU8W@8}~_Il=*Gr@|4B3GIYUAae-su&e};2bV%Y0 zK%B&LN_ua5!nR9q4oqF=G%vZ<@h$aeLw2#53UO+r=F)oQrsT>_XcDk2!wrqDroD!-Z8-d zH8PFopXx!MZ=)x@An}&s27~{Npu5MuE;bcC9D8M}@-6Pnacz(k3fv79$+apGNM5@r z_>&w8oqZ!`jOTTEn_>LP{UdZmwcq-5){)1*|iQXcI#U9Kb%yOc0I9( zv>84tQK3m=?$WgMKZIi@2JXB}U0ds(Eh9D;RU&ODG$bE0Rc` zeA6x@7e?dux^(@i*m;7DzT%tfPoSZ)%=eO)4>dpW2sBC~oM@POa@W|o&~tRuCYG6{ zG{qw!%2lawB^u_;&RQclI{df^$G=q61f`<8jZF)Qsb~~RMW5T2OXS0Ww4&deqZ?=2 zFbHHLuN0t~#DH=^wu5hp7|&;V&^3JT#_u;m>;$G=W^1sJl<^q@rU_nkk_u-lP6A+% z2Y|u9teU`vb|pB#m;atYF_g9+M$#8S+)4T!ei$F|mgOao0gv}ytknHdpy7iW4mDPg zTz(8?ddRk*i@{sSl*}s3K0=d_wD5Nj-2wI44D33hdDQ%BzNyxshMdy2ILPfXEc)1KY-axkMci5ZHgJCBOW!i5(9vUewLq_-36MAWY7VyCF+hiXiB|)s& z4n%x%@DK6U5=U|m5T}{KTSlfN#z0g=;KsBlE-~UT+g76xXEt|t{N$cL>1Y3VGd1Yf zjvQFPRNUIINWIv7nx+QBEMcaz<~a_feF!ov*T7^EV!iLhM{m_E^V$%UUX(%S4_Xez zxK2nZilCBgruHK5wflqkPRJ&0d9+StoPeCSr$ZiS^b$qPy-bmgW9NOl4)e~%cO2F$ zq9^^%xT%rp@@QI&6N|yKRGz+94Kk5gR$SZ((%=Gjtst&-Y#&~PS8<*bDF%UXI z?$xW||s^@A#VKZc0)y8Zy47O;{I-D>BRckCrVaskLgD>M}WHJo-V%YJYM zbu`)dH1!j7Y<|FFsERX>qX6r~*D;i~L9u+>dh|Hj4|NAnAfMK`Z_)_(gfu)!QNQ%p zA+JA1e=J;QpKVcNc+kUBl47RfM&WRFQA6LNZonoF^p9E>;N+b#gHv!8d<4RT*ZbGT7>|K z?81yf@pJCuy9A14IbaBMza_x_aFWtKgM9r&M?S*OfW~^=S;Z2uq)fL`A1WU7aX_3geOq zg4Nh_waDhdqOK5%zWCs=>N6Z(+P=VinF4w}ND1OZh5$Fp&{XwapqQiF#qdT;k7IUA z+um$&Uc5_eG7fJ12&Xy2wqsGLXUkmfBBqe%F4(UQ#)Z5h&|>g`dBI3650A*)cI}1N ziqyJFQ8TKt&~KCbdRE7So5i-w(*5tvFUR{(){Fba*RVYRpsQ&JoJ)q}(^AJI4Yj`3 z9H)tifxp(xpES^2dVjiR*_Ib<35102_N5(oRw=5D?1n_O-pWd9!`Sw! z>=+4UmpL=fe9ApsEpOCkDMgMEH%S#WUjIi2BSqP6>ive;b&(Ls6J1N{*}Ek$d5*FM z`V9_H3)49EcFe!Xin&{-QpkVfrBC3Jk|jYZp!Dtd$C^)J@@%dB7{5#U@@o*2H+JAN z_Cs5T!wm#U&N^$<)Q2MRR2(e#&%i|^>OrfzCSO~C#wXt5dudkQl!^Ebh|!(8yDF1} zIR`^hgp-TrLDh_(Bm7`0KI7N*MaJ1nLQ_#jpfkUf-^VO)1V{4osNenpr!Td0AB(vv zb*M{y`?%XAyLz}87ow}k5*+?1bQzan|X`2@Sjp`Ji9iHhtXlYQv;o^qr= zen0TPXfl46?NRQ&`IA%}Dbct6d(QrlSM6aFabZcQZMD{ke9u40UNV~*T23;J$$uNJ2LnYtJ(RpjoOxm!e}S`#1AGW zm*XQXiBsq<(6%@9b!Op46s`^hjIo7%|H=)kY;Z{uY0@+Qr#rAD%=Z)iGGeDBV!Wt^ zioo0)HH8OH)$bhEFURP%IirF%s$0}T6}E1K!pd*B@NgyD!AJb=zB##$B2EC<0?Pu1 zf?hL6;E)eFU#zGCa8|A6|D{Gf%jPKE;cL)?r*%7A`Z=)*9u;%|;+`Mg=JlVMG#V1f z#i5FNcmh9B9WYKU4H(-5Yd+I|2mDi%Y}2`wo0|GK(EtMMBKVh2gQwl74-6O#8*Jag zj=su4sz&kA!|?iY&K9E^jDeIZ(1FI@8rj-h-Am zcG$V~>8SY~R`HYXn;f|Qz!*_MJb+&FY9y3zLD%@dCl&--#4ZCve)J1cVXUkNw=Khm z`*i3$JdMYMh=)emh6ow|g9xQ3W%3V2@@R^$X>@(aLy;C4VlcKH2r8XQY-+zaI?8@% zZDu+*PO<$|szBgVC#r`ZQdNkTFbS@k?rnyZCN>WI0~YH3oinc$E9_Q|{Ak-nN6ju9 z7B5j{aG8Akbe3LSE`~K!^>nFO(SP;HR3|v|bL5|bKiIoTe|+@9kl&zmcuqF(M{_a|0$*o-zR#Q3`e@_DKwq*#udhP^ z6xN^-2h5g<*Vb+6W%~E|Z)W=EzPvytYx;@YMM>LS8rQc0ZT$F!yuzQ~7S*I#sCIAb z0~=z8W2Eg{?=O~s!ed~|o#8C+(CIh=kmgwz%E{DnTG;BF5`%6AH+kSk`KRgA<(w0K zUCWgIH2iS6NQHQ*-=MUjRpvzOA^jOJaG7cixHsn30mcGAB&(9~0Lm2;8xSDeU zUvb8>eK#4{Aw7q-$Cy#S|Ec{9G(rITG$rrRpW*MI`{|L4@2N!4jb0g4ewU{KHM;eg zP&yRNLkjT=!YG%wusaUnKniz%K0y@%kH8K}5$r=KzR}CvY5CFz7LA9mg6B=f4zK+x zsVji)DwJZOj_oBqS(f!q@d`x>(9;d_BsqN~Gb|hsO0$xMWFooBs@WUb^X}S1yRh;I zJU(G$XP`5q{0u<%#r}dgwv0AF*pJ=R2W=}T6Dz=R&SV9OdRQbi&~ zVFF2C#ODE+o1@H7hio%CamV@)Es%n^&1I90mMJbnqTQ7Ac$QkPcf1t&1X zc~J&VJ6SEr_^=oR^}s>ptAPjwN;vVH%EN{c6J^jykkiB)d~8sobE8&x#2uk~Idf#G zXvgm#&jJU^pjLVNCVms90Q`BfRD8>o2Y%Ry_xbaQYmyD)aW5}q23Op_$UyMG+IU_w zF|dVHq)9p#wED%g^4J^3PFj6kwmD^x-B2d-XuKiY*IH-JU90%P6iy;l>TWj$Q>+;+LlApsRWI&MK z;b2K~M<6py+vE?06fTorcAL$eS6kqkN+EqDys0+LLnoR!RHA|T(`55HR2o;)yEp2u z{_vi!AFx=U2+T09H<^18`mWK}1p~Diff~1ox~DyXHFV*Hab8iaG34;`TS(U0WGD`W z9TL;Kf+%&c3am=OTJ$*~`=I3;a9}JhB?uKaZz`OwxTmE8{Q6)ho{{P2Le&tui{{pM z-`5WMA?bw7yWG!a05upYfmPa!ycRH~B+awGf3{DskI*LZi4}0kUW?eU_omV3&nmXi z@Aj7aS>E8n&U^r)E{uKd{$YNn{CqH^NKB=(gE(ifw{a4#k$SLF2k8Unq4wb_00s2i z^ug9DeK!&Ep_Rq5)$^P!#bKL&(p4vZb5-edN(pz5EXGd?&mq*8TM1L z;_JsHW?;9W83FTQ-f8NhBdQ%*HHgD4A+ZeN$H(XQB<4a^{+KO5%QDW~E{Rn(%3q$0 zrkSA0q3G5YnVEt==?lDUs4n7f+tskkIs(s3SIcl_6?pF+FRP~jOV^Qo@kZd$klpN) z6`4)8TEFYCfydQQ2rQGP^FtIH%nfT?|o< z_53#^EkoSq*JwJJqpyYX$5gr1>mLak9XTkqkMl9?pi#Ij%J;=t*B9H%=`Ztu!%nvf zcxNstlMZODB=qX3w`0~Wdw|eI>Am{RDDzQ}k z4q#Ivqjm*iM1qf*ArT@Yeo_KH`EZ|+Z}%}WF!g1;9`*FtArl*Pl(q|G4Y3Wc^>jW) z-wLt`TrCA_1;nHP?Jes;`m=v_ZQyji;C0SQfd(%)vJlj=X1DLcuIC<4@^IAkD(qS_ zN&4K)u;xw&d~O9)f=R+`1=8Id8Hb2er|+MpU@1ad2sF&7Fs<0Q6Zoq?YYXjM;0O6|Jm8E}{0u}4 zqXqhO{Ai5_a@t#1ZMtD6BAT%j?fl{8g)!Jt2C4JUoWod+tlj+$ZN$VNW`H0zNeL`Z zuYf@W7>peP68nik=q~eQ0+d34?>&HJs&E?!7T&Jgn`>PKxkli65=J5O+~m^oiz-;Q zA?)RV?GG0GGk`@oHAS&2y0OWOLeGhJ>GvT?4MipkLjXq|t%h>IEJFi?AYj>>*J-5j z>mml(p~zT>&cZ{E^gLqCfVTm*BC=MR3sFqW36S~-hmUY=eR&qR!Y|`$q8@qDd01477r=N8JTC4-7b8yli(=55 z88k^%hQL0Ey*i6!?^?k@1O^wCLsck*d?~cOo5>rZ7L%#%AL~!~BKYw-VW_WmHJCya zf($<-;%2%wUI)B36lW2jI0gNfQixvX8c*@G~y{wrh z@WV8mN}ka4W*{kpZ9UBinX+_U#pA)i=eZvKnU_JZ3m0P`G;XnD)H0d^K0oM%aBUfW zznFu=C_N`cvQDw<0_9II)Jh%aYF$n-bI{>HroR*|z*X_oTVT8Z_R_QPVC7$rTPGYk z3oB<;C!3he8}lbMFPH!sRcXM0i5xFXOMpZ-Svu3+Bu&VAq2OFAREU=??#AbVK{fZueaXQULqjQPbgW1m~N%w{$vrP$V)3+E4YomX{D|o)w>6@fSn*Nao6(> zcrz!~R)Xn}FYFIMBm)RJ+bik~z}(>1qeTKXWy#M~y6h+-1-J9OUN{NIKKk|Zy9SVw z;?^jihkk9Z%_N_Buk8o`^$@;Smh-m70T%UJdQSucpasgBkSxX>+;1A>9La}VgJ4Y# z3!2*ZL9i7_qmjs4Ry(w)Soj8s#P~#VXhK9`FeOGy{g8#_*j>;2gqG>Y@jx2==;nZ{oh+AIDJk=^Po zl_qOkC3Qq?`Rx@{^_bSVTimBq6!*t%8m*|Er5@V+B<&kc9h20ZW#2(72dU3l`;x98 zo#5Uk)ThpM zdvcIw^i3)9LlG9GiLtnr7iO0mt*hvPG$RmpEE=SWO1`J$O^VTbkVq1_qiV#wZ zmX(D{O-qSGN`-WPTcvL2^Gjg(SwO$9irlbBYIb{K1)7*XOrR2#Q>+1{m7(qLonL^u zt9i}@owI==7qXp)u6=2W-#jjjL$JNwul{|thlMxCuI=o0B;AJ|yl^?iG( zFfa|aS_5zQiRR50x(DQ<*+2q{<7hT?3m89%3<0GK66Rr}9fnJRp<_fwE|&BZ{ZL_g zMTB;I^er5$z7WlygwPt9-exO*WS4IyTv{k6@@MVo2@0YtT-iPvqz+ibzx#QsoaN8v z3$ltzO?M9ZMiF()t%DvQW!HoT+1_h!2XtI5LS;42_%5adRlm++IC-5Fu7&s7OzF9Y z0(m{rpCjb4T*JN`QFgShB8^j4#`@|HuGeoE_G{OLru6#fh( zy~O(`kb2FSntvS<>^>Xy9G9BNeNUKjM1()%VyIPsF$+RQS7uie8;9`H`17P)g zT=;3r5ORBGt$PjKHXm1G$Yn>Hshilg6)+!;@{@hlBiaE1qnbC7`sBV_(KNP@FPy71 zHf>HJNh4(qJ;G7qL{zTGqT4m}_M<*%9d0w9;pgEbwBB|KA9@7pR~|GQuRV+|7# zTMUrMMe}yuaYvDPkT&9U@QDho^OJt$|M>WLzxl0idpX#(bE9J*gyAr$x^5>x7DWqF z*8+v*-erOG;*E8cQk!uD#`CPF3T(zb(ba%&f=8>+oPQLPt{)&y?l~lI$s#~puZ!r* zod^T^^1iL;!NgX0{6)sg=dW#r)7(qIQ@t*_1_JU^y3Uz^O%?0634H6{w~!QU=r4Dc z#!%Vb6b09M2%rr6__qA(UJh&SR>!+%9@Qk_GsTaG5;<7*oGl`14yZ?L2gLTl{Y900ew~kzevNsXRM<2jZ zA$BrThw1YSVZg|i=#78;M1b3a-?VMZ?0Wk!|YU13Nu?L4QpnnFCNi2?2f@ynl6Y(M(_{Wk&gq6ApO@Ra4qpEGIr${zh2U>Wdx%ULfM z!Kb4|2LJ=;Xt*85XjE$RXa{m$Z)-QKVd-BR#IPJNUgyPRpplDx(Yym_%zd8iUr40^ zJiQ<7gTSkv1Jk#}fF}Gc#gEWxcfc;dSPc-0vQ{;5L6&mb5_p zYnVH4BUreVol6-Q_>6DsM5F5D!pcy7CyXz#XaWB8fS`f@akB!Qa5Yr4HK0{gaWL|# zhH|}h#L;Y(Mu2_XFEqXq!t~I$c}jfGn~!?mrjI!M&uyYK3RLiIY8m?k}{y!#+_O72S@}6?4w{e z4sjyTuOmoyREn#SBN%w5X;sR*jj!fMa?9TP3CrOMKy7azf>lUgj1`B{?tb@e9WT*hIuHcan0Ons=BP~E(k38_zd@<$==c-4RGqfC3q%t;j@QGKGl;Yuwc(+!VVrr=dU44_s?1H`BY5Tu?6z!h zv3nN6lCPKZUB@U(DM^~q!~@I`fw}-C_L!NacqhPE5Io&4fg&Cu>n?d9)<*Nz1?EomE?ikRBdFO! zhWNm0Q6@zhq`a1o)lR`)fYQ*poLy14 zNgTCS<2Dcs7Tp?BvUKa9_sXDP=NiTSaI>1e!VN^z);4s3rw(|6LS!$mycwic@uIa1=E27c^y~s}Urxb48Zw1$m66IE1?D=` zkCAgjp(8^)1NOq=YVr}OeA2M|$VLZvc!asvnVqVD4erBc5^|W(`KY0NYdp4&pK?L) zO-@>kP^kSyxmOBpo3Hda&kuncpk9Vd3{_cnr{lN&<+tgFYdzIr|KP!b^yU|Nk>?f# z>J>C@i@!GMn87sv?H4HR`N{3<=4xF*tN$Cw0|Tkcf%W$wMD3ZeW9Tx97O6WNAwsMI8-84ACNxJ8&O+H$%7Dai);a12DbX^4totpyx;PhwB+ddzy}I(I*++!3i5_Vm zfc?={rkRKe{}1U>@$E*lvsBO5-nh7Pr6`;pj3wwu#i@ z&}{VJ_9ze=o_!E+xu6t#I??y0KaT{bs?fTbnPCbGIp0sdS;(eYgUImmhPnf*>u1B4t8lmUe3V%Qs5+|k+X1ii9V_}DreQVqS3~N_M ztT&Zir1TuqMm|6s4vuxFocZd~Y>&+}d&arzJs<~GimT2CaEtD2DU_k9+u-JviTT3G zq5!lj-gjg>-;=Y~>rRGHD2cS)_bSd9=&24CCDmXvdp_UQDL(??yu}6g11tqmzHF*R zsgjl?-}k9E#kr~P`1EjH4s*2gXO)5cPgQ-^X+>MOsMXZWx}_$=taI#D3}29dVzm)8 zG}y~8d8wA~EF$II7>3FoFivCGlk_R^AHo~@UZsf^k_E`MFef~J%JvcmD*N&6=w||n zY9Y>t-sr{|#NBRrtWyPgHv=lzfeUR<@*NgvyDx5)Xn}tNwu~*-qA%0&9TJ{7vTApt z+jXEYTmT6)X|27wns9~ySg2exV9B~6J-{2fbE~G(BEreyyw>n7?ZG;|XoYn?A1XHK z^ukUq)oS~-&&8MIu|AguzNL$&V;pW$$v7*xfpo}EN!Km$vOs~aT(?4y#G!{)7c&$t z4hTZuPELVoSk7;Qhn$jYGi<;{u>T{&WZ_&AJzA%BXe=wxa?Q)s={^;Eu!PLbd_rGV z3H1{fjRm_WCbF*Wd7y=2uyRcg>UX8NL71|T!*zZ-1jr6bNKCJyS$iVM;LA_ssc2;+{T7qDjJ9kprP-4!ocHsp>K||D3oEMF4 zutSUS*mmg!gIfAns>21^5E%YV{CcL^qhdtt9Vc}qLVGA3t;qeeax;MyI-`R5LgPI} zDAoKI3NE1nD)(Udf6WPX+I$gK1aw>i%IJuZ5BEJ)^XL!(M=UWqa96 zc#G`k&s>ko=E6*T-h>FDc-c+6=Kup4W$2dpACg8OMBzI;^L_y?m6`*wwO;iK^xYfL zYc~tN{yleaxl7iR;>n|yPuy+K7Pc8S`t+E4c-KGeq}owLhQ6{n>h~|=?t*g>8Z=V= z6F5Tb2=9^YMcN! Date: Thu, 24 Aug 2017 02:58:41 +0000 Subject: [PATCH 098/170] scatter check in --- paddle/operators/CMakeLists.txt | 1 + paddle/operators/scatter_op.cc | 76 +++++++++++++++++++ paddle/operators/scatter_op.cu | 20 +++++ paddle/operators/scatter_op.h | 60 +++++++++++++++ paddle/pybind/CMakeLists.txt | 1 + paddle/pybind/pybind.cc | 1 + .../paddle/v2/framework/tests/CMakeLists.txt | 1 + .../v2/framework/tests/test_gather_op.py | 3 - .../v2/framework/tests/test_scatter_op.py | 38 ++++++++++ 9 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 paddle/operators/scatter_op.cc create mode 100644 paddle/operators/scatter_op.cu create mode 100644 paddle/operators/scatter_op.h create mode 100644 python/paddle/v2/framework/tests/test_scatter_op.py diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index f466dbc79a..f0fd12f1b5 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -47,6 +47,7 @@ cc_test(gather_test SRCS gather_test.cc DEPS tensor) op_library(gather_op SRCS gather_op.cc gather_op.cu) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) +op_library(scatter_op SRCS scatter_op.cc scatter_op.cu) cc_library(net_op SRCS net_op.cc DEPS op_registry) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc new file mode 100644 index 0000000000..cf01ef6279 --- /dev/null +++ b/paddle/operators/scatter_op.cc @@ -0,0 +1,76 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/scatter_op.h" +#include "paddle/framework/ddim.h" + +namespace paddle { +namespace operators { + +class ScatterOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + framework::DDim output_dims(ctx.Input("Ref")->dims()); + ctx.Output("Out")->Resize(output_dims); + } +}; + +class ScatterGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto Updates_grad = ctx.Output(framework::GradVarName("Updates")); + auto Updates = ctx.Input("Updates"); + auto Ref_grad = ctx.Output(framework::GradVarName("Ref")); + auto Ref = ctx.Input("Ref"); + + Ref_grad->Resize(Ref->dims()); + Updates_grad->Resize(Updates->dims()); + } +}; + +class ScatterOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ScatterOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Ref", "The source input of scatter op"); + AddInput("Index", + "The index input of scatter op where Ref will be updated"); + AddInput("Updates", "The updated value of updates op"); + AddOutput("Out", "The output of add op"); + AddComment(R"DOC( +Scatter Operator by selecting from the first axis, + +Out = Ref +Out[Index] = Ref[Index] + Updates +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(scatter, ops::ScatterOp, ops::ScatterOpMaker, scatter_grad, + ops::ScatterGradOp); +REGISTER_OP_CPU_KERNEL(scatter, + ops::ScatterOpKernel); +REGISTER_OP_CPU_KERNEL( + scatter_grad, + ops::ScatterGradientOpKernel); diff --git a/paddle/operators/scatter_op.cu b/paddle/operators/scatter_op.cu new file mode 100644 index 0000000000..e6a6fa57d9 --- /dev/null +++ b/paddle/operators/scatter_op.cu @@ -0,0 +1,20 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/scatter_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(scatter, + ops::ScatterOpKernel); diff --git a/paddle/operators/scatter_op.h b/paddle/operators/scatter_op.h new file mode 100644 index 0000000000..c2db3ae37c --- /dev/null +++ b/paddle/operators/scatter_op.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "gather.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" +#include "scatter.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class ScatterOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto *Ref = ctx.Input("Ref"); + auto *Index = ctx.Input("Index"); + auto *Updates = ctx.Input("Updates"); + auto *Out = ctx.Output("Out"); + + // In place output: Out = Ref, Out[Index] += Updates + Out->ShareDataWith(*Ref); + // Apply ScatterUpdate: Out[index] += Updates[:] + ScatterUpdate(ctx.GetPlace(), Updates, Index, Out); + } +}; + +template +class ScatterGradientOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto *dRef = ctx.Output(framework::GradVarName("Ref")); + auto *dUpdates = ctx.Output(framework::GradVarName("Updates")); + auto *Index = ctx.Input("Index"); + auto *dO = ctx.Input(framework::GradVarName("Out")); + + // In place gradient: dRef = dO + dRef->ShareDataWith(*dO); + dUpdates->mutable_data(ctx.GetPlace()); + // Gradient by Gather: dUpdates += dO[Index] + Gather(ctx.GetPlace(), dO, Index, dUpdates); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index abb9c248ee..37e186a408 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -4,6 +4,7 @@ cc_library(paddle_pybind SHARED DEPS pybind python backward sgd_op gather_op + scatter_op add_op mul_op rowwise_add_op diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 8fa8be2cef..3bc150ccb7 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -47,6 +47,7 @@ USE_OP(scale); USE_OP_ITSELF(identity); USE_OP(minus); USE_CPU_ONLY_OP(gather); +USE_CPU_ONLY_OP(scatter); namespace paddle { namespace framework { diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index fb4686889a..661ebd8964 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -14,6 +14,7 @@ py_test(test_sigmoid_op SRCS test_sigmoid_op.py) py_test(test_softmax_op SRCS test_softmax_op.py) py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py) py_test(test_gather_op SRCS test_gather_op.py) +py_test(test_scatter_op SRCS test_scatter_op.py) py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py) py_test(gradient_checker SRCS gradient_checker.py) diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/framework/tests/test_gather_op.py index e868983042..e3de3fd0a1 100644 --- a/python/paddle/v2/framework/tests/test_gather_op.py +++ b/python/paddle/v2/framework/tests/test_gather_op.py @@ -21,12 +21,9 @@ class TestGatherOp(unittest.TestCase): class TestGatherGradOp(GradientChecker): def test_gather_grad(self): - print 'creating op' op = create_op("gather") - print 'creating op done' xnp = numpy.random.random((10, 20)).astype("float32") inputs = {'X': xnp, 'Index': numpy.array([1, 3, 5]).astype("int32")} - print 'correct before check gradient' self.check_grad(op, inputs, set("X"), "Out") diff --git a/python/paddle/v2/framework/tests/test_scatter_op.py b/python/paddle/v2/framework/tests/test_scatter_op.py new file mode 100644 index 0000000000..e7696844d5 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_scatter_op.py @@ -0,0 +1,38 @@ +import unittest +from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op +import numpy +import paddle.v2.framework.core as core +from paddle.v2.framework.op import Operator + + +class TestScatterOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "scatter" + ref_np = numpy.ones((3, 3)).astype("float32") + index_np = numpy.array([1, 2]).astype("int32") + updates_np = numpy.random.random((2, 3)).astype("float32") + output_np = numpy.copy(ref_np) + output_np[index_np] += updates_np + self.inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np} + self.outputs = {'Out': output_np} + + +class TestScatterGradOp(GradientChecker): + def test_scatter_grad(self): + op = create_op("scatter") + # test data setup + ref_np = numpy.ones((3, 10)).astype("float32") + index_np = numpy.array([1, 2]).astype("int32") + updates_np = numpy.random.random((2, 10)).astype("float32") + output_np = numpy.copy(ref_np) + output_np[index_np] += updates_np + inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np} + # check gradient + self.check_grad(op, inputs, set(["Updates", "Ref"]), "Out") + + +if __name__ == "__main__": + unittest.main() From 1dc62cd21ca8699c6740d71cf984a7f5d589b77a Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Thu, 24 Aug 2017 11:28:48 -0700 Subject: [PATCH 099/170] updated doc with implementation change of trainer --- doc/design/cluster_train/README.md | 25 +++++++++--------- .../cluster_train/src/paddle-etcd.graffle | Bin 5765 -> 5557 bytes doc/design/cluster_train/src/paddle-etcd.png | Bin 57495 -> 50387 bytes 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/doc/design/cluster_train/README.md b/doc/design/cluster_train/README.md index 74961f8005..177a5f5d54 100644 --- a/doc/design/cluster_train/README.md +++ b/doc/design/cluster_train/README.md @@ -54,17 +54,18 @@ The life cycle of a single task is illustrated below: 1. When a new pass of training starts, all tasks will be placed in the todo queue. -1. The master server will dispatch few tasks to each trainer at a time, puts them in the pending queue and waits for completion. -1. The trainer will work on its tasks and tell the master server once a task is completed. The master server will dispatch a new task to that trainer. -1. If a task timeout. the master server will move it back to the todo queue. The timeout count will increase by one. If the timeout count is above a threshold, the task is likely to cause a trainer to crash, so it will be discarded. +1. Upon trainer requests for new task, the master server will dispatch a task from todo queue to it, put the task in the pending queue and wait for completion. +1. The trainer will work on its task and tell the master server once the task is completed and ask for new task. The master server will dispatch a new task to that trainer. +1. If a task fails for any reason in trainer, or takes longer than a specific period of time, the master server will move the task back to the todo queue. The timeout count for that task will increase by one. If the timeout count is above a threshold, the task is likely to cause a trainer to crash, then it will be discarded. 1. The master server will move completed task to the done queue. When the todo queue is empty, the master server will start a new pass by moving all tasks in the done queue to todo queue and reset the timeout counter of all tasks to zero. ### Trainer Process The trainer process will: -- Receive tasks from the master. -- Work on the tasks: calculate and upload gradient to parameter servers, and update local model by downloading new parameters from parameter servers. +- Request tasks from the master. +- Work on the tasks +- Upload gradient to parameter servers, and update local model by downloading new parameters from parameter servers. ### Parameter Server Process @@ -119,8 +120,8 @@ When the master is started by the Kubernetes, it executes the following steps at 1. Grabs a unique *master* lock in etcd, which prevents concurrent master instantiations. 1. Recovers the task queues from etcd if they already exist, otherwise, the master will create them. -1. Watches the trainer prefix keys `/trainer/` on etcd to find the live trainers. -1. Starts dispatching the tasks to the trainers, and updates task queue using an etcd transaction to ensure lock is held during the update. +1. Write its ip address to */master/addr* so that trainers can discover it. +1. Listens to trainers' request of task, dispatch one upon request, and updates task queue using an etcd transaction to ensure lock is held during the update. When the master server process is dead for any reason, Kubernetes will restart it. It will be online again with all states recovered from etcd in few minutes. @@ -128,13 +129,11 @@ When the master server process is dead for any reason, Kubernetes will restart i When the trainer is started by the Kubernetes, it executes the following steps at startup: -1. Watches the available parameter server prefix keys `/ps/` on etcd and waits until the count of parameter servers reaches the desired count. -1. Generates a unique ID, and sets key `/trainer/` with its contact address as value. The key will be deleted when the lease expires, so the master will be aware of the trainer being online and offline. -1. Waits for tasks from the master to start training. +1. Watches the available parameter server prefix keys `/ps/` on etcd and waits until the count of parameter servers reaches the desired count */ps_desired*. +1. Finds and watches */master/addr* to get master's address. +1. Requests for tasks from the master to start training. -If trainer's etcd lease expires, it will try set key `/trainer/` again so that the master server can discover the trainer again. - -When a trainer fails, Kuberentes would try to restart it. The recovered trainer would fetch tasks from the TODO queue and go on training. +When a trainer fails, Kuberentes would try to restart it. The recovered trainer would fetch tasks from master and go on training. ### Parameter Server Process diff --git a/doc/design/cluster_train/src/paddle-etcd.graffle b/doc/design/cluster_train/src/paddle-etcd.graffle index 1b6611bccfb0034a10044f2f175b56c46a98f1ec..b4be06a0b1c6ba4a84475d2e5d6217b6c259bdc5 100644 GIT binary patch literal 5557 zcmV;m6-w$KiwFP!000030PS5_bK5w!e(wAVO&;dqPAmZg*R+!<%C=-X-eh@+U9Os< zC|F`lk!q6ilCJ#sdq6D&MUa*k#Y@vwi6rn~$NBI9IK)}~@9n^?T+<+QeDBLgTty!% z)U*ArheMavYU@Yq zm4ltF_E81WsMR+1K2|>VqiFcKR=c^msai~`YWo8w7}gE~e@KJqZU+)ufe6)Z)cpwA zj8jh)2SU4!9ew@q$LbZm`^vI&Y%7B2Cu6bI8ViTVRuEY1%O9&@6hQgELUvVu;5og( z9}TPf@U0nGmzOTpR%;UxP8w@Q6>Cry$sn4FIg!=cH0ihqYXOb(U$-JU%X}g%A*`(+ zeMKS_iG0TRvw|!CLGZ^XH%r71=HuIa>Q!!?dFh7Z(xba!COc~s`SY?+$6Y2rM?9WSqrp^Vc7Ke*NDu)lIWb@yLJ8+u zsW_v$WnZxw-|NORp<{FLW;L0JGN~|mo{aLpS9Ak?s;polqhFsYe@iOXK2_ic{*@Z| zlZ4h>D-@+O;T!Ef=v-J`v%G676qGNM-qt#w*tCjLy0(*Ei|!H(PuFoQ5caLEf5SEP1v*}@D(Q~qQpt@ABL?NhND|JK@l^?%KBH3TgzTy)uj{Un=pJ+ibNa!#-r>pNpPA zUv>=7>ldeBK<(!T5T8jZ5|_o?AkD#p6eP$Lv<0wGnR=tP$d9cD(P zVTKlp3{TKJQsh=9$s`5}F)mDFH=O8*-bUj1Y2wYta?=oyL^-6Hxn2xIsX=5Ib67Nu z0>qupMM|}^IAK0zJ)81`TEc9Q;t-Q(@!&*NMZ$_ou&jYbNruX1$Jww=5;^q8M1)^W z&6fcQ>Q>8+mU-mOscRO0vx0dbS%j0>Qsu$%vhou(iePLC%9xc(^z0&tF7bs0FdhGA z4|`Zrqzl`(eTzL(kUahojymvLC+Nf97ni;lMIHCz5?x$+4t&~u3z%nwDx1{3rV*^< z_UMTI`3q!i-}M6!{_`TBT}e?cdI6;#`_geo6h33A|GS{KLs*2-?o1xSFNRjoy@+lV z6~X@)G@gB^I{T0$IsVWQ`yeD1Ax11B%drTQ9gSnN4;9BEssd3*#L_i(<6%)2qq+A;lzfhSY2b2u5_Q$f=o`0~IXIVTh=4H-TV34e`ig zcvFDJdJY;N_z*nb_NiHrQ#^&RX$zxYf@Ye^$V6gOn^9E&WMx&+R8=*QQXGXq6dmSH zS(Rm^>avP-MJtLzWSNmLq8CbGgOyiPs)|Sp(zSdDU|%|}`y1i<)lLs=V4FdZDcmg2 z1~e3%ip*y|p#!vM*>{-)=3rtvD_D+Om?3)Llyona6%#(9s%#iah*S~MbW9`#sj|u# z9+t_wr+C`ZD4zOAiYMzDF;wUXRnsI)bg86xCB=L16t7YnhPBGd$}pgp&TUEfO2Q{b z(8`;AfKcRb&c9w-D`QgCiGpQK zL0C6*7^#YktCEIw_*02SU`AskS4Hogx4*#ro%sj!KAFedzi#agGuSJFlk7# z0bN57CYnlQQo>{jlkXTNU2282T>QWI;=ceSrRO<>L2S<%W4&r963}r4Nt&+d#j9AP zRt^47j#DT;VnJ0+PGV@^ZL-Y zugc|Nc{VIY%tx>_RR#Kl3lBzP8JGc!869RaS`|we8AK~jP0NMn>s)x=_wt8PRb<_e zi2;PqP$l;AhYoDdkPS`NfM6)Hr2a#W1N5aG2N;iJdqA~SgcVIDGEhDPm6m}e-7Aj+ zO1}4k#{tH3&@dInG>jCnCKWN(WGD~aSR;m3(yWqZy(*feY9v1XGIU0z^mo5;rsNr* zmHgr#$S=C|GO{|r2qsj6{WlF-h!((z@3OmA!dGWbr6~GLt&Mm8?v#Q{wJz2A+0^>L z3akNzDgO28wZvWsR)9$|dvPF@lC&JXF3>gUX}Y#3Yh7bqni=_Xl4H@%yjXHcg^~Wt zTv9Pfq4(qzf+wdC4lSEzQ<>fn7W6tI@;VPMFH7CaU5c07r8u&}s|vmkvz`TkWTUFQ zAO?{VgGvl4G3d2okhEkB0uu7_7$lb%RANwxL9Z2q$Zz362)`f#DJ24x2=wd-ba>Rr zSPXQn$Vz_l_e0MD`HP}wf$@#e_3iy*)JP;D>hR1vLFcE3N2$Qhz&86^t*u$$k-K+r zHW<2Rq-D+m`{&;_m=rwBM6bJBT<+g)&X3Q&?*&`y?Pe0)+#a@D8_g@fy}Fzp| zVlWFl@0s=f@Y{wvJZtfFZ=HSL?w^zG{w&aL_K&)oZg_rXwUdmXksDB)qwgn4BcGja zhvz5tzCCDoN%TSE#%k;#dV1?7MS|!X8_LmfvvJ2o@7|q#ulvX3P)Qo3dCVGPqlNa8 z?l`{LGW)GHR`=6cpxZl#x}RTnJg((g>-5d>gCtF_McwVItG;>CnFa3tJpb{%b!Nh! zos<*)YBgocgi<8Y&AP+_x@JA;IJ3EHp0(=xDamfD?wZh(=l!(~*BxPfaI@Aiw=8ai z^uB!I)hadi-@+u#9-whBiUU#ZynSJZy}-J=u+eLp!lm&PE-!fsSBVneduSL{)lgK~ zkQ4)}czmfaOt0_$*hF2HVVQ~_`<}V1=mwIIuBf_hXr#PVxLm5d=cS4XFn8$(l_;T&m%yOyrq#4K?K4DT3xKyStJnzb@eI&|H(X zLJcXh3@x+`%^NoY`_P3fMDuM#bFD=426>@E8@UQCvKGxE8m9yrCZ8&jtXGK!#6|qh zbV{7v6`^DCXqeCDp%^3B(OELWyd1U_@TAqrSFC7dh= zP70AhyuVPm!ywcaaf^%h8PYoqD!XbF-M7k?csD|baoLbZ>lyPQplh#5&E!(e zN;O-K2@>1Se}bCjP_aVBDLuzFb;;mfVq^ZDuqa7=RV+#^;k|_SrNDdiM0kH9i<0D5 zMWeJ*&q_UeH#ABqY1BKRQNqi03e4fO-(yez-tXgiUW>H1XS#%mT1C2!na4L78M?}@ z;*iPsWi_npRaI3K1D-PvaH^5>>d0>~ZBmWj4R*qpN%AMsj%XHL{B@4pmr5R!O7JYH zh+i(b_w|wmY2{nBG&5bN=VgmrV9Z>TmSBlq3@l}>iugZ;`4Jnxge)Z z!iH+dM1oPS<41wz;;97Bl1d4dC0M>fuzUnIK5*kk)ys{Wa^vRyjhk0}fU2ogU6+xj zOIVT&6#IiQCdWK->;_vV3apT@Zm7Adpx=1vqdlYXjBuJSIP=Mw>cuhgr~3y^vISHA z<5CKqEv3Ave}Tzgc;qDxd)xtC4h^`E2rTh*PD}~zB?aZ#-t!TIuJ6$oI6W7EtZcku zkX1^MEkU*f*%D+Q1F}Qvb)mIh3C)z~03Stkq zs|j1p>4RC|=CFOfz13=*{@j(4mdB{cZur}Te@R9Mwg0rux@$IXxQqOH5U|-YtycX9 zcje!Pc@#h3uKZ(lHc!u5?1sNY@;o#lpxO7&zRc@qYRJDOF7nE6{JZzMzsT?UzthN5 zBc+k&yM9;Z-s{Ems-}FZ$htZqr5cJI;mI#|V5+^4iwON} zMRXAxfu!XRT+5~dn!W$0u&iFb~BHbtJVYo&3Gxr)={b0rDDIB zVq4vA@cZl{)vB&37^#vWDOf|A3==L=s)=gyrjhDPnvv_cRLfE=U&0jce_a|nP=n9X z@3dEzl!OQqLuYr?X|GZ*rBW{+s+YpUgExD0aC6-`ZT##uPZB>kJJRj-nKWuLuR*iG zc57pI?9AqNKiQ|BxDR4pgOWYro*C75hBR&jZX8>H?$L@oe*PvP8)O`^18kB6GPuvHwgMs8MWpYAb z^3kX6gV^B(m;2a#P>1)zGj$(ye8X+?Pu&MGuR&bh$L@pXd@4=-3bkmG=EQvv^BOb@ zoVpJ(`SXOS`yl2uh&wtMyANVsgOb8e+y^nQLEO2=*nJT58Z-->x(`Zu4HBIuyp4(M zt)C=J*{AE=sJanCbPHpTIIgCFFN|b~K*o-+J+m&bd9O@aCM- z)1b)g!0k5ou1d3mXE!@od&g!6rE|d-aV|(ByStc=t%BX=g5^;vr8J`u06(_^&9DhE z=%=t$%p(Moo?wn$*kM@Cq_T`$vYg>rbK!razVk9-j=)`6p?|8Z=!_xg2!E<*M9VM) zF+W&@9TW`_38BXhk!5#Z`iBU}vSrFZUAd+Azzg!W{Z7Tqj6Gpj2yrYI#ke$h1e+Y3 zJKeOS4z6ow678X7Lz%eTChj$O!{1nX*ZScHEXuqgbVeyHkusqw7O7B2C@YfeEi4%< zESWK3&bM4n!BDC_1Wh>Yq`9{*`RVM}FJ3cnslPNsN6? zSa&qwljP>LOh=iZE1;=!0GjenF-{bg>Gz22Sp#81#^tF9yZ!(I(!)FoM(I>f9D0){ zQ8e};SAP^0kABm023EuuIuAo*aoY)Of7kKWoiNI3i=I)JSiVgG8>~4`x>H+wl!@cU z#oq9GY0tjXa&`SkP%ySM^qJ$H9qRQ`i^OS0(qvy>GIS&nz`jhzaT*USY;@xforUAf z5B>eX0g#)ESvdaV;j-^hrZWW!aOmI6?Ud)C=TtnSzvJ=~l`nogfA=Ykj+`i^fRp}M zv%G67oEzwItYgplYeadfkY_zN9`X;tgcB!pIxao&o$fb!H(vt|!)MK(sLHrX1DLzw z_F{8RbL4bsXxfo;J*%R4lOc)B^uh9M`oOryA>HsICrEW?(wSr5xsC^9h}Yto4v&f3 zC2jK!ODy>)|2wie3pK)I%pB4luX{;jtPn<<y-yfV< zz`~NT%FQ*YX{Sgh_<1mE4(1lDPSI4>jY0#3d)vRo&+Uwu;+U* z(nGn&TqeKWVciyO#Er9fu>i7Qc8&Eb)AIa`6$`_Fx&{WacboO0 z?Lqt3cMr1NpDsd5zPZ(Sko@M+?#_cEY#%fqB!95?F!`gc#sg{qGiMYo*6^WqP0g_3 zT2TXr_WqDX7sVu*n4^%+XmEbFfz)n1atcNE{74KaQ>a?44PBTozkc|C5fQTYW9R?? DBK_h! literal 5765 zcmV;07JBI)iwFP!000030PS5_bK1(be%|>NoIIR|dt&H8Ln}MUDXsg%B(@M9vMc}nc8ftR)ROT)z)rg=0a|PIyuSW=P_Iw_z3X|U8yXJW!2kRnmcjcH z^_`&Y`kl}3Pmb$rENXHZF{}+eyvie_WK^KR5p$` zN(bMzkB&<$j!I>7?|tcgH;VcnE0x>Z+p^82%1+SZf`iII81!iv-G5_|tucjiJ8HjY z$&6x8Wru~fT_^hT?vGE`^!^J^&UNgFJwF|ZeX8(q_SgMj&W#Z+M(eWsC}wTzEAXqZkrCi1#x^?Ib_+7XxqOE zrlMo(9q03?7DQ3d+qFZMgNck_yVIzzyugk`JDon!K+1e8iQSJ<*a|}TN8sBY%eU*t zEm)Kg13TW~B?;B4(U11lj(2_r0sUU{F1ELVs@AMEw~scv)$aZY&|&Lr$8$RC?00+r z1YUGoTc=)?Jzw9{kB`yL-xp_lpt^mwSv~rj$<*(>+x13q_P^1q=sDE`NpbuKHiOH}so*Ps44$P4A|ePw*Uwcs@~MLshl?*KiO?6ri4? z!TKS~;Y=+RCsebYYd+%p?RX?~9YJqelCdb48YIt?UjEM-vA~DY8pIm-`Jwa=GNAdP z#D2h^sg6I6Xx+01vUtXPldWgP;`Fo_OyjeTK=fN>e? z1QX37Y-9u`B%ZZ(l;e}%t|G8b{R%j z@X}^59slR_I}qvU(g~cv=8trsjeZPtH6(@$}7ze{@8XOl46p31ZE z%f219FQYr%0PKIrES`TDgnuBU#UBLl4~%0GY{ep)7K^aFgON7>FkmcVm`sGn>xOK$ zG7N3+@^6O+M4W(9|H^ivAO!E;#ibiyRJwGoKLOF**f zF)aCJx2U%f2D8y#+1`Lw=EggY7R6{6q-T@)U5ZQQbg5|%uUm*vx=e5xs69kVOk{-meV#0?FgLgv&a2WuT zK#X)?Xa;9^P{VJY;+YGhc*YYco<>Y;8LS};(?k#xq$r-EcyFEJl`8!~rL?xz59yVA zrwE@Sd~yUG@9ad~pu;ymYIevMPb>nohR9@tStLqB)!3SZIORVx#uArqSjB4{GM z!jsvJ{5?n+UKlCUo=3_E%Nx>&VM0a96e&}rOp&rx-;p84b9ZE9nEBLg4NIHja162@ z*nq%gA`(2vREVVrog#FuzeuwtvH%2Xe zVTb>oI=M7VKD{?h$nZobnx*EJYHsBiAf`thAl?;ECcbkc?n~CmS&ln$q0t7A>%jMN+1i{ zO0b^D_88SR0MtzlYmD+)K)vdr=$_h2P<(HR=R2&IpkW4xX&5eIO$Oks$qQVqv&#AsF$gORQW&H#XjL%?Ef|9s30Xb{X$pfB1}O|$ zRSd$vga-k*LBvb95 z`={XI?C>}h*y=ge?sju~8hGsO9h~?2-Z^eor-9vzubW(oJGqF&wUA2l}{*TK=I5O~M(c-TM3hy7__3+Y|Qzpkd@w43`!-5vX^2J|HLCb9-C z{B<_YHj8ubrq!_Sr-9X-qs{8s?gq<6ZyI>fsn)vvubW=~yeXEwdH#K;dx3Yl(?F-u zJ#KG#gNt+fC`pJ_a-C)8`1@&6$>(P~gNxHz*Xh;$M7>wPwd;F;p51v#mYDkHrha_V zsNW0f?fdiZwcrFF>Pdw(PIzT(Ho;!f94EKi)oyd0m;G!S=yfhw*)MKdzEJbLcDmKk zgCtJBNxhxx>u&Y5H4WVTaq-vp=6RLx)Oxq;t?ITd^pMWy&!SnS`u=Mer1=9d3I=f?$Q`#Yok1tG z?=Ky&N<%mr4dL2?hj0Vy@kQ2_RW>Z$&@7}|(10T^5o~yU^V=rIq7IfB@Tu>pYdWz2 z0z@|mu}rMoM5w9CTb`;^8O&V;VJQl2*Jm|X;-k$HcAW~~OySaObd{{z%8)s= zfz99I;s@fndoTVfqe&!CoS*x7;W!V=M3^MG=XoUPkvBDxB8Wt|z^aSwI*TNdfE>Hc z$+JvS+4y5^eB{sc=Tqj4vgRu~6PkNutPKFkq4bAH}lJlW+oe<4GBbs|kG_R8v z8nl&b(6ZB_Sws_zz=Zfi32CH^O-5Yg?@Xt}`Q={(%6r3XGLOX=$%@W`5oUSVR=|^T zQ_ejpI`3K0!qcKM<9q1bXW_}uIS^eT#xFq(1H2q!7{JPf08xrOj4KK;6k;gESWU!$ zFU&D8CJ#{y%x}a%a}W?+>PAk%iGq{Gz)2x8$k!JNR~V$$BCm1zIzxJ;!QgktgNIfc zipnzoYC|q`q`(8qz@x|5 z_wl^!iL}>ubO~ak3W`C3n!RXw-kXR};zm{*-qo0n)k z$g(Q^K?Q#Nw-^2C_CNMn_Ws?8{(Q0XYEObo2v-j*O`yP1fu#aV1(slGV5ym9ApYsx zG3@9iWW*iv2zQQ>r5>6$;VTA9qH7j_mSJfa5$-YbNnkmDEa8j9qQFvtx?e*6Yj#Z?)ja4EY2-4&W#qjyKy?}C&E2|^}kpRtzY!clM+?W?f4o& ze(_#%m0qKMKe>1>3B-qbt8q4Sw_uHD2f}uv?h08tEx0kTxh|^L_D|qkmjklOsBP|T zH0!%U=C@C(zVw1Z!H$kW8?kms9zSLVU%#YoxIKa@0Lj7g~pVn=w|@-|G( z*Lus?Q@Tcu3&n?=C@%U4u!+=8lG;gn{t@7rV;aze3~Mwp#u^KkbpY7ZSSLEbSYrJS*nLOL{osj&eU=;g#T#iNnN zGLMlHsLWEC*y!f&WESJEnxvdYI=rF84y)ovWz-n7o@ zKiZAc#0$jeH zB}?~X=Rw?OkPzp{c@STNPnNI8&V#tmpd`+*^C0dsNZiYaw*vSMfKc|4^C0dsXc{jeHC5z`{=Rtg1AlXS7?+;W(^@;N!zI7p_K5`z^5`FMYoClrU3XA*`=Rw?OkWluK z^Pm}zN)xX_O`60xb{@oi22BGe&V#DrZo)l$@UUn$1wKV_Yo%C(+iyI#h*Jj)wy%ETKFOt2H-NY6^u+`5TYYa zge)|^FTjFjU}8-8(t?IH6D^e`Y3dbN^$M(-O8(|k$?^4Pm6oGny2OhEvamV2fN;e? zQ&*s=KvRLH0?jFCwpZo?0mR+R=s>m}XPV2rK!6zP0s(b_fI>}$nlWlFel>?+wVLw^ zS96M8d}}6ujeaz`X&*`k#*1<2o!_mjXLhUk8#?DEY!w}^B zU>8~% z+Ucs$v{Q+rzU{C)38{@;rHX3_vGlI}R}gZQ+0~ScQQ9(N0%a&Op_WuuCfS)=GMrm7 zvo4aqAd-TiRDDRQaMDOKo2p`S#ZE&H><$$SIYE#aJudK-(*AL9nWPf6PJ2P`1n&7*Lbj`h@~w|Npp zeV?To45Q-SU-jLd9f_IF!_ZjVaYHBAb^Q%@5M`}}WXvU&KcWm9?8puLj0z}&jE)y) zd(-c@zARtuAd+N^PYna^g!vowJE=+HBq20e5w-|Gm|!0UQRmGwXVJ=s6M3*zYL+*buWHkKbKNqhmKp8Q{1*)@}dB z9?W#~SnI@h{~l72D&)NhIqLF{!Gu$H;I=$^8o2GR^nSJk0)|hkKQWb2k$Pzxi`}y$`=_DWo{3)@6bo;o($-wAGu+wIg`eWWzJlmks;BFr#3tyZo@ES zxsID4z8!MRJCE{?lEIzs{?O`jhQJsRc;uv%)_ z4?R9}+kz$M5=&9#f5&!fu1Xm9nM2wUZ7->eHOy&qToo4Jmr!F2B{gvo1ie$6v9P4C z3S&)b*eNm*Dc#5f2>>s2;Ax+Z@iyr7Lpm7jbA-MYtYM~8IYIxPq2)DgZ}+&R6-ct= zBYMPg>d}LZdcGiIpNw@@nYbt;Z302QIkY`Dx))VOvGSzE9^ z_lV2n*E^fHMVoQu%%3d=S!{HT?JHCB;)oRsvkrB`7|8swGv)Ta@7?F;nRU^?8}ea~ z)qidGAlv-uETrTc+x17uZyoP`dz8Y?LE};K2YZi`Ki;lCq6BQ@j0W>n{J_4U)j{2} zqdM!_`+cr1)5ICg;ebwQaDKBfsol8e6pHKxksMGaQ1z+O_t<#(<=y`SEsPXH00aR5 D8X8?! diff --git a/doc/design/cluster_train/src/paddle-etcd.png b/doc/design/cluster_train/src/paddle-etcd.png index 4e5c3d886e65a654d734788afdabab3fd15e0632..dad67a277296ff1719a968abddafbcc1277721c7 100644 GIT binary patch literal 50387 zcmeEubx@Vv+b(Rjbmt~SIybR5As`@vASnn+iF9`=C4zKGcPc5}g3{d}T`H}DARP*L z)<%7QzwdnW&G*MSXXc!l^Ul1ZKKqHa)^o2ruIsv=P_?^v2ykg|(a_Kc6y$HKqoH9i zp`k&Ru_53{QN4cNJ4bLf8k)Gf82GE5nX3_lyPd7Qi7$4u0Cr@~v2=F>M zTks)7MMe4ek$gxb4>*Iz#lzm!$eqXDh57eQ{@&-dnTv_Dm7}YbgFOT4zDC9lZmtqc zOsEI_^WX1rx>}k4>q+)5f2IW{$cOrc55ddN_s_k-rQ)cgVlobPj?QK-F5v!>LgJTa z{)c1#dd~0j)hr!c9l$C$TbU@>yP7$Ji(QRSqmxAbx%$68{Xgi}Rr-{%;HMdzCMbf@PM(73cfsl}X}OtRO$2p-G`B+`e_+9sOH|TlRg~U%?a= zYHB2H_LUF6b>3EkE8BK;Pj745R&Lg|Zg;kCZ`*fD%aYty+c^GmmVR9Schf-jw3GxA&vHhDk`Fd9wv3CiZ~f7gxB)_ zJBiY+H9Mu~al*Iq1&cJ1MTu-s6su>I+ zVcui^WK-UxY4G&$%nCW2>DZ1F@4eH8Wzp%y0Vb1e42{>Jkl6}kVesoGF`Qi6EPGd` zQE>PAf@4R$(+bxUqjDB-8pAEAlmuTRH3(@T1!P3>=Qkter`1y>xN@Q=qqgIgGoks-G)BVA%6=a7Zw?gr#o}z~B_fPQK(si9OjqTT3!H z+L}ME=pgjkcwIpJpd*-wNc85(qW{MxFNex5N~%Y$_tJ#zJP*D&{W?Fk;Bh_p^8U?- z$MaJU9foF0>*h8-dVN0!H>t`OR0pq8N(a1*{;=tGRt7&gVv&oJmG`P(xC>yMZ@)MH zMD84qtKQ7;+HQCmV$>1z!F62a&Q&*Z&9a}{?}+@T5IuBpwXPBRIdYM6&I3H61D`5Q zdOmvZ59w&oaH7_?1WduK%Vr5Yh#4E7M$ob#<8kWaIV+jWhV|59ujB38K^-2SGoE~K znymBtP~Dt$v8X+)_e2Z41%5wGissCu zIAB5139Y>`%+D@PKcB9r@p&Z5*y=K3I+U)y_E=8|?r8JJ!V;xWeY!t%JxcNiN3h0S zQL_-bn?7d&IAoG%I~{>k`XX!sD7(&vpiIH{8!K4tUnDb6k3SfA?{+=;^z`~|Mqg>e zx8PlRZoH;uJu?{VV_QKP7$G90V)gZ+{bbprccv^$o8>J&VVwCQzs~k`1i90t%@4nh z2kQjadz-Nw0n7OhE3FI}Br|<|Wva@0a-;lH*72uj*B*3AF&0^V%}eSoKHupeBJu#Q zf=CX3THkBSF$CH1n8Hq%NaBE47EzJxguZ(Jjs1tC1(y+1as3^5j$1!x0smZb#C7=Z zH9za(RMV zq4$C_f0RLOelymcq+n2sOPdovp6pLVd*=zT`{V63}sC;+zkw*)a76?RAfNn zxpgtx)EhW;QP`9P;zXW@q-9w_^XjNwV#TJGc&lS~rKg}_Xi5blq+mExVU%+x>S$goF2g1VxK}P# zHOdqr`-OV0E~8pjwI*l~J~h&~6T9W<Z4udyXi7BvsZ;EmU3vQ4ip z^N>}Lg!E(dXrPjp%MG9X75IA<3yZsdcQ#9OFmMP&>2o&^!ORz){W@2%{Mi&H+_SZHu*@i~_cFK^uIr_j=K z0wHH};`R8fMb=gJa)N7;-qVksWB`|m${nsgS%}fAmF=DS^zy>aY zulXc{>Bt!W66QlUXITJoD4eHmhR$X1eZETCdTE5#@2nGhXP`u=_~?7U9eNL_$HCmq zEU<`8vf-2>EfzIr30nF%%>IbxXTL7Mw0MI-Zp*S>mR%`gJ6o0aVm~JK@2lj6lsaed zHw$B*9nCwy-}(A+%@s<&Yd$}kkH93P7f*)a>mpDgzm+Up24A<&-54YVnaP9y1P@ed zRb~7-qu1)!MLNhlPfFPzPQG>9TxDF!E`;rL{gOy$ilBLD=ywGqIlg31k>`9ZXR+df zs3?AEOEM5b8evm)4Fo>^%Gd=M1am?=)(961RkIHDR%dB&@-2VNt`p`canfsWIT)uO zV^1zR{*#mcJcQTsZ1^ADS0eFgmN#CMPoh#lu%5g%qtb$;X;bAb7h#)|(+1C)cG~ft zY));B$`CyTnHbEXaXr7DO4p_o zZ0| ziX#8UG`RS6L8jQRwF)<*1!HL@C2O->Q!XY85+mwedf^-B6V-TPf?j;um+PqjIYVwj zvY_fKOS9%_rO}jkMc3j%UT82CFEGe2Gv&_A`lr6JlyAM&5u_Xu^Us@PWdJ1%`t9() z&O-`90NyPP5+V27C4dX#eL-RXCFYU-=ZGN49jtphVgEQ7H3(!FyxiZnQQLyfd^cL| z44wRlyUB-+T@e9%Et&dr>6C=n7Zo%pKb0DZ2I2NjEydK=hU78(4jM~rX_o5N+Rny0 ze#Ny{Q~<^inCgM@W)r07bz-G?>ob%%__V^dAmz8JuT0SS7>T;gMn)^BWj3z*ZI$yt zWPq8tW`=C>Ynt;>_PVQ1(%Wkc3Fw~j!QFCO?Wv2B;w~gZ%#F)pH<8D$Zy~xTExQ}LqRACFwSpA$RL#=VH_so$15+CP1|6%w zbEuZ7fYjd3M$Rj7S6hG0tmOH-Jmh~4;%?xRDWLX0%KZGgN!84S5|h9FHO^?MTOh(* zF+#@t-VxwLOE|ijrb%du{-0~malox2l$QyK%Z2H?GsMY;*XvAj`h0k8NxIje+OaJS%g(DUa3O z7*N0b0xll07Oyc3D${~YPwV=r3Zo8CMiu~v>jD0F1g7%+-S=kgU_IF`C??9j1QWqH zD?1faE2A@+J}1njH_ICy7>td!L)WY??HvUzvxfoY@PJzvBzE?J9x+esP;$1HcM7<9rwr|#C@@i z2a8AzTtf-|XSmiP{{iaz;9;AMA?4BQh*^KY9-fT9*m=P}Me$(vXD&(04)26Gm3hu&(o%FH=CI`RwZ ztvjuBM_#+>b2zCLrM=zonVzY`0#tTUy8t_QKT~NE%6?9AhmK^0Q@qn<15{<9y9?c* zz-A8(Rh~gUWF;zU7yDgur9q1i!l&i@D12AT;Hg{0C!^r2#^SY+ zyWTk_XDd-R=~v>;<;RWBNR{9u<0kN^U6)}|i-?|Fsui%AX;%n9|%REQNz$9kKBvR+>Hd(wSL#*(_^j<`5 zp!;3cmG|7at8pYzQ$#V;yOtR(aTfVm{6-LDc#C)J+Gkv54r+?0TeZ{D&q215vGH=B zSREunHX&yl#pUurk-~;LZj%rAz*d|pedqx8jkiuVN$eVD(b6Q9wjsKpS%>G?V78FJ z#;?O^0}GXrl0H~gyv_`RiMP7>;nw{1?wk9A0+q&1UdJzXzc=pRjW7V#VIuzB5aF{F zNcCcuSuP@E7i=BBShADN>BLX9V~OEfPzQP5GFV}ixYd+fwR_B3hFA5F@Xx&gzS9S5 zca8u^WV~t&vOI&wvu5v}d>_W}C-niA1Twj;4^KX=G5X;%`Ml0c3_GX5^8k1Nj7$`u zBQm_C>A>NfeK`gINs^nQJ^*|GvPCDrhS74ihnG(Xr6$72dTma*= z>~_=YrB0s%2rCRXE{1j;Km;9|f$Vg_9_r4CFI#VH*ZaTO;LJ~cE)C1UaTuw=j-ws% z!4xAwZ?x|4yTj~9p{Z>YBTKpTRo*w)sjBFZ%)F-f-X{iofo(EZOlIS0Jr{yUp&?P2 zjP!4`ac%tR#{zD|-M}512AO40WP6C$f>V!^60>4<+lO}x&I16X@T#|V3L}}6ArxH& zso7`q9O$-D_w0K*k1%WZ(7AA&n{%gGR|0ciOCakCHF0({`yANV95c;yS}s_vP7KUf zV#!SzBU$;)2VtuNz!xdYwHH#SD-mmh)#GuP!OjO*ByW5u7EaW|i5XBLC_~6;+gZ~U zDcgGolm&*!g(z{{0|J&{Y^nErrk-f(?>f{kVakcM&w^^jKVCeu??wL-WoJ>F2woXa z=gKf?&r6Xr;GU#~_Hxevtl}|&kiK>9F*Z&t9wWmz5jJgeX6}ArM5}H2GSD&p78Mfn zwqeohd}HqbCp!IEnWycwXK7hzKVWOP7IHC!;yY4w9lOsyd(q>Ip6+XY(CMx}O?R8p z@n(+A@+TAv@rr#frK|~zm5GmQLDu`m^HRdwZ7w9?G_SIf0y+q9z*X-(2=2mFl7hTu z+Tj~`R%ER?pJ0Rw!=>50Z_6gkCFd>D888m=yNC7-SN2J9lFw`AK3%(uNW!J1m}5N& zXTiJ0s3LJVQ7H)FpkTsb(Bqff?N!KJs|?u3&Eb)6BD*W%$IUhp#GlBLsr^!$H}XrH zuGN==OP&JPP~STF~VE4>!cN5fvu0IK!3j)yr>k~HDd ztt~gRebJ999Q#}1SXv8-F&=vplgZE9e|W2cRGMU6lPu!s* z%WP8H)FK+O&NPHdl)vb6BzEL}wk?a#z0-XqQdZ9S<*6}a=g!HS=gH2s^$Dl++T86Ymd3_za{;4xLOfzSxA*@J@O;Z~}Do+6t3jtTGD5xOW1yi4m zKrOP}FbtE#NI^R?|E?BSX5zBfsqEj2vd>fI@?VCk8BAf1=<+;H*D9{i~ zZ4eBtc#4GdiV@B{)zYJeh0J&ClX<+qSEqhT-?!(nrO}(^u zn}S(%5aywr8D<1wWR-&-(3xEC)1YI^AdsPr!>I~F$n(30IO+iuyqb-_$_JZ&D(PUN z+aiWYSl2f1{YgKEuqJO}UhCKBF6Q>6=WRBo5=Pt4;137STJiWjuAB1|`Lxaf>?NBH zT4SEzZ{3Wf{vgA+Ti7cfua`ph49;>T3VML+@cCqaZ^p1zA%CG-9mA!Bh7t+FKPub)c>ZAT&iQT5bKCQJ^@=n z5;HL;OM{Q<)L3jxe8i?il(H>_`*`(7^Su%y94G_?iH8%~F*xnvtLXATN+uZlt~UfR{01U?9|sU4Yoku81;!)m}klrdsAa2CEyTy`Vl5B zHPh`J{)v#d?|LpnlNH2rQ|^To--Jjb;hP8?WDrJ28SC}9z`s?oVhFZm8JjA zJ4ey}C<_y`m)+KQ_~gbglb2XQy3Y|f4EWBXdPt7^0eq!1l%8(2hXI>JKt=$ zdr%gcyJ2YNFOhF58M)wE?eN{gtRy7n3^i)NQaAq4#{rFe{)=xc_;f4R^eo1$jGBxw z!YuqjJQDMJNKaLl7+wgycV=%09)C<9k&+!_thQoWZ-u&4jrl#pF*gY13^6Lbr!k8-Sq+qqfqPR}M{Y-<}-h=4R`qY4wcPa|stL zdE2=+&XHM>`k3D0R9amfyROi*xLT80PPjx>`~kaPK98#IcHYqj)aR;jb#&exJOz#5 zFSgDJq^-7oIN1v=DSEOtnt9)e^-7!{k-qdFke{mN0N(k2rgW^&0pgqmgPMjrMKY>Y zk&QW>-7HgsIxwI^{IDJ#0~hB@T0;BXO2xm0(F6p&RA=Z@I4IVyef*+e@?d}3 zs;bYik86}))4gA?WLE*5D1)!liB;rNd156P(cz)%#^O@ekEs}%L84GhNb8W zPqgQ#+Ks+LyKm!t_^2{i0VycVCxIL-HxrCpYDHN>k zC3nS}>;IW10y;xx3(AnzkP47bRQe3j*p##hmsCn=%9*~m(v&a_9s)rq)eXJlHHhdd z(K3WO04bW$jcl@+O%(vye>b&7BjCIwJP!;5Erh8}{D-w^Za6>QUN{AH6kC2Ysps+b z)qR>maX-a@5|AF5fWpM=4fRoxNuyC*%?zJ*EJX$B8cgU`Ezm2r7ZpY=Wcy$P%IBJ^^ zUlSMY%(PB&b!mC(YFa?6VGK6K*F zZb&9kLv>8~hPgNxm3}_`ad0c^il9Fxfw6z8-zVgwUa3EfG`@ z!4ksQ%+;u=1+(1}YxIDs3rS(&peLwYsPx^95<|mEyIOE5Mi{RX zEQj(xo(3ebU!>X5r)STE>sH7$HMfOLb!LjLY?X6zLj*yg3T)Sk!50&kCcK}YB-3IR zfSNrk`0UgX*z>m41UDwj-fk2Xq&q7X^2h}P4h`F>o4phM?pqaL?w&;Jd^-<&%Gg=G z5C2)gPDp*vB%P}kCIboCgiJL#Fs9jD$chSJXO6+?$ZGfr`zUeBq?1>pduT-sK73l zOCavwb*LsTOdmA|Y8Aw-knN1b8=v^GS)Nt-46jV9Z)*9R>IkN9 z{W_0749OFdgm!=W7S`1OlJNK!Fg7P@fPR4TD4w`=9yb4=WKf1kqH_hmyaZKYKgZ)` z$fOR8_>lon32n0G9~<03*eTF%DV5-7k|z$k5#g*X7|Oyd6f~R7KetWJ5b4(Uar(ya zwPK|Ly{jF6bnIVEI*O>109AckL6p1T#h3Rv)WvMEoeN0|Iodi}`W|b55+GUOqMV3f zzO#(Xx+mXHv}>*GK~SnMhExAcO6+Hst^xxUdZY2h&riRQo+0$cdsCo__rn-mjZ^h$ z$6;SgirB&(Xa}Q>$O{v!w_bWNh~HT~EEXPo)kH+=ksF0@c}d{Pz3RiMhsRsYcjQx4DeM+DL%k{-y#@oOT2A>~p**02iPpGxd1#5hIywz?48a zXYfO7bH9`jGRul4KJK3U52D>?%FkF!jf$}AiOh@McJ#{|#zNg&DIq^Io>@$1tV^v!q8MjG2vq59tD1H)c~B8<{A9;-;5n zDk@o5xn@tQNDHG)+fFlEa*=eyLcZ7)FFo%H>XPq7(AK%#7*GNP8nB zCLI7xqTN^y$L=r)+__Vji#jHfi;=lJIDrwYO)aQs3}0K%uta3Rq)|0{`X`+$ENoJB z0=v;^3GZ`_o4e_WuDZmQZh6wswa>=Kj~{pJJ?|G*clr`_wxFph<~&UYW6Jf(R{1GK8HRD2BkmkFl_;f( z3a?)Tb>O0BA3oJO8L_2wWpZtjj(N)ZTpVqr5YWf4-JgdO;4}iM09<+~RYGJ&5T`EHo9Xg(aHZPE0i6D&LARHDBDX6 z8y8S~<0@o#$dpDYt~#x976<>DB;7v6yFugF)w_ma_J!&P-F8Pbx|d$nO*VVhRP)ij z;W_QZ12YBw%xAEy3Sj{y-^unFy7(*i9nDNS-tSm;9%r2;=gQ}&Tpx}`&m z&fo56Hv#g)7y^MetM+|6AJ!W-^~SH(MN~i1eubY9Bw>~H^eDwBi2BFjW{)&&NnKC# zt{%;2mB(_pcw{EhCVH`X2d*mg=>$PMcB)IE`!EjE0iHL2DdX9#3CR=FraJ|y z9J5GCedto_Z~`2^BY?(Sq}uzMU;T&MC8d7iQ2rOl1n85d2*E)UYmt%b%qm0Iq~bt~{I^^LFPFAQ2(1%p;vAc-tnwWBt>A1OP_E{tW;?y959lunmL0jTaDo*%>k>u`bD- zxjlOW-7kSVeD^ZxQS^8}w;kg~cl6V@q>&63g`mJX)wcV6Y$MmM3Nc`khe` z%bAL3Q$IvR$Sv6`wYOQ6Rx9%`G+E(vM#aG;G~w~dJ;uCa6}Ipkub0|{9Ww?Zg9)(* zz4h6Z@(R3y2Jirwrra>9oS!AM1yU+$LgQScXcDi7^{<}olNm0gtPxWacGL7%O(@*T zx|w5fNg8AapvppG^Ov$l3ep0VWlxb9cs1QJgTf)n6`yVhbfIKHns)U7$kY+XpM1`V zZ53M^tu0-KrX*ZZ%|b8U3s0OLNq~1+3}UQBTU!jp3bC4`Mi8;h%jy$Wj&w=KD&=(b zMxgoVdc#0%&VOCtn?X!WcSi}dJ_c5{7I#<@^Lv5p0Z-seHmokG>*q$s&&E1}cpelM zhQKCYun{q>AX0Uyd(tU$21sAy&d4qr$EAK<`N2%?Rv*F{Vc5Op4G|?ocRV-i?t3!s z+$DaKRmA#GDk(E1MtJ>Bk113{x&Pg0x9Rf|BYZD>`sL6TVEg7OhPl5<5{5u1onVP; z0sl1Trn638f@tM2182nngTc%r+G{T9Su>@U0EIR|$2Q+u<1&qnY?JTK-xiTy!Vc6R zzxaMq@VP&sgjDPH=IY#(pp9aXNXpFuZvq-D;=R|;O1 zx2Q}3kRK1;`U9Prj361=CX#nBnSpm}rbc_*?>)kNehF3L20y_hxGZtad(I?}*!myH z%cU$48*q46R0MQY|L2nSCv`Co|KrdcF+7P1HVuBZB~=waltzq2W7#v%)U6H0KyrWMoOmgNmCyT% zL4cQ>aMqgsnuq4n##kO6b(Z@wj;^BVRHxh@f@usTZTKoFEn}K-)MZQnbOmBE?uA(* zLQ(yQ&c}m@9)=b*rIIv?{EIa%7Qvv}+SpR#fi3ZO_A9x8W9_Qp@CYTg{VEve9G(n$ zLA@)S%p*xz&bur=-I$c=8g3h9!a--dY=kO24oz5iP>T`MhSL6fUvAaI+Wu%{(sJ;FQRIumL$zTJxvLKXJF@0)4Ti)5lia%xo2=R)C5GKuvA z*8R^R;Vjcv@Mt;xVgb>aKwc1dc_V4aAfhw7s{nV?K7lpb8p5lZy0Km9{I|Tm&j)UI zc$6XZS7ppF35NQ5o1UBfZz+EWZuh|9la9-8`}T#Zs{kF&e|MAg=Cr@3sLQr)96h{LJCbrlx3auR;;o>IQ^@wf*{N%-!%Fb5;K&*5uZ?%yZ$I21 zEmVLcoHi9IxC$3ZK>}_09vR+ai<7D*GfZ+l+BN-*VgId4Gd}`ZTyj|FcsWAAvrE?{ zm6<-J@dETtlUs3VowY6R_%sXpN2Lz^uV-uByf(B6f5f!DQ0oA+hr7zoUSB7K-Of&S z0bltulsz+fjz+f2b4SfdmO|C2XSt;g|k1K3U2eo>0FKMt%&)*p)#4 z8+T>PG&QJ1QH;_Ld^(YeC*MB`O;#uh5WVX*{b;&IdP6)3Sv>53mc|DxAo~c1f4pKLbGQ}@ zBs9;wmqXd5-+|=OrHevqLZ6mSPGpz7rMcFJ!NSy3p8oo)*3rV)4NZ%8(&2SCpLn@ZMX zNn*#*(Hplefh6m6U)~*+ZNv9M)Q@@#2f0YIXD_`H!xjkWLJh&Ur%hcMbCAXIzUG(v z0H`6oV_5Y8jjZ9=Rla8i)pUZ5|4}xnrX#be!piTEA#di1<&P zT*T?u(U%=;X|{X7bP9^Lv%L5E05qLd#6_V_O<8|)HCjeW?0CTiXtnlG0w?R@(xY}f zKJwTnu~sN)o)T|Efa%E1U*ZxcT~r3jNTo6;C_y^`pq8>}OV9&EAN4MFsi+NELp%fO zSo$A7J04#FKc9j=fj4cKv=Q_YVmyaHDG2md4nVrEr1}*1bsym60pfiQ7+*Q9Ku=o$ZsGCNi~^M7U_wLH4rB~N1`S1}ZMD<-KLKul(#P18)@oWp{eYCt zbhnG@`}`Zeu)6y^IV~VVBh=Se%D!Iod;f8M_RRQ21SPE(rCtQG^#)}BTxMLJ;a*{Y+qQvOx73>54-<`x?q1fmXx_)*%{7A{aELM)cvWo6K9F-7~{} zfzsLl8GXF(6A~+IJ)q0aMLBOG$rD3DRQf0jq#uCoodX&{9AcN#wWRN$!33b%gsg+W zm8IF$Q9ov07O*}9%^*&XypusqiBhv_ECGE-YzgFA`p4$8J*9w09X58rU<>7zWB54k z;@_Jgbec~E3LR_#;V^HMmJrZ0xj?gc)$_~CmjNLCxVZW2l;}Cd)gU$uL%PYe_1yGJ z^17jS6c5Y>iAJSkO0x4R0=1A}lmvN7EU_<&$)x;%rqoT(b>8kxUeA*`D|XK%koMTk zgw2KC8b~@K^Ep&}n4bso2oLNXug{s4Ua2$@Z6pE2ZP zR6&}(m@OkBAII+!OX-G-VW2*hu8@5i~+!9paq+G3zbA&K)>d+^hB(h=N@a z6z(7u7hR?A*dt#)^#aCQXFIzGM#%0xEc)5;t7F)Al6=k@Y2}6?;dzEGk-@@A@Km`a zAPQEL?>o7Qo8Rk&)E9iV2z<-xnB{14#CrD8GL}{;Lxh^&_oicgc}(;G^TFq&x&Xx# zxh-jo*^wrh3Dic_9M@(Q@hNY#gXr%;k9Q=3ptw<}&JRyyo8+JyozDy+-_lm-{v zHFR~ksU;aQo?$vzR;c8mTk&^dEP85yf#FlOGnc@|bd)CTzIPKoXM72P-XLyQf3 zxh!qKu4$aXWNo1pHv>C#EHVGN>>#Cs(IGmM!B)-;r(frH?_%%lMOJsKg zTi&m6$oU-2S_Hg%yhURoy$C!Jo}V_ib+{_&kD!=(AgN#RdSQo&9ureZ7hIpG0e{f7 zNYx)@W17tt=Ym)GpagA?B&x>idlS_)WcOio#jOdS^TX-fHVv+PCu@8`u!{&kOLRhDp9$fKn3&-=Bk25+_5dTpl8i-7A0p1+MEzpA^_mB0&58_m zALFMaCaiutW$wn?%}C{i8N{&=2Rh+w2Qez8a8bmRNNqn?@v4qxa3$#&KiX}%4HpoV zmQ6e4p@baC_0JvLI0T}q?W&@+{n{}+$_5tkFvT~bZt!cO_YZD(4DAP*ieESpn1|qM z(Lx3XKQjjG;M&W)luUkD2ORpwAvq!aazy8@f;AkjT?nN=g841(3<``yGknP%-Lafc zGwoM+Oy{=Pp$!X~T*Ye1i)b_oR4uvHXBvPQF`WqO0OB9?$PnKSuF#G4HEcXHIlncL zPV(TFFT%UqmnbET4n%!QnPk+RsR-yqqWKu=xJ{`EwHa?*SU`8m!}6S}5kIJjWDH|X zkz=357J$dsE{q@|w2O${e;}Q^uBbP;+h0<&Ey>O2r%1f`zP(VX!daMfcZh zd+yc4LeqLs=Y}a^SKf8e-~vLjR7YO(?pdIvc!6N9fj(--Zui5Y52+@k7r@TfYlmRz z%UMhFp1ruo0;6~V@`G|=!bT%+_7>Cvs@>1ISa0Bc6;`*HIhc)8#@`@bKCNv+@Qq1B z86oRPXRdiQ;qz&5lLe?C+WBBaW<|HT3%SO3+fVN1aHZ^Ze&L#+P*0|~W_k6a?t#~o z7*!_H<}p(Y!-CtOYX=s$w|k91P%`-%Bj2{7aTZa-)Y8{9x+KSaPZ)a{Z$p z=#XK-H+@7NZnvOXz=Dc_E?>lh(cL_Gb6LlpKd696WD%|TiBYGpPmC4vEDo0T zhlC%<>E=ZP%VBQ_xJ-Ma8R?csI#mKg+FCmdzRAH=rM%@zzreXrCCjUV_t(Ao>M0?C z@f0Ugp($h$@0tXTb{$?Mb(55$J|pZdL6Y2zU#gKoS>WJuV$c{6*xnU_nXD1_ z(W+048Q2H4NU}P_B8yk$dDI}MI6JbuS%b@C$p^cUtgSiN!4%4OPRg7^H1>hUHR;~< zLHQX&N4mCIrgX)AxSsuMJ8x(#+B9($aWAp8IEy^B;GJ&Poeinyo${-9&xD9#PL_;0 zm?Ex*NW(amu1kn38J9&!8;hf38aqZ3UCVsfxrlqN+G+3$@s>E{^IP#2r_SzUZt{T; z{83A#6T>YjHR-7822;uc_m9j9rxopFBLiypC(noYs--%|GkQcn1dIz3`9wSpK%|;u zr;2k5(C>JdQXZ&DhxqWep)oyY{urj5Ba>nH-sm1itkauCdkSUAFFcqHqk^3l6JfAs z^7o9P!^Y7Wy-Dm) z6`h<9jlfLva((?2Qe>?xIm-reWt%18q9hr|r!QN{A0dnr8i}{gH#PQ@xT+OjOh8zp zv(OAytNP*OIOTN{=mt!fCnpkXew~ah2~KRn4*U3WhEJDnb?IPIRrQykL|=1q0~%G2 zdP`QH6W%iKdcD^d-%Ci}fxv7aqsK%9#bB>%2X^4`HAYwB@b)X+9MCdG;_Olfb~Zvs zU*r;HF-*Jvx?o=}|7C>ZD?Tl2zT`~aZ~JADG^`ye{ahRwUj6N!%rfrH2ANv!Q>qnQ zb0u4zrB`1DV8exSOPm5ob2_9Ntj1Z+yPI*2mqtb4E-TS{v6IR7(2vxyy9xL%Zrw8e zl!MP6(Yy#16uy=vfJtUTnJV>=&FUdnv@*QT2(2N=1vXU@RA9{(#8=*|v-Ekm1%y#a z2>#kmg^>s!+&ywd^yc`Pm_U2`>u7-_8#}p%2u0$!`}6_S-#Hf0DYMw=F`PcSbKLy; zRtZ0NX)?u8(_+3PFyPe*Ef()-1JU-N$OhZ9AO6ga^=g9@gez_)i+nAc?c(KevLxpB zx#8rYHIb5n)K=Cj#BE`4LSY`=P6lUu3&Dz=cxq|^!b%~`Y8BM%*sdw-P{$AuAqM&H zM6ns>+Pc>v5qeiQ?+KEuHE>gbBiY; zU71zd5p1W`^aDnNgcdJOqOo@srXP{hu9@6jxzCH5W~UN4eXrXGQyh*Uh(vi@1HaPZ z3Xy&dyvm;+Gt(3T&_<~r0DBnwbZ1Hp@|ti>cK7eM02q8H!(Pe=YP#>r!Votinm)E~ z-iR6Ov%qB9?#3@%is%B6JJzVtM5cy&^UD1JS6Q}rL~yzpkwQcdFFh?sCa4}*lMb13 zc0-6J=)VdqJC$~oX11~6KPq|J+R55YpdeGpp6&?Y9mJ1OLx|V#BaQlcbpeJQNVoph zUQU04J}2F<4H_aq9B#}nNH!(WFHcS6wA9lZ>>%r4dnh-taZ+FP}1__(GS#;amAbXRH(w&uvEi7gYdH=W8Uu@HAqa%=bqJu7vysKxBd^+;H zBb(sX?)?Hk{NdrO*@L51P6P2U2b8{x2i+&;B6Z=`dAozg>x*S8l6Cf^BB=q83h$A% zn)?qLixUQtR)C88r-whS` zCIz?bD|;idf;Dnb(V#dejg~`m#M2AC6I~L>GEN1{Ty>*Olo2Afpk~e_h@%FbFcWC# zJ}Dn;3ow7YXOm~(fvOSJPj$})SncofGq*OjtOhhkR5uWIq_^o=xHODoTFfVNKVwAb zEcxk8+i6SB*q0kMB7i}tkd9t2W_vd&hXPblvZT>76m0bDnl=Ui$mAgxCgY}F*76Sykd z+#lvrBX~{kn^WM7{uXjYKZaNbvK>d`*7kjomNV0gK^x+|1dXlOvo$T9i9*Xz$F=ja35e3H1PlIR5gUId4+MtTtXp_Hp>XL2>byR{q<$s@4SsHwoJJvTYR{R~|1=%>p1KF=UvYBS1Yg_r3y@r)oD^x=bXrwHvml(R|R;nYbuJv!rp$`G|Pp19buZ7o40)L&3JR5uXzGg}} z<$>u4M|Bv~^6@1+Ezjo%zE_d+ptyz23vp6Ae)mS+n^#t)Dahj9%zOXgZV|BU#{FTA zQUc)_Lm*L8c90#{6#&aDk@If|<|OnnkT2D=w_#mN-_Xskb$;7QH7ih-_JBpI8io56 zUV*?L^Mh_=T}y=*!X-cfBsvU}z=440Zn3E6eiLLH1ZZ+@u6ln-ylcSZrFq*ziM5x; zZH0E)tyuX-#P_`cjA2#!=*W{1)u-Gc?=+Cevm!qJO<~~LX138OwbvEY%)YWk>*;ZQ z5UPEgz^3L#!;MBGY&!$gH{*^b+|^u92RtQEXIU}(ZmQ1}nLcGv!}nv935tLrmlkz+Z+3W_ z`O`jUS}vargMeS>Zgm*H?Q6W8UZ5}S-h`6sw8{ZTfGvAsP9Ef)%e$A(zV1ilH` zNP7TDfpVdKKM1y`!#a4dQS5$sGKSdOcCzhCq`kKq{ zBcxy*abTE#Vd!Y7uK-V0>=}#w`%0pvo`ZHp^p5Pm_%bQSRq!&tYU{@Sb3~q4&79Q` z_D4$u?wrRwTI4FLgF3_SfHe8+yQ=4=#SqFqKr?7jmwv4Z@XQzx`@U(kzF$moFX(3L z?B;)6r3Jj0;%(4%O6phhuYS=T2QaT^`#bW#{Wq9>dVZ{x!2u)Lfxs#{V2?Z2{@S#ZU+H-{oOZV*(HV zzjipSrg4$<5+E{LvwKf_=Wg}RtO${Z~e-P8-OQzvvj`$uM3U|!Z5O!ddbV` z@@*7yLO@=>^LDdLDNa+zEP?psU|jXRHo(|L{X>Bo_6%UtQ-B&+>IeqHy<-4%X98?o zRLJMdy{ZwY`93}SCE3-FfJLLO(Km?{k}gIe^gt#1 z8KefQI{-5K0U&Gu61R*><2i`{9cw9Qo&&0xZ-{GQSzXB=&cPS{6-7Lg*JfC2UNBNOAkRgm(BzK+!R09Oz8^DsUnriMz z6W|Hv>wtZbdgUF}?=E3a#?~1F5F|`&z@j=&P%wvr#_$dxmZe`2fPfe}60)=dI13Bg zn6{qP*V^*kfw5Nj*QtP0aEUp7d+DJ7w+;A$?wPxSQ=rG{Q@KHl9|jhR4Gbv-!~t7o zhG{nV2m)XiPz#i)Of96hC*21PN{azZ-w^gErAy9B>M4mkP!ps2SoG%JYuC_~m&ny- zF-mK$qCTCpJB6DScUKH2>O_vhcv%rebH~U9RP4&x8=et03SX2 z0@jaK)S2alCh9W~RzQCGE{L`83Rm+#VyescF!v%V9KB^T|_>Hmf+s%=HE+Z zPbqLFDI9cQ(|UhJQi}?PB7cE@=a4|BZ*mzD(t;<|E}nLFKroVPPso#JOovdd{l633 z-)9z@0rJl5!a|?`0hfasB>wQ))2U;}VEP z?{(Gl#v(vKUjqT_@cl@gI>b%Rck51f)HMT($hrm)Pd0!Rt536Pf;ho}`aF?)P$GyG zeEg5eps-vIVm@^OmUbl_c7T*)>L$w>Sn3I4^rzL*hl4^%25+?&TBrg3B@iyhLB~Ggs;)s8PxGhw(_dmxYJ(=5 z4s^0@zaxGMu#dWo^6ciocJ)#SB0zY-Fjn||x~}uDT;_&%dm6=Hwc)!)0p!q| zw1R6e$T8G3za0J81Pjh2Gxp9W@KWE@&b*Vv>uCCAPOceVp`*ACSi}zAs$cNmoxiM* zh-4#xbB0O84mS+{*y#Q7jLpa({A@mZ<>dNQfka$EXJJIgGUyykcP&2x`%25~00&I~ zAOvD2nZ~xv$GPuT>>uB-@K8Q|80^K;GfTMk?S!w)Y5l)}h>AESpiMHFoO52|)NjCr zY@kP@`LeITgjo~;!XDWu*e=|pkcu!^m?L1PPw)Cqf4U>xAASbxLxNEx3~H;;`}cvC z)^CvNuWn5}z25J?mVOnnKEm-^LmzzX5s+It4E8NX(0WZCwsZ zMri7mngf_XQUo5C<+wrrhf8r7*`tvz0Z1hv0>K;-{Yasi+J5&}e{kPV`@1$@;VL8wdhj;e0G2g&Y+`NbVU9x2_#PO^eX}7)h;;1`EaH*c z(6dyaK(;l+yE1Id%KY$0vJ;3Ce!*V70C)$mRiY8*IeR6k%tj$%<>|Wi@-{`-ybPHA zm3!sy{`Ai9P6EvZ1P%HlokcMFBF}M-4lD)^eog@H2eq+SBV64;X9X?xArR5udaFwN zZF`1_oyI-_8uDBj+(Mwnn2OROKL)*p9DK1kEyL=>-clEmY-qLui4P#J-QD*FVUDfE_EdXZ)a$npLz~o&F`$ahtK>&7$ zjG>j7Zx2sToIwd-KQjy^250!sQHyerHc%-5j_PZ$wGG8g!!<@p!osOhXbi9LBQ{rQ z*ehZPhN0COKPz%CYS;oYILw#M{&S2`n@uNqe6D%cs~4x#Wl*Aa7t%36K`>+y zQ)`J|ELgxf3^1!6WsmSD#R1?8f6LgA7eHY%OP(EJPa_(KK8_n^!>>hylmHE+b<)Dvs9zGdHQXZEfJOl|c00mBb5s}_S z%2Uwy=xDdWV-zssm}x^M1=_7;a9KBM_Ml!~1AEy9e#F4k{w$tM3z?($B?Pa*U?4xC^m!>OaFh^4xcwU!9LCfE=Uj z0y@}sjL8+P>&jfJ341G1H&9ss%Ca-2h3Eg0#x)k$lB6pv%Ki^O&ChK(qQA@d-#^PO8fUfyEtfGj;3og?xlOQ-98iwsq|+vG!{7c7IKc?y%U?uN7zsc552hgKrN;a;dG8hMb$RfcBbp2Duvt4Iq?yVr z4%WT@_>z%rt1>J&EOPR#-g`rwCSCZA&zC)Zap-3PsQ3q=N)r{U4kldydR)A|l!3%{ z{STchpITwOs5@ZZ^X`$fsgQ)TSHrb()_Y9&Omq|+UIPgIYRzl!7{DpF!8fL7K79lS z5&hEGC5!5WoqJ|nVzmVLg~W%*Y%YIwXP9;1$fbbo(YUdG~S@h;s}AB;UA!E z)*vO4y|pbh&@GVxI48lsVkAW|oswteptL2r4Yn}IPk9Y2Gm~Fwe?Phu_(%n?=qo~Q zuq9ny3a3JXbvm`PKy;}G-ceY~%55{?eL#rc3AD$ZAigfrIUz;Sl^~u``T&vKK-Coq z*8zwQLb==U8uNbP95kROqECVl-JOLC%c&H{WSVr8;jh*z${i6AIZvLVSbsH0?V7;> ziEM>SO&%VDuwm>%kPb@5&&6IW@(|G*IR(QviVL>3aEw{uGT%LX3*=26&Ks~`4>bL0 zszRoJg6%NOj90%*#nGtluwS5ZBx=8Xm7AUGl$Fjt6hd?PDTV*8x#sxB7Eb#g@4~jQ z0DOAn-3OeDvE;L`XO`vp4&sdG>frFQdT$g(C7%BJ3grThs&HdiRhmL?#qe=_wlyqs zxH1N%Uh)g4>lcMrJPHoph7QOu{KZo2xR@A$FWg7gj)GLO<2XbD7BV@AWLr5DHmk!Gx_#q^(fX;9Rt}E0v+LKee z=$x{aKsZPg1#ZGgB$2{DW)@u1aBs`W-z)&)@Pc^T^$8>q?wL6VNs%lYZ;`HATd8cR%(7{vRhzZSsvnDXq4Sq?B7x30oCr}jNeEI^0fQINeX$l(o@^`dBnQGBPIm9!Yk|k@6NKp@`}ZDj|8_`u zDT=C)T=p0%R5By_qbo53<)Bk!TZc-~Bdz{wkZ>2h>ExYQLVV%VB%JV~9Ak>Uz-zZe zeXs*c)<@HjwBnKhhCA55(QWrR_4#nxUfKvcb?5!c%RJXjW6DON`l!<#wtZODt`uTk zW*DG_+~0%jF8a$SK#+u+bm__p5yDQ)#0tpW5paWioEfO=NOxyvT7hrbRw`nFK(M=u zDVGLYK$#UOSgDgOoeV9(&N&XgVYsVsDt~jNqNw`?INF|q1E{fR4|*Nv-`Gih1_&y} z!TwXA-~+4xB9T7P2f0wdAd_BA#%uwYD;9m7;}7_V;vPH~o#w_eE`mlJ+3>pidtYZPe9v5cR=(L+F3rErL|Jm*NkI<$Z+K zzd|~iuLj2;@dHbZ-s{7@BJt$ivAyS&iBm}*n8|4e+jiSHX{*s^C`#k+Ni4l&oBguJ z^UBT6YrgqVoeo&y4)m|r_wGur2J~MdLrYFzE z4nlWn+wD-3=t2E+a)WkiZn`<%Qb2`QdD+Cyp7Z%0ONZoP{oU{Nt8n?a8_YV-(faEc zC8);qc0MDhnuNHnuICKbzSQA1(w4CsPk@dzWa|S|jp|f;8d!s+R=NtA=lKT6<_%b} z=UW~ZkZxIg^7Iza@M9gnRoFXsuJmAVcF+VO5 zl*yfMnwnt|wwvNZhy8^74W!2*+A3WemlEJCwm)Uov+TX=I%kM>7de!|S*{tmG?CMl zb+5R*p|>b4|6`T78DiFqqUs*=2ij(ME`uF*o@Zv1IG1TfcYqBw*XA}5O2Sc=yXczhGKcGy^_W& zP=i=GrTCnL5UjLGhGTAjsVGS#ToZE$De;W{)R(QU9R1@dZce6Rgagg1 z>t?CNS1~0YhMz^7JPuW$Qef+6;viM1KEc#&bivL#^h*ZsKs5Hb?G~Huyw)+D9U-ei z;R)eas*a8ec5#<%t~n3f0S&&>#1dq`e|nU0AqSP@vHUAuB5vyX?6l9DPwII;Y)Vm( z((oKGl$g!{8IR*NzqhKT#DT{xeXj8iwiayuy<+h>-KiuRg*v$Mrzbbx`z-3NCnl6k zTM<3#36|Ak*_i|}Yj{2l2A;1&B}GuvIuRDH2*M#)9)7YH>EZ=w)II@ zHjzlUXy?#9$or#Pj&Z2hi2n4O``+)EAPBWZ%SF1GDiCa%pG}f5Ldz|zkFcj7Ls3o+sa*lVXKTI+ibDZNP5cG z6N%v?cf=jq+#gbwTSLYKw5dfNwjC77p>KO&44hcY&Jm-?mqqr}vsGWo?R9|S_`)Ne zhUZlJHmvfu*fJN(iMd$itwc^#>FbkiqxEFu_C^8S$ZsZSWuNFq9+}a_;>f>x*FFBk zEO0sHM%!WJ;jGvGJL=At6w56N_Hu^A>5R({ZWip7%mETa2L)LsLLxt)U)+#m&WFPZzt#6o+6 zC^alGLs61~h2`hBP34C4$kvQ7{XG|{ck|!gyzp2(TQ2J8MxTI4hdpsC&QH-^RI*|= z{@#Y2(m7@Y6z2J7(I=f(wB-VLlZv7K%$A2__3NXS>cXWnj93f$6to|NW6H*T`MT{h z9Sgjo`&1}sJG#VDjaZpKtD*+Ei7sAkOAg0e^Ou})c%V-zc57wjp}Ge~Vww6woTEAV zXN4-7=8xfJaVi=YhjZ+~w!J=54BK?XvwhvD28x?YMRBEz943p&VGL(OOp0 z2F7NN#ogV$$~k~l=#Dsh z4Bknzw>_=7#+*~swFn2(Ikx5{$_nBTmIm^3JSeW=gWDZd8YeyutP5^GWMV~$b~Ssy ziB{Ns>BU5H)5I|>fZF-_HP%Xuh_Vk`gT^p9>raBB-e1fyDL&IrP+fN6A|*Ve1EJ+NX9{IahHu*@9=%_jx^@94ST8aoa{GQ#|oA zX^j40li8VlS=v2II5CSl+ucpLyna-xj7L|bIm_S!nC3Lv9=dS{GsP_>x_L!>dwL6~ zm)ow==ZhgaIBg0`6P=38%=tjJ zO1hs$){vSV8UKK8^xE_J70rR`kz6y>sR!DY&f4xYI6Ui8PaDkn7F9X1fF;kLg}qpU z!p4_Qk~^Gw$4IK7K?jRQ7MJQnG{J41K6_L8C>=dD3lZfA=~vrQ-PI23v|?V7+toPa z&YTTtSn@~DKZ!+=DhOH~WzSx{B312v0|&Fx6WG*Z6sU%?#tFr}z+;OSxO~c3DX(?9 zdT1<(BMi8cXiP8I^j^>7KuLYdrq6RW6}`7_JLv5wGgJjY#UCAfeVwo-*MwWq6RI5g zA~@5JjvTBKeGJ!8z5T1ce3&hc%z#)|{Kfe5ndh{7|M4Xbi0q2{KSCCEY_$h7#4HIC z4Z7bTCilI-oW|!(wfY(a1ie$mijt!_=tr6AUOiuPg^3(2oxU_KOtT7-B7(R)`Cfn2ageLzRQKTk$&|PGt;6*f&rnvd;cV+Cy1I1)V{Z$^etTprLH3cM z8mO;XdXA$4kJAm%5|th2HUR3b6Y%zBO+VTEyyO8495_bOZ;?Tg03d|UIhx~745MBB zQaSCPBB5*zTz7y4Kn$!1oSi>}SOGVOQl3IRml!gq28uC>il={m&&&mZhttY2-2|We zGxS##B)OdZFAaNmgrxiH{Q$xjNszkjoIH$yAk}(nY!RSfw&S7RTkuV4KftXgkk)(S zT*V9WH$uvC<|v63rE$nDb&$@=!QhrmV}9z>=}PFFKn@XNo<|K&S4KM^sZ%_*(8@>4 zQX*6qA-7QTjXcm)i(4N54g`pcDDHP^{JO%o2_tnzA6zoo@F^%%$oa1J>r>w!sDG!$ zU-t_~P4tPz6Qsyckbu1{y8RR!0XO$`5UBb%P)^kq96$<0qIzJJBe!usr~5j*cph?6 zGTgZo??w+#7MmHHpGnU!H#2Ja0(P#<`sT|1P9l*6$z#g4jcOQ;qZ1+x1fgXNJiyPv zDJ9V&F7^*go8rj11a3H8Hfh(m#kKeX%y$x*HeCm>Y-TdecCDc_3KL-ymU< zN&9TK>-x4B&nZ{`9PVq-!!djglOX!mu>|b8@5c{BMG&Uzq`v%#d7R_SYqM{VpBAB! z#(3spKkzfmo=v<1(ld!dWhbkzf<# z4!F}n`5J=+u16!RM3pIB(=?r^Y|M0o_({)Rd@=8=UnBS3mJ{kz$AQ z?#G{oE2o{I5w%kcNi9hRVV$5;kpU7kCZ!Tze1evOi!^_LSK;}n#ym^eplLP&76n2r z#-2H-dmnP$sY4uvZ$q`N+WPe*klvSR=bKfWkT^Z}xE0Aw{t~Wvzj~s z?PnlR(~VlRkE0ZTH&UUdMi-jX_}M%!1C!}%X#(rcX;sEc9K(9Qp>p*V_&a#+2(NV5 z^Go7>3B4m5TGFwf0B%Fu!`RVv;wtFo<@ZBiLc?fmWL(52G7c)Q<{2l{s&}2;fQZ^G z#H{fhI9cC|Pb)H$G;o|1{wzwRc^#?O&~#CnquwcH?+YY0ZB15PCFVX8EvIvXxF}L1 zP1$c=z3Vp^)8aLJp@P)jxs*?M0#D+=HmemTj(%NGQwP{RJ&pAMmY@lDdbdKH z<7Y4QzbjM??&%P|9nnfP&Q?;Xw6*TK2V^-)+QB+?7<+zd5l&Wh!=y}p2 zeT$R6T5;y6wQy^^8Ht@=W_^BU=#kfAd;=vQu zO7fYL52+!sMW%RFo6goRZ37?aN*JAEd~wc}VSUCh0sREMH0_H3MCkE;;v=-A#Yt=p z383z6YY}oXlaV(LzC%>Qv>fxg{`~TB9A`$I+q})e>;;j!f4dw{9@ftMK)LS{U2Y!b z8@@&+$x5kfAy=v1NXYg1_!pZorXDTI_EZwa*B9AiWWt9;O#4B*6*hhwRXJpoaO%EW zU9!&e?sD5}LF`avZ+uFdu(JUKWeefZ-%L| zkGKW!R8$NYn7Ls(l}Wek z(>~*H&EW}H)J@`YZny4P# zQC2t&^}R2IBNbj$?`aCY^*^tWl5BI4ZdVMzxT*Wdd&FuX$@nO(8+@Wa7n0n)U<0-C9KccbnR_dkb)=n2V zSLHO@sNulKHIqzY9YICjWp=16=rdipKtxhubzVs0-u~HSh54q`v(Qn_;ZM#I+cw(} zv#?BkZSuF}<|2fH8%2J5$oggJ%yb1t!&XU=Vd>ElHnljmk(;d=U!eCQUe`d_rAU^a zskgOE>CB=^B4gG?@F)%<)KfoMM4RvpnAgWn23K$~UuKfQb)~#P!zgvc>FSo%-!A5E zq&Z^pM3fYZGk9ZlfVr1djn$D;f?Qo?aS9L(Y_}KftCUOaE)oZZ;*q-Jy)kMtA;$0c z(7C;5D{m%Oz!+x}>Z+Kem1AB1K`Oha@dADvr6$G0GhrDb(c=B=j9q!SEDWL!__w3S zsY9;6J9LqV-s#WKkl7{4V`Dw@F2Uut?#xlr;EcuEwaz7u`_3jl%!L#yBrG>kW<%(K zq?@7rVxccx&NQ#}W0|Io{O5YG;jqf#yPIm}8tBTgOvm=JF=i9vGlixGo~m=&#PAO2 z)r#7-t1x7iW>y=Ljnfqxu3(Y5 zw?z@>bx$iP-b}u@cKliUbyEcMqkG#X(b9|hWbD&gZY3nsR;Y+b?D%t+3m#UC9OBq^ zcwIuK^ zslZ>?cWS7q&2*V|BOf(*JzB|j#`OKsAxR$1ws--h2|ACC2a^zOXob8cd3(AW$&4iu zjg;6P9JqPZHR^Kl_`ZGeX{KEjgkh={O$KDQ2xqmjq|y0VXN%X2zlSn+Y=vY#_<%7z zc=IwY_-?nO^EdW0JcjM*-Yf!v;!+-L4)PX>&Z95X_wS|)+;sl)^=fKPP?MFDEmKbn z&F=kVa~BgQ!X;{3fwKl4#B&^EWmn^?hCjxnB#TkbXtO^qLU-vmp*tXC7X6V+58ZnQ@4U{ z2E*F%D>2-8cmKN75})7L70L>oV0V>l6S@@3j|o^$o)PYT`l41M^;^}6qI$SLn1x`> zyE|LtMzxfnP95icH-US+?@^U}ndae9VxQ1K>Pk)`%{Y3Dp7_PM->43fnMrVaf(N|Y zFf#{^dG$J-A@_>;bfS60aIKQf9eRHa9m$8&DpW_S@S4sehlw`vu$xa@Y*$HV|0Qgp zCK_id@|ZqQ*rwj}dl$Fcz zDLPoFBvaZMzOS1)z{-1nk6ZKgR@h+7)n71MYe_6IG_N9Wb8(-9sy=Es^y0V^OVYAz z&t8~gw3auqn2Kck;|52aMC1P2c3-bcs>1OU7*v|Ru`<)0&FV^hZY_!svmN`y?;D}G z^(-CyxT0~c_gZdJ2xRpLRU6@qs0J%7O~vT<~Igg&`bTyTdy$w&WIN z5|J0(MjDs(PdcB#(UBYUBIIL&N6^qN(JmF~!;D=nCXeJ{Ej&k#a>K_XTK|Lt5d^|N zt0V3>Pvt}?ZtA~_Y|1SP)EPC9cXjTfUrhY_nZFBv8Lnuf(^BeiH|P8<43m&=Y^mXzgf!{Tub*&4Vl7fRqRoUBZT$c%nCFGpD>_ap@IxFSerX%%2b9R*2FVXD zC0w3CJ~=fK{7ZHH&|U)p7&1~PyWL^$B}Y4I@Z8JKC0`-x1yKzJ4InDFnx|*AHCe9X zKmm9Z1V_FQ@kfTGK-uz2$snZz)av#A# z+y$M_UQZ2pRNY1-rdH#=$;Fg3zjlMsw#dNJ>sI-0m5+-L5Ab{sS_1w&z7X5CM>EA_4LbgGe@mI5WuRqYm$%tXu%;nn8s#=+tIFl#Kp~kVw$i zg`a>l|AL3I%<6HLjfTQ9Si$8E`jdC=i-C@nQ_d|R!yK0bsbO52^E2waRfiG&X=H6H~6zV^myec>gc{EGl)6qOsmdfhQ;~cZj(7ECWiNMTd@9ovH@*?Q|Rd$ zoKa{*A9CtnQ-J!=8A{&HfJcbxOJAXqG=^$RiGLUb5Ulx6n_oc+Gb>X~Z9iLF_#x;- za{{vT$*wP3z=zR4L3W0yGmxKBsE9b?pR@={L`Jp2}YTEn74yS z?Kgt$6Q!LX945LFbO?DdDZKr;%x>}g`87q%^kKCaL_W`tWc38DA^rHr3Z?AqV7!oG zRtQEg71GX4PGl@XQy$wAA#ht+W@Rq9jl<9Zbfxo9-%yrGB{iqodi|Xy)10U1)L4wv zt2AXeRDn=nqRV*^&3STj_Sk^zt;YVv4!dV7VLnBFKuEwksK-{xKcRf0mX@fo<{fo- zbZHDpGG!+D#zIwcV(Hm*{+-||5&~4>_cM{2X!C7?IH~a9CVHR1d$#2;c3~9#EcxS& z7q1{LIg&K=9zxsj{M z-uL&O(7kKn2CG1Q&_b?d*I5RSjw;qo=-;uDVcod2S~{9cb&2hV)x3U-tI zX(m^#)BA>Drysn_z3PxYr9V$^J_kZU3_z@37Q(OLkg33A#CAhap zmhcHjFL?q{pVDQNAn`$yokppW|$bFuH~56p`p6G|+vbm{TI z$VbBWB+E=ZTPMRVr=PQG{U%fM=g;Y!RLxfLZN7d1@iFM$F!&y1h=Eq|=7m+QqmK)J zdc}en*cMV(zmJS$_C{Fpc}-Rb?QBDB$wM717Ti(pZv4R3Q;;f3#Vwaq{x4@Y{O# z?seY5xp*dJAofb$Llvfdt(+ZQB16k$^QC4g+WX?WkXFZ~8;57iauLlWnKi^{Ds@on z!He8yp zTzQ&Z)Ex$Mkqyb6!BnkF(n>^L)w`x>z7~;X2osv{bJ2@@%!D2m;>^3s8W$uzP=@i- z_@+>Y^J~vQy*vJNHz#_^C7sYfE^8sheD>P@6>lzO&j^R@bUtCcEp8q=HSYUZDk4ZBk&$%ZP?@S`Nm2uoO6KF6%p29 zN9Dz|x${K72`@XMDdrC{v2nk7U#PoPir1?2IoC=qeg@$WHldr>p4A5J8AqDT-ll!C z>x;&zy<(Dq|BFZG2V+OAS9|F zUsi~gIo-{8wJ6ga)z3GtNKGt4qao6Xj=9H@;+S~(3Avn~O2cv~``&}au|545J_4fI z%Q4a&WT<;OP8wrayT3gm-onn7@KIzHN!;?bWEs+OTjW3#U&z2y9;N}++`z09Ux=di zgpuj9%Uz~2s_QNuv?4Ud8UpxF1}dHtQT8Wdm0!9@Rv{EQwB*v&{9(yI#rllzm?7drT}&nf{vvu;j>+|Fb3FW^STno#Mx0MPs@6Z3-%S<&l*@s|>~W zxS0Ej%j_5Ta-Cm3Wt(^tEy?x*|Hlb<&9XHjusnsnLf(B^^L}cVGA(_$t1l-K?l#>} zlHzBu60^QqM0vj`RL^3h`w@m{r9fxmej+piA(R+7sTJWFL-lyEVwnNL37K;+7Hs_S69XwK@zB_v% zq(IriK&+glYpNwo{n4}vp{*q=9ucDu1CG1~8S@*Sa-zS5604!`7g4vL57fp=5HW~( z$%e11e7nhD8%nU=)QLYr#L#8lSq35gpFsl_Q@<~xB7prYji(2XpivC8pPeNOEZGqc z5)JM#(Z{~n#^Q_p$oUnE)jhhNP;AkMffMr?%KlDAm#}%dYpHM|7z!4N`BvwcUWXU) zV^Gl1 z)0}R`;Gi~|R(h{D+lhm87}V3K;2OxFp*_du8C~8Ax-9@hZzFv8{oC&K)(g*FmE&S< zpIqC<7e1729juX-h4v}+^esBR>+X>UDUoK2+51mRnLBf9cT{3@-i0N`n8qwQK4CH? zD2;adVjnV1#8$T<`HtkimG`H*!MO*Ux6nRp2B@D=zhle~(1fHoI_3^tA<@f99x0<5 z%7*B(;q)oC(Ji&~e@ap!s}q?o)5lqOw=Z(4h_Pm&B~RWH>y~0q^pfe}o@t+^P7o)a zLA?(*k9+o|cIy?cAxB3HZGyN?okdluq6My$>Ud@4;BCwq=NC8!h-GKOoCa#12wjlY1LycBaUQV>|wEff!=Y7 zq|Q?F?46-OPW^M(x^FjP^wsxeV)xn;R9qhHU+~|1T#tWq)V6l<)!>O!9MxjuZif=h zo0RvW8I>F+Z^t$om?xAcq+jW?%FD~$HZe_<+Y;b@QLCMxYA)cw8AiBrzV}n*zxxF( zR}!v{@z(@QsbW*ES;@9xb`6xx@0R}x#>OUa=@qN zP0PpqtMn0(yPUL{4}n} zh|jy?8#?n`xp$4Xfl_o>$tfT7rJ%D_>%4VUC{g1Dm{0gVJ#I_iWtIoDlfSm)t^gx= zkD*r}GerJK0fZ`G?L{|kg(AQGon!1MuwgNNoIHK3pksWi>8leb$Baw1s?IV1%qED1 z;PwbuTSmfuVLQ;5){CgogCTf;Gm(@u|BQ1XO)w?#>)nk}QI$>5Z%rfJi<%ez*eVIT zcQ=K)-_h@xNQpwcTcyAdl{^~%&aFWPE(6DcFE8ssTE8HBzU|{gI5YUy9bb3^oR}cV zpycmfRR>Xh-qXxr_Ce-`BMF; zK_-4|b(m(P8$-AG`ROatTtr?Cqf>9Z)8s}7q9mj$0tb3{2-%kSuU0{JsGJX9=hPnv zT~of0n#|zKt($OVpP_YNd%rxf1G{``e0cw?$yR2f&zBB zK(IIAd>HOTl;hwDi85RhMB-9iIv|yC7Ys8Edk~o!k;l}`;c;*ssZCc=de2vLU53!5 z@N-X!MBOJU3{FozDZoriS^D45>_zn8zkmHuZ%0O-5T8|2D6&B8%+U3OU)8CogQLpJ zgTohNR#c&+`~q36EnsUx0Oz~?S%bW%rb`iWNHPfg^%cpP@WXV-90e!#LzoXfW$w&G zqtODhc99fQ6I1huoVPllpT|~{cxPac_5rxnTT3JajrRocTZ8b|5d^!obakq_e9pAs zRX!ugvE>fP6*|-=GcSPdMMaQWMw^vlZ*&6GOTa7TP?d2f$UQ-Hr!KZQE#Vyqi2&w# zm&nLdaSGf3id@(D=8#}zSJS|J6<*dWXc@OiIf!v|H6x+;yir~oOA%cI_er4&j8dG2 z0;OM(2)L7PPwzV}T|fOv8(07>@> z<8o`TCX1c;i8p1Q2&<>#z7i|ExS9=0c=L`?YEu!Wx;-$_?SMz#FUpL!ye92@S&oInD3}0c4KQs&$ottpR`E52_#G zCbKzaAEvcr_brw;h&MS;^P6fpEF&@&M+648!Nm)&A_s>s*iU7>Owo}RP^t3B8}6Lz zoj42LY1g)%jndj!lNdZk&;7~Tcnujqt6r^NB+*Z!3( zKn1S03D5c8QJYWS!+bm*PJtj$DA;2EruIbJq)_$QX|%w5O$O6C<1MpVHM^5Ewecz9 z4x%DPEzDBka;BMcR$D=L&>~(@NLLva=X*5BGUj3BLDTgk+At~H_}iCxFycd^i!Nj0 z^Y798NyVd&Z9#&}a%&5&ngb2L@f5fYLj)^J*Fk5QNFDk)<}miO)+-_}Tu)Wi#bI_n z@~7>q3BBseA?6!`#@1=l78PTiJY+=H%^U?Yj>uYGqr#FI*nlM0!!Y+3>B5rj7e{l` zlLNyX-e0(X65_~%`)X>!uJ^Y{*C&t=HbA1Wp7>jpv;;0^?mYhK-Z4#lDbqk7*6Q}s zdpIrREByk+O`T+pkqNQgsd+BEC#flOL{HgLN%bx`Iim+rHX76KK@4)uuo6wA?Q6P_ zni~4#pHzYg7iJ$$fiJH;F_b0-Jj}8B6VJw2vQ-t&(?{<;wy=!3N^z0(?T*e^M_N&L zvsiT!%XI)s#l3Pz-}fyP4MdxUa+W*xtt$#p#5qGRi1Z8|a=OpICdl}S#hjM zP(-yTrwODgo3Gi}H_^-Z@esz?1WH;%vo}QvW#R|^zNlp+kx*VqC!dGU5UHUpKMNJ{ z0Lf=>ypBG{Cad3NYNfMFV=5XElY2>xgfIBE@4_{DWNS*5OKSdUGvN}?$1Ad({c0m(EV5i zClkf`?y9>W=!BJ_9Ybw=rL{}s2lT;9Ttkh2f{7%$n{er?S?zRp`*du;Q*=}KzRR6vK$FtQEhPU`AVMKBXAw{CiFv`S8N7yM6*9p0s>6Q{R!RGj9~SOk-sfTT!o^7V+E8RMdeH9Y=j zrE;%$|K2G;XAU&>NswM&7#F)C9*;avh11nP>79B18Svm*VaWPc3<7+JkkFnzai}N$ zUxcG5^wmlFvg_2jvMNS^FZlj-*4dMIESLVSPOsbKX6YX#0tH5jBlCs>&55q*(Fqf)^$6KbE}6dGt!R#^#NM&_jpqP zEETtWuxfnI_y0gE*jiW5sb!}p7yU25=IcYx*YHAK*m_Oh$zQc1?u+im*#ACHh!1hU zB09h^z_2hXA>{?k2GHhzk4#@52k?M}lI453k#|7V%r?b!{`-F{AsLYGriF3&UuOx| z%36%iffP=$>;E~$3xTUCwq23_b5b|`!1XsNom<(=(@@#z@HcOO(nSVGCkI$ffP{4e z(WDLX6)EMzb|>g(r+=2eJ7k>Z&W;=gqe*==Fpn**a6C0S06kyu1Be+p$D;sK&BL40 zs4qyk8AFHoMRmqG=gdGYw_9M>+epT`I==Wi>=%?2s;)lWf%%D9eJTX28o>cQy>B+X zVPj7~iG-dq%Fb`MGRp@YA-m80h5(Mt?>C)C$8*xPAI?3-%}G?8&6x2^c=nnSAD8}V z-SjRfx@TV9zfYz_i396uIJ4t_E=4v~Kj$r}uh&}65IE@BKplhuklgoQu4|V!#C0%! zU(?LxAj)q$E+`Zt$yzw~ z>>lD_|~yr?Oihc`!e;1Dhgc+ zD(Aj(j|WbWZXLcya&DQ@g$n1x-H)q^&u(41xqtT!)Vt4|BlYmCHUBXh*g#2X`tOBv z&}E?&Q(S-PdycZdpaTZY)qzaS`PDr}m zrjw}->i_-ogp%Sy0MnSSz`=jKy+&6gOrYwE`xvV7EX~NEdB~wa3hpOV zpEK+q{uz1iQ*S7yugD_h{8fJgtg%L+(Nkfee~&BSk5UY_X$ zypR*KeB2;-b7EVAh{_us#c?hGLgA%13d4%&_ZYr^p1Nq^U!;W-C_{1+ZplE8$G@G5 zEyU`^k*;3OD!wj6=K*XcQ~OE4m1Wb%6rf{h`7vX}$f-%tPgW=-2OlMCMI|Ig`#|w- zdtCV4Z186zrMmF+M!0Lxs{)!3fz{yb2_nER@x?pPl6wK-@XS+thVd=rA#C4=M`fWaL6K(dzZX3AVls{KfR zwXjT5Q4ix9yc>7n#U}akSlMi9A_Z8?65y(Boz+R>^lZ+tb=)oHuM_LDzPs``arV!E zFs;w0a(M>pc5z+y_MF%T;_&Lt{d>TOF3lCd=)6w8Q>rWC2Dey{}v_@qsWDQDgCw9a8^c6;SEtlFhx@j^Xqr|p3iHFK7 zLEhRhn4PS}=hI~S*GdZl*N2aNPM?I|b~=HjA>av&oEB+WQ)r7@3vC~E?{Oy6V(Yx^W4uZH7AH`mPi5KI_^X>#QY-`uOB_S)IYshF2(q}o)R~N^_{xsqi z+cohaVR$?zf<*e*{URYG~!IjNB#7U(Y0SSt*>cx^07W)Es$zK-eRg}m99xntFjSz9nvw*l}0my1)wqN5&8fCZAO zOyC}?DbtspkX8Wrfcy7>H76!{BcmcxdfD(2dOor5;RjUINs)|gR9tG;zMd>pZudh_ zQboz()GTNYlOANC(}&Pbf+hu!Qu;&QM$wzRD$o2!dms z98l17q3RI62kx{B-NWYdwBqm4M1tZlaKr|S$PyXKGHj1Q4T)Xi;ymB#WuLQkjRT&a zfxBF)pHsfL3gH9}Rtuah5WX&$WLbuZzHru*?)ZWvYv*zE`e*Jd#^^J zY=Lk^mHWe4A{gRqY8H>qEQ$r{V_bp`Hsv;!y03EOyr{K+A}QQjVb-E7_p~##LHAJ= z=&yUH*I=-d0DCosVUS?~In~*Rku}kRZ6PWLw)tp<^dI=nY?OjOm+m?Z^Fl^x`9E~b z5;BN;0P#?kroZ3s@diV!;C6R3McQ&qblhGym7YVMLdh^nt{*Ag43`P-kvHHjy@LrC z^AMzRf$o_%m*~u;5+Zm?ZWP}fGs8VBoHm6RnJr7Z(MSrGpM(vZ>WsX@N^S4~sg38y zdpowz?T(>s-BH!gmvW#Y-#hnOoD#2jfGf8RC^F+ls-!)EeMZzq6qo&nte{t-DG_te zN~iuq4QX%>{OHRjAMTys2ak7fMGk1qvVsHb^7LTdyc%^qQDm;j_Cs&RjF=v2AonTe zX0o;3)DSn<3>g0@zydJD>=(&6P2dWd$mU^wgfJGb(0(BxgIhr$dAy*Pf&urDA5 zI!WMWw4$YH(sAcV8KZN_p0V6fiC`}rs!9F*^IKPjCW+3?K_9(3&&OBOBjzrBQ?n@g z3rNzaj*OJaxvbh*H3{DrLWqH%x#!)5Ym3=~heea(WEUyVCXs%6qH00mcRoQrZYpvu ztTw<2yU;^!ZY>^vv4{WRROR5^B^ifR78%(il9)zi)(`(t9 z&aG`imD_hTC7%$j8>xzlR+d1N1E-~Rx*`*<)MHT@>(4InsvODTir2WsevOQ~W;OAL zzjX$FM0$RuUuMG0qjRt)Oe-OMHT;H|IrVvU=?%VHvCoq-QcQA>8DP+Y(=dxal3k-sBAerC(Pnf?0K ziqqqDPJpd}N%?|M3jUB6Txafi4`=!}7mNP0xn~4Fov4s@b{lX(B%C_PUmspQ;JIjEZdZkZiHrdKgaU!4rkCicC7*X$~TRqc9;a2BluszN$S-CBFg5Dmm z4&X8Y6$h_ysni9b7>5boisJo$LsXb2By>&Ir2JI&v4>&4f4mAa*6YWUW(9)W;%d;+B5dUrmCjbadA9 zEbQqwp3a+PT@wt&4If?nvnDoQPZQj~xDWWu*%67QVR#c2zqDcBP(of8voP#YD0X6_ z_D2p^pCA|kOnw7c_gJlU_{XpPR~-<(0|K)@3nmcIMQj;fOf@hl6OgnWWAq#YmzR-` z5WsqazZr^3LE19`{ffT8A3c*;iiWUR{9^1)JYrg8YIzch!q<;lL&a0+1-S7GH@~97 z0o0}p>Vka2=xmrh>XeU1%EQ9x+v9$4k};d`nh0aT8U00&^IWzLd0pP2YawLRf*XGY z8l|3#D@}9&8JPci=#9a$5gxwcxrs@RKsGr=0J)4aRla&ce-32+0>%`SPx$2OVcwZ6 z%1JML)e=SpVu%=^&>bvOKI!iXa6G`zmMVo31!d3>-W71zLOz<);B1+j2vt`*32^IB zlLn7L+}(m_DU>7w$#MEWf~lGQ68#90R%Ws5I-p{1{aeF;BxHS7D`(1d~n*D5nG(!$=-2nIo z@1*I_JPLH?#Ds1n$OsXzrJY#=flz;d_V8_*TwT)@x$JzsT+c}uFCU47`mGHrN$9KI z)+bVA9t9|niK%)1ltO~qQAa`28^mq6BFHSZx$aKZCWxe>F`KTRdRr*Orcn|~P_(@N zUa_R7T$>g13EWpcEA^EJ<(^h(6#d+N-7lc}EfsNB56#G|=&C?tUxZTE@g@qYm|Gy2 z^n+D|LhNy-*}3!Q%?aP8jHC}V@r=A?PW35N=FEL$1CDl|mx5*4_@Rmq@)=TFzL2VU zSCBrI)jhw+pl5|ybb^D8Ol--EMHufFQK?RFSD-TEsMaeyAqluHuV*D zkB~jcvwB>Rq1641nlarX-rfri1fOgGoO~;EXCD#GhLBfzDlL}4xhe| zYf{(*YcEuxcN$2fSAbD)4=u@=qZdfJ&_9f&pMWl9I$8XBPd;iLU*LnJh}P}8WAdJM zO0@S9uit|vSNhqyo@s#^*#^VpJ%PvZm>|;MTiqz}hnK4|qap?r4bNXjHNf<0?Txw^X2Zx(ANM>E3K2o==p^fIphD$)t>8F{WXQO;b zP{ro2?YIClEZ*sa(mpsvhIu#=hRV}H*(pe)GjeZd@`JHt8|AieZ!KGEmc;c%g=-}B3G78kxEm~)V%FB)15J9_EQ|WGcVWT zyo0Qh0kS46l_0G4`@&=*WltpF#Kp$pZ?+}otz9H+79Y>IIM-a)oFYsiUvO`v(1(q^G#jOh$bxD4+gU%`>ip_Xn!-g&MC z;~*-ED?C!cGHLn-*YMPJyT4usqK5ij^UPNittF8N)sAAFcNB(_ButHw%9&_zG2!0jVc(=OP=jFh+__zdPGcTs7R zge7DpnoYX66Y?AowgF=^V7K{qgBP4yAPHF@4`Ww0R61fQ16oKLfRsFLptNOEN zV|QXxd0Ijo@lIify#C&I>-bTk0V>%$LE_YGS@#}f9MQk9i%r@5vHGNIwm!_gyDe7S z3%d#H_2TV{kU5w63C%kEU;PKI-m~cNMkmN14!8_F;n?>psrl&fj+M&35pQH4A zQ#fW4MHu97cs{QYqj#ZQ@yxZu{Y)HmN%%I;NJ1B55tq|Jo9LEI_vDZhtDm>Xvbn{z zx5~aL04DiYceyXR7u4MK#y_11$lcLn*uoL-xk%k{u1`dIl!g@hm z9(Clqw22Xn!OS-mXlAlH`MzSvy3={09lT_5R&agD&3XyxqsrUdm7iwXw9WO4>G^KQ zcR!g^*v%WcIevZB*RVF8S+~8w+gHaYM48z98z(1W2!;GCyRQjr+?Fp(VKCj@G!Ne` z@?o`{Yh?B0J*lm~N?-f#iqSK)b^A-7e>dtD?nfxC5B0{2RkU7Ry<>Rw7 z;!Ai@TZj$us`7%etny;QMob}*5+BdN+2HO|u^CaB6HxJud5WiV#WY&R2u9#9+dqq>Pw>OOOD>AhbvxeA9p)XkYiu*ZekbA< zS3lcrT6b3>i+#ddz2z8DOess--Aq2JtPpKZz?zz%_l%ClDAq%JS79^X zm2UB{u+kdcY3rvg9bMhZZPJl+MrK}&t2OV~yiDKbZs#sFd~g@gND5-M7@qKZgxe#M z|DoFC#j~;7;tM%l_JiC*?4um&RkD^8-l27@{h`OnP9L){Z8|N-t$uY@$d&%HFV|aD ztu0hn9T`)ut2d?a)d@5LV=6eXC@!ztN@|5TglrJnvbIEm0;y)NU2rsPLJ*6sogjpY ziB*Aevdrfk4W%{BrD84lc;@i~(X;t74?fdhR?l5&TNL49Jc7qTXJrw8RRuTuWU44_ zf#xxD#G_Ft(0N^Oni9lwSm0gQ4k@mbyCv5|z>;QAJ65T#pu^&EMBIADbdkr$uqRR| zd0T!u@OJViR>f8q-<4;n-83=D6iZa@geUk59Fy#PJUShsK9M>1o{ba0J0f1)pBp#z z(AU}mpJgU$!L02@??LRfo=qBDx{{>h{H%n7S{k9LrS|e|;wdPX#zjdV6O%n7p2_7U z-B8`PTH|Nb^Qhj~%4ZLe3S57NpOi>QU>yC*)Hm&CA~vtK6^?AL&gsOVN$99UeS}F< z9;dKMje`Ek){d44wvptG_&C28B+5LaEOG}u5wvo?Rg=uzv@xLr53f<=jjCAPku8c0 zr`z%7I(KDNaVOd8r73-2c*?fa6!Umi^634KMvf!Y1muDP<1B%cPpPm&r!4NCCzf*i z)-5cJgC*#usAu%NnXtQYxH&WJ=V|&+l=S_43elV^mmj&en^4AlFDmHP3i*W+)ggjq zpdfh#qfFd(d%b9G#I4Rp6fT%N$x7N|TPD?hy)fWP#=4lFxk991DX2$r;!+(!VGo3; zm~yY?K+c4VdE7UjnEJ*U>ltX^Mhnnm{d{!<@yi&wNFOE9O#3UiB^wpIcsJ~l-(%+;(&I4mIogSp>1gw=@p<5%1>{#RU#PSXhErpoY+n2*T+IKVX&GJ7;jo}u=7wiqEX z5W?c;YE;sXE!Sx_id48GVOhiBq+L zIOd}=B0@J4SkxxYBwj7KB(Z>Nq3k2M6fNUU@ojv%^~TPG-r(KzWk<3QJQ6wT8e;M= z65)>HLy0QXN_xWW!hQuLQWDoLN^-o6_QlF)6ceP!x<1}+C+)Gepg_cN;ac8T_7?=& z`NsMNgB2Z9-{W5~wC}yG{&qurMl_p0e}#IZovyyBYr?|70$0d9qV1?q2&1iK__sp5 z#-TTdZi1X!?LnV_sU9QYFQ-Vwhlg1-NiNU9wc5hjcw_#VE# z!0t{JfxMP6FSdQCcbT5=opO9{yj}lcH3^3AoRi^C7-@H@W1LE=O#Lt`Ukr5z zPtM)pB?+Or$hAj8;Tus>H9RZlw3suNf381EJARRRd9Z)iz&tHCie0&+XSi?OC0wIC z(?BC&)hXqb;>u&shU7IIHU8j`FQ(7N>DXU5vUaK_kFOeD5gFzS0;KL-+O!M6(Jzfz zrp#znbt>32*RK8&f>#WebwM&1IcrFcv*Gn{ro7|m*=J5+XyJ8J z4vOHmS808Arq$N^F3#NdSf5Jz_4S|SM+72T*i@P1=hw^}69{oj&TjKid*8zaG^6rf z+Ru0P&v-_#xpdT8{BkfQ^-oT`fIkT_c;hv9?wd8mA#A0R5XiE{-Cf5+U?rCm>w8Jq zK0NuiwkhT}JXf-9mS+4H2(fj8$jpyva$jsqYt zfOtc;6aj%Bm8brJ)yA*(8wj+%#C!gaH&A@1=|72u!|p$lY;%6YnD8}q_}WCb!A3gB zsB_+&QB2iCuwhODTr=k3Mmw7p8T`_YRxQ|cGY8=yg!Am}5-iR@jYG^6(e1qGMPbZx zl~H%?-;rXWTRQroT_awGiW~-od+Z4Zy4eq?N75;t0@WN9fE_Wbc2gKMIb!I#-?xeu zz{mOD88`)Jn)?r(+ARJC{l6d5BS_f6uHY*BSU+5#VY{nw4WLvHD%#5C43{-XND<{)^f66n#~Pg!g|dc{WQ_RxD}2j%CH2&|tZ6(1{uQfTx?z?8G5N4Id6h03NHRucCBp!r@RY{O60-dKCLEC)l+7VEj=5 z(N2h`2qFd3_;bA$dANiMGOaE_q+uF|*e_9#r2KsCEguFDpdo+{&@3Y&!Y`PlPNrMy z+rXXQn3AyQLFUDzXKtjT);N_lLp*>UDTNz7MLGED=kjm8gQ7&40x*K6W7!9-GGG9k5PrsmMthmpAI|95XSA^R$zG8b z$e|udWIKKUVDb%vx);o(R3E(?;W-PunlzaFgUwxYEisAa=*Z^l`PioO=pk(=P-^K zTjBFkTNx#bPK+Wp#E;;MQjr2+k79+sxj4~s4HpO4+z)^-{eowP)G?_|2o!bQ1Lbfz zfJ2KCOyE?US`vBW0=Oh6i|#=IC#s``iq=iI?G7s{+iplrH5Z}$NV|qRMR%jU+045T z%|Id*oKI5a+gA`rKC0jC4b`qHqL?@(rpMSMBL&Fu7wTC7c4ZpnVd@9M`3)F;k~g#i zZwpG5A283&P%jKHHXc+y4}hYPR(u0$i&2md{96;ucsmRvX&->!#HEHXY63~%**`o4 zI5I20cC%#-wSWMPeD3YomjE@30LFQ*9h}TyafGkDc!1o72R|S+A)u+JU7iEx9*c=v z_VLBL@@Gv>kPiZ_&dZkk>MPvzNoC594p4KPGl~k7sh6U4Zo-3^ssopzDtnOfO~ZVF z?H2Vg%|iYKNjroSi0AeQ-j;;tz?c7=>jw0F{Q7YsY63iGV|;P|+;)b_=CXZWd~Eep zsJwBsYU@20N)gqt@TmvytXXFuQd<8>te86Qa-?(BWzq_WVvc!(WK7XFT;bQBZBk4H z!%Wt*g=Sr;`! z7#)GD6$9U|18M1DmOIB5WTM?$v~Y2NoH!zX(f$b(1K#0|GNp|lq!`fmfdX*W^s^CQ zNj;hy&O~p|cX`YpUpemGYl&xh)z51VFbhS~cxd4|1KR=$ikxk73cXMlPxC^6QNKVa+=|VkX=N$E%0{JDsiIrIb*AJ+K9ArooYcFB5 zgp7~^fzDFxDM4Nd-b$ogB2s0Le(Yh(hd{Fuaj-w5DB6XxNsEWWkB?7?GV`~(h#d&W z9q!+UD)7ok9@;C0;((<0Yalp387>TdYE?sl0Z#hc8i4Tfm$HB4$qrfpMaNB`bj|~! zytKL${etj*hfiF(|96n}BVQ&rdgw})IrRt>821rQ=NTEupM=>zfITp9)DK{BA#ql} z2H&8g7&F>l_z+2d1ZoCZ8P;sxn;jfJs+=k~5J-!0Z5EDY0&*;KUBb|Ho1gy#{!H@Ajw3+q8ou}Y**LHzhI+?A^^cMP~ zd=Ju)5qy5M2f?l50EEjb{Au7N?H@4j%G^w@&^O>pox z+c3NSko~?vaC3c?Gx@=_G{#4%&Y42LUMlVVFa{Y7IvxxvhK|0O5ybgmCMx=oPS<~s zC48RVW{UYDy@oozab-zq0-w6k6aVIBs1Z{h(y&d)F5lX|wQhu*kba&=T37)b~Y@Hm0a7 zy*)tLjj1zLzUClLtu55KHTrm^pGhf?{rouvLk&=@b-AdhGgZ8;8vc>zw8*xc)Tw4* z9jaQx0x-HrzQbWmo77xzw1ViZC@uffJuca2$&_(nfD(@X{ zS7APnl$(r3j%e1Q3d~xsz^D8g4mY{Vo6QWlj54{ySfGS?Ea{6)C_#M5hpemVAu=f; zl|zvf0Umk_$BYsA+~?Qe9U zk_FMl)9ow8CpCHhC6*=7J@nF-3V|7~{iQWMS`-CK_QEJ0onrZzIhwfMt)8$uV*Qg+h( zEIWJnF{AyhnX@pD$%va+BqMwKs`cmY@c{2L7c%|b?L0`!{iteO&~F%~z)}=sA2PK=|VJ5m~EfQEZ zVoAf$VEWIu96&QrLYH>*?_`77E6m`8)}J%@aPrdN=54GnhmZLG$6%HwuH}8ifxbnh z=PIFL&Y!!LDWw?HvWa0l-!Ne}eUgAWInYH+@q%yzw2ZNH#5qUudVlM+@85~3IR%ts zY`NH>&K@n7NthRB|Gf?90QdWX(-5Cq4@{>2y*^w6!g#44UM7FDh;>-wHI|Og{@q_< zF#S+*eEv^|!gv`kxaf3sz0KcWok@qeE+^~A`m=k`t{*B}pECS?p1|X9hAv?fli_nC ze+LGYPu6-M^Zi%b(EyLsVXepI{ojEN#zNub`G(0q`9{2q5tWur{mw$;m%jtEg3h>q zj}~-uIpwf&X8t=J=urD#PY10)YYB9ky>Gno@61ESrOC{jw(~LIHZrS$61#y|Mk7~3 z@k=-w8iD%ha6;@^ym)zRydRz}zbZ3Tniz;JP-G7ojvHt)j1tqc5@p8mUCZ72+}^nD z(3Cr)R*CU4uPFx=Y*6GdyA5h_f=TXzR5?ez*V!gsZ9=$JmZ7|{0Tu22>n|U3pRZQJ z!ou-TRgl#=JOgA>aQ`8GaM2hK)CVqzA>Ql;L3r(~^{HXFcPLZ?88}j*Hj!!@@^O|u zTRdDe@tIhkI@1REJYPcs5!<<#(h{cj*4Z&$W{qOrP|0|^C!MBZZE`LD0?J3BkxotG z)n|%blZQYAn$@6}1kAP3_cgLgX#Vq=G^{#Fvi}@K9I!jm%<3=y-5pTI`i2RG_DYp=cbTyxF1<~4V?vf>kT6cQ9TI5>0}X$e(0I0R}qILHbT z1bk9daf%H7fOl1WA_iABdVdrA1KCMh#}y6^6&LnDJlyLHA~4{Kjk>m*wu1a4Ge-v& z6LUvX3l=X2ComcgPSEQS_^X43n+c_tgZ)$2M_xkIw<8{bzr#LerKY?c;$|mAt*xL; zDemZELCMX+#ll7{j6z9CDd=Ku`AAhl>d)oiUqaN@Zf;JGSXn(iJy|?CSsY!gSlRjc z`B~XGSUEVD!3bto@274iUd&HjX?{=gcOD4~S2GtICpR0%r?~}o|K}CR6oiN9rtN-T_|L0svE;bh6sc)}l|8wxftSmcjsQTe2x%bbEv7ZZR4f`|e zqr-quUwP1NrGs8^eXEDIx!UZ0P*EXA3KN?5ATVO*K-H!u0Se$un8j3^##NCQdnv-7X7XqrpgSI_}3DB_8JOQ^JGi)>ijnIQgR6zZNC&1 zyz%=np{BHEe|5HNb+HJ>PKL zLufSIzStRzdJmJ_UJ|PNVdJomqHV8nyTT=!LWDeU5}kJ*tHv$LkV&h|ey*0;>axjl zz@?9Nd2XGMV;+bK+urw>HX-5V;`qS?p^mkHSXAl1DZ&K` z6;Oz%S)yI(b+|HHW9KvG5~eU@dN6sEHS4{e$)CY(mps7WX}dSyP~LX)C^X%nHchfj zukPv3AE%4Y&kb6ho$7Pk0ZJf2iHb;DT?MD30kru#Ml^-RK=@)Qu)OuhU@n#@L7{f# z%J+1~(TB^w;-6L_hk`}b5J{w-CpPG5l$!JQdKv4?vIv; z>$m!byFY{-9b!757b?w+`iK(kO8(w2p*8Kgnihlh*FP`8#LALrc)1ZY2Cu0^;=#; zlRta;bH)6(ybo~O-G73|Zv1&RSE(3H%8OAGw6DH7o+lG3CmQMN63x4QJkX(QaHjtPd={?ryci_ zI$o^&eYA>Cg`+M5#HHfFw+BC^eJ#aE*lk0Zlx5!P&2w$voTpzKll>V2`k@o}p(V>V z?Fg7$4zr1D>AUi8gfrL3L(oZkhaZ<15gK}#wE6uUru~kxKUqquS$uiCp2DOxDv4vv z`DW`&@!N4C)stQ(C`_PfH2!gh989iW8dcz&$U{m-*<8QNzr%d%HyS>ecLPq|Cz(0> z>gY)40B}-q@q0?d(SPwjKmq$3l;Cf!$yXk!OZV5P#hxR^>Ku&>UKJN&{4;0&?J(Nv zqZ^zegn!0xnU*pQe{?%nGyJbP`z8dAQ*&F;*541pMESrW2tIBs)cALqw-PWq`>Z*S z{@GP5MPL?}n^|m={u)%#1>D%-!7I7{yQKdsrT;(aQUC|>*5mi8d9VN=r&FT@=xa?VfE)0Vw!->TE-cMdQ7QA}g#y%lDV3N`CF8@Do+``WUmWj)!GV*M1S^p%tCw`g zd|ypP<=?AYZ5iGAoT}lzZ3yhBiNd}%2eVWGyG%b*uFef}T>eOiz~Q6#zg7EpW~pNZ z!O8wM!%*<|g#I0*c2VGWu^gIW_>3XkPcefpt|yHEsvRp~XaW2-Eb0lMrlL&Kpe20t zjJGd_;>BLWx|B$P_U%`&Zzq74&+Aoc_$|6ycXSuripUs+=|>;M3&F!~++y|js=zer zJ#Fa8f0yYp$l$r)O&`h3Z9nbQA4@g4icKTM^>n`8v?s!SYl4{1DLcKn)@^fqG*e*z z!)HZ3hng=sR+*Azoz(9Z{unsXAHXW*y|I~=y}n$3L&{|>S{#BwHe2g32aj-9-+L|T zKDqa|I1o!%HMp!P@+~7my62ty$ksDG7jCZhMSL#~fOkIKnW=VMZ0)YD@Je`~@IXHG zV~fvu(_W+9LbLbRMt2%RihmCY2O+IXbtj1QcSFw9tu*aP)_a7?GQ4sk;W(g(`1be{3;f zEI!+29yZ9Y;#MDz3N)F++|lnJ2@oLQRJVPA^;)s#scMKb45_q9l)I0Uza_h&qvTyrSOzffBO=VP`BZNke46Jw&3GuF0i zocHDwg2l1=7nYYl!u2rO!3Mb;P8s?z3u{NQsY1kG86ptv9k8VRb%K--6`&E4J*vua z@59-CxLzicwjc58k}Ed9PQ;lG&YKV?V>8U;ceKO|9IpC7D_8Y~#~vH1C3O z#vzDSh(X-Izn&4OygiE7p z^$1U~L6gTsqMl=m`-InRYfjpQ!?T>D&DJn-wqE_9hC~9%*4SzgJ+&WzevO9Bqr8JSS!STOgEaB0llox0yp6@ERS=E0 z)?Cx%tIk$5Y=yLnlxVC!!|QbUB`6;R^h6R4y~a;?rTAM8<_V%+*TK~@c3(8w5kG3% zO_m^cMKjxJ!Y^8Gz5erjcr@_&Zb9$n;&rGRuzs83+lA#|#||@oU@E%#Jr(#J6a?Zm zX_lbZRDb2CYz_1WWjCTo0u~lO9yQ1E-(`3tz=n!7e?2!yB3f%^7*fy=fAdN#lWaETIB-8OnHLxDoJBphyJ(e+3iRbEhoUn>9qP@ za#(N_27gP}Fs_Skyww3>Dy$F#FV1mxeBBbns_K>x=9-lYMq72af)x{;ERnEga3II( zA3Yn7*YS2@Dh!9MB-RyTG9Apjjy2&gMje~En@3lTvYDk~^yg5CBCh=Tbfu97spJ1lMVP2Qkl#w@JBfedqmG4TffC^iEq{gOkJK$m87Sw{qwxZpcDyl8 z(kYH}T$1@99lL~49VWgiX%NGbR_wR^y0KS(`v@hh9augi(H0dg*n+a9k*z;gNV7IK zGDEy&je$kBmpVHh6Ir*p`eF}!UM@B8i0UbNng4)+esZvaok~pw>R4O|NGPv?-nppO zFv11|P}76+GDPYhx~L@bGtm@vTva;4LHW!H=i*y}(&(e@k_ddJ1(zY_WqXsY(jOp8 zRRUglTG=CknwIS^9VBvkPBJ6exo1!#ori3(HW~L|w04KcJZcZW~x@`tvGfSdjCto{Jz6 zeAbCfpU7(DH~voM=hb-|NIbnQVwx%xq?>=;TpgcLyh!HCmoQVv;B#JT53qq&Pe=UN zsUCZFwsSdaSDwUTFe{+kFLgrk{Nxkx&r_v(o*?ECfCBFTi96$8jpEe57CN5?zK&!? z=EVs2RJ|8UotGjyDbH|^E9oz=^|TKb>?m?sR!Xl~TQ55E`kKFuf94@HDpF_IGru{? zp(dnYYA5Y!z1o8z0J<-MyN20ZE*Frsd!K&MDsMT{wYwgme{&611u2k>B-n2`Cr8oG%3&c-$ckJ9+GWrT zJ5@F?UG#W&NYGy-E$DX!?(fAZA-9gwfyHgsi{{s{`y|=49PBpClL0^W=}0nW?rGC5 z3jm=u*74FY5l01&<<&V*dZ(rxTQ5U=-h!yOzZ?W~p-)Qah=iVT!LM9WCg}^_CFeqn zdv{LIqn+O4I!GF0NJU3sB&fSc{ru26^F_kd=W?Sq^0Xc!;e{i!VC=-G2oU2GBpZj4 zd1QNU2nJ15Sqwp4&UR*KyQJ*2F@#P&0w3ua$y{swIUn7Kz*@V?KbP8W5uAgMiE+E{ z((}2W@4n%*Qk*y$e(iQ>m==sa-s5ATx!C&L;5v~(O~0$rV|Qz%KjqtV6AF)ed9JR1I`3$ zThVnt3$-_$hbWDtn>oxwIFTm>sMZdjg4Pu6VmlnMe z;PkKzNxtj|ym$O|vBk&JaFXFN)$EncbVbh420B!y3Q_Ie%cxNJv9FOVjn>Dxc z-y1K{sTTfq?OOERz3utc8CPo5JR87%y0(Mq?}x%O@DC|3agGUg>ABBTSug~#llb`L zKS`bUI^+Bpjfl2yWA0uZUFpr?%iwj);-;Zo1BQO|?JI1Q*|CJIS4-jc!5#UjkrXw5t3Hu=aXcV=9-{l%#$m*}r^LaG>4)5bVsM);l zUi~UYEB3+|zLtK6(111NgJMkK3x7iVJ?d66%VLB$Zx&M(@ss0tC!iDe@RB^WCyaqJ zP{zIxN&cl=Q&K_Ng~x`+HOVlgV6Pr^;S`2k;IcNTBz#HxoeqcXYO~^nNQNm3J`dxE z$jeAC)W=i~t(V`+xF8SlsgWu5*@O@K53|-PgN{+&GE28CwD_pKJueBaq&@7eGJQ|- zDbN9Kpl3A#($XbbsmkFi?m=$G6~eJe6TEafHo=CWqPrnz%j?MY4X+c`HS>!f@D65& zuMhU`;jy)oTNa$uNkF1d*p2({u?Ku`0QwcKbHoCFPE{6Ee**l?2uaLIUl&gtU@|(& zDKzNvt5rn|5mYFQ^aKSbsGnTuS^Z;iN2<7|)MZ=}XpDp56#y09%?Tx6`4m|oI7Yf- zrNT`xLu~HWAYFaF;KSKy)#I{pnh~88f6~wQS=*eJ@bwtJ7+TCPxELSX?##5!^Zv*B zBShw0%hnDb7rl_!T6_JE04982X{A}NYq!Jy3Z!CQ4x))hUdzGLyI4{@jnRQ3`D(3C z3bLLtzRqf>n?3{PT;MJ``-L;BLOKt+8KZ9Wt3F1?{nWw^U`ca85Reo&)IkG364Vw} z_q5k@p30I`A`1QT(BoB@$(IliZsSJ>D}|%OU7|cL;d^qQDb(CBFhO#KA;t&gupjDM zN)+stBz2d_>QKtXTH#1am(NzVm&?1~`%2OjgIL^+Zb|N2XC>sq=1{+T;2ikOUO`-@ zw3WzMxLPGCIGsNDrv1nF$hv*{fr~`HH~gkqgzZh>r}ka|mjZ;cQaxXa17nm{*j`tf z-H=Kf4Z_JhGk{ZENd2ye1%y-Y;{5NTe@5b5$R&EWbce(&o)^J!uGzMU2oXgI34?g2 zOD-i2LyA}U`aoQ2K2}Xo%r}@amB_y5M{s+R)axC;JS94~!YuO>@?Xnhvv8K5pAvKR-{i2GLx zmlyB|NBUNLGG%h_)sm{Lbg6{iQ4EEWn;4u3C#HHJEn0ry@k)d>23<4Vd{Bsqo2`$rD%QYb zCKyUoT%yfEJ>;?AZDqtvH1NoCFDel4l`@~?@~^!b=1Ux(bctwdvg-j~n;kY=*(@u0 z(i272CzPsTo%glG`{o77N6NK~8y8>}6Ee`ORGqBe!*)-l(TTKf(MVd0W zi$Y8-@s@2OEMZ?!U>8pZiN;P)UHgnATq?QJtS`6G{sB#^@`bSpvTLc;H;kUi0ws#U zCr!#^YfaTG{>o-GckZIFM^%1A5NEcO?@SgApn38N%1&3N^&vElUxC2~GCnE_)+vOTQeydA*wA ztl%P@ul`9kd_6x@r!%1FH+21!lkn7Z?9g*j*%E*w=6GJhs(ZZ7*bRTVYF`grt?xJ|xwO@?@?%V26d zA;<>BA%KEKoCs|htaw6jitihtE%H4mDRDToJ5NH<d7ZUwpb)I2qMF-zq& zS45xYXAGz07<#&30@2Qb#>jZ7`Q)ZcPE+nT%ypqYI=@qbZ)l!ZV z;Ve<*;nHqK-Vx|jS3M71GQ}^J4Z>idQ5nl^zo=J(5BB=GVekV7K{$F%fr7H4c>w9& zJ2bvErfv*`SzPWG>OVLcWf0=Gbo|c9+z0=_cRUEZPPNMmd;k|l^EOUtCtdM4R3WqI zIFt0QC=rMFy+{9(@x*e2lzc`s`i%llkqX(L55bmE$0DTlybLJ0?p_&s?_^vGKkunM zOPM5Am5|~fRHYUNtHk|I?2+&T?q66|dcbYd=Sc_jpiW0Yd&CkA{*%vhJfWQ9h)iOB z128l-MSi*@mgk2(geC6Y0M*D-4LkY`Gv4OBQpo0dmX!qj#g8g;JsDd}kF}EBj#y&S zWNijPxeKIzl#usW#!3|cGs2|l47f+$|ASlMHJ1sgrWPd=P1OchF?m(j_tPn(u||K7 zE#RnIw~C5A<{WryHO=;QYrlW+qP{xc7jc;VFj=Zs-N~6ARr$DpDVmHQzzG+l0{d9F zO2{D8cK62b7Y?u=Dgk(0J}}oiJY9zFC5I!zY*5VwVCn%*Fb~RVd*998;(I(<0P$@@ z8cXZNN`veABd&Rk9l3;Dq2Z+bzmV2DX*f?4E`paKR=DwDX{*4-Xy6H04yOV?bs%B( zCPp#yQ6ue?pv6*yZh}b)8 zu;BZ?R|xWkhZi8qZB{XXI-rhwfK|W|K((TM;uNdA7qyI@2P-}~J>U8gnf@K%lvbR= z3&_(Fn7chKJ-hu|%Uz+Epa3lJWB8-|Yp(DXgPMDC$&Pp$^tV#R67?4n^b(xuEw{>W zz8gULv>u+g`>x&ar0Ey(BU6j^*X?^Bf?&Q5Jb8*01!;zB|D=^Y@ewe^E8fG)2};8J z)*mU)rYWO_#XeMi-DPRo>7){9;mGQG_2Do@l;ydLBxNzjc9NFid#!TvA&G{C6{FyY z_1N^ghZ00h+Y~RZJ9;?)Y-(mEplD*T_EbE1Mw8k4MOxK;(LXq2rzjk{IZ0$M8+MkJ z4uC+tGL~rrv6lhFYgRBTjwy69spnWTY3f!8z`B*Fn4bGi?eJqFznTZ2XMR1c#)mtC zLpL@VV(#o&;!+bt1uLu_gP#^Nu==;MsR$U}h}kZ-wrqPedFe z<&LnuxxTzd=7vRznbzAmfBNYOMW)wEI7`YO^@M>F+K9~#Y@}a6$PE&l;immoQl-@I zBk5XY`o!rF0;pz*2b4AHyYME1aM z0f4$ph{s8FTxe!!!Yf5RVN_j_f-L;$%E$}jc%u&jb_SsGYJKEBC62!c^vj!DdalOK zh!#Mu$ri!E9G3#8J_X{}0PM&_684i7Rqb07KuL>uz|-roMsf@)w?}+kOvP4XS>mA> zbXml~Q9n+%r!eh00pv|gZH_~P!2RXsYN@Sr~x90)4s?#i8{z2qW1w6==g7SR0;HR+0N2bbzp>w?5*Idrt&@ z{qPkE@+FmFG?in{YeGfHtO#Yslw>JRfHvYmg!e(mJz^mVOm^?>*LGzc@Rpwycz3@x z@%jx+msLRJGx)IO@NJyFbdseJhXKgs9;QIA6Zx|ZDtki})V{ah0h3^Nu8y@;=jlj) zkZhQ433at7r5pzgcSoaNwOHH6>1F(`yoBE$OD!G*_KC^tC3Jt(2hPeetLY02tW4F?^{5j^?!pxD^?cm%aP<-%?@W6D0`d~%#qkKQt4LZ-(qnP7(vr*G zd2!$6=X4vl^pM}0wAO7F86I?@%P!lM!yT?6a~SYgENz`V`OcKO=7Ua(ki1t!1MhWf ziXwR(Jw2`A3HVJ(@r}&I=q`@7*aQ%(XsaK2?1bepR>={Gum_E%ao$_^)P#3Nj@^|P z{ARgU);WaP#KcS8tKC~`K=QAoppIC>n+u^Ba-?o;Lmym*P0LF&Kf6ic4c zhZ8+;P9?)?(Q7q&%IJkqPwNwcPI-b!Ynj|>7>b3{4Xnyhe%75;Iz6^L!&kCSh=3G8 z(Yu|-o$F2XDjXn@$Pm89rLL^-cn!-|-2SxTBBM58T$b9e9&^6*scu)qm=?^F;u!q2 zn>hU&Oo&r*mi26J{6%qkOeYtBU2(0U=OwW_+4v;GrKdxU?0Yt21r-~4EyOpin8slN zN=I=vFswY51;ST1QKG|XkoP_VAf9=TyQIz6hEhD`_$9KKLx}RXISD`_(gh+P$p0ox(9eAt!*cl`k(QSDL8^bur8 z+fy@Ur=Z^9jEq!qAxu|&F5t@Fa>z1l+cWH863(Gca`x^I2tm$aj)dw^F-v~ctHwQ= zz7u7RifbljrXL^PXO-zr5RCZl8&o_%@M1>ur8gShIH%a@wo zV}RfyC_;4fF1olR)m3!IxWDgXOBHUDi74j2K|DF0Vau3ZcC%jXVo=@)9*9_DgFyv& zL~>44`a6krSf54i6Nm3cn*@E#d?BF%BROBPcl> z(RE3KddG10H~FLRFnSQt(LcDaZ=2NgzauYRm;Cx5ew}OXe&t4_M)ZaWyAUm*COnFp+&5<)P1&PFiB_e}qE<;Qk;-wOBgBIZL%f1zQ;O-Cpsm8!{eALEg4wRR8* z69bRRnWCt1&tR_C@AB_3b}ToOt9_ts?^b(k25&GjLnKvHVx_@+U(66W`NO>rlrqlT|R9ELW70=h{M5P03vwq`RsrqxN(v@6S%ZA?uZu zd1jtr167Sj9a%@A3Q3AX5Zrqox7!XlRzdf>rMoC;&UlJdUIZeu?G3wxq1`h>x6rXI z&&C`rto|KT+R#$kX_A=q>ft5Ra^Y^z`fyf=77KLS2ZmJwP9Oshz^61Fynyx97DW5< zk^#vErv^()tOQPur=pscT(BRSJQb5Pyz=~5l>gm~O0|T-KWM_e6p9;cP16uQ@;uI zA(1c+_E0K2l`qG32n&wPEMQ|EVv1bHV)GK%wFfiVQnC1yz+Leh&lyXE&pIcYMlop0 zu4K|D=PiHaJ=!>uQV7S643N~nFyMgPf3}u+`YD~#mZ*z7Fw|=WhF`1TKKy-aQA9Cm z(ZG+~K>TOV@}P(|77ST(8WJnqsEr#G%r z2Gigvs$8_Kz9DBysgs!qn+&fhwV6mnbLM#A^y6f+%omY5nD#>>8r|x%m>BG`pI)Ci z)5rf483$#6vO+EEM-WGbTAG?>mYen>I@|Q+_q@i4dc{76F-BYRgs+*?x*v;*B$!>` z`*|g=N{N`S#VpmI1lEeULA>%5ltGU^v8X?}+$%vf76A!tFN?FfRnEOha}G0{n+ZHV zmv8+zYAerz&CPpP02=)8+j1yHe(dTifl-T-yF|zqUjv9aEtgb+in=Hwb+sLmWOYo4 zUvQUyZsC2~`6nj=yd)B?;WPKjq_T>W3r69~ZwVlYl`+XMea>L7m>hB{7WepjtOVMS zP%O?ihN5m<@WtQkvj>(C2$gtVBU`x|c_hz@(Hi*d)^u|=_mgP- zCwFxKWsg{$+5%<32~2GadwHT^9%|1W6C+sc#BDbj6_r4*lwTkrrBHH9Sp*pvk_bQq z?2Tx+BqnT(pp#CZ7Qq=5`omf6*!sS>J@32|!VzjsccTu^DcHA3;>pRQvk2xo1dp%1 zGN^gMZwPdVQck{Gu&aI`40))iQp1Gny{;hTTauVzibG9-(m}9x+TAG`6~mMO$HOzM zMG(l1TIUJFD}INUl>0DZ=2bkEg{j5>&XfAWCqj{P^;52b*|CK)a#0`f*WW%kMky&o z);AX&^jBkP;>G$%?#S`)@?lu!`n~vV?Z3?R8~bhMdT9H9WUdQ$9zb5>y%_WtbCdm( zTcX4O?b$15X#G~}t+TTs0NS(%=7(qCg%V5mn-ikQi2?e&9t%wo%1ZKlaSj`R$zU~Ar`kc629RGGO3Sz2T z=cN3w0LFo#@l^?)t+FQeLsb}u_u29n2bg_xLe%CYj_&Dn@2-$GRJS82zSndDBQ|u) zfC8bSvg`v`>~TlV-(pw0GDhRa^xdq?`rNYwA;#QZCU6WX{lVkF-G^~~Kv|H|coN!+ zuCu~Y#DIzdjn~J6PRL+OGl9+iu%d1PQ_9#-#8k7e$B|0pwFU|V0m`@l#>iegKTYf)56aO}bETcXHLpsBGHW-ldL~O^``Om}g4sI9H`Xk9tkRI?lv%^g>$Tod zYYMt*jtH$=DyC=vmN8QnPF)IygBB>*@dAbr$g}uAFspY}$@^b%gMFl}wyw(g*Ofh#C0pKM**ZGmh9^`)@rMt@DcU7j%aWKb0@5&BtU!k$n z#5qZn{B6fa5a-PpM%FCz>Oq+UmG%C-bCs&zV+{`DcDNE@+z0X-pl!1c}7vR5i9{oa)~s(Zb(Sc z^IJAY;b6X8E!1dW1)(nNeFvc2{oOH_?qLh|$LyRv#NY7+gr1V{;vH+nN4n@y4q3}Q zB;}dk>H;VZlO^xUWB=b(0|nV&d;|K{Tr&bL z={KZBxr6u)KfYB=EQRaQ(8IJOYEcWA1c4zIfVT+-x6ofokg_7?U0VcfUR7vMN+xq` z?Yzs-2g2=Q$xyH%uiyk;2?6#nF#b~@OVfU@N=I1kPXkB&^C3S8%v_iUz=;S{Sy}{C z{}DKt{Gk*uG(BElzF6I^4z^`bz{?wim}~rp3=#!Sa>QX#@z}qUtOH}$-03d@>>A2* z#BaZ{uI#c2!bA^Y)RI=aGV*_R!8{m_PUd-vNrC4c*X69H5Kd zvmb0KM+uyo%{AeT^xYL;`qe46^b(cD>vR!w4nM>9XaJPJ4UfN%XqD>XC!aY~4O|?q zf{bYX-~)YDG5}(}1F``?aeN%>mqWneeG1A@g~XOVSeJmR(WfSsVFOFq063W^7L4MY zA0zJp+Z-ZxYL3<=GcIfGgA{EtJebhuaSH)r)i^l&31ldZ?%RgWJ@~+>p8@NTa@D)$ zfGYLs{w2VkT0jN;$$jS__m}N<rjk-N9#M2(r#fQGrT8`k z#X^CMyykvO=DI$*#yYp;u|0`ikUPRN7Zq0y%N<^a9d3gfuMXhp2NN3B19tu0cfNpu z2#VRt83{1b65u8Ree*jSpy#w47M-r_c$)dh9hMLix%EO_}r7!$=ofq+35KxepL*d zqzY23Oa&05YY(7oz<{DA9z1PPj^cKfLO>qsk%UW$137TP^g2lFO-YG_uK=?`4N4nR zMWbo$0EBrt2N<@1HF|x0JoaKGoc@)_jaP4*{O5xUt8c|V=Xxc$s#n^Ikz6 zki#7lLLDuDUMfd-(-mb_V(?UP@bYTEO@=rUO>6mWj4))0+-C>gFcF~ZM%SAKZ*twjCB{_*;PIwG zExhmqPzu%~Jhvvoi`l#mmii6x6w!2rTPUu+$T_Alf<;Bu&x$AggnaVELwAy^m*MH1HH^%8% z3kLy|(|ehZ_M4CE?_CIjvbvy-_sL$>5Q|`sw+rsxRQa22hK2dMg=b8mfV0}OOOjLV zvL=gxp!z!Qd&fPB4`Ra!nfDsVT*7VI0rP2;>YONs<0iX)NK4?xSY+Z>>!|uA{cQosT~LTo&~T%{NR&XoWPz}2Y_LPC)!&EAu2^w zb2z zb>AnZC5x%rz}?7<2_BO9A%`pPzmKeNl7)bsF7f2;zJ$$%g{(Qr}ZJ+7P3> z6=rCVPx~d*XM-S~TlOeKft}JhOlMTyEhsKL87{R@e1aY^j7mNL9O>H3HAm1@m6!>;`9yGJyI$R#wzUa6IzEMQ+;s!aoY%TR_hzHazp&bly z0?9a}MDT^rFp)WrdPGUT69{_@-aJa7A>;=wE}ek0=EP2r3RoF;^oy;Tg=IEKrB;A3 z3Ek(fB}NQlHG6>JELWiYI-+B2(9_O5`$3BYG6YQBhB9ou-#c)nm8e#aS$eiB(#5-6tizSfyu&tL! z=E-oF()u9=WEV8BTzU^niKG{@!&YSHpuTQ;-=)&ZIIL618&Jqn@978NRRKHhycH{s zfvjyET8T5x1}7mkdkoytvU!&@BF4ki`r*3SK{Th@Q-T!|tQVBohIftFmOlB-2UMac zTdI%QW%1Ik9>&5cF!Jd32s{8QJ77EoaL?k~W z3Rq&Z7{tR#Rjhg?FlQLfAlK~lApq0Ngh;c6;yT&a+aXw~Ea5q&xADX-R7D{OPbNiE-=cV_>Rb2=bQrQAnDQlU`i7~X3$$oJ{ zF0>lP>t~35x`h%vjEiD@UeYDU(hw9pc(L@c_aVO#orG;kw>KGUTGCXCQZ!+JJ=W5v z&ccY6oui@1><)?^L)-V{j+2=lv*Du9saffD(=1o7Ew&uwbyLd>z^cbJ3pqtJ=tSxS ztyHh7F~~WQ^0hl5_Y9?Mp@e>Y4{@SUrhZ;6YmweFBwa-I0Kzksq&wlwHk81B-XWCs z7+2rK)Z38PW*o6e8u&6v#hh-xmCibP!5G$LawAuFP_!}Wg77;B7@*Y zF_h{RgqAM7$ZhhPk;8(h*O}6^NSuu5`%f~kuiXdxt1bXTbG{y^hdy0dOw!K|1#Cn0Xn4{fB1H6H3E)8R$Saz zq(c1W=xi*C4U#A-dA&Es;G4He2F`W06S*4)lTN|`YjP1uHgl1A!&>28RlS>)7L;gIx9P^By@))V@`)j)udyVP% zG*m&{#ate=ArymNwR_It=yw~C>j3^6cCXnRyFnkGVdV1llNdUw8`TF{`yTxq==b9@ z#SMvKWr>urT|XBnt#hQA2^1XQuhu>ez6RLh{gtt9h2XG`_Aa9xDX5ZYbGe2-lmxKX zdEXX%;`L=ML=6xdg!*Q84bL5a7XnBrNgO6Bf0{CyQ_5~cz7i_ zWE^zZNOBUHtziw=F`#a;@^3Bu6ubb8Y2+T=e1q^r#CZpT<{< zzx#cI$L?%vt%+3U-N03(Q38)arfR2#yqZ01X^6gfzgRo{gqL*eI9F&nO7o*wN*0`k zRlgBt91(pq$)HLE6(j_@>-@Ru?zJQQl@(+*tU$Zw=Uz{-1DQ@x9^ocBECBjT0$@nfep=^BB6E5UI||)k^eJw*y#5lQZ>Y@Si8>!59((hBevU}+omT??;Ih>T zB`?xptP$GR%G)n|Cyr|W(HR&~jm9F8qywtF zSf%dC^p0(@ItzEm4e8eLbJ44= zz9d{lv6QoC)@1lG9{hYHOeBx>?S5>4*yq7kuSKfaQDTMX18c6As6fC)u0zn!1Ob zQMNN;angG0;;V9MM4?EU=9M!aHn0QxS$4JocJ`e3dFV9Bi2Z8hIhpBEN8(y!lwdbo z*L|Tz|2U2#dk@n>vh6rd>^thReQ9PJjWG^m3gfhd2HC95&^w9k@A(E0@j6ppp0j^1 zL43#HuSVwt^(QYv1u5x!+;*v+45y4duTyiz`!SCQyiJEARqpDGKNM-NnbZ;>4rd>G zD$tWvr5z=Et>d}IW*N#E>Lab)JmpTVVD!-o6isHCs#DbjB!I)CWVbxa8^U!L%u-QgH z57}^=0Qn-c7T1J2DF$tl+l_SFekO*qmLf_%`K~+uCQL<)(hl$6orrWK(EIL<8ouUD z_JtQ!$y@40tVJ1OBC+nIL)yv;J`B)chWCr(QQXUZem0j_*5;)AB7A!0}j?k&j~bAKw0^n_UR<|wxc;e7lQM5(!(hEd?MiLAh4!1 zo;e2p?Lw9?QEjAba;H-F(n_1-y^n;(3vWEd-t60#h^KQBZQDaVbzN)S8(yM153FEz z7x7jh&OK%2Fk}aMcSlCFRp*Yk&KHJ%;Kn>Is6(Aki86!7l#YWde%`o7tOjMw*=Z=^ z&5>fP1mQdea{ucj@rxpWwowWp5+Rz8Y@Z}1Ni%>EaXgH>QV>{b3En^eC6K|wYNXBf zu9mj+wt0BllTEGckEmTGnB^{jTCvXH8BRiX!rzSiFvmiQ4T;8?XYdsVGZh zPio@z^!^7tOSSO-5dqT-P{Pix`i&O_CBj}{Upq4WFM&!kGg~^vqKin?MBF?MuyifWC--9jxF{WI13k zBrZ+=i!cU=QMQ}j14OllL0R1!;-B*s%raEmkP#Wx3zfwpVl#V#8vK6#h~<*yV4@`y z8v&Y)6_9}~YCJ=4#jM<8LI)$w{W=2`A9oFN zYTnWUv7SN}dh$BIs@AZ~As#835od!QlTzIpMl@cJyQMn(DX}xH#2Rm4JN1noUO(li zdfB!HNrE2MAU9c}ldubQ$g(++&J~~OJ%}{ zmjP(lAJ4{#A;qSylK#7SDXd0Wo1}UiOj=V~r?8rfs4%S1{-!K>mgF9MZHnb~m;B;% zypFSe1R8Cc$cq~Q<8mlF6@l=TKVgpLi)tdk?6a4Uf=mb(UIrFnti}Jn-{1&$D?m#v z$FhC`@*Iqw0;*?LxzSz#W^@FNsuHeAsW%`WXaazw6IwIq^I8PaInL*Mv|gYLNdb6L zDgG22fHqeHOap)czkbXSZv#AdPeu+WP)u(T)pMtLuO<_W=J6yuSl8phsPzTOl{PP^a1wprJS;8m2$qs}+FP$OM3jj(0di zAg(Xy{sFLa&J^Pr;El9^Rl#~|hLV}5N^}BK5yYIRkTXZHV)^F$dr5?zxn+bW$=_~o zwilB1F-VK~3nc6^K25}Lq)6R&XSsXgLH`^(s2%|pB`I4ZIQ z97sG4kDY1I$P5E~-Bm$1IB53T2Jjsy7%+dLLxaNlgJ8WvU6BHy9UD;g?4?R%5+1Ni z<^rVHe1Ea+mkfR8$rmjnD*YR9(w?8qtOE`tXC$6`%Rwh9un{x@!~-xnC(u<d*+hqixX;1`9sosG-Rl8V|`G%uDgrN|CXut~P<^cXHFROacP_y@O$>s*KU;>dh z?DYbm>&BStewVAqR+i6R$2n@+c%!?WY%AC)2g)(6I6A3~*sq<}nS9RF)cL?D!d^M~>}-`Db#~Y%gs9a0FIENfJiHilvSLVYgj3BiL5gAPfEiEK4+zD?kan5EAHJ_RzRhO=E007MJq)8aS1 zJL9zfv|VE{LO^lp$|c*E>FMk6x35i2DoXYlN z_G6?(z$d#RX_>K1H_@+@#z_t2f3^o6U1AZuK_FNSK=W;&xeLn#$ad|c9(Jg7Rp;aWNu!- zvg3e+r%0t!fkuT`iSHbS9rn~zyh*>3KMW6cuD!z8h{ZajtgyES{ zPlQ9K(>_`uL4-Y$)vQ?)bQU7`BCk#L(KyqYLY*J2qbdmo2I7ScudtfERGe4lmf}z& zMI!_xqEuads#drV3Mol;3HI;O%Nuf}n5WM)$v}-v(Nd+v4o6MwV`d>2n$7fdKPej{ zVe#SrqUtT6s@lHyZ{pA(4Tq9OknR-e?r!N2Ns%<@?rsDGRJubznnNR?f+Et0f&vyO z>VF>Z{oeQYj^S{4x#w*5UVHDg)|~TsK64GL-mBSYL06$*jE!?fp@Q5)Bnf%@to~)c z{43V3(|O};8n3a7FCJ)8np-XLC8F02C>F{=vCy)wn)&t-rv6*l>Wn_lF}&2sgTKeW zMBQ%yRTH;&{AL1e+S@$S{LaqXQ(=T#Sxi4Jhr;DHJqb46l;4q6>2t7kTAvO|?8wM` zrn#CgWSkMJ{D74`A^I6xgSzbRpNfg)=|Z%uIrAkPfmVq^RcF)YDkpK=bUWjuXbpvXs{#k^% zasQtBWHR#AJBvE2K0@r27;G}9_%7k{X);2n8FchGlx9up!K~zYP5LMZibSa#H@>X*LPvIl=w2N!Tb>UEVGCbwoGiL!+vyDJF3bM&?at6(H9br65tvW=GD26t8~)yI-(_x-b}aCYsY zg=BN^NJGI@q^!nbIM9&cQ{oA3MfC&CY32u}!2BrY9OEr}>=7mDm^& zVA_euqZT-Ocj?80fhSXUj(!cT4YNjtn|Mg;#4Xe@=g()A?X!`hGOhnCxgVZ|sD8YJ&9|-W^Bsrco__e_NAB_*4?0 zY-kC%LnqG~Gtr{)8odQ2fcvT)8*RJaVFf42d=w zDvcg!aRl4=ki8qvmrYpplkYkz61LozHn>=ar&>CxruQ*9zKvJ~+L#;G+yl z;rcq(s)HT2Zy{|VYvDI(CLI6NS%3tWVkAzwyYl&|%B^5hZptuHCp){RY(9=J<~NU> zJk^4?_-~Af>KJE0Ny;H5X;F2e1sgLVb}1>_1^HZv)^9Q-P4xv*Ja+77OB3rB!KLEx zO#=5`YHEEgLPgrKw=<}3DqevJ8B)(VzmROYDpOS)8Vol#+)Bu~toV$giTQBdO;OGx zk>9DvPJ^9nkQ+NiU-tBH@NUyj6-nJch8hUA5pMAbwPPISVM3u@2093Vynzln zZ*bf1x%ht(f(-?1tMk5?265r9>(lIYT|eg$eWsVggZ?<1!4bp{B4G)4rp#PAL~{NK zCgJB~pkzw7JU~qQZ>)*uDq%%;|B`xvIH1mQm!PDlyv3mfIIQd;@~oI{*igNs0s`A=&4H=Ylr>$Jz&f%-} zPu+VLgYp|NlDt}$jVPu#AJk%AR{vl412hlk*M(nUyVeFcN3hiMWu<(e__;`a3Q1NA z+>-j3712{bV2NZ=c;5RCvP_cGW!HR?Ppb=i99%?C=6yLWD1J3O`RMpl?wh&m-WC*L z!qizUZA8{13me#tX_Nt1G3AO*x+VAJEUkmQq0&jdSUq~V zwq+(CAYuXn!&TKs?HAw)y{)e|>H4Q#ymA5der@zb9(DoGR6E4&oK>@j@7@!?zBK=l zTe=&^^wGa3L3KHz?gG8zlgA(gsk}SyoF?Z?Swu)F)P>LDOv%MfL#VFm5a<|ZZ%tMX z(1UlP=Znv*`+0>ywv?T#OX^%p6L{>@=n}xNnM{IM5ls66fwfYY@X~UZpVNgtYD0`8%1$ND(?w;VjdShArn}k_@)n->Xbo3PoEm7{2BPN&V=14d#nYjV|s*^|9!XTmftmr9L#+H~B`$dWjNW zSJU1!0YXgEjpj}4P$G5SG0YWpX}#4Bg5!|{%C~=>G9Or*Mb^eEry$2PO(#xLG2XH# zcbzhcgqH@T=sz4p^F{qQzsAz07N7KBrpyHJHQT$c_|U3^>?qNk*xswpf0s$8=9x5ejTKFL^f~0 zQ6+Qt05$YP>f@o`F6RoUDh5M8Lg8NY_t#vayhiXHsY|Q_XTY}EQ59%Xn}Tw`v-v9N z?>SacVKPTN2qL#{Ai(~jGrL0_lSkPvzed;9Z$-F%1Ob}cN20q0(@_p<(-5~sKivfm zN<{AFb8oap28={LW~ZJae_~M!RobFIkx~#v$sR*t)=^930xoPpWv|jd6u3(Uc(O@X zYCxBa+d3Sr58^0j+QQs{B8cFdsDoHIm&bVbe$<7Nz|8O3c8qBH3TT^Vz=Nh4e|`sM zj{+qrV2a6p?81KD4!BTI9R4s;yRr&_hy|iH=+hcx$zDUrzgoZSE?~Ac(x^R1hqFhq zDBJk2OFvRS!O-X<%Q1*XWWFZJN?-?q37d9tHorAZ^G;W{Z1Du=Ljk)mp#`C2_l9&1 z>u!I}(j=O>&bj{Z5q6PzPL5qFHOH_+*3#qe@N!t5fBSQ{PN2VjT8p(*A3`S2=Z{^E zQMCOBG-)6Gu7y0{=tr0d`l#RLyHXK%4L9j6#5HKlRZ`mdlqW@N=5hT2;4$3_ih8}@ z$n4Uxx8BD@m}E;6*gt3Y2QpXhpI#7NNl_Pr@zi-kJyzA(9arQ75IY~pAgxqS16nMI zI|d-MQ8&{i_DQo?-KW)6t<6}@bg&Kk@=U${DX?NgVKd8j7oetX<3C0bOlHl^rvSRx zM$1%!>B0QHXDN4;1?9q9w!d(juaw}5PJhR#wqRd20VYja_O{Ey`#vzB=6iV^zf*0i z&Y4TyCuS5S|?=pj$eNPY2E2Us}0Q}wxx0sso5P@_LeWhpzpEx5W(;)PwGAEQzo>u zX6|w+??ZuD3JcSzyoG*Z%{rA&yz20NQeeSj`8@L0`e|@c^sliC!qZ1xqc5iNGREtF zDvpy2I(zJX*VdYk{3)jpGd)oPO*t#yo|+S6dP?7x&J%GyZy_~{``K>FQ&RA?;i9or zQi+Hf+1DHIGj2OL&uB3@`Q;8UM1E&5VMBOJ*dg<8CH*81dIL!}1Ed8HAA6&i&R6c{ zbUCecngIJK6H%=*`Z+0{3KFV^wV_usOoTg5SFWsI>^DY7O!VpjrKEel;~DF;w?rxUCXPx@?l$?9 zH#ZGzyq`7TYP7ba<-KdImW&(>N`IzIOq$&8fHCYli${y&BOTq?wV6m3XroJb2@^L& z*mprfr`MtepXD80o9c(p9ZrF7Eks506VjgL@#UKfclp1~j3HoLo%Uv?nsuzL*5P}! z%o40@Urm&8n~k?6_}2_Sdd^!76p3yZT_1W(N$l>pH~cWyod{69n!PjV>|khrgM?Bv zZ*x(Wsbj-Kf+pc*WJ<|s-HKcf zG-Ugkp1w@3eskyh8{!lupg>CB&{3;(mCNYlaW-rJBM%fRnmGox0)JFILl0#bwb#0Q zbFTg5=GnM8DXE0oad7yC$-o2Wwaw+$q}sDaF5UJ$9l+5o8-{xIRa%^rl0yYA&p7*N?*oHe+7*C5^nLDo*OV zzrv6BD&~0Y6GIRVyH!zco1EIPX{`aAk8pW|86~=?y#baA=>f zO!QUko_sZ5w7fn~tcx)55k3b=Cz;!E;5~3F`XP*6K^QNu?S_?v>=x+6?g%A= z3w#vG?_GEC*Pjd(vYYkKOs}tP#bo7)eVQh|hOOZtEQKxP?T#B}heRc5CU;uO^T`f< zsQujg#cZ2bH?A8jo&~Yp@9pqE?9|4HBYj9?C4$MzV{~>M4Wh;|-Xrws znI$zBEZKP3GHFArS&m(}rsEvs?aG@pvhp-`J$eXqZ|+WS@WeNfYFS%pJ&xyqWw2$ zl-C`PW25*B6qRqcb|nRigfQ6)D;*7vCF|?F&$WGt#7Qi`cuKtX7V&mE*}}P^0dFvk z!YN6<(eb<0hgxp4iNrZBbl9&M=42ax@os-X+|Y*>9@4Ql{Naar>R64t#9S@wWcTQ7 zhN(s#c&`ReOx1-I$;SFR91ItxJ{Vw|F^=dA8Pl5gC7xU1p?SGs8EG__mYEqMvZ7Gh z$fi9V&$_}teeK2kUcpj@Dp9Ybyo+qZB@#Z|-DiNX&F%dozvq1;I@RilfkBV%^3ue-ir)eh)r% z<8J`u=|+pUhy|}Rs?Gq7lJ22oR=v5k3RK7I);2@3;Qhx z0<)16YhIiu7kC*9bp|gEYi0IYu%o!Kj7bF8@u@zYNSldyYkDiP|II32NYnU|a>X1; z<6tDimaO_2wNoFj%)yk@6`Du0RUR`jQ!2KtDs!Z(=3lxnw#4 zm7`}Axf{)~XTMu+aIHKd3M;BFR>2bva^h|I`a{^Jd>OF3VE^s1zddwegFpHjjmih_+DrUAa@H4Km9&7In zZ*n)wc>E+<+x4ibLtx~J8e|-r0IgJkoWfUBwC+=eo#mholFeDOJLRS$uSrw(`SuCJ zPHr2?Y(wkHC9+?B`M(0GG*gld?CBjow_E&D9qb~BJeqL9Q5B=v@=Q+J_@*K7$)boC zLAO2B`=ruIYGhW$u9d)HRFTe?@X&>)kD5H{+%bBz*2+NcqgQEg@&c9nsQ;uU-s3S1 z*CJIB8;zF_M<2(ZCj~1_l&2$aj#R5+>1ef)b!!p` z#-NG2R6>}P3d{SIvx;NuRJUB!iWQEiH&QZ~6$l4E&WdoJ3yB<%xdn1g$)jJe$@3^u z`E@$RTj@iu1_!cUQjO43--;PivKIP6Q(NC5k@<b?9k-z^VXkOQ zbN(Z@VT&+cA%k*#G+B|LgLQBwirImg#I6syDCBc_O!(-Y)Ym0rzr%r?7wDAV({IFl zO;53|3UaL59s`SJJxKhzqiQVLrDfKL^;Q5Eh4uK$bs{kud4=1yeoji(_x(@TaJRpf z4BZ$WDE9G`s|oI{&K= zEejU0qR0Jo>dHocYC#+o+WTc)exDa{yvKys*1A1PIW@FgWkB33SH6IZcRrm?xgmm^ zIF96{Xy$1QI-ro`GoiZ4@QA!oa60(LrgYe>&Nis@_FevW$Py=tT1R)Ei45mg`!tqArPhRTFAYTXVf}8(A!pl+u|9 zE-x2sCT|f4f62-xRXGcVaVj~?yUx9{)V)_uG}~UD-#yV3%Uf_O3%}jkGG3g)UfN|5 zWs}1djx3xMZxm|hxlUhlh>DVkAo7mqf!tq_G1oh z^kpg}qvzow2Rpa;Pb9*{+nNTK3(41liHeSi7frT#h;1#lHoq9P94IJMlvnj~U6EB~ zXhN2ov+)wfv(nB<_+SNg+OrE$=_^d}-;v|tK^P+MF|)tk^0HQPD1J}z1J|#$oG2sc z;fBRanR49iFh`SF@s~EkJ6qb$%nh?ZVixX4^edm7I!2Oy=Dl)fqccVV&nD=n1F0Tm zx^0aLiwWX10X5{q)edU4st>dOrNJk$`$jK4qj7q%Ny$rFU1(7gpLV&4udmYqlQUS~ zD?lNBBQo4dt3sh{?BoUYyuXjg5GD_{-Wg%@aQHVB-A2?j`Fg2apj+Kf{+nNSJtnxy zPuh;5>p*nDz@KJBba(mnB6K=*L?1&omu54KInM~*F6Vf7w*HDzlrh{|r0`6)^b3kg zAHG<^7ZPH+#s79m%hVg4O4mq+DblewTV@*(Ij+!`l6JL9GH>Wi3A8rX8nR#UV8!Ud zKU-hE?38hLCRbHhp2BRo^Zl#w2N3mEpsNO#Y0$8A=K{F&+=yBk(D2+q{~lbI z=V0J8puptUx1wMzfl2^M859jVn-yL?`_-D;Ml|qA4EPn8zG&Y&%Qdtt@m;d;`#@CU_ZZpprva>>n8typ9hz4zz!6FO*?L^oa{`Q17_H43`+dtGF4E% z1PYP}cPlt9DQnF}(ege8HQ-eXvGzVR5;KEL;6yu!N7UP0(ekfyksJ`zVqno_TN*xn z7zqUq;NP4)J~qq|pTgrfB{S{W$+ZRn*iA?qf`ZO#X#_-m$@GnYDu7dhZ(+8-{|91% zrb#OW&lHY!Z9w-Y>JKC#0BR!0c{Bp!38S*$g)XOrBgEYE=T%4P35Deec8nb1C=>+l zZ|W$>c_QFEf1@Q%e;Nv`_^@Jtz+Fq$`1l70mMYktYv1``VgenNEImPrDo-&5{m2QT zT}%>vIA{w6{3jausHtEKgZnoN41aT@@61GU_WuFVXQJJ+2(kY)uYgZRWD=kxYn|1! z{xRL!BihIsz?h((OPs*Z z4`tyR+0x0;HusW@L{s+$f+k6YhNAS}KUO$P?#Qj2g)_+hstVJ}{@_zVB6^=24o}g@QFFEs@$I?~7=ImOVRKnd2~)Ob7Rua10%nqYFDAq$FlT)bO1E zFp+T0t_MXMpT;XWW(7#^y^0m(Qd2kb436hzw_TT&4m}2gRxTw%@_xnn*=5( zRCa>`3p86>NB^2ieO=zT7I{HijwkNV*x1nRy<|W2-URjFHob^*{0_aaqsrD;3NQ4) zg7QKHHlFu#)8OWr?42K9fQPnO+YF*pbgg!j7X}DT5d`&LOevZ@=$wc#Ns#gt(B76f z&@DoQ-V+R$fl>}b(I9LQont(^1&uh^O3_U~K*5@R9|Q#tq)$-npv;;BF;$805CI~- z8_g1d69O;uz0;!&NM(OmX>|i+biLT`z<2W-04Tl-ZoI%Zw_bu>=M-uOxBihlL_8os z;mCUgOhR|8bVV#=ZYt2bAwT>Xl$te#kiI6}`2w{ys~n5gdHrZK85(jZ_SG%Hx7YY* zIP+{y(84^>aDi2jvgXKF-2|;I19SD}5fte-aQkjS!ryxA{L&# za`*gr9F0+58pV}v|EIai6Ph%7@%QjU|F}yf0huw!-&|G!0Uz@*`)_G9T?}SMt`t2w zm<>g=!`qQ?n+8wcEIl$tvkmIdU2R4a-b3kc4XO|qq}DMWk5hgCe#jVcub2BQ}g zg(^~o)c6(umXgyEuv&23=ShMi<>Ir{_4*^-eh6RJmv{%j;aBZNBQr+>hh1CkB3 z0(?SrgB*O3B3r4@FYiEmQUUrl9S&y~JTqT=%=mNG@?3aXwmBnoJ=%j(`r87qYVbOo zbi$fFJcFAZcs9w(^iJ73~P7om2oBW2se~zusM{H{-C{`e{#!u}y-Fi$3IA)S0K{CrH~(LwGW67&X-PNu zSP7oR|0#q;w7GAxZmum&%KEhxB{F&`<*U04#E9aZYyu)8*YRT5N zs*&A?3VoY_)z@Q_n*c85meDK;sGn6WFUHzW0694F7Y5 z+yeZ=06568wcGj2h-rdLjZrm2D}GhbJt&k;U$w(kK|iDm)gFZS3USH?cx%HKj1O#dI(#OyOKK@KGK}g1|%Q z%E4j-BFC61^LE(MIIF|h$m&gk`1}71s6h*it=6~s$4+bpvxsdmzP-xE{v?IlFcPDC z77saLv?9bxXC&MgR6ymPDfxivXACH&LH|Ne7b z@_1=D<9ucoaeV|9%Moar5eC|Zrsd0Z6*4y0>+rMrBHW$PhzRu5DW&`i@B~j!Hm##*bqX|JPTaQR;`2Z$Fgh&4-iVlrD9AOATxddr6P30jl98o zGHm;x+4Tf}>zXOLvB#fZpY8>{@~AoRd}!7FUKf@ISi!hxLx2lVB<|S(^YV=3Te*;0 zBNY}4ELda1fQ|rbOh2eL8{1!4p;ZF04Bp5CfuV;f1Sx-h>~)^}`IE7$i|W|XO8{Q^cD5Wu1Vh4(=CzIV$6*Gi|z%VKa%PXlA(1ZLiCtN9asfSrpf6iIlU@HQB+ zkx@rLkI+=+tG_|A40`ZepwuvbNj&lbG~zCfoZ1DO*}8)#?5*w+SI_UPT~pH9H6`pJ zMqe%aeH`?96(6lo?n3kZBg}sC-poI4KWp81z z-1nj|+K*>X6*}BB(U>%C)X^+(s7#S)3HUB~GlsxgbSaJIONZqY+-o)iHbig&@i#X= zbDj?)vu&XGeO-+WBu#+S0y0&*I^G!iQGk|&Y+^d<+ch$PRV8a zD5w&B2bXfk7nsPmKsUb8e+?AszzI3&*9!$d01&Na-V_%vc}V-QL%XNz(0}DMgv}s_ z>48Z)>o+_I0474_X?1$Pfi0f;)WmgwfI)%uiuE19ltS%0=$rN?&<*}T&cI6M-Y3vF z!)BmqAFRJDuj*inkBv**RIkDqw7(GJnvR~0ufjg1m{*D<y`%o|)el;? zz(d*c6Mtk+5)8j!k;B;xT070Vbr9SNUUrDvcv8L#PkRwJ`X(=-uV{s05jyfWfXODKpk zZ@|VpclNC@h+F2BNpIDfptw0I8hx*^y)pSHgO9JKqE{q=3E%u*V}Ou%+_ief$CRA8a_Dx!wNhvB}uJF6}H>zX^OJ3&fLZ z=d2l8E-DKSEA}Q>p(XSYd<4Ee6CyP&oRr~qiuIPKv+1&33(L*Fkn?Utr>Dr6y%d@8 zTkaLnafsLoK;Pr=NI-L!-e4++5D&esd%i&P`^Z1Q(CN`x{#no#MDt#da|fO9 zgdh2*{mcvv_lHI}Qr{t;usT8@fop>9iw-;TLA zr|=c)+z*}^r3d7XteVmIvKJQ26g_ky$B)9uBltue*b!%VZ}9y{@C}Lm$U{R}^kRtl z2SIqb)whqldD}}&W}@EFV(9*k?X4+nybCJr_6ls-gq+U%%h7gWhvFl2Y`8mZfH6k1 zNgCpGq)1V{L1|VC!YZ1l z42FjdxNWr6>?TXFkv6ogDfTDq;O=}*L*{nw`<}dh^A|aj3tjw$KvksEJL*Uf1YH`r z4g#O$25{i$V&v|KG3}OE7TxDP&*`APR~1B=Kv27OrN`D&5!T!AiEp5s#ed|FUUOkS zOOH_4aHb*L1x{8VZaJ}+*78WOg5z{Tl8E0Hm)kw?^61O#bBI)0;U~IL&#{vm5{wFG z9)hnWn8EPsH}>28P+N@eh%ScCJY2Jib(XoaSqTy8v^KtjhU!c%Gl5UIl!!Xn!ve&G zdWnb%>RV)cvu3T-*sNxDU@WMTFqJac4yywqeM!6^ElV#`v7ZcCEhIc@k?5 z%FhJUtpcQf zXwN~*6h4+R1DHcR^7|Wl-d|R#!~e!6v#d?VsmGOEP!Q{ng?SF^itxi3)89LXOt1dk&iZh8xmf3u@ynAMuqY6yf{h zB@CzJ6wzkM+W@box6znYuri(@R-k3tnx~UBmdqb}M_`&CW1QJBNz&fVjVBD)UL0)> zkf-Qf@~`yUM{zZ{)mirbrWn%;mY4Kfi^^~up)%e;w_q&FWZ=H_?bF+`Vzb=WR}`I` zTOAVG3Q$5;o7Z1N;zz64Lr?7Kt$~7Rx(~g}2Y|#JF1bXucO6n+UY~R_hbZ4%)Gpo@ z;2?i1a~zYK+Ko!FbVwHZJZC8r6)D|ZfAM@D*e@AXq6)!qN^JoYbbOBcy??r<%0?ii z@n%8TMD%CEjvKw)u1<46gNwG#@1Jm2vM3!KSkvU*9Ci$RMPZkoYB%Wejs`pXwB+exh5B@xe=1af_C(HO;S%E=J$liU7F&1Snan6nX zHDUdcXV4sTc;U_K7Tzix^7zDox46iUM1vnof6AJ-cfJCckMEyNt4SYqlhHfO4Akbb z;zCDcS(*G!gPWXb#5QHqk|o`Di=lL0h$qB)X?VSx_Q^`?s+sqdCH)<u1Ig0q0KE!mN@C;2+ z3v%Sn*6Kk*rT(JYLC^TrG>b5CN6J@89+rGh7un3BpNf&rMB~t}MBvS;|LdigjPom{onQ<63`v2ZCK8Sz&xg-t7+;`()`h@aTs#k4nm z56``t@WO1$owbc;-d~HfN=?VOOmlpBV&^lNug}KFH%rYGnmD@%XR{SHvvJk?LI@pK z)(Zwg#Q8g4!_CQqZaTBQbxf1-7fE`jc^*n&(j4d{$4-$nkusjbp#F}!B9T^<>XJy( zbQk_JeSGn?@2KSD;lLPDB2p(M(>fzWtBrq;QDPFtWd4y#OXs^H@hxEMJ#Lozs}Q z=!lkL=p$!(pl)zT8=->LdzE}Cl06OIvmzwk!T(r4 z!7|CWB$%a67g6x*Eoy_mgN2ar69;Z@fO3@IwQ9y3oi7}UH&A@$)cz?)JQPV}$%Vz} zSU5w&?!zp1sksOBO^A0PuvNL>u;>_U<^Rx_wTS$nKR#SdUE~KMWCU6)B&!wE zl&Zj)qAoAc1r6m>J3jU;6|qQjSwh;Wnk8g+8cq-^bD@&$1(iM}JlGa&(3B_C{59BN z*{-UDF?uj|OKkxI0#6ne&s;rh*i|{Q4XlgY(8K2$s=;@4E5zUn#OG=w$Nqv$9e*fh zMpv5L<`_-rboXqN3>JZF(_M6lqD0{gInyRsBkPoSWl`KJc5wHNHJlh3eVx8B z#K?CDP}2K4_`Yre^SzcXSSD}Kgsnm$}>C#)R*l{|F!&+U~(yGOLmAY;X3$kKB>6%;l&-;YG9wgB3(=s8vXdF zrgSon9#Vd#LA21Kxgfjjq^@ZQ+Q7^|nqJ9oPo3C1)llBO;o$SvZA^NR@ta*s-BqlD z(Hm2+^9#&9G?w_c(!kAzb-SE#aS7(#(;n^@dH1cn%Ou@byM)cEH2U#IQ8SQ87PuJ{ zxZ*-vjq^?KL8kOQ=^H5uu8=skz0F)1yy4oUc0wTvB-wuEar5nbE13bJx?sP5=)DGJ z2neiZU(PUyUk{j9#A`k{41*l>nUHkr69?BVN(-K+`|`js#ML;tg6%U1u+3u~)*T3} z_9}5YJiyNJuC%`6RpP7DSD24y7tSWdry&WCrp$*ap%Ne;( zbeGVYYhl*Up&#n;Fy#*ixbh92gA;W+V1{r0BE{lYlqvOlCCCscMK3mhNum7P<)ukc z3-&NTCqnk)bB7<4rcA-KDthbu^tfC?E$5#{`vJ#5RBoL2r0xdii3`@Z*Zp_G0tfM? zV3HiCUG)kIrU+3xlAeWNw++C?BhsAKjNtW}*ehKQ1)+Et1Dd5-G_BOP!qva;v9Jva z^X*n8e0urIG2G=#01BSBfIK?8#a&RZe|>TD-(Z(C9NqIdW_$Yf_betu(&rcGM4}bA z-w$5XoJAAf(`=d*(x7yqMsoWoQP4)1Rfrk)4j43-M>^>sV0{C2ge(`MQw{D!yla;T zp*N6Wb@Xxo#MIy4J&XDj=a|p3M7*KFBfl}g^@%i(*OlmEW&9L`y|^Y=95 z@Xx)e#VMI1NgyVkm*TZ4j`L^@ZvA$#7J0+~f$yl_%`)kmT;|u-b83~))Y^C0j21ZF z(#ICTU%=&x_bsW>eAsVE ztGqO1%t8X4IGiHV8nZO6?vP&G)Y>*;Tr#wKTV2{}#LAekA*GUFae7k}P?c7~Xs z4yRl?aH4tk;Rd&xW@FQc7%4<#R%RA?_e@Zi8v{o@sy>u?kuG$5GPYB}lgMjkh%xc; z4j-DrYOu-u5&YOReG{F_4c)u+Zi+hw4gs9WslhS=)Oswh3eVJg8Jy2fafL(Cj*cZh zm>D)M#(Qt`3u1Pi^D(tvBC&whp|u^*7U#B2?(+beXU?7+VdWHJ;#JU9DhZgIzLQol z#XC^y-JbOO3h^E~D1vk-TfsbL&F zU(Ad$TI3xVdK1f@+e*2}W@nn!Lno7*bwT+9WRuYCrEex)gDDXyH<3m>^jmZN;$6>g zLlJ|$&Q6xXVG9_oF;+9n5_E%u{xV7KE#jHeCMQr?X=ws11vFBLx%uY$mLMC2cBA$4 zf5LQyL42yap+?i@l$Z24>Jr^jex*|(orEqt^Bh6=oOCS|N_ZvuVdmvtktN3et-^;R z$JXi>aDu_9m5_f)P{+(Rm@+r!V0fESgp^T=%q0ro&10nrYkgfi;B4>O2_RaN%fC2% zj>&lvw)cvqm(PXd^WL03k~fieerLolAsO1d(vx0W!YUl}(y~Ccs1sC0T9NqCoNjZW zbDFS$rxjW88e-S}V>y}`t)O^k%ZV%Wf-?8#)O=&QDkUi@o{6%A_R#s!_{)(MO%9nu zk8}t_iL&@=4$!#NTI~0e-y-pyXh&8pR|4({>lu;W%EZK`Zsh8c?IsrL2IuolFkkZ~ zrH;+bh~O~@me;1^3PquyBzb7m28jdsV<3bT9TrwLiL)%2Vd^e>!$gDdHIvVY+{<{z zPRV;%nWyBkqjiMAEAzr~ihP&)m4-SD^%H}qLkuIrcxx|gUnySWcRD$h1oc8C5bS%mckF$E0P3;B8IVN zC-_>9^M_Yeg`RZ37dc3BvxJ&!Z)-9|?a1fZd-oF9#_bf&n|UN69Yn;~pXypz2fqj# zk8>c?w4{DBcF$?c*~(d><6_M==aiw+cjqesSff>vfnMO}$nqg6{Y%Eu=S zq4gzeU#6!J#}3}A297;XzP<7kb$emA3HOzBjT@&_#g!{%B+?P$pt-M3Fr#O|4|%j#q>(EG zJ|`UmvUs@r$f_fY)^MW@4vZB@ppokoSL5IIQT)`xb9t5^4<&Mm@P$>IRf}cL4Ia*H zNyatPoJFjjq7{|=v8YNr2RTbUab74wp^JpY1Qx*t{wjo%1ngMW&n%+ZvoOn*4MaY( z-=}CA;Rr|+a#WfvAY^5|;*b-XKZ{YNlur2qG4$Nf|CEukg|m@@PA&ezMMo&Y{Q1D&lrOAcvL2( za>IO07bS8~i@4k(D|v&uebx^DHCs)JSru&q6^;07mBgS252psO+*n^|t7SA@^XbV| zy^n#ADiAMM#+34;RaL=YAi%uj=PA@65V;z%iFhvbc*vgI#A|7ootT;2^+GKrcC_^c zXTf)vH*U$rJR%_&z4p--alkIH_&klcNm_(Lgls*AbMDGbf`Xu_wPJ_ws3(olE+3~8 zWC|Y$-F2r|AvQE4$k{R*?sjGW=}RsWusC*6WQBmTP?0Q&xdIO-idm+-&RTW#{)grk zwl`7?=@S;S_*lbANkNPbsqV@P{%j|?0}quEl&^@M>t`X2rXmKv#GKyR2yWQTCbIM7 zQ1Hl4DH*{`c2~ZJE^FntAGl%b7`>=e`R83Y!nfy|SjQ#sd0ODPOQ1C3+tYljV5)jm z%XC*!yIVX>E2t;=c0V z{LS_y9{2U-7mS?;^^Y;GoO~1*^^ZI~d0Kd5W@zkl*`M=q>sHHFq_v_Pjn|NG=m@#` z{a`-Q{f}Kkc~0y6MZL)^!!NZpXiQlX6Y3MtlJ66QQ4rnSMNA3IfN)gkzc8l8^(I8jJ_*}>E<0D-Kq+i0zP zCu2%aGUhg##bi3dPJxYgPgpBQ?IlI%V9A*eiN^I7`J*f)cPar>pLQLu@UN)at^#Jv zw?eF8D8AE*@n@%NC60rRax6LPND1NXH--KMtanGgWW05?INBXNNO1n9ciqLBM~Oj0 zKQKKk$)RLBo05hJ?&#^G14)2>hs*v4zqwkz1v zo!mEJ9$YP0eD$s+E=O0dh~=t_-uj8^gKb@#iv-F-6#X;Xi9-FuxqdiY$-Ds}^HUS_ zN%d66seu7!|C_g$5W!ib624C!dW_T`6B7-@P$Lj&9#d7R8U6Y`hUPN5I?B1 zs1&G1^%@?Xwd+^G7OaK!erMqgZXSJ0a+cX>#dp(AmS%lsZd+NB@?F+`nhrJvnTLMWcFkE6HceBx--|C4Hw8kE!l1IV$C^Ec(;+V( zMf$M}_})qp`*M#ZKQVKH>(@);4si|FoWpYSadgCeKN}>5IQ{{gt+ySCB_()mxANkhP?+20ezfum3U6INJOUP1kT(_tH**lNjl5hm zD4r(x4Z;c={|Qde5uF`v44-NJf=XEk#gmS~$FS=LjU4?on>Qdfu?aoFl0LhB`QD>k zLCqsC+bQh#0P2rXxqFcP>6Mkt^)&ss3=MLu#50!6s(TQof?29L^c1NEetG^o;5h~& z$!SgD^V`lraLkqf$DT9!gy)M74J)CRJdfrHe+59x=zFw)>`bn<>hs|?kc+HkUS%|X zwvs9H+F8^wn*%Jy0G{Qe0f5zA6Y@YQZFD&onqj!@!``W7Wc%suveHk|My|{s0qF)|)1hqX4(Sf*P)gm1BHbuRmxL11A*HBDN{EPvGzbzF zsepod$Aa(koZtQX{&An@-t%}o&%w>wbImo^oMXJ>9q;@2`R{ZQ8xIz}YC3spwIJ;k zlLUFD)0oWNAHG2g$0#e#HittNQQMx_>9_E#^VN%t(jUZBYvi+%!g)ap?9t2}yTq-M z551?-&6G!`8a;dQL#D`NAH4BfQ2TIq;G}@G&|2&CdClJZ5zv!h6^CUB?RByiOOmP0 zYNn@K=J95ug!7)YecIHRKMPjI__i5UR8v2JW4Yz0Bfdu^RTDyblY1t?K_OJ=RT8`M z-U(TKN~^bal!Qq*X3RT8zj7FrE+}j=b z>mHcGnp<1Tq2mXrm0tj5rR(k25iwD622q?JF|kA3kB+e(jA2DEDz~*=agE4;Cg3Hy zO0`~M_vc5llJ@Y;J!0HQrtRfTk)WEZ5X3Za;miv86Qq%87w&qImQVUs91jv8Z}Izl z+2GgG!$14<#+R$_lA=k-0uXwn-mD)GpgV6Q3BI5E^IQ5A&c&GP+IOa;rR|tpnvG}6 z4Q-|;)*r8*_4CVEX(T$<1O(LC?Ush^0Crxx5Ep96ZDUNzrrf{)_37vL*#h`F@I#_; zgGy<3ua4AJYzD}Fu592NNNKkODG|QeDX$5^hBa`Y$?iJI%ZM21B;0$MPF91uyfy!x zy-Qw1`o`Mi!(qV&#AWubKZraubNm#jm!NyC ze4?f|Ps5j{McSJZ>ri{=){8%8%ywFd|Ky-EG~ES`H@6&16DUr-aoUMv#ew~5?^A<~ zHVt}gX>qgsAhEfeR*hZcN6OM)bI;nZrt7(|Cy1)Q52!uE(Qr$4nZatAvE53tS@s=+ z?U_Ja=QMgpE#P1dNLkDauo8P&O0cen7wczB5b>*dfp!EKaLzgRT>YU-q@ckPqElDzy`3tu$DqmJL@hsUz%fm62myyw56oe8FTY0 znndwLJ!#fz9H1Ow-0l3)gB|y}-bNd=yY-Wh(x1SK*KNCudi>b@tqYpOllm88wDZJ! z(I)1q@A(!DU)^icltRwC93uX<&9&DzjHA-tU0ZosN%gF-GpsT>8!Ta$V6c1ZFWbq+)a$ zVjW7c!EDLSx6b^gR0vRMSh&x*kuq-(RMw*975bQ0?D!t;*IuV66xqh?D^ms=|Cf?2 zEW#^5dI+Dwvq(3<=bc#Sjz=_VN0O}{W<@jdgP@(vQmRo)_mrues*7|}JOhaU7ai)| zeZlHul!Ui&N~KSegN(z#d$-P}?5#(HHMq&6gP!CyGqdbH)0h@1#<%)GpWfBHTKWT9 zoYhoIsE4cbNJ{hsp4L4`J0mn zGyoyg1DB$@dsc>O?GD4kX&%!V^y_=m)PbD6cnMVX0S6(dB#xmWnMKl6>h(&<;K~FS z=m^`}v<%BI!O0^Ll^wM0W`XM9Y2U}EIu#f?r|{)QcVV#FL(12xEh`|Dkf{-g<1Mlc zYT9q4Z2hpSO5viq)m=cHW(aCJsky?WFTqPryH1ieb^lmCY8nSG|GSf2?v&pa`_R)C zKPFt;DN)*ZwOJIT(hNQ-F>)mjtf%674lQXcvE5GA61(Tj@Fz)5aR_+8*~)iPM{B%j z+SCe@y9sKRQ<-zm_ZQE4GRFM5tjZ9W%7*f$e(oSd!M*G^Dey7jm6GB6%5N7tN@LFr zoZq0O;)n{k5Roi>8!{(*RPK`|`=A6TbrlaPxxUYI7gdz@#Db|U?K!tvaO%8X<}2nG z1nVq*cwcqoPPezm}B0NR}=+utLe7JE-&=@wOUj zNWbxzF;&G{U_Had$#MClVEonglAv?Eloqc9E>$E<=#?>6FxcW>Y*F!?Am8W3UGEFV zReBKfydyi!0KXzZU*&bsFUnQf`?Qk&#OUjJ@-2ehJm(3g>S=_0TD%DN*k%;&ij*Qm ziv#L;OPfGfCEDJ|^ zh*g~S>oCGpaUe+Mt3Sh_NqAZA9h_&16ZnE75I4oWjl=F4Q(w=x(UM5j_8p=F@q-*3 z8pm;>&<-+v9|GE~wOjTb!_KT~E{*I$q|eyLgw`5#RyzXDwY}!~!o>a}kG?8BX-HsO zcRqwIE*?wo(PN=$H*DT^rXh_->^R%0n(peJDe=em{0LaDeP*|>fYOZ&1qj^PAgsPn zYWsk~cUoVo^6P|jc9#-0H!i;Q!=e~|ve+(_%OWy~xV2TH#%JG=_HmNcwTmy>DV>Rs zc9`*v)foxDs5dh)!P*MCW85+vW@j6$st1FA|l^JNO1Q$@I_>X8fF+bLr zy-!0)n1-{==tQ^pF-F~Cq|#uw7-B_o7|L2~lKQ4X+{V*H6~2_sQ-K?O^@RRI4z=@_ z(to7A)DC%bHZpuLhqKIC$RxNN$S@78qspVQiWL;6PO&}_5>#g`9*7mGG`L_$M`F=Y zpmZs6wpF0*;~^If=}5)Bdm?-IxuU-;*$uRifmiw~)AE)h&<4zE585pDq+^8nJ=}cV z+90&OM^PHGYQ7M5;n) zhdkN2az?FR`DS-4&N8a8HNYEL4WafM*T>@Y>Re|a_sQCJn(e`5bf&O|HyJ7#&l_^5QC|f|z z+$i{FlRX(BZsmzrdC(%wX z5Y?`gA|O8-4QeR<`j+B-4T@vn88Vy*bX4Hp0*7@54l3QBz^J`#w*`}2G6VsN_F+BM>iX0?-anvLwfRGonk8yUep)wFJ+LJK~5?AcpNy8Vt0~P zpkPt_QYZile$YQ*D$p7Urko|Xh=4L8(ggpFTGE-In!5iC!3GSt^z zfJQjn`U;yW5FERILzF|3Rh`XDon8KknS#njPN|v$GCohp3%J=MhURzqFY5aHUYUt;ll!TvwFwh7}LylD;bpwl&?RP+>SW`gvs z;X9B`>myTzRfI-x0uW=eSKwC0@zJt_=htDGDuPIb3U3bF0U(J?6@7~rpo z*NvtBATjWmik2o6I_Tn}%mN=}LzLSKl%@Mq<=?yrH>{sR&trYx1BE`6%va@u0EtH4 z15tHu)HCsdS6g0BzK;pnyVjfNs0$r{L*judsRhSr?ihiG^!TSVkR)Ez;{B*H(ffU6 z+2UN@Fx+tSD#%2(VGUWFp}9}EtyN2$>RH_ybvj|P1fC?p<~JY+PY7Jp74r5f&ch1( z23TI}m%-5ic_OKO%8XPcHU6n3z%)Y{BYv)OxF*IOtEO0%AB0PpQ5+b#E|Dy zVV-8*{(9M_(OG5W`#U>D@Hbbgi78O}W^e9Dy`~gE9!8Y0fK=Uqb54+E|DgW-T8-|_ z%b?)%h{npi);?6YAb*+%MJMYkWZ4f?hHE=tVtq;crDONR+=!enynQLJ#c`{ zh4)a(Z_qLFlNZa5gyRoOBGb&e6k}oO1@tl?D&;vQ0Y?P zCu^7Hq{2uMauV96uAG0E{aR2bm9BukJtPC;PKaEOUw*mO*M>oRLQj=>l+fOpQabUq z+l*%+KMuQhOU@&lOsb$VCpXVWq#>~|g~F$DYeC8UhSD#TAA3)-1rlUAM`&zsPWp~{X*5oUL+ediT3WNcl$Gk!4qW@k& zoD8(5$kO}}rqOVrG>9bNxGwO{bit%cTjQZTwS=fMU)@*I&zvvl2k17fgv56!@>wdn zMr%L<{aiS~moN@Oo)0v2rldAbR6+~Gj72ei3+o8i9U2U?b4I3Ebu36V?&qb)@B?IF zL5S9IQ;B`Qc>p!x9WX#`LO;X5>1%=3RgR*w)GN9}#D6bxqPdHy;BZ5NJ&8gX+E;M) zczb*HbXPVB^HRni;v|se>tpF^!p`$?1O~dkCu*?Kh+R^&CFS<;>*nAtMhNuH6y39{+v)KZ9X8|=&dBypu5xvDqUmHR=AuP z*6)5*X8Gx_9V>}M&EiNBda79G_uJ;R<6Te3K84nHP2WHIqc2|nw2@UMRb%+;=s0&5_>**R!x`k_DuFW!FzGb-py=_R~Xd+o-l z%IFFXC0ehH!E&4=sUt?r9xp{-n+2(Oja75?pPmeVvTW&rmDi{LJ0Jr z0TI~aXyM%XTwZC>N@Wy$xZoj}qlWfwLJ>dQ1)Y9M#lwG${h$A+ zp)~)4Fz4kB8{hp@PBK`C6881F>4W|0sbBz&1V1$NxM#Vy;EjOU5gHp+X(7a8$4iQs?^w`MmfJt(2`KyDZWpM}b8hz%52T;w zdIDMQ?Cq!jH986-JOI99-3Q`IE$U%9P5qdDqRJNjtr;NgWY<}d0pXZfABB95c-hhw zj%sy->{(0OwJ@Z=AE%{`;X?g}KqM0!WV#cLZheE_9%<La2Q9-gR8A&omsi#FcB!M znb1M*2|%6lB|YhGa(=YCj&dPe0{O&(Y!x&5%GMfvD=Ur4Cm3R8IyODwiShBOR&0S?3UNC@1zG4`2 z-sOkaqqulX7Ddubv(AYC_p9P}-e3WQhvEX(J)G?ERakyR+DG$FA%{D0Ls`9&C6y01 zit#Mq@7$BEc$c+d@vOqvd%gJ2*E_M*LOlH&4n{__n^7bTML;E0Ryh?dNOjWBg?Uoo zuuw!n5UO!;FZ=H3pF(KW?-SL^T59+R%iLKBb7c-KJ6t^TQsep@;Mm%rA*yxKYy8O@ zSW^EPPN>Ep&fgh<2nJCb-P`A1I`aae2K;tS)@u0wYCyy(& zsJ(iZzJoCCMURx3cZI`Bn^Z9$B-BokGD(zS3suLrKoPaZ_6bv6Bea17^$6Lw54gvK zJ9ub6R=?wabY)vZaehXo`!YBT14#R#SuNObgC_;ye&2n^A>LWYo ze!KHgFVfY8|8kSlt!%mT9Vm&y@ci(px(mGk_O~)^oRsnyeNpAXu8n6XN5)yl?%Jn1 zaWqMIToip|c5hOC%7#Js4#!^>m9CkQqXb_A?AMN{oHs&S^nguHZfG@nF=5gE+ufRKh<_wO4K{1T7W@uh~d+e zU7AcJjL@`)#y?fQh}LMTnth0^xwzu>)I30Y~7VUNqP?;8-FEAr^yZG&`9 z49#Jtg%l^ibp!~6S)S*XPBY-zs+m<=XZc-iJB6H7{80LB`!=EtINK`F|4OE_b6emQ z2GQbTD1+tN^jW2Wml7LZu`bT-_n<-_1dPT!Xap~wd9!hizIc$hKMJuW%Y}A74Q}0! zl(Vr^4liE_oy_YUcD%JE*2~-gtSHN%Es8PO%TB-=gw8lE-Yvg!tt{1oL81?5mLCkZ zs?LI)WA7St$|tsQt4`v`lEr^(Pcv-z3`y6Yd^_j@T-|{Lr z0#bCPh%V7vj4`LqNDs{L=MYAmW4rT)!LAodn@nm~`%8xqDhvG#M3(9L8R1{uVJpF0 z+XD83BD7)|Uxs=bas*0I&t%vI_Z3S*l1iXHzvi$^UQ*vUlROTABIu3!jEGuF7Lp;B z$~#n1OGa;j4wJK_qd}DB)@YBK1qUK1F`*?-EXIoZA}v29aj+$9L-@0B!bL{fpD#CH zL~{-0msXm8yR4!f~w>O94}X64TVw8yNTt$CTj>)RB?>HK|rnk z5MxX>9IVQwcSW4J0zgiG3lLQjB&BB9u@`&gOJY`{{3NgVMP)xwya!hACtU;z^ zlB}0kR4ZK|W06aZeke}_G+jMP)EI0rNDCzJLT6MiwpLljoW!lvUFAA^{B^7U3Merg z_E7fTDo!3WovyJ{?2yu2(aBW28NBta6zZpoc~@s!!O{n6D;&bxyk>iK&gps>6TFaA8%IQzdM^Jv4hnu@?zYywCXQALI=5D_*2`8QmR-NTC&1(ha zDAMO>F(4OP>pAuGcb7nV_t?!XNb154VzV$lTJNmwSP@7DIZ8@AO5m$e;p(+$2R|fg z;nA!#VyIi~Mx-ksY4$_OikZ)Vzgw->dwCyLHOIDLbLhnS1ai$TwI6PA&hM%9f~i|_ zn}ds@<5oE4n86U1$kUIGat*=f4_TQ9;n zkvz4ej_;PGqlQ4Nkba&V?dM3~dV#{Pdr&7sHoRIU{Z#u@ipX)t z-9$~h(F6{TkRE~1Q|sH@!z7oVD~6VdJmIUdaQMb}k7lm5(&lkfv!7|5e(gt~cXnnk z%|vM8^K$aTQ)RjN$Uz6XB2zYHKdfgGf&kdeVsQtXkW(qMcSDrNRYXUN?aEw- zgh>!(Xba`Wx_GSB?a{jn92BqzT$m@?e!JD;9Cv)(!9FedPZJU4@{E(1)qT-;mYae0 zLF(y-(9Wa!keM2f^y(f!0HX51&w&m!SmKq1+C1+Z_U(agKo}-fuT#_( zz42@fI$~?uw&LzRbL=Koh|(dp&k=ROO3UW6;hH;b&~aV4yCQMSCRn&D8i$UDi^f0E zZ+>t$I)A6pBqlASY(->>?u|BaG;I)2^+jJM{*^k-nH#GivD>*nm}!)1 z;ivacfDGa^c!vFzgTe#B6*q|I;a$msn5G!ZvB1#=c0~*;ulU3X$m@q2JCC?OMUVRe zmzFtT`Z{)3)Vl5f2nP!$PH7r}9H*YF*)zrB$%CVm>rifAsGgg6x~NN_kD>(Qn;w<@ zv{=zEPl=w4l^+3{45*?2CW5!8_T%=T|5DrdCGa@)+U3QDkZxH{;)!FOFBx|9Wljuq zEqwd<=6F_QBRBc5|EO*d)?hE-3m zieK_JKT8b?M)*y6>b~0Z`;zj^|W<#0<(&1&Rmj)yh@rd6v^Xsb0e5nI}#YX zrp(`GX}SuwiPV1yQj)4BSvFEdqs#AsxDJt3qX1qY#r%EZj{z_5i3xDv1rw@VMmNmy zR${o3e*S`Q8G}0fPaTmUJ-L9pkRoru^ZueTnf{WBasGu_Va}?|A9;kk9d4Cj zmo1K3g(;y416Bq{$Ndr{<^=>UVx!R$Nm@3PG3WvEGikJ;N)&`sYgRF5-A*0ibm^!f z($IIE_k{FYY4Ui=`BlsB97P-|1uuwH_#3uZ6`i+r=wt&WpP`6W2Zf%8mx~-GEab>H z9b{J}pZ?)%^k9LT?eV2eR{2JV>fp5Hld2fh$GQBBknn@|u5j4X2eF4=Hi9ZjwO~>- z87WPd<2{Tu+3=S1*tD{^%`{r`Z+tpLfe|2BB9zbBR5RRUMTJciuYR8(>OAeC=Yj(; zgJ{#6cX;qxsgq`;uet~v>Q&XT!QL>^$w^ll!gM5Hg{1o@;3DZg`jP%RCKZS45g|wl z5yDM5;$jjhLdTIy>_lsj9cn&|LNt8d->{spD>~%M(@j}Ak67fFKB&O(iZ_j9Sp=Mq zjM-o|`9$F6g<6&3K3f}l9!2mlX@1BK)&HQdUkJ}x8$#v!Gv7V|dXK3C#8+U4vur2h zEJW(se;6vFVK%t6kPAnv$KEyml>YDV+UJgDsM!Bd(-3(Q1%NX0H)t_bJmSH_rGgP- zGRr|P4TB;^Bu|naHlC(@fsFWvfe|C}q+@W8Zo7LPWkCJUqr{*(kg?qn`c7)&HeEs?3wGLMX1o2y_R5?>!Gjn=nh*>?U%o77;TLCb)PbtjRHD@6 z3vLmsCd9l5RHDY9!v=%}roMlC)9d|ds`(NGeNyB0_#?1`_d^&6?no!U?;y}Qqj*## zL}f`oK#A(hM8gUqg9Uy60bj;9A%E)zx@BzfUHaKj|60x(iiGPIRJ83ps~RDKE5&#iaWAROLqPf!KKB6L@$8g)%71s83>5?^ZRH0Owsoca(RHb7|g~#;HGfLpP?z) z{yR{`5W_mDnSeK75>o|iBcw_Kk}NYI(45Nr{UHeD!J*ayQTPN&KjOCt^rHvoi~FIK z2hgJ_P!|JWEoJcHZOa&Pm}0uc(r*VEfdW1+@9Uhd?gA|q`2m6qtD6m7MPARTFnZvn z=c-UmRelEI%v$OS>o<_ym{yp6UA=ev3Oe4P3gEm@ z^9l$}QVJoegqi@V2(;9eb1ngr)dX%KEFWolQzdWyK0q3_>Wy0fbhG3JMEe^gE}L+m zUxY5L4_(GRBqyaBAUlYD3qB2VzP&8;HUX3aKf9CQR90?Pa>Oc?X(Ck!q^V7ONQSfU zDtJbAdWCdyv!BU%n@a0qWDuwo+YWLtF>AiTorvfZ6r>Rd=QHOHua0#|9khI@+wVih z9kavH3H+ojr~%#i1D)>(>V6TzMpy(>v;ibuGr-6S22GIN-#=dF$WLz>%urhNb zWMI;KR<7OG}Q zTOUL7H2M3bj)p5!-qJPY+9K7dZm?mh4%h|JhsF_oaNS)Rnr-!WU7N}Dh$3Y?zj-lE zpcfW(r?@*J*#f9N!}!}`ZF=dSd2VU29Nz)E4{%us<)4r>Az?Wp%mV3nR;HtpRb`Ev zf!d-`x{1y)c!MS(If3fa1y&dHa^~A#1OrqbLeHKLWl^rrF+aKYHxPE;lbPNyRRWF$ z$RxfZd_f5KR@$5?ZZCa&eed|JYcLXFBXbJt9#(E(d;{`+A)!C1`?ue=H@=$;sL7Zq z1y}XNVN&ZXewJ=_whf=^xT_5`NKcW7tHtN)`%TG;I z_TlG>s=$(IMojlUR8T%7##!Mj+r(Vt{}5@j_uKIaFGE^fUUjMer^>Flr{1;5HwP?(JFgU3ME7;sR z(vGlBh{%vo($75EBBlYDP!=y+Y+@3g|LG7&vlpE=?A@jc>O`oJ2B)J!mW&-95ePnc zNMx_$G57RLa3>VkxO3`Zo=X8s)D#d@WbQE z4<$Q$?xUbg_xoqOE3FNWeF`s~c$esWWN0ts&lhXuwal#E?+;*4p^6{8f|zj=g4@Rs zu+NZ**vND0+qGfE#5i8yUHm!SZxbYA71O>nUT3c;_3P5+x;yQKr(cR}t#4B(Cj^%o zU7^~Fk%K_dTTtBtGz0>I&l6m!i;ZTy&0XXGg*$xuOr2nbD3^Z6EPE%sEMY9k{wef{ zT{5&_DX|SnmN-#)WlwIEG?TrS)o*&_o#EgUS;Pec^doN<13+J_Qp zpvz6<3q~=XRxIe5ZYN{DKzpfS+WW;|t1j8JkIPVQ?A!F_u$w>?8h6ezx*J4UgL_*6 zGA{D7F}Okw>XwPT$a$g=>vZw$E%~*W2W;_`-fhYVE)i5rM1zc?1R$gNVc zVz`Wz&cF*n<{*(!*8B~IiUoJCHa!aEa1B3dJr}#e=3v%{VK#2dK{OTzhgDir<(x4W zkE!?k_n^n^^{J+KNu(7r#C6xVS??;ilbrVA6<(!^&lmC4LbF=~y;7h?4j=iII~pW`OG5haTegc~xiF5D?>4`MlglX5k4)y8wP z5qCJq&Oc|NHH4=}AY3_XO8SIz&#-KTzQV~|&kBPl0dX;x%_zjWDvniH?;rS7VmTu% zdIaXtkB`;#CaHX(K)vQHA6`Sb6gFcimW>N5EJ!$#bV82fstLU@_gYGu686vD4F3Gq zysuk0GUZv!n(D`Eh8I5?S~euHSv)NDwE;}%E6 zG{^jSa<>PU8I>_Hos4Z-?$^Xp5;Y-#UfHk?&RaS0xo@E&k2gSfp>nS+{A;#4nkoJk zXDM3YPS$YbJIRY=%#z}sR-0wNqeku4ZAjNoN|9gmCUuQ3&-w9{zJZ&URq=#~$CM!R zc7^EKVwqiRs6zldeP!b(D55u;OXv+jo_vy1g16P%veq8AnoiHAgk|druCOW}>6noA z2LdH8vSmTXMBm|!{ZAD&qGZTE;3s@o+2W)qmK>1m_=89FT!PNTm%fKT>gw%X8pKOYjupY>-JOvMws%Oj3NzvHq)v8c0eZ@c@8l|n4< zx#u^5I?pH7<2+7V)&Go@w|u|pLLmRJdC2f zAn+u*9_N?VtZK-WxexvEVv&MOf}B0PA84d3b>8tA2Kl#p#IKWkcgfy9O@J>|*E6Q> zHPVI_i=g=0?ISuX*LFj_+F=)cE=piYH9iIp?PSEh>@hiCrF*F5XOENatMQ3(BHb8cq?RCf%J0 zKaJvsFxQfAaI=5UAeeve+(s^?5#YMqz`Z{v;2MG=%v@Zhz{D=ahA4T}b+KgY=RT<& zyp4mSV?E@dEoT;bV<2}RYhryGOYh0hKp}QFRXeT!$FhJlV!=Tkd_xrtzS+@<$3K4OHgOt9CJPydg{f$Wg;I=!PNY>@cG0b` zWgf>`bL$(Zu!hAmDz1|Wby{b55e}9l<1Q1`JWJ^1x{@eH4K>D;9^#FpW9GCxJ&?T? zM$5R%iZlN1kWDa}dQ2jeY@4_=V_d97+cOemG>vw#uOJ}nUia}8!I0IGo zfMpIDEne&)CUvn$TtMr)`sxAUSX|@JYJcm~paj|R3 z?AjC3cPz>wfm3r-$0{r)VEia|lGkSB_C3TusOWArMne+ENo+wt*h!EzN8+r>Ma(J4 zXy=~q9!`ED19h87@&aqSn>twnK}gON{a4qEXL8+KMmo8*?3D5--hP$qRKou19*S;T z-U!^i)p&Zw&FE57Qab*_{ix9nJhOO*Sjw&l_AR zGr87KvO00famVzUTFWBSZEWzy^+p_$ccM@4K3BmvuND13k*m^jt-dB&m%(q^hs#WA z8*hYo-Mo5>(28=6bZtK~!$)Q-!~7J_Rr~3blM|^9&-R4wtTgr~%{?YEV=cLZk|+FxNmr~W|}b;HNgc~u*aUbJNd zShb4Oj&jJz{3(_lF+6`w4<++JNzTUKBMKcKaud%mZ}5w>R;j~$HHDXG0+-g9IWxP| zQh6PpYJgca)4&kH$&xM>;>7M2#Y!!mg|x(;tw3~P8KH=njjD+TM{|M_ha6e27AsSH zxs2Jc>x8FJK1*Lc7QE-`ar=4xLFX8eWsH`nD3dPS-rEs%7==nKLjx-_K*oeZVOQcG z8F^ffKphO`&^k06A&nHF=VM+vjxgmq*U6=iSI0Ox-cim?Lw!QyD`}mys+_Fs=}He- zei`?*(6AGMr`W`i*n3t_8P6KMt7UaTsbKn+aqD+!p3O3?f$w$o_ z$8KKeuX+|pF)+23)BDsI{1TAw#vqzQ6eZ#MU+Wz8VxGNF^JdERsgK9~j>{ zu08P=FpG;tu&wGOM>(U>qo#9&|HO;{bbemt^-!<}fUa4GyzP-NP#_NgQH=Y;P<|W~ zfv?P+SM2zd571g4a^a8f4eLwO{--90(9dn2PwA;#cXX#bV%* zjJWfeX~^F7{dCms4oEQ>vU(;krs}Hg^awSBB%c_GhA;teCq)-(&ctJ`0J8b=R*K&t zfcynn&)+s;W*7Xk@$uG||=qgV{`~ z)!8Te{b_Pz6tfvDPo_NgYql83K>KV^WY zArDzVL7^W9*C|~$3~4#!%zJ$XX^KpK0K5f_7)TLnpfvg$x?&yK@7t7F9{zIZ9bOo| zKw!%!do~n2MAz?#0<;j&IHbUTfc*Y)K-x3_RgmMp1QKzwh;rzg&W3%u7S72$WX+|X z45do808n{-s(C$uQM}x=3SmpH8v=z_aUHN+Fe^T7`7ow;8{rB7KgT>OMNjSlwA*=g zT-=1!SX>7v03hHDe$s){lmm>N(GMgUeML7o}~Gh^g|0TkS}(Ln;^(|iCJ+FQko zZz}FUqc-( z82Ax)avlO0SJ<7$tUpxn?<0Q^g-IO~vStr$D?z#x)u}#tK*9OmtHhJiSHy-iilJ7T zp?mz)bwr3qvoTY#EilaCV|A1FHK`Xu zq3ZswQ2Ds&_jn>)olJz^A#UrK*Ab&SwXy3E4rGuegk=4R&dMv)nS^|v;G~zmFmDe@ zV)t`E$4~Z8QGt9hvTtKhc(Ar41Jj$`hT#y=P0 zPh+{P2)I&(H|R4Kpz**0yO=1AC{U*>{$7cejo4sgju9!PTfodi1Ax5-zwi%0(8ZiL z8*pQpeXmCXdtX_?pW-UiJN_-DdqO^a{bZ@RgsB14MVKWU4}-maKqP_ifmvkds3i7G zZJ_BZJM9DV#&?O|aBP~Rh;^*xSdV_h##=j3fKJoRt+ z+yih_b=RE14jT<2mJ?m?JEQ$ES(}LA`W|l8bEu$C0!5r?Gzbd1V&~On^<5;+A4iIt`a^Cak{M8QA*mqx)SnZINctdrHBimD zEq$Y^QS15E%2&g>)8tGNtB84ndkd5h95SG6UW$NqpJ{Nd_=Lah0!SQ1f(7l~gv8oc zQUK1ck%nUd>_l*OARqWu#^;4ko<58oD6H@dwW2Y24IB7Kg*=mw*^z+gDIgY2aSfM% z9RN$OYD()sKKwP6j`hU~LDf`pHOi>ciJF^#en$jyJH7FUKxGaQ6LnkPBNdgHJ4GPi zNsg~4JbQxZR?;VcjH!MAI{UCHrV7B%lSbSQfuJtt6Jax4eRLRU-9=70z7{}>#$bP-DDSQS@#s&>9k0XgXhY$m)u(FM7P^|N90hpVC1{8Qk2!doya@@SPq;kH6s zG$|t**fYT7QE+zQR#So+`3a7vHae!v7dciKuo}7Y&(|5M&0VJX4&@X<8U1otty*Xv zXm2I3FAc`t|0VlXg5vQdy&z+kqkR!Wm>M?7(GeBMXrKh&>jqUd>`!#t-0Fq|i=+q5 z*okGeNr20Rx3{7{*QXFmCI6N4!#mhlL1sb^AGslvN$$3^}2ueDAK|+YFm4 zjMWZNWJUL1cSW>|592m0fsSBUM=8y4ru98707+YQQ|R z_wHGM3*A8Sd3j_lf%quARDoU3@VW03E_BR~*&b~|@OOVSXgx0ZSH6AY$jw&jP{%6U zXa&FU1->rr7e5AS&JvzJb6m!e}{W2A{v&g+{78ax&s?QolrVy?0ib*%kB6hDC}RY1pSU0>r7t$eN=EA zbVVikGr6OK1<7_;;n{7Bmmh7Z$eU5m16sRd(RPT_+|fFP9l*&H>t*t9t`LKEH=#nRLX> z@uR{&{||l!I0!7#Fq5?Z{30mr4E_BD$jBq%t>QB`IYb4Ic0{-Y^Z&oZ|HX6RLud@< nn7MKP;laVOJ&nsX_6J{$SfIc?WG@y6{-dd?r&6tCANIchX`=41 From bb749f675833c11063b9d221456f2789d67ecc73 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Thu, 24 Aug 2017 11:36:54 -0700 Subject: [PATCH 100/170] fix alignments in graph --- .../cluster_train/src/paddle-etcd.graffle | Bin 5557 -> 5578 bytes doc/design/cluster_train/src/paddle-etcd.png | Bin 50387 -> 50377 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/doc/design/cluster_train/src/paddle-etcd.graffle b/doc/design/cluster_train/src/paddle-etcd.graffle index b4be06a0b1c6ba4a84475d2e5d6217b6c259bdc5..f973dc9b9dbf72e9bc31e2d32822916cd281f8d9 100644 GIT binary patch delta 5458 zcmV-Y6|L&EE6OX7C4ccE%S-HX)f7d+5@U)~la!Zq<-gwpY9T0sl)NuZS0$3bgB|C? z2jCEA^}ja*w{k^;(DA)5A8{3ZtWeMPyN=iU^6}`fv7&$c@7E8j|61Q$J3QUrs0>{v zj4Jy__3vA2m5(d6nmHW0v{qX`T(9hZ-)bLLAdOmWWA|g_V}C!2hM#M->+9>P#iXjX zKVX7kZ9njbG>C4$Lt-lsq1uhQA0eA@>Z#&DXxFi$uOI$cy`;BaS$2+XMezK1EVf!> z;qcfB0*ih5V>OHdDF0W;uIdjwrx*C6VRa9_H3RG7!ll}3Z6d-+W6h{y4ay=JL{l** zvRa!a9T#COpnq}x>sCZ(nNNfzgtZl$rwW{LR0e0;l4y~>R< zFWqoldU!j`WM_>ce_j^qIP9d`fS1bGJ8(qszr-GA{e^HR?iX2(ZrbwChK7`TFk# zPS5d{7T<@itHXG`4406FAJLX~;m<@T+B+#{yY5GkKiIJXD8W>Q)7@Fp7p`wbyq?aU z$S9@0wSPqK$2n~Jf%D7vEEme{cySLFWdvZy2D~7nx*7fIZf?7$Cm7Kmb@yy*(>LW- zy|vZe=$rk$Bc$NY$+m0v*5G${?+BmuJDbO@3D4Iyl*2=5`>(T;U1V;ZY?$r8K&Ely zUavKai~pToL=R~mtQV)hZvphSonN#l`7nMiNPqor6-%Egnj!sOkP90J2b4iMh2aJx z`ptF3fz#_hX-^#b!>34owZ>1F2h{5BdG76E)fM4kobptICo`}eIal<%bxVUSuS;)c zg-`Gt@pwLs22+*U{W%IFK?JPi#DH-KC7f%e;*9E+eaU8guN%*Vj?KlJ)np>dq{8HR zGJnedUXh7fRX$Z#R86ibzdu#}#u`SSDiYGr@6_0zq_yT+p(v>dlujBc-ZfWT=|1AVr2RpDKiqDp7Qz%Z3hgX5>lPt+)au_-8HRw~i6^B}sw=N786g@0{KYVI4-hGkFL_d{|_(MnR zgOFH+7_o>f$0AU6G>*+aR2++_8i=rTod6arqrh^{|FT&?#5gGRE-X9p1N7lTT)U7+ zm2>+7Rj#eb?h7DB&?zSXD1Tz@&-QXUz5pam7qH~J)uHZs;Lj&}VYwl#Esb{!EsD|3 zPcJC*hZK{{8B(($AQ;iHBBy3%4pgu-hasZI-2{&PFvKIv;Y|S=>p5t=??do>+oxth zPVp4NrY($q3z}&vBNK^DZAMiAkd;+MQ&rVKN^ukdQFNF)WmT4us(;HW(iN>J3Xx?- z!iZidg$-6-O{pp(ElAh$E`WXExb8E;^{bs8*uXY}AXB(mo(*UyIu)7Ed_o6k&$4eb z3CzL7bXKq&w=hF=$0=zcmK75|qQ1a9 z2LOfkTryEGn(wkyyqGoSOp4hpuDmovNIq`m6+GX{_v5OPrWF%Uoyn*RWw0U@$w;yS zGmi{RlZgTsz=lEquu5+F58|c_xCDeEe{=rz(pnjls!kLvYkvyDx}n2JRb*V1G_1p) zN;Cp98Y8(XdhZ-pMo-0YW#zscS2JV@!7^DB9DLHP*ao;h=?a`fno8_CUlHQRU%j?L^3oT{uvA=W&Pd5Bz`iOl&L!?!c9&!%wu0rdMIz0e=%VQDFgs*-o-7;i`m)Ea_O& zBu!Ccb3pms@1q&YlbsdI_go;V8naE&Gz@s6=(16+56ku8dtM*<_GP&|EYF6;hrk8{Ty^e^y&cn;gQulI~;w5(}4z2LAf)`=dqacuMydVOR5`jttDiP?l zB7czd#0Ufwd0B)YjhY_~R=mwtPL3%r3c9t=;(!EhGX zk(9peU7D#ht=3+;2)k4R?6j;_Kcz{juFYBisF1pxx{rb~oMd?9^%}89^gApg4y=j*~_{J=qS=j(_WY zd(iNb=!3?!)!0S!R=_m9Ydk~B#3h&9GW3+*P|adf?9 z_FHSL?kBTAw|540KfCI9T+6f8>6_yRNt#}Zy4#nRee<|83*7m2_VY*U)Pz4fDJT5Z zYRZ-grAVTib%_OZ&3e*tW^>0pZGYAGQj*n&-!Z}t~3#Xa zt5s_3zlCX2Tz=w7TqR0;>!D#(RYOr_LsAT^ z;_;QjFulI}LlgC>%*s@J-?z+VMK_RybVb#5LnGy#!sTM+Jug;FfVm4lsDDI(<#^C? z6*k+vz?oA4oQZCHFSzqvU|S^`wjyLs9bdC|xR^OSK(WMLWYly46z7+ISvbzaG9D)N zTx%WydgLvQga{%Ko-_~js1@}y;)Roh089h{IjWmeY>{NC=66-|u?wu<_gOZIy06G6 zG}mUWP)UkRLko>V^Tv(9K!0>86VZGl(Ofgpyg^>5(?+gNi_Ar{pvEbIhRLUjBmqqAUJR0V+xhu>FHg$4AMo1ls$%CM+?*ng-nzM*1<>s7gD!Qz(XrZ~N zNck2SeioX3E`jJJVmu3CsOa$!Lq$d|1n^S&Vq7T^qeP4nFzewX|@nA-&n4vb#pnqRob)x-7rpkP96t z@E{`aC{jmJ{R>q?7-7No!@>;+k(LO9ayJ)pt09&yHzV%488HlLa3$W25Mo?5}0RI^geo`1#;iEZgWK+STfSRvz-9%HAvWN^=s^E+ixlKQGxlv=`j3GYt= z@6iL{{edh>l3x{#(n>um_3YiyD5a!P?}SDPZ`UcXh|`|Or2f5M$n&^HX>ZSZ2@|!7 zbR9F7Z!$7;mEFZ5lkwXkSl6qnswf6LXD;AWBj@drXR&Tljeq9`JK_5z`4eeJG>b0( zMn~>@CHF}sc$8Gc@0Tom!(>TX`BHPhLoLlr*XeoLA{W^6mNsh360L%g@Lnz+Ad(oedX0LWHxWxGNfu)8@uq?r{1j`aE(TjnltW^>Jr!YriCbgli1-i+OSS`k#%B>lAG^-Y%Aki`YzY` ztLskO-hcQ$Kkhm_@u9fLi{ew80Bcftl2o1~J=_F%&P)Sq7_dgxCs<>Us)7(KjTK!%gvjtvLh6eF zkMUN4r!)c7x}$i+@zBx~5>HN`|Ch4QVn=xJaocs>z#1sxN6q zuH#ZIOSOCnQ@sCmY3M)=K1ZMfbn#^&~EU?|$*ctn?x!Fth=_ek9nB$;iPkrJ+h&c{QiZt;c#D5$I zC7bsX4?@gw5SQlIgAm(=Pd2b89)y_Vpd`(S2O;J-h(FDU4+Gd409W_12O;J-Xcjp2 zAjBL8CEMo{4?^rkTDEt;e`@gT$;2h9Sf9)E;P{ybsoL5MjH;*Ji+9)y_Vprr5<4?@gw5O?k| z_8`O@2h9Sf9)wbkgG8qZZ(|~R>n90Q_UZce<0j_g{A>%-$nGxZ zW~*SgxnOyeN-51K1i;U&Kr?JY4EiZ774rzeq$ikTm-ZQ!GpRg9E?Lg-sJZYzQr~$Q zF-PF8tk6GIR&>S?bc8=uG@@k~f|wsH!w!muh=kC6hsaZRU;2j#$g*Y1KwY_|cgK@n z5-$VC`stI95*9fv87wWCF=5WPTu#AIsyzfvIPIjlw=enbiXBbtTRqAta)Kap@tD9j zN{5Gj+qQTN;ay3LeNI@nliv~-e+xu*t%0y10urPqjajr4!zEkC>r~a zt3Qg0N5APg11sVSorj^Zxa|bCzvFo8P8emiMbD^9EZ?Sp4c449-KniT%EWQwVsCi8 zv~%BSxw?KNC>UEB`pj|9ck1<0i^OS0(qvy>GIS&nz`jhzaT<3lY;^4p9G#`(%n$v& zzyXk(i&;4S+MT#;Ruql^b*&q()xnVpT*)i(+gJD3!aF5~iC8rZ}L(BGuw}6?Kw7Y*b zVER@d3E4-q4JCEy{(2){5I87sPZPv>8DU3o@~x5OI?*j3Wvsg>K~GMcIYdhpHffQJ z%1_#t*Sxgy!+Ml9`~YUb74VSdQ-{diJxT;e2h#;K Ilx^q$08*2v^Z)<= delta 5415 zcmV+?71-*^E43?-C4b&zd5K-FnxZIJVoZ^0lJb(S{P%l6Ed)i7mKViK(^ZKi@Lc7_a*N)B(HY!8c z38TuvaeZfNt@3fDRx^h~m)2_QN9&b?ovrp!1=6V1HugSNK7aP3X!yBSyScfkT1={H z`vWEz)(!%HNQ3BZ2NGL>2-R-X{Rr8NQ%@BKLc5L~ef{vq>J`2F%Cd88D}v`IW3km5 z3x~&65LoQXAFE*$K>5Eyc2$4iIlaIi4XgX`tr=LCmoC*-YZDPp8f!)sYfu)+AexFf zk=5EX>9`1M0e_A2U$-JU%X}g%A*`(+eMKS_iG0TRvw|!CLGZ^XH%r71=HuIa>Q!!? zdFh7Z(xba!COc~s`SY?+$6I5Q@?6 zQ;3uvKDC($sKDi9pcQtsT*3NhEPW=DxTD7j(M5ZD*T1H$<2Eg?>(Yg5wC6{3@%rxv zPS5cc7C(TltHXG`3Kx)tAJLX~>CZ$b+B+#{yY5GkKiIVbD8W>Q)7@Fpm#%L`yq?aU z$S9@0wSPqK$2n~Jf%DV%EEme{cySLFWdvZy2D~7nx*7fKZf?70rx?-ib@zO0(>LW- zy|vZe=$rlhW2E5D>9%Y4*5G${{}`Y5JDVr23D4Iyl%peQ`>*rUJ!Ed3ZkX-AK&El) z-mEo?i@!rJqo*_v){E0WumF17&QDsDd>B6$q<{Xmilt8#&5(ZGlM5RMhm=7%h2aJx z`ptF3q0{RxwI`1J;WCn6t??7)A+@^uo_lw{>Wc6%PI;=qlNs2KoNKyc-O*sn>(bj< z;S)SZJf2UZ!Bk~-e~iLN5CJPWF<=}*3Flg=IHS5{U$GhA>&7#oV{`FlHJOMqsW5q- zjDPaKS9Ak?s;polqhFsYe@iOXK2_ic{*@Z|lZ4h>D-@+O;T!Ef=v-J`v%G676qGNM z-qt#w*tCjLy0(*Ei|!H(PuFoQ5caLEf5SQ#!5kM z5=Tc2Pl;X>kEsBFqoAsisH(m@8hEDb^h7Fyb7QoJmJK;^abV7OzY=w|7Jr$EuRGz; zweH%srnpoz6u{wX-;3K4m?d@`PH#Y>?s* zlV|bZL{&w?ib}AofksJ&%4WyeuzyVwIrPUwgkMd~mjMatR?Ci-dF0KhYZiaAf_WfW zgp=7)<-zf?@)I?RU~CG?n3YQO>>`LR@r4C29sg$!dstGW3){DSi#<}1JpK`mI`CU3 z=)>O^m%bN89rxlAU0iw&eA;~rm}i74o7BCg5v=6)=!pLL3uJBI^#c(8^M4|sT}e?c zdI6;#`_geo6h33A|GS{KLs*2-?o1xSFNRjoy@+lV6~X@)G@gB^I{T0$IsVWQ`yeD1 zAx11B%drTQ9gSnN4;9BEssl!gs z=m=HQBusRvqcOT~*>W6q?Q-QvniLxkkxR$jsLt^7EyDrs6V@zj}&dan#tq#_wfR$%6l zfoU>P-~!lC2mn^eP5(jMlmVB3P~>mUzg}7^V^Yx4*#ro%sj!KAFedzi#agGuSJFlk7#0bN57CYnlQQo>{jlkXTN zU2282T>QWI;(xyYB&Fv$gh6c28DqU_C=$?d1xcE&>BXy9q*e|7PmWV4K4L*tO-^EH z+G3fOi`O@}cs;P9hz4Hn+7-W|wQC7D&%E2H8Bg796s=_+gKWm9ilV3wMJ0$*Ra6x) z?!GNIQe`_DAZJJl%+flGi_0Cj@^pAf+h}?f)*Ucm6Mq#JAeikW%Mz|ih{%$THBHhK zH8uy7-~B$Cp)BpJSbpRJQPr4jil$+}6GfMea(!5?58w0p(6_J3{IoK($qb6-_2GP(A~dmVqVRD~|(8zW0L10mgICFcrl# zj1;jZ6*1OiC=cCOBZgMetdeHEDw?HgBtHH!bVjB0cfWC_L3akNzDgO28wZvWs zR)9$|dvPF@lC&JXF3>gUX}Y#3Yh7bqni=_Xl4H@%yjXHcg^~WtTv9Pfq4(qzf+wdC z4lSEzQ<>fn7W6tI@;VPMFH7CaU5c07r8u&}s|vmkvz`TkWTUFQAO?{VgGvl4G3d2o zkbksf3<47J@)#tS7*t|Vi9xRwgUD~;K?uJf0x2Z|l?e3g2y}SV$XE^9W7J3_A?onVJ3;5ChexTv&cHVNTdl2G;E}s`a5fmaXQXA$0{iFR zHkcGV%tWudTU_qnZqAR-zV8KF>+NO|-GAI3wp$y`E5E(L1>Qm#4~J*ua5xL>NJ`)K zuFO=LR%^fA-?mQcNJ(;UNk-Tq-{#ZoLYjNmou+X&3pBUe8|LZmI+S8C3q0?c_5SeN zhC4iK@pW&Vec$e%lkNU2&~EmRx|?ozerC0kjG&PlP@JRhCrKlpooi!r z_tROR+dGH4pI>)8uH{+l^v&^uBu%eH-R-NZzIoD_1@8Vl|M9(bX2PGHloS4HHD$|$ zQY6vMy2JvyW1qZYtJJ< zkG!Rk5J3dO1zKIK*I6VH1mx&#PMJlLrH(&T$H(rkem!Q*DC)i z4b2-j0{hT~EJX8dM02e~^M3|;p+XzE3N5l0%_1771R5rvDw3>Mi3Y?){LXYroZS_n zWASL1&*q^RBiPYdGQzwZwiWQC+?;bQMHjUdEwmODDc^l|pM{p63m|%l7{3KERP=m^ zp&}y}0(dF?eq1RLqeP4nFH1XS#%mT1C2!na4L78M?}@;*iPs zWi_npRaI3K1D-PvaH^5>>d0>~ZBmWj4R*qpN%AMsj%XHL{C{)&1?7#gQc#>2EvAF z$V7rsuH#36<$vO-1kaL636>>TzCy5k1U5c!<3`oXjhk}g=KhVFSABr0sa0K$(icKG4iMT2TrmDQ~u*p z3Z5;cys3YI$zOQnB@TPs0bLFaxQ_@d@pMj13GO8Y<$u}U^AUrt@6i`HJr{wjY`kKS zRZ5U8LAC_h5@a6(vP0^1p|xHK<WM0ez)NtCmPxBJDpAX$9B)jWc_rSN)z~03Stkq zs|j1p>4RC|=CFOfz13=*{@j(4mdB{cZur}Te}73v2etpS&AMwgZ@7#6dJwSLGObqq z2Y2P)hIte};I8~*bv94WTI`0uMDjc|A)wj!&%Vs-XKKj5B`)&HZ~VLWy1&Tp`oGi2 zQzNC3=evGa=HBbY^s1(Os>r%JA*C9M9pT9@cVMc$)mz3MGcFakosc4WBgOEH!Mv9UeYvR5MPT3 z{cJ^a5gUP|p|;kCv;}1Od%>DaF=Nso14r zznEfM-EQ#v>>|~wt|=I)k|8NrLz)Z|E`L(0iE8quk?KpDk?Xir%Tg_0!W8d+T^c%2 zgU{0Mv{#mtga{KuXLrm%_t?H+yt&bKN;@{OmSQ5ro*C75hBR&jZX8>H?$L@oe*PvP8)O`^18kB6GPuvHwgMs8M zWpYAb^3kX6gV^B(m;2a#P>1)zGj$(ye8X+?Pu&MGuR&bh$L@pXd@4=-3bkmG=EQvv z^BOb@oVpJ(`SXOS`yl2uh&wtMyMGU2UW1atPuvGFuR+|o$Jl)k^BOb@oVpK6c?}Ys zCcKS_?5&?9OxdUF+gHoCJl9Ig^S8KPSnL)`)|lz_n4iEhLM7yPdv-LQ=-+zrj?TF~ zr|{;S($k>G?7;0d_pVB_gJ(B8SbN822c>ht7jZ5~BfGnpkFA2;=7Qx>DqW>CqYwZ; zw*t+u2{GuWuvE+=1e2a%j$PPcSk9!fj9jvu;aPLxf26+iGGdOvU0I=js;uaYA?OHy zs%S*ZFa$9_ScDxE4G{^U#}1KYcVGI42*|Q!%0OMYrT4&-Z4xg7*ZSd;pAr^1EEy~; znK5C`w_HxaP^vuyO*rkOxwkL*?us2v99TWdDRP1!bMctKH%f;`e%rQq4B=f#jD1d6 zca!cC7k~GN>{$b0L&oK)2)q6O0@A}g3P$NvPaJxaCs8!^AyL+7dTGzT({gqFNKi1gH1wI{ zo*nA-Qj5fCM$%+oUovzg5x~Ao#&H@CENpb+4;-C^!E=D}Pg4#$D2MaWVwb z1>z8C_WW?<(jZp=TB&2*_xRN5a+;h=Ecqz^JF+?pHNs@f9MT@Idr4!g5JsEhrZ5n@ zxQGxeXo+**ADmdg!jiGd%{8fMr$|MFbR!eQ0X#Q=XJb0XTYoSNXc+D@guddmVQx^_ z{_qaa@``r1229fmB!40Mh_<1mE4(1lDPSI4>jY0#3d)vRo&+Uwu;+U*(nGn&TqeKWVciyO#Er9fu>i7Qc8&Eb(|_{(j1>#RfVu_-vUutc zxx4SVcll+8A?mv$Htj+C*LM%H-JdQ(O1`<(c#!<&(eBQJB5WTtA0&UU_b~aRt;Pdt z05fM4F4pj&bxqB%;aX7xhW7rDMHj^+nV6%H&S-Fcw}I4dJaP&}_WVc;C{w6ftqon6 RFTZ~He-RO~_XK0;002NEntT8N diff --git a/doc/design/cluster_train/src/paddle-etcd.png b/doc/design/cluster_train/src/paddle-etcd.png index dad67a277296ff1719a968abddafbcc1277721c7..57981ceb4b94f0f7d6dfa63f3d28c0402bf9cc31 100644 GIT binary patch delta 21109 zcmY(rcOcc@A3tt|YrFQ|d+(XOx9sdKk?av(_MX|9p~xtkvbU^=3fY8YCoA$hx89%c z_xt-3_r6|dJkL3gb_n~_2zu3763E;VoVVh&u8Fcy7)vX0Q;8mxMqI;8Jn!kF8sii}RnIvi zUQ5;xmC@I%=a@S1(BO|P)lIjE+Uu=u~P9?XAOy`~@r85sdmDl%F3XENib?WnpfAy-b zv!v_TDW>sxW7Qmo$(@(0YMN`dP!b0Jr07?--WY1%>^n+STVhJg0uIMMIV|+ZFg1D! zWUHzlyo4I9SQ62E(d67k@@vDI;`bBCnH22sr1tnxhF*n#{ez4_9flf=NgZ*A zGJmSVMTJXL=i(s|U;2BrDgOAr?BOZ7(M4)7?r`yYk`s48||FrZ=dWEhd zwG#{MdW}00*v9pg1fK@VDimYswfiO_P%*lhwQ-5&{hi!7E8q4R4Ix7%Xat4Q^Znuj z?Bz8j=<9Kq*9&fq6IOxWNZfqm4ELfLbnAL-YgYpkS`(0wP0e#@WX3TPhcoO6wfJ#& z^yM3|jj7lgbI(w1_31g*y=|rh(bLfbqwI4tkQ@|aaOCklpD`X`KclThpQf3It8aIP z8!$V_y(85Az(IAZFJTUGEozDvY#!EdPNE9ibs#(7p*}QYzwp!{2vn&fbMI-Prs%8Zec$Da)Fl;9{U4%yCG0@DXO{17Hke@5+2_D{7qHzn{xr74JCi zLL74EXlt1&n@#P+62PF|wM8?RcW*+@$1a=V4?8Xy{z=rFXGM=g&Kp+B_|pAbmdOy+ zScwk0g#oNF&i3;fA^zsjX=TZ`=xSfO(YOb(Ap_NevJ=#H1!($=W1I0RQBPBF(}=Yq z#B1XNQ@t3iA##5BP`XO|@tuf2;s#~%jc)ILyl$`L)Y=Kl>?KO_72u4J$eef6++(3^ zp^Wl2tZS?q3Cj|Z2|=y6Bb8jCltti-j%CGEN=(L_R)ECTK_tmM?1HEsNhfN>Tt=G28u)TY0DFMtK}j z22u6oO7#Lh8Pd5{0tP2x8o~(Uo%-%YTMhK)SE3pY@sl>uNNxsG|iML{@lr7BO2*Y3SQJX z46Q|RP0Nd^mk1-jCUi7@p!8^h3faPgW%=};txB^0-g6mC5rZm@SdSfs>-)JV8kx$1faX-$KjJbQ>?84`d3dxd{0vVx z`4t^D@+HlOHr1kWesG=n8KCu(hsVmpltpIiawe;=d`pQ5;|686TvB<}=y6dx6zCgs zw{oP&dLtg}^+~c!-A5y5*6D%W)_&S6Xd;FD8F(#cQNkTZBJqkB+`}TIyd$?Ju!uJO z<8az+BYs8HTF~B61#Tm zgxI#=F3iZwn?I=9mPUqJZ@P|T){Y+he)O>y{e8{5lB7*TOV>vDzt7pDB*{PzH+dvf8VTP?Abn& z{B3Gw^lMXENC=`9h?3nVc%Hyl^7x zlUe#ug;25Ug`d-g|32;i&NLZK3uzO(6W5={sL+RpT?rYjR$uD7DNGwLEG~b3khLC` zAvXE78i{}6d$4|rWnlGWU!;^(qSZ&mP;K08LOm>hW66J|bL-i|tznKKGUd1ZM@BFw z&aFt3V(3qxkIMz~7sOW%JAR%dCyUjk*aM!co(Ik8qeFr9Wyti@ig?>5co zP`x1IOUwAs`BS$!LrYNo)RRZ==dU36W&O;9mwBw2%KOLP`QaQ2asqTpSldir?N!H>B46E!+G;*r3w6aYFQ~T_TUd)vqtsAK0uF zC9UJXMV>R<6yIKNgJC4zoew!*I$!kukQRFIcDo3m0)|@vOOU>%;RVXDl7CE<+^sB2 zm60s}y^?^IX$DKZU*<@P6+;D4h`o&3*Lfq(kb#&qZLy`yW!@Sy6b{1GJsM#b0$nBU z2I332P_UzXx)xF_CCbZ4NUej9)BcPLeULtb=Io!-b`3ONpNz$x z+V;mRLAN&yvQ?N;7HZwwMNb~J@3*9det(e@Gq4VI>3%q;ekI}KaP#X6sI<#ztKYf(f2 z*GOmJ_?6zP>sNpzy`r$xH&q>Ox_%!}eWW;y*(3Ob@@MwQXDh zc4Iby)^VsTi2aAXP7l9096RgigXgyDT_vW$ZluJe6#Sv+E9auXEq2-X%GY}M(}enx zj9L9OA2(LyetdaD7bH9zsg-o%0iLbang)$>R3v@;E(P7=E?4#cE#AXkfjyxcwlVqQ z%k{q7E~(eo+Cz`iooS!8Hl0U19c#x5Loe-9Bx3l2@F)b)8i|TChnmKW55yPs+LTWe zVvc2J<=lVoHR@i33Ag$lJkp<7oPLb#v%iq#^T@9NYw^dcG|F7-qUao}p!o6N(x+dz z4=5Mu+j62IzIz!u>+ji$ulun1-0FJK!SZ}qnavk#;B3k&dp=XAH9@|1F(vYQed}c2 zxj5SA&n3IaIq~i`X?GtM*W-M={IjnfvQ{tq1wS-`$w()2DY9Z1gkZDjNG8;0us?69 z6FkJ2z24yLV$~At=gj1@36RkbkR>$^6^+Hkh-TktZce&QPNbSqo#Y0B;4lDIkvP4>Q) z)6(O9+zxm&<6>}N{-$z5PDkUUWVLzgqeWbGdF6D$C7+qv`XNdNHgxwQ#qX)B8^b;s~Xa)52!M_@15z_=Wq2xw%LcNebs; zpyJco$y_ixiE}XJ4JNX-KFOr}lc?8t3$NruevR!w$_I<0I~I&wPt`JZ>tj@yCESKw zL;M$*o1=dSFZ!*or*6>%?K~fNP#=c1$x_je?p@fI!1c@=+5frUi&@(hs=SfFxsj!M zKH}h0>IX-!zdrB}xjJPicvl!SJJR)d;r6)tH?LHN*+5ot;8vck{|I+ENk&ohCjzuE zE{2+uzKKMRF*Lan4?TW{LTEsP-{m+68g&=YU_(J73Ncka-ve8yEyY)SlVq6A4cwAh2g^GxGBU6{H8X$omSwBy=wR&S`3h7Fh9KLc>(YsQ zJqc5p;BiyI7a!xu@%7iq#hFj;h8Cm^gPJTvVq*}*PKBG$RZzbc%e?FS022}z=rTXQ zE2p2a{BAM*tA50c_ve%EH?6+(uRS7dZN(HM$iqf_z>suq_j7?v_TB9+d<*CcM zz!}0_tJzc~fZ?GunT+(LlwQE@)$!Y>^qG>7;A5XJ`O_)zqXnl$tfa-E!OgbloRu&y zY)JIgr0Qd%BM8{=B3vzHHeB+BOTy_{V|rK($tv!qi0WKOCv8KkS@B;Eqz>Ac z7~gAB=CGArsefN;7t4|LFtg=qxqBe%(oiS4UEsZb%0`?d#<-ZmURMbx+f`nL{%k|L zMn=)U#666oOEjE?M5~318_WS2d;NBcGU-g8n}=uPlaMQ2tJ)&|=TT5*Flp9sXWF2GpP!>c?vgtEF^(Tvt9wSn=NMoW6r3CQ$^* z?8*LggGfknNa{-RhS7Ia@IC!m4OxmtILo5cdK(7sN_N3SNMV8~-EGtMzD!5at@{C@ zIV@?S$T&kCb5X5{P3NC*ehHN($MM?n!f01%lFZRef&$c;mcyV}Ug6x<)V2?(_|j>Q zPh6NSX9L{6E)vv5t8(zpCdvtil~|Ev*TzbDA2aZpkK)`x3vmCrX4F$&^=armH~KGP zuG|LqLBWA161vzi0vnC?i-KDr!e;DhF4AJ!p&eN#S$c~{ZJJoS%q}#jB!oYXt%V)& zdb{;QW0am3h%N^}8|*CHHZVCtJLSZRMoMO;20n51vf9P@co=r|j_jH8D%t+90mU~@ zd6Z=49+KISCC6qyyCWD<=kX9#eacV9fdWje2?CW(&*qc9{%CbQ*)8{zBb{jZ>XKKB zya7y%_g6gDNYWk*8c8!Jxw{T#iy7r3fEG3GQ!||Up>rr!qwC}qU ze1j7mf6oF+lPiL;H^OOe(BE?t4le?esF#i7bw@oJ9YL11oxVfw@Pg$hieGr4F(RHYv7@od|j6a$cvlvr;q0 z9Cvr?^#rd9K(FR7K*r0ygdGe8!FsUm{3>lA*aURyAkZgNgs=!P`D<_S@ynfQ}9^2FB zAI25_o+>X0!``bmG`?W)v(cUs=Dd#z@v4^*0-_tCQPQELD$nR8SEcC3=4F}D*u9cI zPKD^%xHq%NDw&-x+EfjBo@=71^Yhm|ZJ^m&bA1lyHd>>}luy)pLy%o>vc|<6Ty*{g zZ%id#oUZ=Kp#4#qHg}Y+6+siBF1MA+Yk?X2SiaGf(BC}C4wiI~{G>5cF z6&oa_jnm8zT6?cp^dgWdTu$}a(`!@m$!{U09^PBu8lhMtfoi&ZyjeieA2=dB?wm zX(S~K-7!2&z9NlRF-_)Yr3EJlR`))-h@$7_4bNr;BukS{hN>7HSw&Ug zP?u6s&1W1XX0KhLNrU4j`Q0+xKkB&Zv~kiP$Ve&Vx@ zqj4Wo>9_{w)cRi@I8~08D>)`X1E-}zJe6XE+es5TdCx}FBDKHn0chArI@8Q@;xEJ) zjKIQ&q;Qdc;JkSMffsx_hGL`!0_Y*}P>jvZ|ND9zbCA-AzT5h0|bXs93){-yd0Vze9psuzeivGg@^ zFHV2mQQZp4VN&YQBJfW{fv)V0@CxrDfG^k|%Z$)IJLS8J216G|>@OOlnNq~p8^(Bl zxK)4u+pRR(@Uj$1*Qe^N>Tmu?&(?@=t?55%8VBGY7I1;q&-Wj)xcOHNzUep_MMgyr z@O+6iKl*Q^FaNIQGL^9!uGejLzxP|W>+W73^^bu*V9*6mbwiL0cIWIp&AR}tdUPB^ zLkkbzOjNvl@@rSbWvZmo1CWVJpkWHmJ7j3z{WY7L-gUKGUH83ynAY5LkmNGx6hNo; z_pJFHw|{<@d%XMp@`9!N&(*_S>7V05ij(<|-n|0sDtIC9>JR|>w15wH&uiXpJ(C5T zvgpZy#0Tl0QA)-hZEG<$9`>qIdv#F%>5T3N-fRmm^(2SQx2qA@E<)G{_z#dLnFdMg z(S+DaIr1COh}GUmvjD*76esb_;i8v(h{~{pfzvOM)(jK{>KDB{5vTE7=;ntt;y6wI zvz6~r6tw#)^yZ4edQ$c_7(KG2SdZuKle3PU(nP01U*p+1=VB;CWfBlt3VeD-SiIk& z=P*D%A+gk6nBM~q*e6Y@j}(f6Pu}x&9QA?6>0DJ+zkWBRfbF}#;0s8Qb9iPVUsnKV zu6f9>Pp>E@n!ur8Z^%SLqWlW*GP2HQfYH;V zE>g(UCS2sRX=LpfPpgILPF-}IGG!QVf%l|@nViPG^2Y4C_r-{%Dax9rj(H6r`%loo zW`R%WKH7Ok7$XCkpU%aCPF|R%-Zzg1N29+$QddMWOWN#4aZ2cW0x?#1smcxe?JaG;gHRbbvx|p zYmeqn#%cnjuRG7~vl`2X-W>JgPaBV0&h_+hF6>Mx;^U58NY)oDf!V6Di?aXHAA^gX zFZqzc`~e&nZ(gH`+2P0`%5l zvw|TPja3wQX|PC47-)sf$PDVL8a(O6_coM2l0v;wC^B=(i@vj`WlJ(ch%I}2Ep!|A zJZ(f1R*JPh*n@E|+td!GWi0+nPvPQ^o(|0DPglI=dHSI#wwFe;%2 zY#`WH*zF4RgHZR2g*T*aI#?V3BDuDS=nYHCWSZ5{iWxR`LzW9{c|2nLwFW|Nl2bq- z(v`0N>BQntDZ<6j1Axb(yOw+1AgTF)d6EuRwP}`V&orqu?pR#;1`zOCzCL0)O~B+- z!jEAQUE@_cr_X)N@dxlUY`zZuNR2wf=zG&p?jP4e-_P;Ty}mxS;$S%Gl7vy5- z9~yKL2u@}HrKV=Ie8AJXcmlSafwj1Ysck3hy9JclRzVN!D!MM#v7PD_eBHsuI9H10 zTI2~0QK&Pt6{)?s!2fMLqJCj$7^O0_?_NoFrg<~4K$IK3tnf)WuPhqQlXOz2*TQl< z&WqSeUD#0a#`Bv$$o$q4Tk_eDsy`0#T&{^4r($jly}j}0R%E#7tRfXUNTrZD2cpa; zV{`Y9>TECEge9cbEP6#_bQF5(M-#Dae?fPC5sgHXt2z@s&p3xE^Zc_sqQvThRB8{=3yz`56_C`#kJ)`1#wZtXYk7@04)Osi#vqgGzXKxptej*yAK zA$KYc-oJ4P_D`OcIXgGUP>esGW`f#^@uV>YN-RY8RIs@`)Lp`E|I;hoz{b4W(=9G4 zV~Zsi=QPO0;DD9r2}!dg2`8y23)G`1liQG=&{vrbnv8H7-19wLE;PauHLiZH7+ac# zne99SxG6bBl)f@51|H7fO5iq4a73=8zF~PVXybs#9s4=p+r&`hm8gAG+zDcj!0CG) zpN3VvM)!ck$7t?1I+dGTe;+Grs+^6$Fi+J1M|J&3x5WCoFMtJ z&MAo04GGzdL8h1QlsbbOk13s+cbk-`^pvj^%TEEEUxsPV48?Rc47fQzy`o^I1gXHO^f%yA@(Oa2vA)tXn zbXdP?I~o_rKMg2JYgx^iq0fxL#edBj zgmxrZgTrFO)ZR+fETsG~zAx+9qj~rA?CawpftYKJ+3}Zf9&TRKcPxXK+Ub9B2_+_d zBl(l^@hW6XL!}DazeP3Z^y#y>raEFv0ENfe^F*9B^;x1h0Rrt*go8yW_aD>>L@bVP zr{&-9wcb*X)_6sYx|uj-A#XbUMQCa|^cjJTe34 zLse!Yd8?Y34_x0tYjPq%dgW*I(71VE9C7L)kPJ350teOpUmT=Y)U$H?=lmpZd^-5A zz8L@iZ%9jR^0vn&wkt0yk|MTl_Ft7(1I%Uc7fAeB*93|vzT!PVzB6&4}BMTYE%$D}P z!we`dk6IrsJr$GrAOcS7zOpy}x7AbS5ZY%8?+I3Svme5m4~_fmU(E3TZy3YS7P7+t zN_`F*gXZG-x8v-(pdF(^uA^XBqhgi+UxXR1rLamDaypGc8&DFwF+ME;hG^T3bD?y+ z3Ns_`e@K%5!DmbVrB7ZHkE+nQ02YN(xE)D1$lhI_leMxnUaxHOzQiE*N>l99Tv~^Z zFd(z}sD(~tnZYRho?l-aZxDUHrx!e)D{wdHl--;Mf3hJE%heC;L_29nCNlt`JUKsD zYIOc^u+;9KJFEnAPz&=P9QyG6@p9mHGPyv+EsoUkZab~0+Z@1CV^Fm}Gn8@W?L7JM zS(bY7(krz4DRPV1^IplVy>Lu4$~X0Mhk|3=GjFVA@#Wztph}`>9YI@=?^~d}A{t@# z6aQuP78O>mYxABZ2O(Eq{eG~0cSl$&z^5_OOhy_`EB&t^!_e-rl#FY{iD~9##p6ma z@cmb(xM-(P$QPtqiUb5KkLCt7_0rq|MS`JZNsJBSU1xjV`v=m&7e@%izq9w{k`$Q! zzJJpnJ_~a>*QTkLaxp-`5$gDJ_2X-kH|}mWsJb0)$86EfD2xyP5V~GO&j69`H2|QQ zBdaePKyf+}W=*EO7#|&ov7m%v&HuR~xF~sxM}~b?AwNO6@r?S8uOqR%3?HnQ?z@cBo|w zVpDm!e@GQu2^QLptu1`F0=5)@NZW9OntmU&=iC+g?nZSw3${}uO!4Q6Pnnu;&c4!l zjWForUf}*d+o!612k9|{a~HWF+Yyt(>?O8F0r{Lz>bN23?^Qi^5?C(00>N%YwGgnEhSigI6<7J&Ir4mIO7wI|rra%oOR95qz18RdD*&wv=2_ zsf6WBBM$QZrQ=ZPG44j6wpaZFiuiYZetj~rezM;zwRpck9MA&;9SO&NT(QkP&SlpG zP!3S|8Ee?^<2RjWlKV|FXP|b@4ju$`j@@(mG!}CW;N$ha2c*a;@j^9-O(RBxsj+WI zI)>Cc^UM8*wRcEO>6Oe$u;-C0x%Guz6;Ge^?3r2~DWBIs+hBH+oy1q4SJ34q>_g-@!mjKsl(r zvTiGAK7?zraGp60JzPu2KA5JWD9Dv^>Ah~D(4;sM-#bKqLf=`e6gfSZsM6F01@n%| zqa(d4MPe%#Xqb1$mCg^^54|3AUaTkLr1UKHkJD*6Qu(Q_=Q$T$o{aH9-XHnzH<#c7 zqAGO;kNIZsN%BWcKS4;vcTTxk!|K$#XLAVtFe{$K);G`$cp19UDL}ILG?K*r{QYn9 z6m1iETHR1=)WMY=}S_z6DcvS7Y4uxs~5a&B9?QOBxBX z)-pz#>4EE<~8}M*`RDRR)?uG{Ct@lnG3S=^$Wb@j++v&O~oX+Uh8eGzlIr*JO)p?JtdR*P6|xWpuyfYu{@Cbd4<{9>XyQaeUmM>_7a_Fm3sB7 zJnr&?Im_(NHJOr764Qf?WHCI-M9Y$F@~0Y%W4d>L5r0lBsW%;b9Ib&p(UTlk%uf3) zc5_{O=XjV7pW@m>6lJ8u=Fv6yNx<t`pUyI(lY_c%DoMcpCy+mOQYpm_sSqx z4FgAPAz$Yy{4)nD!)Cypyy*5L1c?fdBg^H&D(pOX_d`WUQ2M9`RST*AqBoW{2vf+} z(;AvGP2_#I1H9025sH>SpmcJFkN8QQA8BcTh9*C#dh&n@1#D&JPfl6heki{yK9>mC zS(R!ktu%cAz6W!1%t1|wy|gsey{B&kt$9+-Uk1dP401Bv_OmLP1{=i9Ge(>3U2KQb z(3z?9{?}HUHq;zkrwU-vV6;QC2(AXazo-_%y6BGzM%_R{h+44(oFgd!bA`wvU@bZi zee3GS8)py&OyrXR{uYUk^X5o8^*LezXgKE%y0hzhX-pJf^A#fn#i!}cC+88hV3_WskSq^X*M0`NeUzNB zzuH+oz~t^Gm}MU%XHBW?hthp3>*u1f5+RH%O0Ep9=L?0UmExoFCi1(& zzquxmwj%SW;{VnQB<|xuGOlsA77!16F@CZ(J?z{ybgmHgF;K%Q)JHQ)m&;WuETml)n5lFbd4wilc!C&fXrwqv4$G)GcO#ec(6W06WAd+sw%f04+VQ*_m$WHl0b87C0PxcN|+lwoT@E zfC$j+0DvfDuXdPdDL}h~^2+YMo03LrXmXN(LaPENQOy~{TiC)1)-O#KCz3@M;@B)a zgXS|QIl0ZJa6k%HJu2}CU}CI7NFdO~=(sS>g&$z%a0%MX{1+3u+ zjT7h~fRr16jRgCU8GHz)%gSz-g^3kv&A~{{|2~iuECw~A?B4lKM5t57hxQ)2n zq+!7}I*V9XSBfwfG~zL#O6fLWGfS@3wX6k?`zllCiupZhLNC_J!GvR=wIH%=E_}u& zrnb>wCUUMJL{tqQL>T&DYy|*&>-V~Z*MgSZnplVYS}g7c@5A`<1)vMKpv~|C_)6J0 zuP_y2TWGAWy9;;GK~dl90)&m|PvX{|iBQw-DAyZmejB;R=fa9l+!VRsJsxq-eE=1? zIE=xi8lo_o!k(2rh2zGWK3;9Ot;i^rmA1~fg{o1lO0$D3{|y=)_& zPnvw4O%8jpM+tq6q!y2R+p4&DA8E7z`agf-e+XySEfHN>c?Gi!f zj}Pp78Bqz3bpgvqa21eCd%P?d$F>Q`Q^=9bPpQkz3wcw8{&oaLM9q^pBBu4^L>Phj9BrtVZ{R!g=1w0^`_uOfbS7~S!vdz-qCqJs( z2#oKv1*_h^l1#Cus;aILJ%4!_ji1&1?YHa;+-|@j?F>lYVfjY4sq#R*;ni?iUnB*7 zQUTMp>$W$vk9&|8O(gIGKSyOqL=15xG$)ZEg)$I&p5=63JASeBPd;gzYC@bUwc?>z z)rWvVlhD2TdZC!Ohsn1iEu;i(J=14@ql zu);XY~{P5I4n@fz3XzzmP^%jTA3{Qx*TWEjtF0fR#T}T z7=GBt$JPNjksOH)N(4gG+-M%Qw~9k+UGh#wMz*sGTaZ=+XSMdEs4!oXkZW)*56b{t zIi5F@HEiW^o?~|SHi&W>rU`Xqo@~V=9sv%$?WT0hG!k%$EV70EXqCJ=AphGVMW;!3 z*VP!I;ETsKEgd8TXDOE165rmE9aBCAMSc6T_;vuvxQ@vrKLdemB;qq4eF6$a@~aY` zq9&+S<#2s1D=vqjh~@h}v6QF)hL-wb=M(?4h^OuA30&lOs0pWjlyBbl-h_Z{7gLXg zl;jQ31pNWBIjgbOLFBQqh}T<%cD^oj9oQDD90i#$AGufGMh4X4*P?Y?_e!Z~a`a`U z)fAaA#iwY@xTw(J6h=GaT%$Of9KD3>jT%o>(X`%>+fT z4kmTLl4H?_IRM}HH26v2f{{0v$zW<`Vio8}&C=i;=v}}KevE&-FiJspclubvX2a1++=`~{p(=(OBIoBrk*>+|;^8U@ zb&e7_JVGQl{@#&Yd5G;}NPZc(LrK5B!3L<{e7ZoexszNG8E{h~Jgr3JNj3-4!U>%= z;`y(;u3W{Ba^tVYP$;oAd6qCjUia$wYKo)dihx4K`-$MyGe6FE}a@(Dh}aKMxjZ z3R?swsyn-yB%7=l#_OO1RA#SF_Oh$eAaWh>Z4!yLV*2&ioWX`A7f2ArleBn+GPVcG zZqE+hBRLgArs_bg8_z(Q7~mMaoBfKq6bn|fNmhHptKG!nEFQDFQ||asc$t6ny8^&R zf@m`DFJ6hOqWeQ7rFHv1KlY%)%n_&Qdw(%S+L!RPkWBx1Q0;V7+-$tgocRH2I`TXEa@yCz>moKj^6WQ7# zT~DK4ZbV@oiF?Gnr8d~*L!luG@ku&rM&=K-P81c{MtMeFjf3a-Xhl4eLXpfYX&Dv7hZKh7F@Zv-Oi|y7#mf3AE3vTWLHawot_+i*6;s@g*iIyY>y-%> zskl`Z<5wzOvLsW(6mPU>RcbLZiR$sBSjR*GOl6oqSo$jdpQ?d@|ce?BQ1eJ||5b=}>BmSw9Q$&&IYtGm5K7aNX($ z`4zmM#%crp3(M%OV!}bx%wLx`4$~pys7j9L;;;_AhC8Zn^<})^7tu>w!;ZcT#cEav zmaN>25V>puzx9d6#8lTAeRtGs@9ynW?T}5NFt-V!;V}1xxAPrNT}*kJXD(@CJJQFI zdy80e^d60~2i1J>5o#*d7I%3}<(1&|z7a;`Hf1_s$xs{Txi%O%7CuMzI#CbHfYcwS zASReL!5>b1{qRj^jGg_%Xp$C!yZK7sR+#WpIiO=no(a)b$~X7=MGFyH4lWVtngv_u zI`_MhBf(dqdSUp4jIf8s(uG*@4G9&Ql?-ZWJ0Z^s^hxw)>?Rbt1qt=z$8qN}U7`5u)d#p-Lxw~6j;L2g9f^_)A&Fcb zgT$|0YLdu{(+uWZ1T4#x`twnZ&4VEqrpdry0rI+d5xkHA)(I{mMa*d{9E^=xre*(4 zO20SsBYH17TnP2$VjvC90Sj%a_2#$SfOXIl02Xq5RQbhrl|{tL$n+a7Nk~7l@#D!GN0=IrDrJb#P@?0wt~QjK1?hAg6EF)BiwimDrC%fr+Av)|J&J zu>$Yp7|@TN^il7)sXzRTF?zH$B5GJF_G+A?aH*As%KP*p|N7YpFhHhjOl(Ly*AL?NId^GidI@#= zJD*B%9IKDel(zJzuF)zDs7fS~#SPiiFp3tr5b4AMZC3g|PkR_!x<)BS(0l8kR7b8F zW>kG8HTLD%`6ov=$_puaBq|MhFf2I3Aew?2hLZ`_C&rcl|6%E#jjDhMi1CELV*KJV zz=mS$ot>f$OLC2U(dpG#fR^1Ca-nJ9lZhD9;G5A@_YKUodRU(lu<Q^)$ zt<9{wJHH*~V85I~4a7HsCnF&kaI`wYKQm7o?`XTT_r0PxrHCG$ZL4u-sCE)7#^)t! z4_EAPxP0J_Y!uCL%Zo(j6^%@oinw%nZjB4iv2#=$;r8SYnHa9@2RK$O1pa+tqY-9% z`ZnY1m~xU!01KTy1Ef7tLv%#KNxMB_o{ z>$0WR1NX6Vg%~6vmqCf~z&NN_jtjkpn95G*LB3QLV7&xvX+XVMPKg>zl4kV6)K?aU zw{WRYmcFYb`qe|NpcHJMXOs2)d7fXxUYnT(B3hv2WQ*2U6z*hq%BYyl>A~D+DAY(- z&m>*ssX`CE#r(aqxAgABRdZ8G`=mKmJ`$Y)%M2Z}A3bjc+fkhvi;0WEnnGp26fz^lMG-@-NnV!6QE+;wkFSd_@J>8pO{n5RuETv>cJ!o6=aX_}icbO7=NKXhif!!_tGEke+1;e;~cn zWy2?U^Zovxk=qY3>*Ah`ei6!bLBb}Dp^nJFG~qk@RR>el5vJzgG!H_45ba-cKQO8m z(;xjjWL#66`jYK)?0XvWbK+z-&U))$QlOn>jnJ(`ap6W1X|P}55(dUq)r~JIxUH8= zl~F=Nh5Bjh#G4~S=caaf&lB&cR`rih%&*|(-L(gXU*IbD;z95uP|wLeP#P18q>@u0d<&T9HP%mw%FI`bDeFy=fl zu70QXhkr#F7ZAo^t?f1YY&Z=6N|LUn}+HU0LkK#)k?nP_!zi$BlbxvZlu`g)EX@tsQ` zzCq(&*M?n_B2N&i8+gnJ1$cb+>$HMGH$Rhq8dVw}ke6zL(_32Mm03VZy8qrx2mD2D z2t1X74#}S{ya8x**~$*+h$l>_{}a9>KETWo`WT{Va+a8&Yt*tvIf^bUe-%lDD9eWE z2;>c~H-fu}0x>FoKW>f|@dXmNo|6&gs2hAF3A}E6s@)#(+xP#Tk+nBxcK=_)=)+$k z6e%DI%vQuC|2I);^oSl9H)NaNM+ro@?tH2Xp+L%XSCz~A&)Ia3?E_Z*m{WNe5 zk_xT0KM3eFebJ-j3 zBAQns2s*&E;qdy3Xy@Y#%^R*}71(Ax*!vQX4P<0bd>ZCWz=yIhac+N#xz1Kjm#L=G z$#!Kudjy%Y5HR(8RY;}1+YsaoPN_94eW$@mG&01Sg|Wk7thqMp@oYsv&Uoj4xXkU| zKm%q0sX~BFs6v>{uhw_glo#lH_8B~^;RS?1K5Rr?CSEE83PvEeHjr7GoT^^E{P^q> zvFf18tP+xxv3TC)_Q2~`DGS&)YyNM3L%)Qa#>sJf9IIb!JN}$q7ShxxvBWg=dXeUQ zwAu>bT#3zJi<^sKM*EW{1^uD)fF?%oIZ@UxPDiVQtl9npwLC?i4p8;y9wf6b^E*@A!~Zobcuy z0vSO6md*lVt~5>5qzcu~l>b*Vys_l92XqBb>wOF z0ik0+Vs;9quOIg2;Bg1IFE6>Ki z0W~@)nOwMOlcj@DDGRhgeqyFXK2j7~e5_bxB%K7D>|vmE z^Sg!YH-7>8@x^CzkA0v{0q6f1VRHjk8>5W~QG-El2RB{!x^Mr0+prb0py|cbts=AO zJs=eus{<6;x8jmi;MyUWDH(?mg57PkLf~Zw(`BB5`)&?E`~>tW6wKJj&TGr|*HG(n z(4h8xpomYcr&IGy+8`DTqEg`~X7?K(N6&r}xd9UOaC+3cXAV!*cYabntRWl}XUZxH zIWrUG>ApU3yAR#`YW@1C-9Yy)iMR5F3qJmWChyxQ?H&FkI>aT?u}Wc$CD1q!({}Kk z5c$GPy9461RQxtb)qkErQAn)gAlSepXk}N}7pMI7=-f6E1fOn!!1aC?D7ayc*!ZiU zIw|k#B#}`jMz#Yzx1>X{^nV%5cSIu}pBcTGlm?7+jyYnebP{pTOU$~Gx&p4dUQ(|n zf1e=>{Ckl1OcD-q3ek7|cyfSrax1qqkhMXhAhEGGe=hr}6#@wG1`roU+| zBAW_`!>sOmM)C$%aS%jb6tQBG5!QDXN6k~287-l78PMM0))OxnC$+qBI_%0?9KXmz@zB zz<~z?yQ80TvI}IMeu4|t2`%v3Nt-4VB9kT&PUTE|6~dT`Xb_GqRLwP!7c~)L?(s2J zW8b2546{ADu`R*cx9xcl*Qyx7R1Rc*I_49w6;ZV?wgAOlK`Sfj8^{LwQuy^+{<|{s z6|pOQ!ae{F!WFotP^=bu^QjhmA*vfqcLeC?S1BJ_w0YWdyf3)IrHA<-rsYd``+ihzxz1k)m#;v zk>3~&l#$y|w8!-Kn@bnk9ycOA3Bd@$ix1L)iuG`?b#xj_EgoF3wufDtQlE{8-B0Ad zh+UrV3y;s)DUNBH+DhtIfE}Gs7g)5j-t*%)R-+L=r9)b>+gAx6#*eXbgHu+7<91(~ zkIs*4h)k5a7@ne%&1{X+{{B{nUV-mcz+%STN7axF+NuZUK! z7Vz46SF8&q3|e&1=!I)x{!KXZnov-lr%H6-Zi}8g_`o9THL->2*@2QOX7io8fK2z# zB>@rTcI}swWNC#vGlHdzyJ;7*c-el}Wybx#FU@59NcPVH2g7F(Y8IpH%Q2?LhDgt1 zD?*E!<@reVc~59bTe;opBxYuGw|6)&b&WqNASwENF7BrbCau6hymwF z??&DwhKgIVtIwcVZt8|=nQl{#K3mc$d*<(%K<{!@VtL5ush#B4Dsi#U6DtnJ%g7Gh zH0UEL0FvuOF$6yD-9Apn7pnt24WZcZycBsttiXC-!R)U1Vj^yu2Vt)jkC# zI3p7#@6@IeOAH}bRPw{jkQTNSzNAI((oZ?i_*OIN2P8c-ny;xzD^44Vaa~mxqN1wo zIh=Y&#XvyXhLKS|@#HLXlZ8|Q64od>s-lnx&qY3=T=qjtf}H_O=M}Rw@(4haJcP~z zkTky?t49B;Or{|upe`Nay_lrFzz1ANiOiDK$%#1e<}#3w=rr^SD(qqNpn@!3ml0nm zgghncStDxzq{gF^Io;VQVaLeJz8zD&z2niZZkZ2!`r99tyZX>9H8#oCaZ$j`Rlc|n zrByiGZ=Q&F)=h_tKOBwkQ$uuV;cg)2c9*hL`4V=Vc)$HLUSwb9UfLC@UL43md+T!nC2O1>;dhmkWF!YY?1${SG6RcZCe(xYYF^XGsFde7_PURkO0*X zKLSh>Wc#a=H#FqyisdnI0#%{%3snLnfbIZ+N2tbCW->j2XLrVTAL=If(?o8g>Y)nKP6<+Yq%6Rbu_^%iEeZ_}Y&miZ;5rUNUfX(>5 z05xG8;HU@4Ta^8E3N3u+;V8r<2eNn?G%2011oa!H;v>;F-(?gn zjJXlh8>6%j?2bfxKDmcTcMtV~OH6vkfZ=Ugi3rjGqS)_}y2hFoHJmW1ST|k&##FRb z6ZAs}Q^s?gL+kYP^utl>{07E>PsSnFo`#zw@Qd@BPnfmFc6sEw09%O8EKIW;k_$-{ zZaGU@2`mj+-C}uzWAsYfdA&foy=q?hS{Bqu9TD5p`EQM)Y3C|l2e*CDxqnjGK}KyU zVKij6ypYM#Iwc3|B~U~eJfS1Q3cXd3lVSQegs4QE{8#kxg)Ow@T11g{3(>K>#}4Y> zqM7 delta 21099 zcmZU)Wmr{R8!k#qw}6zibhk(>K)R&6yF)^ONq2XLAdNIg2uMpSQc9PAbSM&f#?tTI z`<(0i)5WZj&t2m~H1bR|a($CKRI>ddSm@Mz3GNA>N9#7y4==>J0Zi-bC0Qp{8c~OR0d( z32a+wTkQtTH<-%rlQa7I$?zbvJindnP#lqccC0ADan~q$i;;~zP9;es!X!x#q%N&; zP6F08Op$P|a5DA+sr6wcVS^YldYA>S$QC}*P-XDf-+?!Wk;swvj2N0mhAeSCN#7s5 z*VZ*R-Hq-JxEZp2z*vNuXnyvaP&Pp>s<*6YVFg@|IZ@$CG)Gc!IRd?{5EksR6X}-p z@pX`M7k7$Ld~$ozPo|g@s>fe}^G6$fDv=a#3};v4m+B@s?55xGz+# zV&PO6-RG1KJVi^ASZRS$&=RSi8xM51~R4G}kFF6m=fxMV3TF@9>}!oQ2D!d^3pB zX8oYw7&9AG!9@LpN)*MO(1nCj_=?fqA#0?Y&rwrhLi-IbE&?MIw7d0zdWiNZtu>Xc@eHa2?im%f7&b8$Vf0?Jsk5PQi)#T?_t zFT&Y_%@5II2^9zmkwY_hqWDMXDf{zKXefBC&|gH&5C@xny08<7IvUB47d^(wqoYOs z6mR!Jb@3{3bkSfNGU;1qfNZthsf!3UacDfr^u}Nx5U+!MYDWRtLHWEmZ8Wj&8&HbH}jxK3*Cc` zHP-EU8m0Jfiqm$rOa#HJ(4KI!+_T!qhk?Q$Rk7IXAp|_fu1M^>fdwf^&m^wxU*DD9 zw}ex_4m{9~5?NEnqhC;TD8X4UA`6c|jJx-wps-@%0?EAB4Uzahk270MJ<^IE+H>Bu z9K|eI>?YhqUmBq@IYtgHW0e4A`ZB5?1a%~T_&uZC31y<$Qy-BND2fV7KI6vNXlB}Z zG8*k~-V4c-$&oV-k)mI3D#xQuH156WJbG2dU5xw;O^thQumXKY-KMFwKGSZ&0e@2C z{bUs#vd-Mq1x_Bx0e=CZHZ1P}vu}LVGb8w7$vaWqO`1l7 z-HnFbk6+k{&Pv=M!FIAk?ld5s-P88>xo%31e~XcgIn`h^$uX3qZ>Md8xlU}(j;7&+ zz081DrW;p1{xv!|iH~qmiN3NJ*00_|_AUaKYV>`tVwhHW1WgmSBhNyxq{z?W4-0eS`VSN!jqUBB@b2qrXDAB!bS8sBusRgFd)*`7WMW=QS$#1qZ%jRgPlj zLorIyw;H+`yAls0m8gl5VbNKEe#BZ$dc>sW=u-H+(Q%3DvwQAB*L<7oV+G;Yl5hL#insAX*ma z;n#7IAgD0WZQHpNZd@_RFl}*;l z30fod-5#z&vfOb5zGP|YTGCW6_iC!tA1M+9>m6Ap{MZdaX{708L*b3J4{Y}&V9y%l ztkFc8kPVXBK*>8#kc8qmQgj<+#%tYmsG_Hth%~1ZVazc@T23%Maj=_Ch6aEsiozDrE+*AJ) z(UM#2r#z`4Zg1O9zLxO!qQ6C;428GZW<6!Rc~6J)?;8k{f>2_VX{57d+cq^$WXsIH zGE5(!_*dKiptrlxd?cBNan8zzJ3Bo`s!=8WM4O16CqObTNH*QKExe=k?l0ieHRo{fjuW;&*kR^_2?*M@<;Oh zTlYe;X|mP%{hl}*hVoWBUwqe6DEjwVBKH78b z{$*oJ^kREOkOJOpNhD*!wWfh>PeyP3QEWyOUx@fjJ1<2>xB5|1sKEDhT@zX+o*5QX z0tKHXcezXAQO`4#%WnlQIg5o51r*G9J(n`)U1LzlC4+J|mwi^>eQ~I6Nz{zvit#u- z)5fr2-3iq-^*@yz$X4?>?7lhjoDd*gtbd1LRg-q-eRKuBBA^%X-pL?TTm7cPVM1VjEA~<>x}^2J!&+}J-jfS_ zQ=@!`s>)XvXQ&=$?LNONy+bgVqQu{zI&_<;(MfoJX|kPCCutcj@0Tw{M;!FWXDRLZ zx$@HW-K9=NWTN@S(emTcp6mT;u6GxopsHV%?@>v&zMj+OXSsB%5$^BJ*R4KN;<*3m zAryGEevjIh=5y00MYh_mvr*c+m6ylw&L7%P@3VpHVI@h!`oGy|QBWQSVolqa^_$(FtI(fWT~iHYiDuXZ zqq>QN_|+ORAxodnYnyEA40VYr+-RbGDYo&)$6d7fb*()oeOR4pKg8g_lM-hbTl=XcUW?Jz3N(pAr?CvD5#hVnfZpOmC6eo3sBe|wqmnNqZI zQuO;c-;~&HkY$31&F*w@RMqf9C@(shWh9r{A&xD-wwRhcwBC0sS#Fx`d`bfH+lKz! ztX4{t($gnAJ`$#u2!f+5JdrIZDJg-n7)JQZ9BUQyyR_)$a#9z=Gpld;J}d6SRMMg; zxcWrN6Mr(c;5(ElnN|k-%jC+;6)EW4!fT`lW&QQT~*x*P?AR)V=uk1Dh<` zQ8l_Mw$IYHPty|Qx7{Kyu7swqe;y}N<6P1ZmE0OSx7`24u~q1Q_MyZJx=tX*+WQ#K zkIOx}2Rp@ZTWY_ES)GKnC4F`2=t-uRi@?p)jm^g>36TG$9ru`=OET-{FZY)ls07vG zZOGl-<48gYNko6B0-tt^P3R`MK__n>d}@Eh*?G|DznW{dbXJg4#~pfjI^FiPcv z*l=%9{t&4OoBukDkU2`1GA_RV>W+@om(bJN-M*gkb)n~^h!KHN6(n}mpeM5@PdRCR zz-)vLX0U!*;PUhN)4$WQee|vJE3d6Vumqz07hO6JUEW>JG3P5K#$w$J2j8QP8e~+I zvvK#cKo>b3AZqU@@xmVeRtSlo5fB-tk#U@?;l+PLlDUxCV>hdk(rpOBK{F;?9cbJ7 zl?+pwhORKzBOvnFZ_!-Cbw7PO#BnMk8Kx2MU&HJ9o~8K1si4)Ojgd`}_Bw zW#QWWS1(vc*!ia$hN&q$&NBFZE_r%B+)}*yy83&FFNlI}HD&9^#6;#$xFM_SYz61h z0jY^Etw{UW`Ij)H8-Qv3rZy)hXTtNECIl&RE3(Uxa^j$QeUk!T3?|fKX##2lG3-5%cU3Sc^xljb@9f;kbSFn z1W5%Vbk$5G^|!;bp3y&zL(he+u~K6?8SqmdOUplm1f9s9#Vw%gs7c&;b!U)$`t>a! zCu-g<9aBRrYc*Ma$>dZ8pMt_!_EkRMY32ec`d8|{UbG{xoY0v@(c*g+ZE}lAc|yZ$ zyYOeYH&*Gh!yn$$k5G<_>7)tv!f+dwTbfl_u)7y@Vhu83jdhgnVxw=Y6orL?WgTe#FuyeR|YFfg$y@HZr)ME@LT^sI>v zM2@hoND^c7kjV4&!J-3clC2Y-yulaql4;r~h3HRG*QQU@qggSqmYzh5^x}~PD%;3U zBOKp3g?|KfF8(Xe*6nUdqMISjn4-fP!_86W@ehF0n5jO<=K5tNd#nchbZ!_VS=#8= z5tZGsb0Xy-(UNCJfAgW@Ef(QO0z$bE7hVNt#MruBf9sd^=gG#%9+lbi)Fz#q(sAv< zkef4aRpF0|y_v|dKe?{m7|xq$eNPZoNOT%(iDG1!$)vL1QfZjAdlk3%Tub~pm%urq zX|wCX#96Wbeu!Hg)}XI|>nLqKcVE|t0Ey-m$?&H9TP2I>7{LZ|4x%TcPm+Z^*n63a zlM)^#Av_yc#C?1ax5{o+^AR?EZ*z_~X zkemaUMskCm>m=9qT^-%$sVG6Z4)mY57c*+v^5F>1oafSbPtsaXQ~ISz$;0eDAjTu` znuVs4WG{`)=$)-%LZ~=KustbBl0S&X{K+iow^pOTsA?{kNy(sjLFK_u6}gg|RwT#? z=o-WP+Kz@F^+5F`)y1mIii_P5Cz}oW(H9vNLk_s-C|W9EYlWSoo@CzXdV@$F+olnH!jG^%(O{^_f}o(oa>c_<8KYiT%UFqG*@N%!#Kq z8&Sxg(lpKsIwiIrM9Hhb9{DZz#ml2h47tLv`l7@*CD{!h242xh-DM4Z=r?>Ql$7Gl z8y0T&;?<=e7wh7d!wkWi3R#=&lTHBC`5;Zki1j%0 z{m8($P%-;TF!wT+X!OVn9X~_sac_kQy}=W5w|+(gw%dA$@WkYZlqM~PjJ-ymQJ{YG zN`^RK>0xkzw1Ea+IZgk3d#K#&1sP0pLt0cUN=^zSad|xIDduvlzfy|PSXf8oi?<)@ zV+61$_*@@{?Z{kuQb6V*7&|R}=o45J{rY`nO`x!6+ZIGOo{Gw_Betb6529i;^C4R6 z^p&jAWQV6FXarJ`$8;PZu*Q64JV&5)jB3Q>TXUyi^!>h*>uPf)gvdNlPlOasQLu*1 zy0uL8KCGC{mn?fWqMrLULa*uV6mK!s4C?7q!$!w28XuEp14u!%dR!`-coty#6Apb6sPIA|&gM`rszGQWgLAB54jY;p(IY*UuvLnLb(Z_Iiw&z`e zLyg?ax?)VU-Lu;V=-d~NJ4b6p9wUv`5HEN?U^R7&I7^PuTgyJJE2Zwstv`~9R{j*4 z5UmrvZvBQz2NcUFn?uXs1uVLTUBOQ{kdfQBhSBBJJ#Uyhod(%;84ME_Nm<{j zKR9Yb$|-qLUiujfEsQ+bJgU8({C7!mL`?$q6Y^LCx9&AY89v%9nBeUkwrVNj=OH{b z6y)v&;&^`SMY6dt{n)m{`u!f1XAHg3r1AX94F=UIkfZ^MuZ?=Hp%|lBwi5-i#^+?T^#DP1K*IKa2FJ(~66Wz0lT45Zh;e)KRY#FRRaP#TbgY zd4K3z)xRMF^aqEjcji3?RSMs{eO8irs0GwS*!pSXFE^Y1j6yEqO7a`WLMeybfPlPi zT+x@fvBkzbTya4>_Mak%g>QG;r9XZa+E$G^z5NyS+l1nXgmm_-nUFsWpCnUKljtP9 ztP-yhXI**^O>FL_@l@L@R;>m zWmoz!wK%{(`P=J`64DXazxJ!^M91)7OPur~b9J4-n)gHCFaNRK3&i| zv)^)V!^p5`mo2Nj0YGQV-4E#NBPXBjjkeXm8&Dh6MbyG;)t+2_2H>-G<;C~s8TW!) zPKPMY19m5QW%eSeyceu9jY(=d{&86xdba@rIj^g`#R!2SRPR#29;KX`$6uVhzc(EB zpb5|~@bI$1IzX#}k?*~5x4~Q-W&F$juK;7lNYp5?hTxT}VM6e$+XD5A-D2wu{hd%e zaWZZ*$*F=Tq=3uX7WnO4&YPNA#tU4^7Kr@FTF%ap{QuCFeg6C&U8Xp?8*uzlg{!pGLHE{`YpXX;}INer>+ANX;; zT|@dZ-Zu$2)sHWZkC6-^WLFcqt<*3* zX&kKyyZ5G;*YR_O#-H175^Y~ypvUBQhi$9kzqV5EZhp#j=Rv=JVWUY&6dSy_+<*R2 zmK3~JxuOAlR$d-x03Wra3?boRmSsZwarX@zg?>6+(#WgRSq~T6D@`o`cdH04hFzrV z7})>*^{vpY4`W{6mWo8a{c0n&4UDL^j(&K~M`a49dc>y!pENX}v@d{meKZ{7>||%T z1wuiFJ|Ke0vLxmY8e~;jZAtmYxsf>-_8kG9HGV#;z1RV$U}QM1VU%7e;%*;+fAOfL z46>q*7`e9!aM{`%sXeS0&Mp8g4ZzZLmYh%1XOUzwVO?GwNv5~g@ynNCp;dv2Iz)(N zn!K=w^l%8i@0Qoc5Jc62v684ZbZKXN#$I~fGfe&R_OcGLUN`-7P3l(=c)eF|zff;3 z&yz-3U0xV?-8`d8@yQ;$m6Nlze)JDmMhV20>t7t}tS#Wf z7;$}E9nsigZQIHy>r-LxC;nF_^_5K|jp-26$F-zdm=X-_52LyeqN@Bs*#MSD&a81X zDq|8A*bNU@k85^BcCt3Xz57npd$ag!?SMHx(oLNa+XmY+->nyaOXe-NTsle*UlG?< zw)@Zn{oYAp9UiKNlQFKIlSDGi=&m%Hs%aB!e;R8rqJvi9ns|(1$9UVH*ToAZIz{jrT1NeMZZ5>9Fq_+%2t-(T+x? zbUvM}kCPV#5Z1NxP!I;yMHRs=%=9dXj$mGRAsCq(fd6;*B4Ke?peSiG-@l98tD7_O zoM2{T_rKmNH%Gi9c7~ZH6AfF)w}bVKCw3wkXPDKgFp#LAgia8978yQipqBzVG)g|5o+XB9T^_DDH9-?cUFWKl32%Qyu_eBmtTc~i z%%Jh!2OVcc!lpG(V(1jiQB}ILSC*SV9P&N-BIZxMYj|Fmnyjpy)zV7DwRz8HwVEfv z`nw!j#%Ur371Mppcv!}*3Bo}iRAqTVM>@A)H`iotdXitL)!akq zHrSdOLg|_RY%z4l)BjBz48Ykan;QQ|nY4H&+edk93q#Wi=)yXF?zA=CrGZH8_?si_ zuUh)>tRv!M22%3uSZ-62m$B;TQgGEC+StNI$;{*z<~9>0OrBN2u#`M>R#Q?!-YEW6 zF{x0z0ju6CFYHbTA&Ew}n}|{W+%`><4arL0CyzR*G%$?TCE%j{c%(etn^ru~%2$&} zW6Ewg;My?0Anos6L#X)&Xl9p(Uo(x>QQn8;(apsa$0~=&Qi@6wYpC8HgH23{qeaV2 zbLeX*ODtk^GarM+C^?S>N(eR)xrT+Wn5o}(dN`>`R_*w4uLP%3nLX&Lq|^$?3N~tI ze#wD>a{}L~K*dVp0&oxWcFb~L2}#HJpG7(t8oDn{Es zyoQ%i`{&WJ{2Tx*kOy(arIhoTS)aj`wvDzB z9;cmb+o3pRb%s4!uO|?(6x#ZiGJ9v(tJIHQIHWl(E)-@)Bt(kk2h(pUoA#DN`LrX2l2hU2U zP&|M;M@Lwrx^6m;P(Bl(-0rqG$z?l#cBq1hX~X=iZ<04_g)vxDRU}-W_17_P`GZk7 zYqcw-g&I`0JL|+zpWmPOq{;%TdDVDz1+VyaEqzEs^^^Q0ucyPQ9M}bUhLTLfa~8FV z!?#h{+DgTGSpMnR_Ykc}EECi8pBKf(?DEIKCsolx%{93#%U@KJ1bWLCZ+KI~#~lkM znVo*Z4}{163wb0tp>rFNC-(FA%Bm>=ph17$V4OXRN+a~2|JpvDJIH;$^6up$SO3TJ zYh9pd2ZmbwU(7}mY=FV-<5ep=ZVHoh)WJwH&~34qRUE(l`*k$I-n2yk#g(Q55YF7P$1X{w)P9%Fzcq{`;Y7rB7ovmd$n?F z#;Y>Ye*H5J+R;x~k^z2t)&9k(d5-n}hqgea-MaTQJ3XoR|AXN?+{sy+I>fn+x7BT) z%T~l5s_ssMhC=_ZmbT0pII8e|VE90>Iw>IR3OEmdp#OcEx;hfT2L@7xb7B*p0FBez z<5&LgZ#2OfjPQ2a{eRjCddrxP)e0Aor~dz68z1^@C7bt0JcpKrJ-t*Vvu8zet6G_x zs`{*${tld-D3Wpx;5$A7#!XnWX7FbumycWA!rK1)-uPja@h5j_;vxu!>ml!3X=sGx zEZ+-2^+PX!J0q5vCA<$7!dlXZ9||~V0e88hCgYxMrk|pNw>A>tZW4m-%-Z|V8z4DU z>()U>%bh-1!!j6K@LsNThpwP#Y@r3-Le7thdNiFVw1usQVlMh4Y4^P|4nVfW` z!sSYooCL|Gj76{bw)ceSDCBhw3&$x+oQpjWgeOIa1h&{PYU}@XMmEt1qqp$6Njoxz zm6{n)N+ZrR84MJ}Kkg-mKfHulo0z9;Petfl+taT33;dVXI!hGVx!}k+-}{RK&2xO=wIF0bE$Tm($VE2l z{jJ5K`}XH_exvM8BbU$HtZd@&{CpsfMdSvHWVq*( z>)a}`TNL_={Dk7i_jxXZ=f#Rhexf*@U{pQ~I{hnKbiqb0SE_0`Tj;7hgTw$#<2}8` zV9GKZrt(pKX81K@Ler*&!|O3;a1J`zV}Z%*U*CKuMraEb5$UIjSaI$mII*Uiyyqe7!Z6%Yh4PZFD|{j2}({06xQ z)cwE#yaT4(GVq)XI?ER?Il-dI!1)D$!ZLV6lA`Kf@^W;F<-|x0w`gMVAmuh#Apu~0 z;~Z8>>&<^o2J{A62T^BR0S8U77t&I=FBwS=(Im>QlP;? zYR?*A3DtpI__;Xd@bam~*n{zm`?e#JTDi>f&FEH|1eh%Cil)4$wsMpvx5SKOV6G60VDQUd6mGtQPXI4 zk|L{ni#ZCM2i_d_6jr*Esf9&5f_)paY8T*lI7T@#i}2)&>?_N4AtxjnD0 z>n7i8{eAsiF8U+VQ6FG%a6DC>?`@ElH4iiJUhVpF9LShNim3c?n9>vbBa)v$@0}uz znxhWuW;yz(;fuZ3B1u?WU2b~5z_)_aPQX91Drd9Oj{xl45Q)(b5rdnbY8ISb^oe4G z@y`eE>z*VfsD@NSZ#rV=&}%FcpLheeP6R;E=%tv}4t0Jcz1(R9e_V^s!&XDkJElP9 zq?L}CmbYOO|CZ;8{*@u0Z@nK+g1zwf1^9f?%{V@$Jh#48*$emPzP`%~a6z~8i4zC{rW?9>fe{M_#F>POiHT$NX5?^jhn{W!h87`M3jb~C|pCI}HZs&+@f?yGZKJYBHCjh0=< zig{KZP%G$D8Rl5x^e#3^0DalcG!+iXBdY;+O0!<;36%`>Q;V)?y>~AzzkvD%@r0)P zP$CWsiFz-XN1mya$rlBnb4TNrC=tm+bg1!z-{?=!eJUD+ zai(`Ggu0bXlQ_#Qesaj0Iz1&KkhE;+P5k+bWH;PcUX`X!i3`jJl7J!RK_5=IXyLnE zV^L@}17<>$09!XkDw^ukXJFbB{oX{?u)KL#2g!5#eSt+_bOJRECY5V)A17 zti!D}WCAX3T_(|&wJmcoHdbW7ry=U)4g}K5!+~-A6;l2=7#0VAKQ^Bk0+u0kcd0vW z=}R`oC&a{fzthz+XWxVY%%KkfOtRmT5A8XT{6=fo(F(XtIT8)ae4`$s={)%LP>{B7 z1ImSsldz0i3`^i0T#fKzMqKz3WMaXB)^SatI{}+=po0)~e&%RfI=eleifkpQ5B~(y z1_kna*eWo)t>b*rSBZ?wf>E>3cCNqI*}m=iFZ+`1Pe?Fmsg(CdMFn~3pz&bgZqXSlMw_o*O;kQIrI=eA!_Rf zrra*Tg(-Ja1T8TvGa`2*nJm9#`Smh;(QxakpnVl zSwTJ)d1|723{Qz0u_AJL4hr2UGolAY0F6)9*Lz&=hTqsPgAm7p#bItltF|hQD-u>N zpmqwDRKa-G+fNSV&?WGCHyeQrgG0`bUOVmqK)?e*z`#g(OVdPR@8MEL<>Ix`yp##2 zFB+>&xx2pX&rrZo_8fIrYjCdAO%Godx_oL-{1Flex3e~gMR^{qG)sg-g3}WRnjFBQ1XP> zDm_mu#5`9P6!FVl4M71gyqAOCd!(~)iA-Cyzef0yYf zg|>JS%Etp7`&r!}87}M&fdq*_MjP}pRq_y}`DSi%oDlvHOeq>TfGoi;4~mDA`M(g7 z0<3=aVKr+{c!ynPKxsL>{l3d5I`^j-j+ZYW0lPFWs_FKg$pYSL1TZ9oc1Qrb=dRH#%D@MciU3eVnL&OG;%Lz0bW}KB>4N_} zn(l%AxGZ;2d5q9*%XqK&*$M*N7b4ltty1<|3%+Lp79HHY8G(PkxLAz8&4S=xy0Dwl zizBqe4Jes!D?rst{^t{?X`&f;01^YywFbyREad;DLX5ZI0`=c}Dhhf5h=vQe5*O3b zr*!Z%0myB&u7zL?5gEG}!u$BT$TIp8dKA3Fs6K38x7>znN0KawjDB&jNHo6=cgj{t ztAWh>o*`A4kTHQ=z@hHlkNw%|c@!dL?(a2C)S%jZECX=L^9ZDs>|RCx%_o74ClLTK z_m zty-)>T5c2foM!<_!dTT)OI{*iZ7+K{AJx2{rL1h-cMZs&J^H$ck)e1E)EJRT;mvVS zU@AY`FdjZN8v|7aeD~DjV=)He0Kh?xj{(jAV*DYn-xJgiz<>T+&-Va)4&;{WC7;y* zIcmsn3lyA2@L47x>voUIYkEEM;p9F5s6HFfkKm{iq*99Vkhfbv?gD6RT4kKsqeoApH4A%_J*(!~ejai3hM^E$m;mqZ{BF$MyZVjPG9VSC(^Athuo$)Y-K3 zV^*`+73k1$1Lj!rlCEbRjw1n;PJDdyR^wrVg!u+0`FG&U(A3`nunBvaY`$on8Sr!2 z2l@bkiV2@JfMzX$KBl@KN3tLaK)V0k<^0s|Z$CkCHM{EoOag{>pa-l5pR~pBR|N3Q zCqDczS&|2XAzSD>$P5iZ8smZYi?wx4{I+|8kwZJn(GMGNCgc%%XcPJXwb?De zi7;K_0s-K-4{96xxoe&Qg<&+a=Vd0a_V$}@JZx7%$G!EjtZE0r_s$g+&23-M+kQ+c z0<&7NoE^-_ovpwgv%7fwk@Y(WX~7v+o9ilo?1I=(>#AKzJTT2`K+rQH%K~NyGyPZ; zH&o7EorT%D?EMvxBpW~{x6LD7vut zb4#Qqb_yW97$AvxZP*(aen7WY>zcY%72PX^`&P5sx_5U!S2qXs3tT$(fX^io0r3w( zfG(PiJQdbe2-L-rrI`ZiA2oIR8of-i5~?FN?sM!e6uwEZ^YQlPu>2#CoQfkkayuUA zAP`uDb4`qjt?O^HRsFRZbR@^c(vfFR2Wx|iIc3`T&-z^ap@tPA?DzfmCzts5xY`Dz zp1vo2OF$I=5~L~p!Az34gD!|ZQZKDOer%4pk~x_({FZg{D@~Sfx!j}T*EWR#-e;R3 zQVbkDw`YK};M%f>)~~sNTx3o2Cq+sfE0PT-u*KmCYUX^Y8&O7XzAxWpoo}4zDITJin=`kAY`Oof=q5 z0;Ra51_B6OpzW0%q*9^_Uva+1X1^25gfEoPF@1%48==J6&LHO6u~(MzE;94k%90xa zT*S$XXAVSpz$H;MwA713X?fqZX~}tHyj=xn$;~9-QoE+L>;CPbE7T8hj^&*##ropB z1;GB#X(D8Xz_yrNJqR|BaaN|}(b^mk^W?s-HJN%jw`(lcMR>|P(8SQ0Z!yfY6J*W{ z6hvDt=61F>Z$s9(i$+|^Pje`iRzYD%LD06)I2HYfy+K`XZ85kdPUfmjpIO~}v*Eaw zX6AyF=B{uaLi}fSL5__k1@bJT}lsySPmv3E=Rz_HqI(juf#k! zE0uVHKv%4|{|i(E^b&KK0C`j3R!&O%@i%!CRV>9IOQfn}YTgw{9X3tFNY7OK<@5JM zpJ3~BAWUOc3u89;pHrKN+hcJ#F*7g)n4JjtXjJf;B_%=Z8>{Wj?U}p;()B`~ZBp^> zLyaXsFL?3i?q|7y2|@-iDviRr2$OKUOaYxiBa08@rIwH=*+6&Ha`Jp3DX5&yYv> zuJT=CU>;fV7vOGOhJFG9+OV-1Fts^;hO9ev04}nQAHgVR{M1s4#g3iOz4649`U(EF z>lTdUZV)3}PS}q98i|>Vu8VxJE+s(w7t~x*sr))r1&hK~9ZxxitO^$8mdRDws-P=E zyQjM(l5m-j#o7_-h(?bX`t{e~OHx#n9=z{L| zuti9mv75C%s62g>o|Op<7DEw<%xS zELO)alj%zeAEoqjuTPBv((A)l^^)1qpk(yZV{5xSkn- zbo@?}`aZ16{CqX};-kx+pFvX+jdpjLk6)^BB|GHSi`exnDYcyV_<|1t!)aNHQkWm!Q+Op$mHxTL_G@DX@XuSe;_>VSG8(Q+C#2k&r(6gxuJ?S^M&Q zk}(p?nyW791%X5>eiAAe-8mhp>L(yS4k56I<7~nVGrwDD~slx7y4zc)is*hRmB9M#>=*_)phZj z`EB%xp0Dj^FA$)1bY41fvuQ&s6*%>rY&7Z3<&r%UUhR%Ma*&|Fmz*t z5>eC15YE;3iry!*zOPoPE}ca4@rGZdM6&RV{F!p`cGm_UGZiu#(*rAuBz<|5f`?iB zWM!)N%;A<4fy@K%Cr%68Xb$Ua>!+cWb+DJPw-~h9dQH=Hkg|*xttYae^%v6(R$qhu z7@qV~>BXSH^kX{NPFKI}iLAc+@#zGcL>`zF9A%DacD|my4zb60&I2NeTxiIG4MW9= z3#ES67U;BJV%N>Oo)6q481*0BM|n_{M#@2lIiju-o>grx-6fa_C#@)OYDGm4yN zwW>5ydA$+=`G=EBbv{HxvFAyN{xdjotW&hGoBk*=m|y)I4GUR(_;9Ha!K*2Gt1zjG zn8*j0KFp$;d$O15R-aADgCa9e#TRHk=BH0(>>DCGP7|r6TZ<}RH~EmCcEzxTtMPSx*bR?vqa<$|OU9C0hk!z@8Kb-(U^$Xn z@AjZa$%?O7UVJe1OElKeQ$c$bm89NP+4OVmCbpTXm^IF-n2?wO7K0I%gggF*xJuUP z^eDkEXjuCf3M9Kl`!?(J{Ir*M{P}i7uLF6uMZv-Kn6I+MJQ?3d;EM_9#6+?WR&nzH z0|W2LfWlbzhZE9C^Sm#o#%9rRjx|?hKyl&D5HBF)uteL5iE^y5(+tNf5#t1el#rPg zUWx4$`?NnJBV#nsng^`M-XnXiP7y~2JLOKo{|;d_hN>lCQ;P}nj(fGrS?H6AzWice zUi>I3%sT5?w0m_^2yOlz!pZ2myO}!mHJ-U#m-9B=hc-g%5CU907A7_WrP~Lz76a|) z9f|8P*5sj?90U2>MU7S@FNw*TgpraGGWjFi%qn;zlA}mST)Ix5l89pIu(9MWJ9yt& z$3W1=6!#iWH|_PT)ui&fd6q8$ny!YN_jysWB-tj-C#7$1Pz-p?UN7P2Dt%q5gbNtL z!D9Ilr``-kHd_SA(ow9hlIx9mKH<;G=2zU?!Kd4x7(=O6#+ztPHBQJK{2L~qZ{kKW zpOWO`d67)3G%J*%U!^9zj$$F_E4CR2NqP|c__)~l{Bl-jBsyoy5ib-C2S(h8jUR!- z+e122sVPLJ5@sH#vgK$8I|b&+qFpSWHihUR^NGU}%&W z>*|fX=~?)dG*8)mwbJn8x!@Q60*;bx;=}F-E%onbE%Yoylq;aM zjV{CUPt1=_zKIRGL+XABqR+C#@SFTIX9-&jhR;L4IilA`kq^!DW!WKMkwj32q@wF` z{k*KlkXGfVWMMV*L$nX~snJPYsUyXxsyFo~**-VWPW@f?01_R_*GSqR-WnPDsb`*D z9Lp$I)jvLX;2Nn?o2RD&?Kx%6%Wl8(YEA!+q{I;tdTu)L@d4wEBVDgT`o}#3O}=sV zV4z-!W-qz|$@Q}_?SdJZg4UY`VVF~uwxveQ?i&LE9bt+-ZpI^ZJNBVFb8QR4ItPB8 zzEliZSvQ4gNLwqMAtpmZUZ#9quTt7lzWtw>Gky5BvWsCv;}+rS#UJLm~ipq ze$MRY+-X1mms#BVaHUco56GsnC?ryyD$6Vsh4jG;=Qi65PCgue6`xuVt|%1{9>aXj z-e64dVSpi0m<{8zHCh<1vs1W3Q#=bh1k4N8gW?8r`0{2Y0@QK#ke{LVwMX5O0Y1{9 zcj=ef9#oMDrwI78>LVQCL+P$pKXWG2Q2!-JGx$!INaCx@T`e+KR0o3swe;F|Gsk31 zlq!)^R$`x26u|B4QnT-N>?c?!5t+4=I{b@Bs#-o!%z#@$^_IO?MSnon1b;;t`Q#wl z6PCVaRtzgxQG%ZuuYF|tzi?0dKgp`<)GXWhKXuFosAF((OxZTZ`TvMxPvOE1d9H&u zMl%as9DA{L*a`~BEnFOfun(oG;o{i7+`?}#l{l3CiDTA~8rx&0|I{(l-znwe9=B<> zMgR0RaGf%^ZnjVFFb4#Si08`J7jX@CDepNar)6Y6faN)jYVgbzLo<=zJ-cyo87fGtkXTBcAW z55zcq>^q>zsZnc{7Ia+o9>lkvlRO(4i*|tXB{$HZgiwSh{12glrQ>aMS{Ny&^gkpW`#?F z-Q~`ozzH@oU4*570XbS=bniJ)InNy#-s6vje=Rc-d{ay)WbJ!kM)6lp1wW2JQ9!3u z(v^RDZ`R>&diH-;TIyHYTy`-2w5LUB%T6+ajkgSkm`z~U7Ori^5#YcOtv%3neS@X*R_PVkKqG#__RpYY7XwWf-G4#P$+@x_e=1r6Olv= z&>F@b*yiy7*Bw}{DZ}Rg<#unSC`Dj{9OND_)<&2PNC2_ljA47@`&mN{PNSTt_-wFW zgA2uDxLQ6xvw~l_G23B;f&~1_T?l9{f55oGqaj&u;aZ{F6-d(70<~mAnA)}LG^;8G z`N^!7W;*{BIO_;MbqbDu9)X7USP|Lw0oJCv0Yc6ijVzhkhu!UFKIQN}NFf3nm3Zx` zCfpdF((UsBM?>ZLNfMt@zeB{CECc3QAUOL8)JSS*ij{f@n5@glcw+di-N2~?DUDmu z)$QPXEf9Foz)|605bYxqJOpQPK}R2hBbIMLr+0%dNxKGuXv$`51TlsfFqd~rfku~K z(sb1Ey&EK#L2$e{FyS-MVEKUZm)gA^K>;R6GddU+pJ2;CLEbiwH6az!SUh;|g`P7r27yGlZv$Hye@xDbs_H_m)forbW z&>=+SEO0Kx(W+}eG@Kx|b^W*>O~kA>2%bIuA2Xrujb_SWZq6g);wQQ115#76evvYN z{)00fpD8#IN?B@rV{aM{$>q{Y(2RcMKCM?uQ=6m+)OEqctt3Q@`y^Mx zude{5Y&aRAPaNkNri9r4+(<}_(}vw!myGl1P)9)m(mk@YqWuSOW-g2+)|4!1gkV_L z7o3@<%Mpn>2M1$|BdN*l*`H18mB=OTd=;K})$pnDhGe~B5e+_joq-tvq60;z_yjuP z0&FG}5ivmEk0i#XfXngzntF@@K!1D`Fv?OM=ZZrQeVNt<2lskciC1Ot)%<6-moVIR z;m58)42R(RXRu~MCvkymOxxfkj9=HW&x*^;Ctd@X!xf@*Xe17W;JlG1LB&sSzR;p{ z{2l2mKMhBxtH2F!;@@RlDi6v>`sh_9$jqmG`wlYL4~=!T=3 zai6sycpHQnUVs4CV<5F|?rp}+U}p6ACgueE0%`X<;QqRkPlB1W@ET+fGT>sCS4nce&yt)hCcG2m9B*sR~n z1BxoQhD_80h~Wx^bHcUr0>D}u0&}0zneli2dpky^H+GI%G0YF+=&;qWdVq_b$X%D> z^kzpsXs;$oeOzD^p?~eBU-JH!{-7Cr*GLpO`9olJ$ZrAODdZ&U6c1KO7AwYBCs+&~ z2O*sq@Xb9q1Ma~oVjY3F!Ua%zfoS;DUdD*0a0~bmz!V0XOcRLA!EIucBm6XAI?QwL zC*5k$=tep(fSq2$6*TH$fgJTprfJ|Wkh9a(GY`k*%gw?{RqmjHK&@gXm}9PohxB<$ z^F-R4d&=_5EPmG7C))gbzZQCP$T(eJgx>>o7;r?4{`|i4yXNhZm|nMk(T^i!c4+2O z%9x=S(%w?!cJf=-g$}-a_ZL1UJYR+Y4wdVq_o}K>M zc8=q}_dW33dw=)2&;8x|{2us+mb`8a861iSod^mAajJ%~op%-!9wbsI4`;<>)z1}m z&*t7HN6f||*Ej}@aa+uGhl>HPUWjwwd~&SC^~BnVF9ysa&q?k1%zaM#&WrbURv(Pl z#Gp9EDyA>h%`nxh@4XK?q@8~}XyH`m9Vx%}tghc>Wui#=5-R(My1fR;wf@C3^G_;d zHCSl6d+ots^@~!*g0Z8yg++oit1c~Ujk~JrY~xVr7L&)%l3GR_ zCpTO@7jBW?FH5zsS;_h1V^tHoC^Hw{7$o_gqZU#%7H3WRP)N%-Y*YY=g}Q`b(d>zW zfGl2HoaD0Ndqab%k#Ra|y36KCIjYjAt6ADS?K`Rh$ESv5g?49BAQ}wj(Hwzx9_7`R znlLgA2^HPgJOn&cLvdn{f2rI=@SYmh*;gD?tO&9YfO2xPq@SBh6qi4cZ$eZGY~#%j~~J;+hpZsUN?6nGCVM71rd!fIZ%ol zPu8EB6Xi3UW?hCxaqTF4jhTcf#jfpJijjp0KqOtd&xI3m$&h88Dyj=XCQjN+RL)t% zK+k}#M>7SP&;`kUrBocnF=&l~02A{SZ{gf*0R{xM+U;9i7C_G04Ls6wN|`)41zKZ2 ze|+ZyfU*$X&YMW2t{?R;pU%L(SQ>aJYg=;06y+{wbs93sDguA>hivM0+OpKvlW=H0 zmcZBLYyFAvw>Jsv#*y6bo`^n@aB*iQ)Cv{qxF4h4isEhG*9ez1m`oyTvaB-;4sBDJ z+!z~MvutW5T+ju`zT0RU5BzdNF!0&Qp8fa`$=NbsSg9dgE0Ac^%*o(7R7vB!;a91U zL@%Eu9g9;qF`Wt+wlV!MYs|IaCxzj2liorWtg|u-y}>NAKir*v+M9cAn@S%NI(3<+ zm!Fws+g(w0)3kafH{Xp`)t1~ohd8}Zm_}V4=vT!6`pqLOpWN~MamNm8!=GXPb% zG*m)|=8i4^qwN)yTVNn`IKi$N82IW7T zSWvbO3m&RqT2swe((dCut1o~uLRyv4cGwWp+0IOEq$Ki;#XP!QeQLJZ6@7FMDJ z_qj8I$mjxsy@AN+0)l}OA*wmYpBQWfnu}k z1#axE6a1qIk&(iWWDqF^u}^Qcj&Cu*3@;2PJ4>k`KtQY>&Nu0xQ(*6x$@kM+9Jead zucddU54b4#RiVh6I-KfPaS>g(`vgSNM5FidgF(F5j>8Fp3rif%ZqD^7z*IGc-qwrW zw2BZrJ3Z?)2T-|Zc6dMKzh73uacy7)XX-8`+E6%zmCip)i2Hk!)<(SfvilY@Hy z#-`PEM$2skiYM8vM6xFDK%~6fW#rvTq8FwKAdLj2`LIZ&iIwZ{MFN$rzq^jaBPm2j z&hEAd+}p)MzZ)4NkueF?lMrKl@EZd Date: Fri, 25 Aug 2017 00:44:29 +0000 Subject: [PATCH 101/170] with in-place option --- .../v2/framework/tests/gradient_checker.py | 21 ++++++++++++------- .../v2/framework/tests/test_scatter_op.py | 3 ++- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 8eb9f3f073..ac37671c77 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -32,7 +32,8 @@ def get_numeric_gradient(op, output_name, input_to_check, delta=0.005, - local_scope=None): + local_scope=None, + in_place=False): """ Get Numeric Gradient for an operator's input. @@ -90,9 +91,10 @@ def get_numeric_gradient(op, # we only compute gradient of one element each time. # we use a for loop to compute the gradient of every element. for i in xrange(tensor_size): - for var_name in input_values: - tensor_ = local_scope.find_var(var_name).get_tensor() - tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace()) + if in_place: + for var_name in input_values: + tensor_ = local_scope.find_var(var_name).get_tensor() + tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace()) # get one input element throw it's index i. origin = tensor_to_check.get_float_element(i) @@ -102,9 +104,10 @@ def get_numeric_gradient(op, y_pos = get_output() # plus delta to this element, run op and get the sum of the result tensor. - for var_name in input_values: - tensor_ = local_scope.find_var(var_name).get_tensor() - tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace()) + if in_place: + for var_name in input_values: + tensor_ = local_scope.find_var(var_name).get_tensor() + tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace()) x_neg = origin - delta tensor_to_check.set_float_element(i, x_neg) y_neg = get_output() @@ -257,6 +260,7 @@ class GradientChecker(unittest.TestCase): output_name, no_grad_set=None, only_cpu=False, + in_place=False, max_relative_error=0.005): """ :param forward_op: used to create backward_op @@ -289,7 +293,8 @@ class GradientChecker(unittest.TestCase): # get numerical gradients numeric_grads = [ - get_numeric_gradient(forward_op, input_vars, output_name, name) + get_numeric_gradient( + forward_op, input_vars, output_name, name, in_place=in_place) for name in inputs_to_check ] diff --git a/python/paddle/v2/framework/tests/test_scatter_op.py b/python/paddle/v2/framework/tests/test_scatter_op.py index e7696844d5..861fe6cf89 100644 --- a/python/paddle/v2/framework/tests/test_scatter_op.py +++ b/python/paddle/v2/framework/tests/test_scatter_op.py @@ -31,7 +31,8 @@ class TestScatterGradOp(GradientChecker): output_np[index_np] += updates_np inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np} # check gradient - self.check_grad(op, inputs, set(["Updates", "Ref"]), "Out") + self.check_grad( + op, inputs, set(["Updates", "Ref"]), "Out", in_place=True) if __name__ == "__main__": From f22ece9273b54f1a248f7a787e252eb04a5acea3 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 24 Aug 2017 19:44:19 -0700 Subject: [PATCH 102/170] Add a document on building using Docker --- Dockerfile | 4 +- doc/howto/dev/build_en.md | 83 ++++++++++++++++++++++++++++++++++ paddle/scripts/docker/build.sh | 6 +-- 3 files changed, 87 insertions(+), 6 deletions(-) create mode 100644 doc/howto/dev/build_en.md diff --git a/Dockerfile b/Dockerfile index 98f61ba586..136db772cc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,13 +10,11 @@ RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ub ARG WITH_GPU ARG WITH_AVX ARG WITH_DOC -ARG WITH_STYLE_CHECK ENV WOBOQ OFF -ENV WITH_GPU=${WITH_GPU:-OFF} +ENV WITH_GPU=${WITH_GPU:-ON} ENV WITH_AVX=${WITH_AVX:-ON} ENV WITH_DOC=${WITH_DOC:-OFF} -ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} ENV HOME /root # Add bash enhancements diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md new file mode 100644 index 0000000000..80488a147d --- /dev/null +++ b/doc/howto/dev/build_en.md @@ -0,0 +1,83 @@ +# Build PaddlePaddle from Source Code and Run Unit Test + +## What Developers Need + +To contribute to PaddlePaddle, you need + +1. A computer -- Linux, BSD, Windows, MacOS, and +1. Docker. + +Nothing else. Not even Python and GCC, because you can install all build tools into a Docker image. + +## General Process + +1. Retrieve source code. + + ```bash + git clone https://github.com/paddlepaddle/paddle + ``` + +2. Install build tools. + + ```bash + cd paddle; docker build -t paddle:dev . + ``` + +3. Build from source. + + ```bash + docker run -v $PWD:/paddle paddle:dev + ``` + +4. Run unit tests. + + ```bash + docker run -v $PWD:/paddle paddle:dev "cd/build; ctest" + ``` + + +## Docker, Or Not? + +- What is Docker? + + If you haven't heard of it, consider it something like Python's virtualenv. + +- Docker or virtual machine? + + Some people compare Docker with VMs, but Docker doesn't virtualize any hardware, and it doesn't run a guest OS. + +- Why Docker? + + Using a Docker image of build tools standardize the building environment, and easier for others to reproduce your problem, if there is any, and help. + + Also, some build tools don't run on Windows or Mac or BSD, but Docker runs almost everywhere, so developers can use whatever computer they want. + +- Can I don't use Docker? + + Sure, you don't have to install build tools into a Docker image; instead, you can install them onto your local computer. This document exists because Docker would make the development way easier. + +- How difficult is it to learn Docker? + + It takes you ten minutes to read https://docs.docker.com/get-started/ and saves you more than one hour to install all required build tools, configure them, and upgrade them when new versions of PaddlePaddle require some new tools. + +- Docker requires sudo + + An owner of a computer has the administrative privilege, a.k.a., sudo. If you use a shared computer for development, please ask the administrator to install and configure Docker. We will do our best to support rkt, another container technology that doesn't require sudo. + +- Can I use my favorite IDE? + + Yes, of course. The source code resides on your local computer, and you can edit it using whatever editor you like. + + Many PaddlePaddle developers are using Emacs. They add the following few lines into their `~/.emacs` configure file: + + ```emacs + (global-set-key "\C-cc" 'compile) + (setq compile-command + "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev") + ``` + + so they could type `Ctrl-C` and `c` to build PaddlePaddle from source. + +- How many parallel building processes does the Docker container run? + + Our building Docker image runs a Bash script https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh, which calls `make -j$(nproc)` to starts as many processes as the number of your processors. diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 2941662f34..7bab814ae8 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -38,7 +38,7 @@ Configuring cmake in /paddle/build ... -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} -DCUDNN_ROOT=/usr/ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} - -DWITH_TESTING=${WITH_TESTING:-OFF} + -DWITH_TESTING=${WITH_TESTING:-ON} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ======================================== EOF @@ -56,8 +56,8 @@ cmake .. \ -DWITH_C_API=${WITH_C_API:-OFF} \ -DWITH_PYTHON=${WITH_PYTHON:-ON} \ -DCUDNN_ROOT=/usr/ \ - -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \ - -DWITH_TESTING=${WITH_TESTING:-OFF} \ + -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} \ + -DWITH_TESTING=${WITH_TESTING:-ON} \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON cat < Date: Fri, 25 Aug 2017 11:36:38 +0800 Subject: [PATCH 103/170] Neon depthwise conv with filterSize = 3 and stride = 2. --- paddle/function/neon/NeonDepthwiseConv.cpp | 115 ++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp index c017241c92..53d14d9833 100644 --- a/paddle/function/neon/NeonDepthwiseConv.cpp +++ b/paddle/function/neon/NeonDepthwiseConv.cpp @@ -153,6 +153,109 @@ struct DepthwiseConvKernel<3, 1> { } }; +/** + * Each step calculates four elements of the output. + * First step: + * R0[0, 2, 4, 6...] * K[0][0] + * R0[1, 3, 5, 7...] * K[0][1] + * R0[2, 4, 6, 8...] * K[0][2] + * R1[0, 2, 4, 6...] * K[1][0] + * R1[1, 3, 5, 7...] * K[1][1] + * R1[2, 4, 6, 8...] * K[1][2] + * R2[0, 2, 4, 6...] * K[2][0] + * R2[1, 3, 5, 7...] * K[2][1] + * R2[2, 4, 6, 8...] * K[2][2] + * ------------------------------ + * Output[0, 1, 2, 3] + */ +template <> +struct DepthwiseConvKernel<3, 2> { + static void run(const float* inputData, + const float* filterData, + int inputHeight, + int inputWidth, + int outputChannels, + int outputHeight, + int outputWidth, + int filterMultiplier, + float* outputData) { + const int steps = outputWidth >> 2; + const int remain = outputWidth & 3; + for (int c = 0; c < outputChannels; c++, filterData += 9) { + // Load the filters + float32x4_t k[3]; + k[0] = vld1q_f32(filterData); + k[1] = vld1q_f32(filterData + 3); + k[2] = vld1q_f32(filterData + 6); + k[0] = vsetq_lane_f32(0.f, k[0], 3); + k[1] = vsetq_lane_f32(0.f, k[1], 3); + k[2] = vsetq_lane_f32(0.f, k[2], 3); + + const float* start = + inputData + (c / filterMultiplier) * (inputHeight * inputWidth); + float32x4_t input[3][3]; + for (int h = 0; h < outputHeight; h++) { + const float* r0 = start + 2 * h * inputWidth; + const float* r1 = start + (2 * h + 1) * inputWidth; + const float* r2 = start + (2 * h + 2) * inputWidth; + for (int s = 0; s < steps; s++) { + // Load the inputs + float32x4_t data1; + float32x4x2_t data2; + + data2 = vld2q_f32(r0); + input[0][0] = data2.val[0]; + input[0][1] = data2.val[1]; + data1 = vld1q_f32(r0 + 8); + input[0][2] = vextq_f32(data2.val[0], data1, 1); + + data2 = vld2q_f32(r1); + input[1][0] = data2.val[0]; + input[1][1] = data2.val[1]; + data1 = vld1q_f32(r1 + 8); + input[1][2] = vextq_f32(data2.val[0], data1, 1); + + data2 = vld2q_f32(r2); + input[2][0] = data2.val[0]; + input[2][1] = data2.val[1]; + data1 = vld1q_f32(r2 + 8); + input[2][2] = vextq_f32(data2.val[0], data1, 1); + + float32x4_t tmp1 = vdupq_n_f32(0.f); + float32x4_t tmp2 = vdupq_n_f32(0.f); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp1 = vaddq_f32(tmp1, tmp2); + + vst1q_f32(outputData, tmp1); + r0 += 8; + r1 += 8; + r2 += 8; + outputData += 4; + } + + for (int r = 0; r < remain; r++) { + float32x4_t i0 = vld1q_f32(r0); + float32x4_t i1 = vld1q_f32(r1); + float32x4_t i2 = vld1q_f32(r2); + *outputData = conv3x3(i0, i1, i2, k[0], k[1], k[2]); + r0 += 2; + r1 += 2; + r2 += 2; + outputData++; + } + } + } + } +}; + /** * Each step calculates four elements of the output. */ @@ -326,7 +429,7 @@ public: } for (size_t i = 0; i < batchSize; i++) { - if (filterWidth == 3) { + if (filterWidth == 3 && strideH() == 1) { DepthwiseConvKernel<3, 1>::run(inputPadding, filterData, inputHeight, @@ -336,6 +439,16 @@ public: outputWidth, filterMultiplier, outputData); + } else if (filterWidth == 3 && strideH() == 2) { + DepthwiseConvKernel<3, 2>::run(inputPadding, + filterData, + inputHeight, + inputWidth, + outputChannels, + outputHeight, + outputWidth, + filterMultiplier, + outputData); } else if (filterWidth == 4) { DepthwiseConvKernel<4, 1>::run(inputPadding, filterData, From 9fdf3970d0de568db4a9a3b757335604430ca137 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 24 Aug 2017 20:37:39 -0700 Subject: [PATCH 104/170] Update unit test running and CUDA --- doc/howto/dev/build_en.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md index 80488a147d..de0733f963 100644 --- a/doc/howto/dev/build_en.md +++ b/doc/howto/dev/build_en.md @@ -29,12 +29,25 @@ Nothing else. Not even Python and GCC, because you can install all build tools docker run -v $PWD:/paddle paddle:dev ``` + This builds a CUDA-enabled version and writes all binary outputs to directory `./build` of the local computer, other than the Docker container. If we want to build only the CPU part, we can type + + ```bash + docker run -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev + ``` + 4. Run unit tests. + To run all unit tests using the first GPU of a node: + ```bash - docker run -v $PWD:/paddle paddle:dev "cd/build; ctest" + NV_GPU=0 nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" ``` + If we used `WITH_GPU=OFF` at build time, it generates only CPU-based unit tests, and we don't need nvidia-docker to run them. We can just run + + ```bash + docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" + ``` ## Docker, Or Not? From f00c4112d2ca1d42c60d154002b2347ba2de5cd9 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Fri, 25 Aug 2017 11:53:45 +0800 Subject: [PATCH 105/170] Neon depthwise conv with filterSize = 4 and stride = 2. --- paddle/function/neon/NeonDepthwiseConv.cpp | 122 ++++++++++++++++++++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp index 53d14d9833..3fe28b1de3 100644 --- a/paddle/function/neon/NeonDepthwiseConv.cpp +++ b/paddle/function/neon/NeonDepthwiseConv.cpp @@ -364,6 +364,116 @@ struct DepthwiseConvKernel<4, 1> { } }; +/** + * Each step calculates four elements of the output. + */ +template <> +struct DepthwiseConvKernel<4, 2> { + static void run(const float* inputData, + const float* filterData, + int inputHeight, + int inputWidth, + int outputChannels, + int outputHeight, + int outputWidth, + int filterMultiplier, + float* outputData) { + const int steps = outputWidth >> 2; + const int remain = outputWidth & 3; + for (int c = 0; c < outputChannels; c++, filterData += 16) { + // Load the filters + float32x4_t k[4]; + k[0] = vld1q_f32(filterData); + k[1] = vld1q_f32(filterData + 4); + k[2] = vld1q_f32(filterData + 8); + k[3] = vld1q_f32(filterData + 12); + + const float* start = + inputData + (c / filterMultiplier) * (inputHeight * inputWidth); + float32x4_t input[4][4]; + for (int h = 0; h < outputHeight; h++) { + const float* r0 = start + 2 * h * inputWidth; + const float* r1 = start + (2 * h + 1) * inputWidth; + const float* r2 = start + (2 * h + 2) * inputWidth; + const float* r3 = start + (2 * h + 3) * inputWidth; + for (int s = 0; s < steps; s++) { + // Load the inputs + float32x4x2_t data1; + float32x4x2_t data2; + + data1 = vld2q_f32(r0); + data2 = vld2q_f32(r0 + 8); + input[0][0] = data1.val[0]; + input[0][1] = data1.val[1]; + input[0][2] = vextq_f32(data1.val[0], data2.val[0], 1); + input[0][3] = vextq_f32(data1.val[1], data2.val[1], 1); + + data1 = vld2q_f32(r1); + data2 = vld2q_f32(r1 + 8); + input[1][0] = data1.val[0]; + input[1][1] = data1.val[1]; + input[1][2] = vextq_f32(data1.val[0], data2.val[0], 1); + input[1][3] = vextq_f32(data1.val[1], data2.val[1], 1); + + data1 = vld2q_f32(r2); + data2 = vld2q_f32(r2 + 8); + input[2][0] = data1.val[0]; + input[2][1] = data1.val[1]; + input[2][2] = vextq_f32(data1.val[0], data2.val[0], 1); + input[2][3] = vextq_f32(data1.val[1], data2.val[1], 1); + + data1 = vld2q_f32(r3); + data2 = vld2q_f32(r3 + 8); + input[3][0] = data1.val[0]; + input[3][1] = data1.val[1]; + input[3][2] = vextq_f32(data1.val[0], data2.val[0], 1); + input[3][3] = vextq_f32(data1.val[1], data2.val[1], 1); + + float32x4_t tmp1 = vdupq_n_f32(0.f); + float32x4_t tmp2 = vdupq_n_f32(0.f); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); + tmp1 = vaddq_f32(tmp1, tmp2); + + vst1q_f32(outputData, tmp1); + r0 += 8; + r1 += 8; + r2 += 8; + r3 += 8; + outputData += 4; + } + + for (int r = 0; r < remain; r++) { + float32x4_t i0 = vld1q_f32(r0); + float32x4_t i1 = vld1q_f32(r1); + float32x4_t i2 = vld1q_f32(r2); + float32x4_t i3 = vld1q_f32(r3); + *outputData = conv4x4(i0, i1, i2, i3, k[0], k[1], k[2], k[3]); + r0 += 2; + r1 += 2; + r2 += 2; + r3 += 2; + outputData++; + } + } + } + } +}; + template class NeonDepthwiseConvFunction : public ConvFunctionBase { public: @@ -449,7 +559,7 @@ public: outputWidth, filterMultiplier, outputData); - } else if (filterWidth == 4) { + } else if (filterWidth == 4 && strideH() == 1) { DepthwiseConvKernel<4, 1>::run(inputPadding, filterData, inputHeight, @@ -459,6 +569,16 @@ public: outputWidth, filterMultiplier, outputData); + } else if (filterWidth == 4 && strideH() == 2) { + DepthwiseConvKernel<4, 2>::run(inputPadding, + filterData, + inputHeight, + inputWidth, + outputChannels, + outputHeight, + outputWidth, + filterMultiplier, + outputData); } inputPadding += inputChannels * inputHeight * inputWidth; From 06fad3fe9deccdc8ee4721ff028753f53c7ab87f Mon Sep 17 00:00:00 2001 From: gongweibao Date: Fri, 25 Aug 2017 12:01:57 +0800 Subject: [PATCH 106/170] hidden capi symbols (#3636) hidden capi symbols --- paddle/capi/CMakeLists.txt | 3 +++ paddle/capi/export.map | 6 ++++++ paddle/capi/export.sym | 0 3 files changed, 9 insertions(+) create mode 100644 paddle/capi/export.map create mode 100644 paddle/capi/export.sym diff --git a/paddle/capi/CMakeLists.txt b/paddle/capi/CMakeLists.txt index 11022d1754..dde99ab340 100644 --- a/paddle/capi/CMakeLists.txt +++ b/paddle/capi/CMakeLists.txt @@ -53,7 +53,10 @@ add_custom_target(paddle_capi_whole ALL set_target_properties(paddle_capi_whole PROPERTIES IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/${capi_whole_library}) +set(LINK_FLAGS " -Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/export.sym -Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/export.map") +# TODO: merge mkl into paddle_capi_shared add_library(paddle_capi_shared SHARED ${CAPI_SOURCES}) +set_target_properties(paddle_capi_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}") target_include_directories(paddle_capi_shared PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) link_paddle_exe(paddle_capi_shared) diff --git a/paddle/capi/export.map b/paddle/capi/export.map new file mode 100644 index 0000000000..8d673f675d --- /dev/null +++ b/paddle/capi/export.map @@ -0,0 +1,6 @@ +{ + global: + paddle_*; + local: + *; +}; diff --git a/paddle/capi/export.sym b/paddle/capi/export.sym new file mode 100644 index 0000000000..e69de29bb2 From 1e61d91f24e9213ab43edc62cf2c6f9e47a62d1f Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 24 Aug 2017 21:38:13 -0700 Subject: [PATCH 107/170] Update index and add Chinese version --- doc/howto/dev/build_cn.md | 100 ++++++++++++++++++++++++++++++++++++++ doc/howto/dev/build_en.md | 6 ++- doc/howto/index_cn.rst | 1 + doc/howto/index_en.rst | 1 + 4 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 doc/howto/dev/build_cn.md diff --git a/doc/howto/dev/build_cn.md b/doc/howto/dev/build_cn.md new file mode 100644 index 0000000000..dc372de9fa --- /dev/null +++ b/doc/howto/dev/build_cn.md @@ -0,0 +1,100 @@ +# 编译PaddlePaddle和运行单元测试 + +## 需要的软硬件 + +为了开发PaddlePaddle,我们需要 + +1. 一台电脑,可以装的是 Linux, BSD, Windows 或者 MacOS 操作系统,以及 +1. Docker。 + +不需要其他任何软件了。即便是 Python 和 GCC 都不需要,因为我们会把所有编译工具都安装进一个 Docker image 里。 + +## 总体流程 + +1. 获取源码 + + ```bash + git clone https://github.com/paddlepaddle/paddle + ``` + +2. 安装工具 + + ```bash + cd paddle; docker build -t paddle:dev . + ``` + +3. 编译 + + ```bash + docker run -v $PWD:/paddle paddle:dev + ``` + + 这个命令编译出一个 CUDA-enabled 版本。所有二进制文件会被写到本机的 `./build` 目录,而不是写到 Docker container 里。如果我们只需要编译一个只支持 CPU 的版本,可以用 + + ```bash + docker run -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev + ``` + +4. 运行单元测试 + + 用本机的第一个 GPU 来运行包括 GPU 单元测试在内的所有单元测试: + + ```bash + NV_GPU=0 nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" + ``` + + 如果编译的时候我们用了 `WITH_GPU=OFF` 选项,那么编译过程只会产生 CPU-based 单元测试,那么我们也就不需要 nvidia-docker 来运行单元测试了。我们只需要: + + ```bash + docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" + ``` + +## 为什么要 Docker 呀? + +- 什么是 Docker? + + 如果您没有听说 Docker,可以把它想象为一个类似 virtualenv 的系统,但是虚拟的不仅仅是 Python 的运行环境。 + +- Docker 还是虚拟机? + + 有人用虚拟机来类比 Docker。需要强调的是:Docker 不会虚拟任何硬件,Docker container 里运行的编译工具实际上都是在本机的 CPU 和操作系统上直接运行的,性能和把编译工具安装在本机运行基本一样。 + +- 为什么用 Docker? + + 把工具和配置都安装在一个 Docker image 里可以标准化编译环境。这样如果遇到问题,其他人可以复现问题以便帮助。 + + 另外,对于习惯使用Windows和MacOS的开发者来说,使用Docker就不用配置交叉编译环境了。 + +- 我可以选择不用Docker吗? + + 当然可以。大家可以用把开发工具安装进入 Docker image 一样的方式,把这些工具安装到本机。这篇文档介绍基于 Docker 的开发流程,是因为这个流程比其他方法都更简便。 + +- 学习 Docker 有多难? + + 理解 Docker 并不难,大概花十分钟看一遍 https://zhuanlan.zhihu.com/p/19902938 即可。这可以帮您省掉花一小时安装和配置各种开发工具,以及切换机器时需要新安装的辛苦。别忘了 PaddlePaddle 更新可能导致需要新的开发工具。更别提简化问题复现带来的好处了。 + +- Docker 需要 sudo + + 如果用自己的电脑开发,自然也就有管理员权限(sudo)了。如果用公用的电脑开发,需要请管理员安装和配置好 Docker。此外,PaddlePaddle 项目在努力开始支持其他不需要 sudo 的集装箱技术,比如 rkt。 + +- 我可以用 IDE 吗? + + 当然可以,因为源码就在本机上。IDE 默认调用 make 之类的程序来编译源码,我们只需要配置 IDE 来调用 Docker 命令编译源码即可。 + + 很多 PaddlePaddle 开发者使用 Emacs。他们在自己的 `~/.emacs` 配置文件里加两行 + + ```emacs + (global-set-key "\C-cc" 'compile) + (setq compile-command + "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev") + ``` + + 就可以按 `Ctrl-C` 和 `c` 键来启动编译了。 + +- 可以并行编译吗? + + 是的。我们的 Docker image 运行一个 Bash 脚本 https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh 。这个脚本调用 `make -j$(nproc)` 来启动和 CPU 核一样多的进程来并行编译。 + +- Docker on Windows/MacOS? + + Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存,以保证编译高效。具体做法请参考 https://github.com/PaddlePaddle/Paddle/issues/627 。 diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md index de0733f963..640d126018 100644 --- a/doc/howto/dev/build_en.md +++ b/doc/howto/dev/build_en.md @@ -91,6 +91,10 @@ Nothing else. Not even Python and GCC, because you can install all build tools so they could type `Ctrl-C` and `c` to build PaddlePaddle from source. -- How many parallel building processes does the Docker container run? +- Does Docker do parallel building? Our building Docker image runs a Bash script https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh, which calls `make -j$(nproc)` to starts as many processes as the number of your processors. + +- Docker on Windows/MacOS? + + On Windows and MacOS, Docker containers run in a Linux VM. You might want to give this VM some more memory and CPUs so to make the building efficient. Please refer to https://github.com/PaddlePaddle/Paddle/issues/627 for details. diff --git a/doc/howto/index_cn.rst b/doc/howto/index_cn.rst index 26449a6365..0608aa3096 100644 --- a/doc/howto/index_cn.rst +++ b/doc/howto/index_cn.rst @@ -19,6 +19,7 @@ .. toctree:: :maxdepth: 1 + dev/build_cn.rst dev/write_docs_cn.rst dev/contribute_to_paddle_cn.md diff --git a/doc/howto/index_en.rst b/doc/howto/index_en.rst index 1fbfcd260b..1b6034be4e 100644 --- a/doc/howto/index_en.rst +++ b/doc/howto/index_en.rst @@ -18,6 +18,7 @@ Development .. toctree:: :maxdepth: 1 + dev/build_en.rst dev/new_layer_en.rst dev/contribute_to_paddle_en.md From 7a42c92d49cbcf05bb7c8fc698b923a09503d22e Mon Sep 17 00:00:00 2001 From: caoying03 Date: Thu, 24 Aug 2017 10:53:00 +0800 Subject: [PATCH 108/170] fix a bug that memory does not clean. --- .../gserver/layers/CrossEntropyOverBeam.cpp | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp index f7736f0ce9..b7c2a44626 100644 --- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp +++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp @@ -53,8 +53,8 @@ size_t CostForOneSequence::initLastExpansion() { candidates->getData() + height * beamSize_, [](const real& val) { return val != -1; }); /* - * if the gold sequence falls off the beam during search, - * add the gold sequence as the last path into all expanded paths. + * if the gold sequence falls off the beam during search, add the gold + * sequence as the last path into the all expanded candidates. */ if (goldAsExtraPath_) goldIdsInFinalExpansion_ = pathCount++; @@ -133,7 +133,7 @@ real CostForOneSequence::globallyNormalizedScore() { Matrix::resizeOrCreate( softmaxOut_, 1, pathRowIdsInEachBeam_[0].size(), false, false); - softmaxOut_->zero(); + softmaxOut_->zeroMem(); MatrixPtr tmp = Matrix::create( softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false); @@ -143,6 +143,8 @@ real CostForOneSequence::globallyNormalizedScore() { 1, false, false); + expandedPathScores_[i]->zeroMem(); + IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(), pathRowIdsInEachBeam_[i].size(), false); @@ -217,13 +219,16 @@ void CrossEntropyOverBeam::checkInputs() { const Argument& goldSeq = getInput(i * 3 + 2); if (i) { - CHECK(scores.hasSubseq()) << "Beam expansion expect the first one, " - "should be a nested sequence"; + CHECK(scores.hasSubseq()) << "input " << i << " " + << inputLayers_[i * 3]->getName() + << " should be a nested sequence"; CHECK_EQ(getInputValue(i * 3 + 1)->getWidth(), beamSize_); CHECK_EQ(scores.getNumSequences(), batchSize_); CHECK_EQ(scores.getNumSubSequences(), selCandidates.getBatchSize()); } else { - CHECK(scores.hasSeq()) << "The first beam expansion should be a sequence"; + CHECK(scores.hasSeq()) << "input " << i << " " + << inputLayers_[i]->getName() + << " should be a sequence"; batchSize_ = scores.getNumSequences(); beamSize_ = getInputValue(i * 3 + 1)->getWidth(); CHECK_EQ(batchSize_, selCandidates.getBatchSize()); @@ -332,7 +337,7 @@ void CrossEntropyOverBeam::splitBatchBeams() { void CrossEntropyOverBeam::resizeOutput() { Matrix::resizeOrCreate(output_.value, batchSize_, 1, false, false); - output_.value->zero(); + output_.value->zeroMem(); for (size_t i = 0; i < beamExpanCount_; ++i) { MatrixPtr inGrad = getInputGrad(i * 3); @@ -344,7 +349,7 @@ void CrossEntropyOverBeam::resizeOutput() { false); } else candidateScoreGrad_[i] = std::move(inGrad); - candidateScoreGrad_[i]->zero(); + candidateScoreGrad_[i]->zeroMem(); } } From 818a64f41ffacca0d3ff07928a19ac47021ccac1 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Fri, 25 Aug 2017 13:56:55 +0800 Subject: [PATCH 109/170] Fix img_pool_layer bug. --- python/paddle/trainer_config_helpers/layers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index f323b017c0..862265f2cd 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -2607,15 +2607,15 @@ def img_pool_layer(input, assert input.num_filters is not None num_channels = input.num_filters - assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling, - CudnnMaxPooling], \ - "only (Cudnn)AvgPooling, (Cudnn)MaxPooling are supported" - if pool_type is None: pool_type = MaxPooling() elif isinstance(pool_type, AvgPooling): pool_type.name = 'avg' + assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling, + CudnnMaxPooling], \ + "only (Cudnn)AvgPooling, (Cudnn)MaxPooling are supported" + type_name = pool_type.name + '-projection' \ if ( isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \ From aa28d046fb828814b9849aa1ebfc868be2db98f9 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Fri, 25 Aug 2017 14:11:36 +0800 Subject: [PATCH 110/170] fix a bug of sequence_slice layer when batch_size=1 --- paddle/gserver/layers/SequenceSliceLayer.cpp | 18 ++++++++++-------- .../gserver/tests/test_SeqSliceLayerGrad.cpp | 4 +++- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp index 5d72d37304..aab44c4646 100644 --- a/paddle/gserver/layers/SequenceSliceLayer.cpp +++ b/paddle/gserver/layers/SequenceSliceLayer.cpp @@ -130,6 +130,8 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, CHECK(starts || ends) << "At least one of the start or end indices " << "should be given."; + bool hasSubseq = getInput(0).hasSubseq(); + outSeqStartPos_.resize(1, 0); outSubSeqStartPos_.resize(1, 0); selectedRows_.clear(); @@ -151,14 +153,13 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, int seqLen = endPos - begPos + 1; CHECK_GT(seqLen, 0U); for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m); - inputSeqInfoVec_.size() > 1 + hasSubseq ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen) : outSeqStartPos_.push_back(outSeqStartPos_.back() + seqLen); } rowIdx++; } - if (inputSeqInfoVec_.size() > 1) - outSeqStartPos_.push_back(outSubSeqStartPos_.back()); + if (hasSubseq) outSeqStartPos_.push_back(outSubSeqStartPos_.back()); } if (useGpu_) { @@ -175,7 +176,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, output_.sequenceStartPositions->copyFrom( outSeqStartPos_.data(), outSeqStartPos_.size(), false); - if (inputSeqInfoVec_.size() > 1) { + if (hasSubseq) { ICpuGpuVector::resizeOrCreate( output_.subSequenceStartPositions, outSubSeqStartPos_.size(), false); output_.subSequenceStartPositions->copyFrom( @@ -203,10 +204,11 @@ void SequenceSliceLayer::forward(PassType passType) { } else copySliceIdsToCpu(); - // calculate the selected row indices in a batch, - // and build the output sequence information. - calSelectedRows(startIdsOnCpu_ ? startIdsOnCpu_ : nullptr, - endIdsOnCpu_ ? endIdsOnCpu_ : nullptr); + /* + * calculate the selected row indices in a batch, and build the output + * sequence information. + */ + calSelectedRows(startIdsOnCpu_, endIdsOnCpu_); resetOutput(selectedRows_.size(), getSize()); diff --git a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp index d560ca650b..e1d4ae1617 100644 --- a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp +++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp @@ -30,6 +30,8 @@ const int MAX_SEQ_NUM = 17; const int MAX_SEQ_LEN = 23; const int MAX_BEAM_SIZE = 13; +const size_t SEED = (size_t)(time(NULL)); + vector randSampling(real range, int n) { CHECK_GE(range, n); vector num(range); @@ -46,7 +48,7 @@ void genSeqInfo(vector& seqStartPos, vector& subSeqStartPos) { seqStartPos.resize(1, 0); subSeqStartPos.resize(1, 0); - srand((size_t)(time(NULL))); + srand(SEED); int seqNum = 1 + (rand() % MAX_SEQ_NUM); for (int i = 0; i < seqNum; ++i) { int subSeqNum = 1 + (rand() % MAX_SEQ_NUM); From 4cc57836f393ada9b65cfeef444662afc34f1109 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 25 Aug 2017 17:20:28 +0800 Subject: [PATCH 111/170] enable reorder --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 39 +++++------------ paddle/math/MKLDNNMatrix.cpp | 57 +++++++++++++++++++++++++ paddle/math/MKLDNNMatrix.h | 33 ++++++++++++-- 3 files changed, 97 insertions(+), 32 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index a5555c4618..ad50c15a7d 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -61,39 +61,20 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() { return; } - // TODO(TJ): dst format should get from wgtVal_ - int dstFmt = PARAM_FORMAT_MKLDNN_OI; - int srcFmt = weight_->getParameterPtr()->getHeaderFormat(); - if (srcFmt == dstFmt) { - return; - } - - // The weight_ is transposed from initial paddle weight - MatrixPtr paddleWgt = Matrix::create( - weight_->getW()->getData(), iLayerSize_, oc_, false, false); - - // TODO(TJ): remove this print when do not need differ weights - std::ostringstream ostr; - paddleWgt->print(ostr); - VLOG(MKLDNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str(); - - // The mkldnn weight is transposed from initial paddle matrix - MatrixPtr paddleWgtT; - paddleWgt->transpose(paddleWgtT, true); - weight_->getW()->copyFrom(*paddleWgtT); - weight_->getParameterPtr()->setHeaderFormat(dstFmt); + CHECK(wgtVal_) << "should have been initialized"; + bool hasNoSpatial_ = ih_ == 1 && iw_ == 1; + auto targetDim = wgtVal_->getDims(); + auto srcFmt = hasNoSpatial_ ? memory::format::io : memory::format::ihwo; + wgtVal_->reorderDataFrom(wgtVal_, srcFmt, targetDim); hasInitedWgt_ = true; } void MKLDNNFcLayer::convertWeightsToPaddle() { - MatrixPtr dnnWgt = weight_->getW(); - MatrixPtr paddleWgt; - dnnWgt->transpose(paddleWgt, true); - - // copy paddle weight and override on weight_ - MatrixPtr dnnWgtT = Matrix::create( - dnnWgt->getData(), dnnWgt->getWidth(), dnnWgt->getHeight(), false, false); - dnnWgtT->copyFrom(*paddleWgt); + CHECK(wgtVal_) << "should have been initialized"; + bool hasNoSpatial_ = ih_ == 1 && iw_ == 1; + auto targetDim = wgtVal_->getDims(); + auto dstFmt = hasNoSpatial_ ? memory::format::io : memory::format::ihwo; + wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); } void MKLDNNFcLayer::reshape() { diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index 94df9c1550..32ae3b1bcf 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -56,6 +56,63 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, return create(m, pd); } +void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m, + memory::format srcFmt, + memory::dims targetDim) { + memory::format dstFmt = getFormat(); + if (srcFmt == dstFmt) { + return; + } + CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal"; + real* srcData = getData(); + real* dstData = m->getData(); + reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim); +} + +void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m, + memory::format dstFmt, + memory::dims targetDim) { + memory::format srcFmt = getFormat(); + if (srcFmt == dstFmt) { + return; + } + CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal"; + real* srcData = getData(); + real* dstData = m->getData(); + reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim); +} + +void MKLDNNMatrix::reorderOnce(void* srcData, + void* dstData, + memory::format srcFmt, + memory::format dstFmt, + memory::dims dm) { + CHECK(srcData); + CHECK(dstData); + MatrixPtr tmpSrc; + if (dstData == srcData) { + // inplace data + size_t sz = 1; + for (size_t i = 0; i < dm.size(); ++i) { + sz *= dm[i]; + } + tmpSrc = Matrix::create(sz, 1, false, false); + tmpSrc->copyFrom((real*)srcData, sz); + srcData = tmpSrc->getData(); + } + + auto dtype = this->getDtype(); + auto srcMD = memory::desc(dm, dtype, srcFmt); + auto dstMD = memory::desc(dm, dtype, dstFmt); + + auto eg = this->getEngine(); + auto src = memory(memory::primitive_desc(srcMD, eg), srcData); + auto dst = memory(memory::primitive_desc(dstMD, eg), dstData); + + auto r = reorder(src, dst); + stream(stream::kind::eager).submit({r}).wait(); +} + void MKLDNNMatrix::downSpatial() { int fmt = getFormat(); if (!(fmt == memory::format::nchw || fmt == memory::format::oihw)) { diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index 05adc867c2..ea3fd7d461 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -21,9 +21,6 @@ limitations under the License. */ namespace paddle { -static const std::map PARAM_FOARMAT_MAP = - {{mkldnn::memory::format::oi, PARAM_FORMAT_MKLDNN_OI}}; - class MKLDNNMatrix; typedef std::shared_ptr MKLDNNMatrixPtr; @@ -57,6 +54,26 @@ public: mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32); public: + /** + * Reorder this MKLDNNMatrix from other format. + * Support inplace reorder + * Pay attention: this function would only reorder the data layout. + * will NOT change this original dim or format info + */ + void reorderDataFrom(const MKLDNNMatrixPtr& m, + memory::format srcFmt, + memory::dims targetDim); + + /** + * Reorder this MKLDNNMatrix to other format. + * Support inplace reorder + * Pay attention: this function would only reorder the data layout. + * will NOT change the dst dim or format info + */ + void reorderDataTo(const MKLDNNMatrixPtr& m, + memory::format dstFmt, + memory::dims targetDim); + /** * Dimensionality reduction. * Change format "nchw --> nc" or "oihw --> oi" if the h and w are both 1 @@ -113,6 +130,16 @@ public: * Get engine. */ mkldnn::engine getEngine() { return getPD().get_engine(); } + +protected: + /** + * Do once reorder supported inplace. + */ + void reorderOnce(void* srcData, + void* dstData, + memory::format srcFmt, + memory::format dstFmt, + memory::dims dm); }; } // namespace paddle From 7035bb63e91a2dcf1f91df5e440d2c3e45bdd2e8 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Fri, 25 Aug 2017 20:44:04 +0800 Subject: [PATCH 112/170] fix a bug. --- paddle/parameter/Argument.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 2b945de18a..b0e9e740c8 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -677,6 +677,7 @@ void Argument::reorganizeSeqInfo( const ICpuGpuVectorPtr subSeqStartPos, std::vector>& reorganizedSeqInfo) { CHECK(seqStartPos); + reorganizedSeqInfo.clear(); int seqNum = seqStartPos->getSize() - 1; int* seqStarts = seqStartPos->getMutableData(false); From c8d0c9af865cd0ac47d1cd7461c24793d833eeff Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 25 Aug 2017 11:24:48 -0700 Subject: [PATCH 113/170] In response to comments from Luo Tao --- doc/howto/dev/build_cn.md | 6 +++--- doc/howto/dev/build_en.md | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/howto/dev/build_cn.md b/doc/howto/dev/build_cn.md index dc372de9fa..7c95579636 100644 --- a/doc/howto/dev/build_cn.md +++ b/doc/howto/dev/build_cn.md @@ -71,7 +71,7 @@ - 学习 Docker 有多难? - 理解 Docker 并不难,大概花十分钟看一遍 https://zhuanlan.zhihu.com/p/19902938 即可。这可以帮您省掉花一小时安装和配置各种开发工具,以及切换机器时需要新安装的辛苦。别忘了 PaddlePaddle 更新可能导致需要新的开发工具。更别提简化问题复现带来的好处了。 + 理解 Docker 并不难,大概花十分钟看一下[这篇文章](https://zhuanlan.zhihu.com/p/19902938)。这可以帮您省掉花一小时安装和配置各种开发工具,以及切换机器时需要新安装的辛苦。别忘了 PaddlePaddle 更新可能导致需要新的开发工具。更别提简化问题复现带来的好处了。 - Docker 需要 sudo @@ -93,8 +93,8 @@ - 可以并行编译吗? - 是的。我们的 Docker image 运行一个 Bash 脚本 https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh 。这个脚本调用 `make -j$(nproc)` 来启动和 CPU 核一样多的进程来并行编译。 + 是的。我们的 Docker image 运行一个 [Bash 脚本](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh)。这个脚本调用 `make -j$(nproc)` 来启动和 CPU 核一样多的进程来并行编译。 - Docker on Windows/MacOS? - Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存,以保证编译高效。具体做法请参考 https://github.com/PaddlePaddle/Paddle/issues/627 。 + Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存,以保证编译高效。具体做法请参考[这个issue](https://github.com/PaddlePaddle/Paddle/issues/627)。 diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md index 640d126018..3be2405ea7 100644 --- a/doc/howto/dev/build_en.md +++ b/doc/howto/dev/build_en.md @@ -71,7 +71,7 @@ Nothing else. Not even Python and GCC, because you can install all build tools - How difficult is it to learn Docker? - It takes you ten minutes to read https://docs.docker.com/get-started/ and saves you more than one hour to install all required build tools, configure them, and upgrade them when new versions of PaddlePaddle require some new tools. + It takes you ten minutes to read [an introductory article](https://docs.docker.com/get-started) and saves you more than one hour to install all required build tools, configure them, and upgrade them when new versions of PaddlePaddle require some new tools. - Docker requires sudo @@ -93,8 +93,8 @@ Nothing else. Not even Python and GCC, because you can install all build tools - Does Docker do parallel building? - Our building Docker image runs a Bash script https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh, which calls `make -j$(nproc)` to starts as many processes as the number of your processors. + Our building Docker image runs a [Bash script](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh), which calls `make -j$(nproc)` to starts as many processes as the number of your processors. - Docker on Windows/MacOS? - On Windows and MacOS, Docker containers run in a Linux VM. You might want to give this VM some more memory and CPUs so to make the building efficient. Please refer to https://github.com/PaddlePaddle/Paddle/issues/627 for details. + On Windows and MacOS, Docker containers run in a Linux VM. You might want to give this VM some more memory and CPUs so to make the building efficient. Please refer to [this issue](https://github.com/PaddlePaddle/Paddle/issues/627) for details. From f71f3935e3ce05a8e90edc971f5ab08d71ed2966 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 25 Aug 2017 11:51:53 -0700 Subject: [PATCH 114/170] In response to comments from Chen Xi --- doc/howto/dev/build_cn.md | 20 +++++++++++++------- doc/howto/dev/build_en.md | 34 ++++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/doc/howto/dev/build_cn.md b/doc/howto/dev/build_cn.md index 7c95579636..0077d90118 100644 --- a/doc/howto/dev/build_cn.md +++ b/doc/howto/dev/build_cn.md @@ -23,13 +23,17 @@ cd paddle; docker build -t paddle:dev . ``` + 请注意这个命令结尾处的 `.`;它表示 `docker build` 应该读取当前目录下的 [`Dockerfile`文件](https://github.com/PaddlePaddle/Paddle/blob/develop/Dockerfile),按照其内容创建一个名为 `paddle:dev` 的 Docker image,并且把各种开发工具安装进去。 + 3. 编译 + 以下命令启动一个 Docker container 来执行 `paddle:dev` 这个 Docker image,同时把当前目录(源码树根目录)映射为 container 里的 `/paddle` 目录,并且运行 `Dockerfile` 描述的默认入口程序 [`build.sh`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh)。这个脚本调用 `cmake` 和 `make` 来编译 `/paddle` 里的源码,结果输出到 `/paddle/build`,也就是本地的源码树根目录里的 `build` 子目录。 + ```bash docker run -v $PWD:/paddle paddle:dev ``` - 这个命令编译出一个 CUDA-enabled 版本。所有二进制文件会被写到本机的 `./build` 目录,而不是写到 Docker container 里。如果我们只需要编译一个只支持 CPU 的版本,可以用 + 上述命令编译出一个 CUDA-enabled 版本。如果我们只需要编译一个只支持 CPU 的版本,可以用 ```bash docker run -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev @@ -57,7 +61,7 @@ - Docker 还是虚拟机? - 有人用虚拟机来类比 Docker。需要强调的是:Docker 不会虚拟任何硬件,Docker container 里运行的编译工具实际上都是在本机的 CPU 和操作系统上直接运行的,性能和把编译工具安装在本机运行基本一样。 + 有人用虚拟机来类比 Docker。需要强调的是:Docker 不会虚拟任何硬件,Docker container 里运行的编译工具实际上都是在本机的 CPU 和操作系统上直接运行的,性能和把编译工具安装在本机运行一样。 - 为什么用 Docker? @@ -73,10 +77,6 @@ 理解 Docker 并不难,大概花十分钟看一下[这篇文章](https://zhuanlan.zhihu.com/p/19902938)。这可以帮您省掉花一小时安装和配置各种开发工具,以及切换机器时需要新安装的辛苦。别忘了 PaddlePaddle 更新可能导致需要新的开发工具。更别提简化问题复现带来的好处了。 -- Docker 需要 sudo - - 如果用自己的电脑开发,自然也就有管理员权限(sudo)了。如果用公用的电脑开发,需要请管理员安装和配置好 Docker。此外,PaddlePaddle 项目在努力开始支持其他不需要 sudo 的集装箱技术,比如 rkt。 - - 我可以用 IDE 吗? 当然可以,因为源码就在本机上。IDE 默认调用 make 之类的程序来编译源码,我们只需要配置 IDE 来调用 Docker 命令编译源码即可。 @@ -95,6 +95,12 @@ 是的。我们的 Docker image 运行一个 [Bash 脚本](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh)。这个脚本调用 `make -j$(nproc)` 来启动和 CPU 核一样多的进程来并行编译。 -- Docker on Windows/MacOS? +## 可能碰到的问题 + +- Docker 需要 sudo + + 如果用自己的电脑开发,自然也就有管理员权限(sudo)了。如果用公用的电脑开发,需要请管理员安装和配置好 Docker。此外,PaddlePaddle 项目在努力开始支持其他不需要 sudo 的集装箱技术,比如 rkt。 + +- 在 Windows/MacOS 上编译很慢 Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存,以保证编译高效。具体做法请参考[这个issue](https://github.com/PaddlePaddle/Paddle/issues/627)。 diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md index 3be2405ea7..95752beba0 100644 --- a/doc/howto/dev/build_en.md +++ b/doc/howto/dev/build_en.md @@ -7,7 +7,7 @@ To contribute to PaddlePaddle, you need 1. A computer -- Linux, BSD, Windows, MacOS, and 1. Docker. -Nothing else. Not even Python and GCC, because you can install all build tools into a Docker image. +Nothing else. Not even Python and GCC, because you can install all build tools into a Docker image. We run all the tools by running this image. ## General Process @@ -17,19 +17,23 @@ Nothing else. Not even Python and GCC, because you can install all build tools git clone https://github.com/paddlepaddle/paddle ``` -2. Install build tools. +2. Install build tools into a Docker image. ```bash cd paddle; docker build -t paddle:dev . ``` + Please be aware of the `.` at the end of the command, which refers to the [`./Dockerfile` file](https://github.com/PaddlePaddle/Paddle/blob/develop/Dockerfile). `docker build` follows instructions in this file to create a Docker image named `paddle:dev`, and installs building tools into it. + 3. Build from source. + This following command starts a Docker container that executes the Docker image `paddle:dev`, mapping the current directory to `/paddle/` in the container, and runs the default entry-point [`build.sh`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh) as specified in the Dockefile. `build.sh` invokes `cmake` and `make` to build PaddlePaddle source code, which had been mapped to `/paddle`, and writes outputs to `/paddle/build`, which maps to `build` in the current source directory on the computer. + ```bash docker run -v $PWD:/paddle paddle:dev ``` - This builds a CUDA-enabled version and writes all binary outputs to directory `./build` of the local computer, other than the Docker container. If we want to build only the CPU part, we can type + Above command builds a CUDA-enabled version. If we want to build a CPU-only version, we can type ```bash docker run -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev @@ -57,25 +61,21 @@ Nothing else. Not even Python and GCC, because you can install all build tools - Docker or virtual machine? - Some people compare Docker with VMs, but Docker doesn't virtualize any hardware, and it doesn't run a guest OS. + Some people compare Docker with VMs, but Docker doesn't virtualize any hardware nor running a guest OS, which means there is no compromise on the performance. - Why Docker? - Using a Docker image of build tools standardize the building environment, and easier for others to reproduce your problem, if there is any, and help. + Using a Docker image of build tools standardizes the building environment, which makes it easier for others to reproduce your problems and to help. Also, some build tools don't run on Windows or Mac or BSD, but Docker runs almost everywhere, so developers can use whatever computer they want. -- Can I don't use Docker? +- Can I choose not to use Docker? - Sure, you don't have to install build tools into a Docker image; instead, you can install them onto your local computer. This document exists because Docker would make the development way easier. + Sure, you don't have to install build tools into a Docker image; instead, you can install them in your local computer. This document exists because Docker would make the development way easier. - How difficult is it to learn Docker? - It takes you ten minutes to read [an introductory article](https://docs.docker.com/get-started) and saves you more than one hour to install all required build tools, configure them, and upgrade them when new versions of PaddlePaddle require some new tools. - -- Docker requires sudo - - An owner of a computer has the administrative privilege, a.k.a., sudo. If you use a shared computer for development, please ask the administrator to install and configure Docker. We will do our best to support rkt, another container technology that doesn't require sudo. + It takes you ten minutes to read [an introductory article](https://docs.docker.com/get-started) and saves you more than one hour to install all required build tools, configure them, especially when new versions of PaddlePaddle require some new tools. Not even to mention the time saved when other people trying to reproduce the issue you have. - Can I use my favorite IDE? @@ -93,8 +93,14 @@ Nothing else. Not even Python and GCC, because you can install all build tools - Does Docker do parallel building? - Our building Docker image runs a [Bash script](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh), which calls `make -j$(nproc)` to starts as many processes as the number of your processors. + Our building Docker image runs a [Bash script](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh), which calls `make -j$(nproc)` to starts as many processes as the number of your CPU cores. + +## Some Gotchas + +- Docker requires sudo + + An owner of a computer has the administrative privilege, a.k.a., sudo, and Docker requires this privilege to work properly. If you use a shared computer for development, please ask the administrator to install and configure Docker. We will do our best to support rkt, another container technology that doesn't require sudo. -- Docker on Windows/MacOS? +- Docker on Windows/MacOS builds slowly On Windows and MacOS, Docker containers run in a Linux VM. You might want to give this VM some more memory and CPUs so to make the building efficient. Please refer to [this issue](https://github.com/PaddlePaddle/Paddle/issues/627) for details. From 4b0235c1f2792cdecfe7d8f3e0bb1d0c57c6f361 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 25 Aug 2017 14:31:02 -0700 Subject: [PATCH 115/170] Update build.sh --- paddle/scripts/docker/build.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 7bab814ae8..1798642022 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -63,12 +63,11 @@ cmake .. \ cat < Date: Fri, 25 Aug 2017 14:43:29 -0700 Subject: [PATCH 116/170] Run a specific test --- doc/howto/dev/build_cn.md | 6 ++++++ doc/howto/dev/build_en.md | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/doc/howto/dev/build_cn.md b/doc/howto/dev/build_cn.md index 0077d90118..79b4ff9d5a 100644 --- a/doc/howto/dev/build_cn.md +++ b/doc/howto/dev/build_cn.md @@ -53,6 +53,12 @@ docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" ``` + 有时候我们只想运行一个特定的单元测试,比如 `memory_test`,我们可以 + + ```bash + docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test" + ``` + ## 为什么要 Docker 呀? - 什么是 Docker? diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md index 95752beba0..e1b55929f9 100644 --- a/doc/howto/dev/build_en.md +++ b/doc/howto/dev/build_en.md @@ -53,6 +53,12 @@ Nothing else. Not even Python and GCC, because you can install all build tools docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" ``` + Sometimes we want to run a specific unit test, say `memory_test`, we can run + + ```bash + docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test" + ``` + ## Docker, Or Not? - What is Docker? From 97649bf9b251707803b2665dedf1ef8f929d8c88 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Fri, 25 Aug 2017 22:08:24 +0000 Subject: [PATCH 117/170] fix codes in scatter --- paddle/operators/scatter_op.cc | 26 +++++++++++++------ paddle/operators/scatter_op.h | 6 ++--- .../v2/framework/tests/gradient_checker.py | 13 +++++----- .../v2/framework/tests/test_scatter_op.py | 1 - 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc index cf01ef6279..f901edefa2 100644 --- a/paddle/operators/scatter_op.cc +++ b/paddle/operators/scatter_op.cc @@ -24,8 +24,18 @@ class ScatterOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - framework::DDim output_dims(ctx.Input("Ref")->dims()); - ctx.Output("Out")->Resize(output_dims); + PADDLE_ENFORCE_EQ(ctx.Input("Index")->dims().size(), 1, + "Update Index should be 1-D."); + PADDLE_ENFORCE_EQ(ctx.Input("Ref")->dims().size(), + ctx.Input("Updates")->dims().size(), + "Reference and Updates should have the same shape size"); + PADDLE_ENFORCE_EQ(ctx.Input("Updates")->dims()[0], + ctx.Input("Index")->dims()[0], + "Updates and Index should have same batch-size."); + framework::DDim data_dim(ctx.Input("Updates")->dims()); + for (int i = 1; i < data_dim.size(); ++i) + PADDLE_ENFORCE_EQ(data_dim[i], ctx.Input("Updates")->dims()[i]); + ctx.Output("Out")->Resize(ctx.Input("Ref")->dims()); } }; @@ -35,13 +45,13 @@ class ScatterGradOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto Updates_grad = ctx.Output(framework::GradVarName("Updates")); - auto Updates = ctx.Input("Updates"); - auto Ref_grad = ctx.Output(framework::GradVarName("Ref")); - auto Ref = ctx.Input("Ref"); + auto *dUpdates = ctx.Output(framework::GradVarName("Updates")); + auto *Updates = ctx.Input("Updates"); + auto *dRef = ctx.Output(framework::GradVarName("Ref")); + auto *Ref = ctx.Input("Ref"); - Ref_grad->Resize(Ref->dims()); - Updates_grad->Resize(Updates->dims()); + dRef->Resize(Ref->dims()); + dUpdates->Resize(Updates->dims()); } }; diff --git a/paddle/operators/scatter_op.h b/paddle/operators/scatter_op.h index c2db3ae37c..e9595638a8 100644 --- a/paddle/operators/scatter_op.h +++ b/paddle/operators/scatter_op.h @@ -46,13 +46,13 @@ class ScatterGradientOpKernel : public framework::OpKernel { auto *dRef = ctx.Output(framework::GradVarName("Ref")); auto *dUpdates = ctx.Output(framework::GradVarName("Updates")); auto *Index = ctx.Input("Index"); - auto *dO = ctx.Input(framework::GradVarName("Out")); + auto *dOut = ctx.Input(framework::GradVarName("Out")); // In place gradient: dRef = dO - dRef->ShareDataWith(*dO); + dRef->ShareDataWith(*dOut); dUpdates->mutable_data(ctx.GetPlace()); // Gradient by Gather: dUpdates += dO[Index] - Gather(ctx.GetPlace(), dO, Index, dUpdates); + Gather(ctx.GetPlace(), dOut, Index, dUpdates); } }; diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index ac37671c77..abe0b5391a 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -82,6 +82,11 @@ def get_numeric_gradient(op, def product(dim): return reduce(lambda a, b: a * b, dim, 1) + def copy_tensor(): + for var_name in input_values: + tensor_ = local_scope.find_var(var_name).get_tensor() + tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace()) + # get the input tensor that we want to get it's numeric gradient. tensor_to_check = local_scope.find_var(input_to_check).get_tensor() tensor_size = product(tensor_to_check.get_dims()) @@ -92,9 +97,7 @@ def get_numeric_gradient(op, # we use a for loop to compute the gradient of every element. for i in xrange(tensor_size): if in_place: - for var_name in input_values: - tensor_ = local_scope.find_var(var_name).get_tensor() - tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace()) + copy_tensor() # get one input element throw it's index i. origin = tensor_to_check.get_float_element(i) @@ -105,9 +108,7 @@ def get_numeric_gradient(op, # plus delta to this element, run op and get the sum of the result tensor. if in_place: - for var_name in input_values: - tensor_ = local_scope.find_var(var_name).get_tensor() - tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace()) + copy_tensor() x_neg = origin - delta tensor_to_check.set_float_element(i, x_neg) y_neg = get_output() diff --git a/python/paddle/v2/framework/tests/test_scatter_op.py b/python/paddle/v2/framework/tests/test_scatter_op.py index 861fe6cf89..c1f9444889 100644 --- a/python/paddle/v2/framework/tests/test_scatter_op.py +++ b/python/paddle/v2/framework/tests/test_scatter_op.py @@ -30,7 +30,6 @@ class TestScatterGradOp(GradientChecker): output_np = numpy.copy(ref_np) output_np[index_np] += updates_np inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np} - # check gradient self.check_grad( op, inputs, set(["Updates", "Ref"]), "Out", in_place=True) From 6f235553fd923d4b0b225fdc4a521570b03fbc24 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Fri, 25 Aug 2017 22:20:20 +0000 Subject: [PATCH 118/170] scatter op fixed --- paddle/operators/scatter_op.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc index f901edefa2..09a2f94dde 100644 --- a/paddle/operators/scatter_op.cc +++ b/paddle/operators/scatter_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ #include "paddle/operators/scatter_op.h" #include "paddle/framework/ddim.h" From 852f341615808b6a5e6249b3b7c1f5f20fd22ec9 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 25 Aug 2017 16:48:52 -0700 Subject: [PATCH 119/170] Add clean build section --- doc/howto/dev/build_cn.md | 10 +++++++++- doc/howto/dev/build_en.md | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/doc/howto/dev/build_cn.md b/doc/howto/dev/build_cn.md index 79b4ff9d5a..d9d520893f 100644 --- a/doc/howto/dev/build_cn.md +++ b/doc/howto/dev/build_cn.md @@ -56,7 +56,15 @@ 有时候我们只想运行一个特定的单元测试,比如 `memory_test`,我们可以 ```bash - docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test" + nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test" + ``` + +5. 清理 + + 有时候我们会希望清理掉已经下载的第三方依赖以及已经编译的二进制文件。此时只需要: + + ```bash + rm -rf build ``` ## 为什么要 Docker 呀? diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md index e1b55929f9..318bf3d384 100644 --- a/doc/howto/dev/build_en.md +++ b/doc/howto/dev/build_en.md @@ -56,7 +56,15 @@ Nothing else. Not even Python and GCC, because you can install all build tools Sometimes we want to run a specific unit test, say `memory_test`, we can run ```bash - docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test" + nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test" + ``` + +5. Clean Build. + + Sometimes, we might want to clean all thirt-party dependents and built binaries. To do so, just + + ```bash + rm -rf build ``` ## Docker, Or Not? From bfeecfd3d25c5c04dbd7d000490e77454944d044 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Fri, 25 Aug 2017 23:54:46 +0000 Subject: [PATCH 120/170] fix problems --- paddle/operators/scatter_op.cc | 18 +++++++++--------- paddle/operators/scatter_op.cu | 18 +++++++++--------- .../v2/framework/tests/gradient_checker.py | 6 +++--- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc index 09a2f94dde..f901edefa2 100644 --- a/paddle/operators/scatter_op.cc +++ b/paddle/operators/scatter_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/scatter_op.h" #include "paddle/framework/ddim.h" diff --git a/paddle/operators/scatter_op.cu b/paddle/operators/scatter_op.cu index e6a6fa57d9..6716b47883 100644 --- a/paddle/operators/scatter_op.cu +++ b/paddle/operators/scatter_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/scatter_op.h" diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index abe0b5391a..9a7a7fbf5e 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -82,7 +82,7 @@ def get_numeric_gradient(op, def product(dim): return reduce(lambda a, b: a * b, dim, 1) - def copy_tensor(): + def restore_inputs(): for var_name in input_values: tensor_ = local_scope.find_var(var_name).get_tensor() tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace()) @@ -97,7 +97,7 @@ def get_numeric_gradient(op, # we use a for loop to compute the gradient of every element. for i in xrange(tensor_size): if in_place: - copy_tensor() + restore_inputs() # get one input element throw it's index i. origin = tensor_to_check.get_float_element(i) @@ -108,7 +108,7 @@ def get_numeric_gradient(op, # plus delta to this element, run op and get the sum of the result tensor. if in_place: - copy_tensor() + restore_inputs() x_neg = origin - delta tensor_to_check.set_float_element(i, x_neg) y_neg = get_output() From ec5e20c9f12e89e13b52978b8bb27997c77f059c Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 25 Aug 2017 17:14:28 -0700 Subject: [PATCH 121/170] Remove stopped containers and dangling images --- doc/howto/dev/build_cn.md | 18 +++++++++++------- doc/howto/dev/build_en.md | 4 ++++ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/doc/howto/dev/build_cn.md b/doc/howto/dev/build_cn.md index d9d520893f..0b911f7b75 100644 --- a/doc/howto/dev/build_cn.md +++ b/doc/howto/dev/build_cn.md @@ -7,7 +7,7 @@ 1. 一台电脑,可以装的是 Linux, BSD, Windows 或者 MacOS 操作系统,以及 1. Docker。 -不需要其他任何软件了。即便是 Python 和 GCC 都不需要,因为我们会把所有编译工具都安装进一个 Docker image 里。 +不需要依赖其他任何软件了。即便是 Python 和 GCC 都不需要,因为我们会把所有编译工具都安装进一个 Docker image 里。 ## 总体流程 @@ -17,7 +17,7 @@ git clone https://github.com/paddlepaddle/paddle ``` -2. 安装工具 +2. 安装开发工具到 Docker image 里 ```bash cd paddle; docker build -t paddle:dev . @@ -30,13 +30,13 @@ 以下命令启动一个 Docker container 来执行 `paddle:dev` 这个 Docker image,同时把当前目录(源码树根目录)映射为 container 里的 `/paddle` 目录,并且运行 `Dockerfile` 描述的默认入口程序 [`build.sh`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh)。这个脚本调用 `cmake` 和 `make` 来编译 `/paddle` 里的源码,结果输出到 `/paddle/build`,也就是本地的源码树根目录里的 `build` 子目录。 ```bash - docker run -v $PWD:/paddle paddle:dev + docker run --rm -v $PWD:/paddle paddle:dev ``` 上述命令编译出一个 CUDA-enabled 版本。如果我们只需要编译一个只支持 CPU 的版本,可以用 ```bash - docker run -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev + docker run --rm -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev ``` 4. 运行单元测试 @@ -44,19 +44,19 @@ 用本机的第一个 GPU 来运行包括 GPU 单元测试在内的所有单元测试: ```bash - NV_GPU=0 nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" + NV_GPU=0 nvidia-docker run --rm -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" ``` 如果编译的时候我们用了 `WITH_GPU=OFF` 选项,那么编译过程只会产生 CPU-based 单元测试,那么我们也就不需要 nvidia-docker 来运行单元测试了。我们只需要: ```bash - docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" + docker run --rm -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" ``` 有时候我们只想运行一个特定的单元测试,比如 `memory_test`,我们可以 ```bash - nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test" + nvidia-docker run --rm -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test" ``` 5. 清理 @@ -118,3 +118,7 @@ - 在 Windows/MacOS 上编译很慢 Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存,以保证编译高效。具体做法请参考[这个issue](https://github.com/PaddlePaddle/Paddle/issues/627)。 + +- 磁盘不够 + + 本文中的例子里,`docker run` 命令里都用了 `--rm` 参数,这样保证运行结束之后的 containers 不会保留在磁盘上。可以用 `docker ps -a` 命令看到停止后但是没有删除的 containers。`docker build` 命令有时候会产生一些中间结果,是没有名字的 images,也会占用磁盘。可以参考[这篇文章](https://zaiste.net/posts/removing_docker_containers/)来清理这些内容。 diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md index 318bf3d384..d0048e3714 100644 --- a/doc/howto/dev/build_en.md +++ b/doc/howto/dev/build_en.md @@ -118,3 +118,7 @@ Nothing else. Not even Python and GCC, because you can install all build tools - Docker on Windows/MacOS builds slowly On Windows and MacOS, Docker containers run in a Linux VM. You might want to give this VM some more memory and CPUs so to make the building efficient. Please refer to [this issue](https://github.com/PaddlePaddle/Paddle/issues/627) for details. + +- Not enough disk space + + Examples in this article uses option `--rm` with the `docker run` command. This option ensures that stopped containers do not exist on hard disks. We can use `docker ps -a` to list all containers, including stopped. Sometimes `docker build` generates some intermediate dangling images, which also take disk space. To clean them, please refer to [this article](https://zaiste.net/posts/removing_docker_containers/). From 721b5020fae92600a0aa2c4093dbeeac63b597c6 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sat, 26 Aug 2017 16:34:33 -0700 Subject: [PATCH 122/170] change predict size to label class_num --- python/paddle/v2/framework/tests/mnist.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index 9a0b109850..9b2dbed25c 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -181,7 +181,7 @@ images = data_layer(name='pixel', dims=[BATCH_SIZE, 784]) labels = data_layer(name='label', dims=[BATCH_SIZE]) fc1 = fc_layer(net=forward_net, input=images, size=100, act="sigmoid") fc2 = fc_layer(net=forward_net, input=fc1, size=100, act="sigmoid") -predict = fc_layer(net=forward_net, input=fc2, size=100, act="softmax") +predict = fc_layer(net=forward_net, input=fc2, size=10, act="softmax") cost = cross_entropy_layer(net=forward_net, input=predict, label=labels) init_net.complete_add_op(True) @@ -223,7 +223,7 @@ def test(cost_name): sum(error) / float(len(error)))) -PASS_NUM = 1 +PASS_NUM = 10 init_net.run(scope, dev_ctx) for pass_id in range(PASS_NUM): From 787cb8ce5d829435e5d45e8bc6bc51cfdcf49272 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sat, 26 Aug 2017 16:36:28 -0700 Subject: [PATCH 123/170] reset pass num to 1 --- python/paddle/v2/framework/tests/mnist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py index 9b2dbed25c..a68f302f9c 100644 --- a/python/paddle/v2/framework/tests/mnist.py +++ b/python/paddle/v2/framework/tests/mnist.py @@ -223,7 +223,7 @@ def test(cost_name): sum(error) / float(len(error)))) -PASS_NUM = 10 +PASS_NUM = 1 init_net.run(scope, dev_ctx) for pass_id in range(PASS_NUM): From 3120ee5cfbbe6ecf3550b6a338a4c14afe6e4ebd Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Sat, 26 Aug 2017 18:46:06 -0700 Subject: [PATCH 124/170] fix backward doc --- paddle/framework/backward.md | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md index 74c001b06a..c8fa3fefe5 100644 --- a/paddle/framework/backward.md +++ b/paddle/framework/backward.md @@ -21,18 +21,32 @@ grad_op_builder(fengjiayi) given a forward network, it generates the backward network. We only care about the Gradients—`OutputGradients`,`InputGradients`. -1. bla bla bla (yuyang) +1. Op + + when the input forward network is a Op, return its gradient Operator Immediately. 2. NetOp - when the input forward network is a NetOp, it need to call the sub NetOp/Operators backward function recursively and ensure them done. During the process, we need to collect the `OutputGradients` name. + when the input forward network is a NetOp, it need to call the sub NetOp/Operators backward function recursively. During the process, we need to collect the `OutputGradients` name according to forward NetOp. + + **shared variable**. As illustrated in the pictures, two operator's `Output` `Gradient` will overwirte their shared input variable. + +

+
+ + 1. shared variable in two operators. + +

+ + Share variable between operators or same input variable used in multiple operators lead to a duplicate gradient variable. As demo show above, we need to rename gradient name recursively, and add a generic add operator replace the overwirte links. + +

+
- We share variable in the same scope, as a result, duplicate operator `OutputGradients` will overwirte then duplicate variable. + 2. replace shared variable gradient with `Add` Operator - ![./images/duplicate_op]() +

- Share variable between operators or same input variable used in multiple operators lead to a duplicate gradient variable. As demo show above, we need to rename gradient name recursively, and add a generic add operator instead. -![./images/duplicate_op2]() -​ Then collect the sub graph OutputGradients/InputGradients as the NetOp's and return it. +​ Then collect the sub graph `OutputGradients`/`InputGradients` as the NetOp's and return it. From bb5c656b574b1e518da981d781db0e1e0a0e4d75 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sat, 26 Aug 2017 19:15:31 -0700 Subject: [PATCH 125/170] test --- paddle/framework/backward.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md index c717c2f30b..d5dbd57d19 100644 --- a/paddle/framework/backward.md +++ b/paddle/framework/backward.md @@ -6,7 +6,7 @@ In Neural Network, the backpropagation algorithm follows the chain rule, so we n ## Backward Operator Registry -A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients, and then calculate its input gradients. In most cases, there is a one-to-one correspondence between forward and backward operators. We use registry mechanism to save these correspondences, which is quite similar with operator registry itself. +A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients and then calculate its input gradients. In most cases, there is a one-to-one correspondence between forward and backward operators. We use registry mechanism to save these correspondences, which is quite similar with operator registry itself. For example, we have got a `add_two_op`, and is registered by the following code: From f646f7991ae49eff00370a03beb958fc88ac62ad Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Sun, 27 Aug 2017 12:01:46 +0800 Subject: [PATCH 126/170] Add chinese doc about how to write new operators. --- doc/howto/dev/new_op_cn.md | 300 +++++++++++++++++++++++++++++++++++++ 1 file changed, 300 insertions(+) create mode 100644 doc/howto/dev/new_op_cn.md diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md new file mode 100644 index 0000000000..df20c15ec6 --- /dev/null +++ b/doc/howto/dev/new_op_cn.md @@ -0,0 +1,300 @@ +# 如何写新的Operator + + - [概念简介](#概念简介) + - [实现C++类](#实现C++类) + - [定义ProtoMaker类](#定义ProtoMaker类) + - [定义Operator类](#定义Operator类) + - [定义`OpKernel`类](#定义`OpKernel`类) + - [注册类](#注册类) + - [编译](#编译) + - [绑定Python](#绑定Python) + - [实现单元测试](#实现单元测试) + + +## 概念简介 + +简单介绍需要用到基类,详细介绍请参考设计文档。 + +- `framework::OperatorBase`: Operator(简写,Op)基类。 +- `framework::OpKernel`: Op计算函数的基类,称作Kernel。 +- `framework::OperatorWithKernel`:继承自OperatorBase,Op有计算函数,称作有Kernel。 +- `class OpProtoAndCheckerMaker`:描述该Op的输入、输出、属性、注释,主要用于Python API接口生成 + +依据是否包含kernel,将Op分为两种:包含Kernel的Op和不包含kernel的Op,前者Op的定义继承自`OperatorBase`,后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写,简单总结如下: + +Forward Op需要包含: + + - OpProtoMake定义 + - Op定义 + - Kernel实现 + +与之对应的Backward Op包含: + + - Op定义 + - Kernel实现 + +下面以矩阵乘操作,即[MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc)为例来介绍如何写带Kernel的Operator。 + + +## 实现C++类 + + +### 1. 定义ProtoMaker类 + +矩阵乘的公式:$$Out = X * Y$$ ,可见该计算由两个输入,一个输出组成。首先定义`ProtoMaker`来描述该Op的输入、输出及注释: + + + + ``` + class MulOpMaker : public framework::OpProtoAndCheckerMaker { + public: + MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The first input of mul op"); + AddInput("Y", "The second input of mul op"); + AddOutput("Out", "The output of mul op"); + AddComment(R"DOC( + Two Element Mul Operator. + The equation is: Out = X * Y + )DOC"); + } + }; + ``` + +[`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)继承自`framework::OpProtoAndCheckerMaker`,构造函数包括2个: + + - `framework::OpProto` : 前者存储Op的输入输出和参数属性,将用于Python API接口的生成。 + - `framework::OpAttrChecker` :后者用于检查参数属性的合法性。 + +构造函数里通过`AddInput`添加输入参数,通过`AddOutput`添加输出参数,通过`AddComment`添加该Op的注释,这些函数会将对应内容添加到`OpProto`中。 + +在`MulOp`中添加两个输入`X`和`Y`,添加了一个输出`Out`,并解释了各自含义,该命名尽可能的规范。 + + +再举个[`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37)的例子: + +```C++ + template +class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The input tensor of scale operator.").NotInGradient(); + AddOutput("Out", "The output tensor of scale operator.").NotInGradient(); + AddComment(R"DOC(Scale operator +The equation is: Out = scale*X +)DOC"); + AddAttr("scale", "scale of scale operator.").SetDefault(1.0); + } +}; +``` + + 在这个例子里,两处不同: + + - `AddInput("X","...").NotInGradient()` : 表示`X`这个输入不参与`ScaleOp`对应的梯度Op计算之中。 + - `AddAttr("scale", "...").SetDefault(1.0);` : 增加`scale`系数,作为参数属性,并且设置默认值为1.0。 + + +### 2. 定义Operator类 + + + ```C++ + class MulOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto dim0 = ctx.Input("X")->dims(); + auto dim1 = ctx.Input("Y")->dims(); + PADDLE_ENFORCE_EQ(dim0.size(), 2, + "input X(%s) should be a tensor with 2 dims, a matrix", + ctx.op_.Input("X")); + PADDLE_ENFORCE_EQ(dim1.size(), 2, + "input Y(%s) should be a tensor with 2 dims, a matrix", + ctx.op_.Input("Y")); + PADDLE_ENFORCE_EQ( + dim0[1], dim1[0], + "First matrix's width must be equal with second matrix's height."); + ctx.Output("Out")->Resize({dim0[0], dim1[1]}); + } + }; + ``` + +[`MulOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L22)继承自`OperatorWithKernel`。`public`成员: + +```C++ +using framework::OperatorWithKernel::OperatorWithKernel; +``` + +这句表示使用基类`OperatorWithKernel`的构造函数,也可写成: + +```C++ + MulOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} +``` + +还需要重写`InferShape`接口。`InferShape`为const函数,不能修改Op的成员变量,参数为`const framework::InferShapeContext &ctx`,通过该参数可获取到输入输出以及属性。它的功能是: + - 1). 做检查, 尽早报错:检查输入数据维度、类型等是否合法 + - 2). 设置输出Tensor的形状 + +通常`OpProtoMaker`和`Op`类的定义写在`.cc`文件中,和要讲到的注册函数一起放在`.cc`中 + +### 3. 定义`OpKernel`类 + +```C++ +template +class MulKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* X = context.Input("X"); + auto* Y = context.Input("Y"); + auto* Z = context.Output("Out"); + Z->mutable_data(context.GetPlace()); + auto* device_context = + const_cast(context.device_context_); + math::matmul(*X, false, *Y, false, 1, Z, 0, device_context); + } +}; +``` + +`MulKernel`继承自`framework::OpKernel`,带有模板参数: + + - `typename Place`: 表示设备类型,不同设备(CPU、GPU)共享同一个Kernel时,需加该模板参数,不共享则不加,一个不共享的例子是[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 + + - `typename T` : 表示数据类型,如`float`, `double`等。 + +`MulKernel`需要重写`Compute`接口,该接口参数为`const framework::ExecutionContext& context`, `ExecutionContext`相比`InferShapeContext`增加了设备类型,同样可获取到输入输出和属性参数,`Compute`函数里写具体实现时。 + +注意,不同设备(CPU、GPU)共享一个Op定义,是否则共享同一个`OpKernel`,取决于`Compute`调用的函数是否支持不同设备。`MulOp`的CPU、GPU实现共享同一个`Kernel`,`OpKernel`不共享的例子可以参考[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 + +到此前向Op实现完成,需要在`.cc`文件中注册该op和kernel。反向Op类的定义和Kernel定义与前向Op类似,这里不再重复。但注意,反向Op没有`ProtoMaker`。 + +### 4. 注册类 + +在`.cc`文件中注册前向、反向Op类,注册CPU Kernel。 + + ```C++ + namespace ops = paddle::operators; + REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad); + REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); + REGISTER_OP_CPU_KERNEL(mul_grad, + ops::MulGradKernel); + ``` + + - `REGISTER_OP` : 注册`ops::MulOp`类,类型名为`mul`,该类的`ProtoMaker`为`ops::MulOpMaker`,注册`ops::MulOpGrad`,类型名为`mul_grad`, + - `REGISTER_OP_WITHOUT_GRADIENT` : 用于注册没有反向的Op。 + - `REGISTER_OP_CPU_KERNEL` :注册`ops::MulKernel`类,并特化模板参数为`paddle::platform::CPUPlace`和`float`类型,同理,注册`ops::MulKernel`类。 + +在 `.cu`文件中注册GPU Kernel。 + + ``` + namespace ops = paddle::operators; + REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); + REGISTER_OP_GPU_KERNEL(mul_grad, + ops::MulGradKernel); + ``` + +### 5. 编译 + +在[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)文件中添加编译。 + + ``` + op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) + ``` + +下面命令可以编译: + + ``` + make mul_op + ``` + +## 绑定Python + + - 绑定Python + + 在 [`paddle/pybind/pybind.cc +`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc)文件中添加该类: + + ``` + USE_OP(mul); + ``` + 如果只实现了CPU版本,则使用`USE_CPU_ONLY_OP`: + + ``` + USE_CPU_ONLY_OP(gather); + ``` + + 使用`USE_OP`告知编译器需要链接该Op的目标文件,具体解释参考[代码注释](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h#L81)。 + + + - 生成库 + + 在 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件添加类到`DEPS`中。 + + ``` + if(WITH_PYTHON) +cc_library(paddle_pybind SHARED + SRCS pybind.cc + DEPS pybind python backward + mul_op + minus_op) +endif(WITH_PYTHON) + ``` + +## 实现单元测试 + +单测包括对比前向Op不同设备(CPU、GPU)的实现、对比反向OP不同设备(CPU、GPU)的实现、反向Op的梯度测试。下面介绍介绍[`MulOp`的单测](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/test_mul_op.py)。 + +- 前向Op单测 + +前向Op单测继承自`unittest.TestCase`,并定义元类`__metaclass__ = OpTestMeta`,具体单测流程在`OpTestMeta`里完成。需在`setUp`函数定义输入输出和属性参数,以及Python对比的输出值。 + +``` +import unittest +import numpy as np +from gradient_checker import GradientChecker, create_op +from op_test_util import OpTestMeta + +class TestMulOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "mul" + self.inputs = { + 'X': np.random.random((32, 84)).astype("float32"), + 'Y': np.random.random((84, 100)).astype("float32") + } + self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} +``` + 首先需要`import`必要的包,下面详细解释其他值: + + - `self.type = "mul" ` : 定义类型,和注册的类型一致。 + - `self.inputs` : 定义输入,类型为Numpy.array,并初始化。 + - `self.outputs` : 定义输出,并得到Python结算结果。 + + + - 反向Op单测 + +反向Op单测继承自`GradientChecker`,而`GradientChecker`集成自`unittest.TestCase`,所以反向单测函数需要`test_`开头。 + + ``` + class MulGradOpTest(GradientChecker): + def test_mul(self): + op = create_op("mul") + inputs = { + 'X': np.random.random((32, 84)).astype("float32"), + 'Y': np.random.random((84, 100)).astype("float32") + } + self.compare_grad(op, inputs) + # mul op will enlarge the relative error + self.check_grad( + op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.5) + ``` + + - 调用`create_op("mul")`创建反向Op对应的前向Op。 + - 定义输入`inputs`。 + - 调用`compare_grad`函数对比CPU、GPU计算结果。 + - 调用`check_grad`检查梯度稳定性。 From d78521d491d8c6625146137406f3b7402aebe143 Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Sun, 27 Aug 2017 12:11:15 +0800 Subject: [PATCH 127/170] fix doc format. --- doc/howto/dev/new_op_cn.md | 160 ++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 80 deletions(-) diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md index df20c15ec6..ebd2cf3ff0 100644 --- a/doc/howto/dev/new_op_cn.md +++ b/doc/howto/dev/new_op_cn.md @@ -4,11 +4,13 @@ - [实现C++类](#实现C++类) - [定义ProtoMaker类](#定义ProtoMaker类) - [定义Operator类](#定义Operator类) - - [定义`OpKernel`类](#定义`OpKernel`类) + - [定义OpKernel类](#定义OpKernel类) - [注册类](#注册类) - [编译](#编译) - [绑定Python](#绑定Python) - [实现单元测试](#实现单元测试) + - [前向Operator单测](#前向Operator单测) + - [反向Operator单测](#反向Operator单测) ## 概念简介 @@ -41,25 +43,23 @@ Forward Op需要包含: ### 1. 定义ProtoMaker类 -矩阵乘的公式:$$Out = X * Y$$ ,可见该计算由两个输入,一个输出组成。首先定义`ProtoMaker`来描述该Op的输入、输出及注释: - +矩阵乘的公式:$Out = X * Y$, 可见该计算由两个输入,一个输出组成。首先定义`ProtoMaker`来描述该Op的输入、输出及注释: - - ``` - class MulOpMaker : public framework::OpProtoAndCheckerMaker { - public: - MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "The first input of mul op"); - AddInput("Y", "The second input of mul op"); - AddOutput("Out", "The output of mul op"); - AddComment(R"DOC( - Two Element Mul Operator. - The equation is: Out = X * Y - )DOC"); - } - }; - ``` +``` +class MulOpMaker : public framework::OpProtoAndCheckerMaker { + public: + MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The first input of mul op"); + AddInput("Y", "The second input of mul op"); + AddOutput("Out", "The output of mul op"); + AddComment(R"DOC( +Two Element Mul Operator. +The equation is: Out = X * Y +)DOC"); + } +}; +``` [`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)继承自`framework::OpProtoAndCheckerMaker`,构造函数包括2个: @@ -73,8 +73,8 @@ Forward Op需要包含: 再举个[`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37)的例子: -```C++ - template +``` +template class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { public: ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) @@ -98,42 +98,42 @@ The equation is: Out = scale*X ### 2. 定义Operator类 - ```C++ - class MulOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(const framework::InferShapeContext &ctx) const override { - auto dim0 = ctx.Input("X")->dims(); - auto dim1 = ctx.Input("Y")->dims(); - PADDLE_ENFORCE_EQ(dim0.size(), 2, - "input X(%s) should be a tensor with 2 dims, a matrix", - ctx.op_.Input("X")); - PADDLE_ENFORCE_EQ(dim1.size(), 2, - "input Y(%s) should be a tensor with 2 dims, a matrix", - ctx.op_.Input("Y")); - PADDLE_ENFORCE_EQ( - dim0[1], dim1[0], - "First matrix's width must be equal with second matrix's height."); - ctx.Output("Out")->Resize({dim0[0], dim1[1]}); - } - }; - ``` +```c++ +class MulOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto dim0 = ctx.Input("X")->dims(); + auto dim1 = ctx.Input("Y")->dims(); + PADDLE_ENFORCE_EQ(dim0.size(), 2, + "input X(%s) should be a tensor with 2 dims, a matrix", + ctx.op_.Input("X")); + PADDLE_ENFORCE_EQ(dim1.size(), 2, + "input Y(%s) should be a tensor with 2 dims, a matrix", + ctx.op_.Input("Y")); + PADDLE_ENFORCE_EQ( + dim0[1], dim1[0], + "First matrix's width must be equal with second matrix's height."); + ctx.Output("Out")->Resize({dim0[0], dim1[1]}); + } +}; +``` [`MulOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L22)继承自`OperatorWithKernel`。`public`成员: -```C++ +```c++ using framework::OperatorWithKernel::OperatorWithKernel; ``` 这句表示使用基类`OperatorWithKernel`的构造函数,也可写成: -```C++ - MulOp(const std::string &type, const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorWithKernel(type, inputs, outputs, attrs) {} +```c++ +MulOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} ``` 还需要重写`InferShape`接口。`InferShape`为const函数,不能修改Op的成员变量,参数为`const framework::InferShapeContext &ctx`,通过该参数可获取到输入输出以及属性。它的功能是: @@ -142,7 +142,7 @@ using framework::OperatorWithKernel::OperatorWithKernel; 通常`OpProtoMaker`和`Op`类的定义写在`.cc`文件中,和要讲到的注册函数一起放在`.cc`中 -### 3. 定义`OpKernel`类 +### 3. 定义OpKernel类 ```C++ template @@ -176,13 +176,13 @@ class MulKernel : public framework::OpKernel { 在`.cc`文件中注册前向、反向Op类,注册CPU Kernel。 - ```C++ - namespace ops = paddle::operators; - REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad); - REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); - REGISTER_OP_CPU_KERNEL(mul_grad, - ops::MulGradKernel); - ``` +```c++ +namespace ops = paddle::operators; +REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad); +REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); +REGISTER_OP_CPU_KERNEL(mul_grad, + ops::MulGradKernel); +``` - `REGISTER_OP` : 注册`ops::MulOp`类,类型名为`mul`,该类的`ProtoMaker`为`ops::MulOpMaker`,注册`ops::MulOpGrad`,类型名为`mul_grad`, - `REGISTER_OP_WITHOUT_GRADIENT` : 用于注册没有反向的Op。 @@ -190,32 +190,32 @@ class MulKernel : public framework::OpKernel { 在 `.cu`文件中注册GPU Kernel。 - ``` - namespace ops = paddle::operators; - REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); - REGISTER_OP_GPU_KERNEL(mul_grad, - ops::MulGradKernel); - ``` +```c++ +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); +REGISTER_OP_GPU_KERNEL(mul_grad, + ops::MulGradKernel); +``` ### 5. 编译 在[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)文件中添加编译。 - ``` - op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) - ``` +``` +op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) +``` 下面命令可以编译: - ``` - make mul_op - ``` +``` +make mul_op +``` ## 绑定Python - - 绑定Python +- 绑定Python - 在 [`paddle/pybind/pybind.cc + 在 [`paddle/pybind/pybind.cc `](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc)文件中添加该类: ``` @@ -232,23 +232,23 @@ class MulKernel : public framework::OpKernel { - 生成库 - 在 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件添加类到`DEPS`中。 + 在 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件添加类到`DEPS`中,使得该Op可以链接到生成的lib库中。 ``` if(WITH_PYTHON) -cc_library(paddle_pybind SHARED - SRCS pybind.cc - DEPS pybind python backward - mul_op - minus_op) -endif(WITH_PYTHON) + cc_library(paddle_pybind SHARED + SRCS pybind.cc + DEPS pybind python backward + mul_op + minus_op) + endif(WITH_PYTHON) ``` ## 实现单元测试 单测包括对比前向Op不同设备(CPU、GPU)的实现、对比反向OP不同设备(CPU、GPU)的实现、反向Op的梯度测试。下面介绍介绍[`MulOp`的单测](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/test_mul_op.py)。 -- 前向Op单测 +### 前向Operator单测 前向Op单测继承自`unittest.TestCase`,并定义元类`__metaclass__ = OpTestMeta`,具体单测流程在`OpTestMeta`里完成。需在`setUp`函数定义输入输出和属性参数,以及Python对比的输出值。 @@ -276,7 +276,7 @@ class TestMulOp(unittest.TestCase): - `self.outputs` : 定义输出,并得到Python结算结果。 - - 反向Op单测 +### 反向Operator单测 反向Op单测继承自`GradientChecker`,而`GradientChecker`集成自`unittest.TestCase`,所以反向单测函数需要`test_`开头。 From 4a83dde594d0aa6d19aeff7471b040277a8a839f Mon Sep 17 00:00:00 2001 From: caoying03 Date: Sun, 27 Aug 2017 11:28:05 +0800 Subject: [PATCH 128/170] save parameters into ordered dict. --- python/paddle/v2/parameters.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index b8af5abaea..475067ef22 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -14,6 +14,7 @@ import numpy as np from paddle.proto.ParameterConfig_pb2 import ParameterConfig +from collections import OrderedDict import paddle.trainer.config_parser as cp import struct import tarfile @@ -62,7 +63,7 @@ class Parameters(object): """ def __init__(self): - self.__param_conf__ = dict() + self.__param_conf__ = OrderedDict() self.__gradient_machines__ = [] self.__tmp_params__ = dict() @@ -231,6 +232,9 @@ class Parameters(object): :rtype: np.ndarray """ import py_paddle.swig_paddle as api + if self.__param_conf__[key].is_static: + return np.zeros(self.__param_conf__[key].size, dtype=np.float32) + return self.__getter_inner(key, api.PARAMETER_GRADIENT) def set(self, parameter_name, value): From 4590f793f111dd4fc5134ca9bbd0a213b41962b7 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sun, 27 Aug 2017 17:37:41 -0700 Subject: [PATCH 129/170] Update backward document --- paddle/framework/backward.md | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md index b4205fed2e..133b17c7be 100644 --- a/paddle/framework/backward.md +++ b/paddle/framework/backward.md @@ -2,32 +2,24 @@ ## Motivation -In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the fundmental gradient operators/expressions together with chain rule . Every forward network need a backward network to construct the full computation lineage, the operator/ expression's Backward feature will generate the backward pass respect to forward pass. - +In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the fundmental gradient operators/expressions together with chain rule . Every forward network need a backward network to construct the full computation lineage, the operator/expression's backward pass will be generated respect to forward pass. + ## Backward Operator Registry -A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients and then calculate its input gradients. In most cases, there is a one-to-one correspondence between forward and backward operators. We use registry mechanism to save these correspondences, which is quite similar with operator registry itself. +A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients and then calculate its input gradients. In most cases, there is a one-to-one correspondence between forward and backward operators. We use registry mechanism to save these correspondences. For example, we have got a `add_two_op`, and is registered by the following code: ```cpp -REGISTER_OP(add_two, AddTwoOp, AddTwoOpMaker); +REGISTER_OP(add_two, AddTwoOp, AddTwoOpMaker, add_two_grad, AddTwoGradOp); ``` `add_two` is the operator's type. `AddTwoOp` and `AddTwoOpMaker` are the operator class and the operator maker class respectively. -Assume that we have also got the backward operator of `add_two_op`, which calculating the gradients of `add_two_op`'s inputs. Then we register it by the following way: - -```cpp -REGISTER_GRADIENT_OP(add_two, add_two_grad, AddTwoGradOp); -``` - `add_two_grad` is the type of backward operator, and `AddTwoGradOp` is its class name. ## Backward Opeartor Creating -### Usage - Given a certain forward operator, we can get its corresponding backward opeartor by calling: ```cpp @@ -36,13 +28,13 @@ OperatorBase* bwd_op = BuildGradOp(const OperatorBase* fwd_op); The function `BuildGradOp` will sequentially execute following processes: -1. Getting the `type_` of given forward operator, and then creating the corresponding backward operator. +1. Get the `type_` of given forward operator, and then get the corresponding backward operator's type by looking up the `OpInfoMap`. -2. Copying all the attributes of forward operator expect `input_format` and `output_format`(if it has), for their elements differ between forward and backward operators. +2. Build two maps named `inputs` and `outputs` to temporary storage backward operator's inputs and outputs. Copy forward operator's `inputs_` and `outputs_` to map `inputs`, except these are not necessary for gradient computing. -3. Copying forward operator's `inputs_` and `outputs_` to backward operator's `inputs_`. And adding forward inputs' gradient variables into backward `output_`, adding forward outputs' gradient variables into backward `input_`. +3. Add forward inputs' gradient variables into map `output`, adding forward outputs' gradient variables into map `input`. -4. Building backward operator's `input_format`, `output_format` (if necessary) and `in_out_idxs_` according to its `inputs_` and `outputs_` just created. +4. Building backward operator with `inputs`, `outputs` and forward operator's attributes. ## Backward Network Building From 98b7c6736445de1f287156e933b0d625f648e6da Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 28 Aug 2017 09:52:58 +0800 Subject: [PATCH 130/170] add todo --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index ad50c15a7d..d38e6a2099 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -184,15 +184,14 @@ void MKLDNNFcLayer::resetBwd() { const MatrixPtr& wgt = weight_->getWGrad(); const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; + // TODO(TJ): merge topdiffs if (nextIsMKLDNN()) { // can not directly cast outputgrad to mkldnnmatrix, // since each layer can not write the inputgrad to mkldnn inputgrad. // So just create from matrix with outputvalue format. const MatrixPtr& out = getOutput(MKLDNN_DEVICE).grad; outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD()); - // TODO: maybe need merge topdiffs } else { - // TODO: merge topdiffs const MatrixPtr& out = getOutput(CPU_DEVICE).grad; // fc do not need to convert from cpu device since output always nc // only need create from cpu device @@ -234,8 +233,7 @@ void MKLDNNFcLayer::resetBwd() { return; } if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) { - // TODO: many mkldnn bots - // add sum handle + // TODO(TJ): use outputMaps_ ways when merge topdiff done } else { inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); } @@ -245,8 +243,7 @@ void MKLDNNFcLayer::resetBwd() { return; } if (getInput(0, CPU_DEVICE).getAllCount() > 1) { - // TODO: many bots - // add sum handle + // TODO(TJ): use outputMaps_ ways when merge topdiff done } else { inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); } From be4c0123c4c6cccfaa8fafa9063ce84415854c28 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 28 Aug 2017 10:11:54 +0800 Subject: [PATCH 131/170] follow comments. --- python/paddle/v2/parameters.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index 475067ef22..cc3adf6f48 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -43,9 +43,26 @@ def create(layers): class Parameters(object): """ - Parameters is a dictionary contains Paddle's parameter. The key of - Parameters is the name of parameter. The value of Parameters is a plain - :code:`numpy.ndarry` . + `Parameters` manages all the learnable parameters in a neural network. + It stores parameters' information in an OrderedDict, key of which is + the name of a parameter, and value related to a key is a parameter's + configuration, such as initialization mean and std, its size, whether it is + a static parameter, and so on. + + :param __param_conf__: this member stores the configurations of learnable + parameters in a network in an OrderedDict. The parameters are added by + following their creation order in the neural network one by one: + parameters of the previous layers in a network are careted first. + When a user iterates over this dict, he can visit parameters in the + network from button to up. + :type __param_conf__: OrderedDict + :param __gradient_machines__: all of the parameters in a neural network are + appended to a Paddle gradient machine, which is used internally to copy + the parameter values between the C++ and Python end. + :type __gradient_machines__: list + :param __tmp_params__: a dict to store dummy parameters if no + __gradient_machines__ is appended to `Parameters`. + :type __tmp_params__: dict Basically usage is From 346630f413a2e9aa9cbbdf2af4595a461ec09ac0 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 28 Aug 2017 11:19:53 +0800 Subject: [PATCH 132/170] Remove "About" tab in "Documentation" --- doc/about/index_cn.md | 11 ----------- doc/about/index_en.rst | 14 -------------- doc/index_en.rst | 1 - 3 files changed, 26 deletions(-) delete mode 100644 doc/about/index_cn.md delete mode 100644 doc/about/index_en.rst diff --git a/doc/about/index_cn.md b/doc/about/index_cn.md deleted file mode 100644 index 3bf030004d..0000000000 --- a/doc/about/index_cn.md +++ /dev/null @@ -1,11 +0,0 @@ -关于PaddlePaddle -================ - -PaddlePaddle是一个最早由百度科学家和工程师共同研发的并行分布式深度学习平台,兼备易用性、高效性、灵活性和可扩展性,目前已被百度内部多个产品线广泛使用。 -PaddlePaddle目前已经开放源码, 但是远未完善,我们希望能在这个基础上不断的改进、扩展和延伸。 -同时我们希望广大开发者积极提供反馈和贡献源代码,建立一个活跃的开源社区。 - -致谢 --------- - -在此,特别感谢PaddlePaddle的[所有贡献者](https://github.com/PaddlePaddle/Paddle/graphs/contributors)。 diff --git a/doc/about/index_en.rst b/doc/about/index_en.rst deleted file mode 100644 index 065c430cde..0000000000 --- a/doc/about/index_en.rst +++ /dev/null @@ -1,14 +0,0 @@ -ABOUT -======= - -PaddlPaddle is an easy-to-use, efficient, flexible and scalable deep learning platform, -which is originally developed by Baidu scientists and engineers for the purpose of applying deep learning to many products at Baidu. - -PaddlePaddle is now open source but far from complete, which is intended to be built upon, improved, scaled, and extended. -We hope to build an active open source community both by providing feedback and by actively contributing to the source code. - - -Credits --------- - -We owe many thanks to `all contributors and developers `_ of PaddlePaddle! diff --git a/doc/index_en.rst b/doc/index_en.rst index 168c7667c6..64684b8b9b 100644 --- a/doc/index_en.rst +++ b/doc/index_en.rst @@ -7,4 +7,3 @@ PaddlePaddle Documentation getstarted/index_en.rst howto/index_en.rst api/index_en.rst - about/index_en.rst From fe51f726a2da85b0cb96734bd9b156760b044cf9 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 28 Aug 2017 10:44:31 +0800 Subject: [PATCH 133/170] fix cmake --- paddle/math/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index 8afe6b509d..68b5296228 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -16,10 +16,10 @@ file(GLOB MATH_HEADERS . *.h) file(GLOB MATH_SOURCES . *.cpp) if(NOT WITH_MKLDNN) - file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") - file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") - list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER}) - list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES}) + set(DNN_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/MKLDNNMatrix.h") + set(DNN_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/MKLDNNMatrix.cpp") + list(REMOVE_ITEM MATH_HEADERS "${DNN_HEADER}") + list(REMOVE_ITEM MATH_SOURCES "${DNN_SOURCE}") message(STATUS "Skip compiling with MKLDNNMatrix") else() message(STATUS "Compile with MKLDNNMatrix") From f241773c4f1803631bba968bca1d5621a0d3ced5 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Wed, 23 Aug 2017 19:43:57 +0800 Subject: [PATCH 134/170] Support to use clang for Android cross-compiling. --- Dockerfile.android | 4 +- cmake/cblas.cmake | 4 + cmake/external/warpctc.cmake | 1 + paddle/cuda/include/hl_cpu_gru.cuh | 166 ++++++++++++------------- paddle/function/MulOp.cpp | 37 +++--- paddle/math/MathFunctions.cpp | 4 + paddle/math/MathFunctions.h | 23 +++- paddle/math/Matrix.cpp | 18 ++- paddle/scripts/docker/build_android.sh | 51 ++++++-- 9 files changed, 181 insertions(+), 127 deletions(-) diff --git a/Dockerfile.android b/Dockerfile.android index aa95abb366..6013215d9d 100644 --- a/Dockerfile.android +++ b/Dockerfile.android @@ -47,8 +47,8 @@ RUN mkdir /opt/android-ndk-tmp && \ wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip && \ unzip -q android-ndk-r14b-linux-x86_64.zip && \ mv android-ndk-r14b ${ANDROID_NDK_HOME} && \ - ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm --platform=android-21 --install-dir=${ANDROID_ARM_STANDALONE_TOOLCHAIN} && \ - ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm64 --platform=android-21 --install-dir=${ANDROID_ARM64_STANDALONE_TOOLCHAIN} && \ + ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm --platform=android-23 --install-dir=${ANDROID_ARM_STANDALONE_TOOLCHAIN} && \ + ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm64 --platform=android-23 --install-dir=${ANDROID_ARM64_STANDALONE_TOOLCHAIN} && \ rm -rf /opt/android-ndk-tmp && \ rm -rf ${ANDROID_NDK_HOME} diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 854066fd1d..ab111eccc0 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -13,6 +13,10 @@ # system paths. # +if(USE_EIGEN_FOR_BLAS) + return() +endif(USE_EIGEN_FOR_BLAS) + set(CBLAS_FOUND OFF) ## Find MKLML First. diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 2d7daed9bc..3cc652bed5 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -41,6 +41,7 @@ IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "App ELSE() SET(USE_OMP ON) ENDIF() +SET(USE_OMP OFF FORCE) ExternalProject_Add( extern_warpctc diff --git a/paddle/cuda/include/hl_cpu_gru.cuh b/paddle/cuda/include/hl_cpu_gru.cuh index c0a37ced2a..732799a28b 100644 --- a/paddle/cuda/include/hl_cpu_gru.cuh +++ b/paddle/cuda/include/hl_cpu_gru.cuh @@ -20,11 +20,11 @@ limitations under the License. */ #include "paddle/math/MathFunctions.h" -#ifndef PADDLE_TYPE_DOUBLE -#define CBLAS_GEMM paddle::gemm -#else -#define CBLAS_GEMM paddle::gemm -#endif +// #ifndef PADDLE_TYPE_DOUBLE +// #define CBLAS_GEMM paddle::gemm +// #else +// #define CBLAS_GEMM paddle::gemm +// #endif template void hl_naive_gru_forward_reset_output(OpResetOutput opResetOutput, @@ -219,37 +219,37 @@ void hl_cpu_gru_forward(OpResetOutput opResetOutput, hl_activation_mode_t active_node, hl_activation_mode_t active_gate) { if (value.prevOutValue) { - CBLAS_GEMM(CblasNoTrans, - CblasNoTrans, - batchSize, - 2 * frameSize, - frameSize, - 1, - value.prevOutValue, - frameSize, - value.gateWeight, - frameSize * 2, - 1, - value.gateValue, - frameSize * 3); +// CBLAS_GEMM(CblasNoTrans, +// CblasNoTrans, +// batchSize, +// 2 * frameSize, +// frameSize, +// 1, +// value.prevOutValue, +// frameSize, +// value.gateWeight, +// frameSize * 2, +// 1, +// value.gateValue, +// frameSize * 3); } forward_reset_output(opResetOutput, value, frameSize, batchSize, active_gate); if (value.prevOutValue) { - CBLAS_GEMM(CblasNoTrans, - CblasNoTrans, - batchSize, - frameSize, - frameSize, - 1, - value.resetOutputValue, - frameSize, - value.stateWeight, - frameSize, - 1, - value.gateValue + frameSize * 2, - frameSize * 3); +// CBLAS_GEMM(CblasNoTrans, +// CblasNoTrans, +// batchSize, +// frameSize, +// frameSize, +// 1, +// value.resetOutputValue, +// frameSize, +// value.stateWeight, +// frameSize, +// 1, +// value.gateValue + frameSize * 2, +// frameSize * 3); } forward_final_output(opFinalOutput, value, frameSize, batchSize, active_node); @@ -538,34 +538,34 @@ void hl_cpu_gru_backward(OpStateGrad opStateGrad, frameSize, batchSize, active_node); if (value.prevOutValue && grad.prevOutGrad) { - CBLAS_GEMM(CblasNoTrans, - CblasTrans, - batchSize, - frameSize, - frameSize, - 1, - grad.gateGrad + frameSize * 2, - frameSize * 3, - value.stateWeight, - frameSize, - 0, - grad.resetOutputGrad, - frameSize); +// CBLAS_GEMM(CblasNoTrans, +// CblasTrans, +// batchSize, +// frameSize, +// frameSize, +// 1, +// grad.gateGrad + frameSize * 2, +// frameSize * 3, +// value.stateWeight, +// frameSize, +// 0, +// grad.resetOutputGrad, +// frameSize); if (grad.stateWeightGrad) { - CBLAS_GEMM(CblasTrans, - CblasNoTrans, - frameSize, - frameSize, - batchSize, - 1, - value.resetOutputValue, - frameSize, - grad.gateGrad + frameSize * 2, - frameSize * 3, - 1, - grad.stateWeightGrad, - frameSize); +// CBLAS_GEMM(CblasTrans, +// CblasNoTrans, +// frameSize, +// frameSize, +// batchSize, +// 1, +// value.resetOutputValue, +// frameSize, +// grad.gateGrad + frameSize * 2, +// frameSize * 3, +// 1, +// grad.stateWeightGrad, +// frameSize); } } @@ -573,34 +573,34 @@ void hl_cpu_gru_backward(OpStateGrad opStateGrad, frameSize, batchSize, active_gate); if (grad.prevOutGrad && value.prevOutValue) { - CBLAS_GEMM(CblasNoTrans, - CblasTrans, - batchSize, - frameSize, - frameSize * 2, - 1, - grad.gateGrad, - frameSize * 3, - value.gateWeight, - frameSize * 2, - 1, - grad.prevOutGrad, - frameSize); +// CBLAS_GEMM(CblasNoTrans, +// CblasTrans, +// batchSize, +// frameSize, +// frameSize * 2, +// 1, +// grad.gateGrad, +// frameSize * 3, +// value.gateWeight, +// frameSize * 2, +// 1, +// grad.prevOutGrad, +// frameSize); if (grad.gateWeightGrad) { - CBLAS_GEMM(CblasTrans, - CblasNoTrans, - frameSize, - frameSize * 2, - batchSize, - 1, - value.prevOutValue, - frameSize, - grad.gateGrad, - frameSize * 3, - 1, - grad.gateWeightGrad, - frameSize * 2); +// CBLAS_GEMM(CblasTrans, +// CblasNoTrans, +// frameSize, +// frameSize * 2, +// batchSize, +// 1, +// value.prevOutValue, +// frameSize, +// grad.gateGrad, +// frameSize * 3, +// 1, +// grad.gateWeightGrad, +// frameSize * 2); } } } diff --git a/paddle/function/MulOp.cpp b/paddle/function/MulOp.cpp index 91b4b8ed91..25e41edad5 100644 --- a/paddle/function/MulOp.cpp +++ b/paddle/function/MulOp.cpp @@ -13,18 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MulOp.h" -/// todo(tianbing), delete it -#include -#include "paddle/math/MathFunctions.h" +#include "GemmFunctor.h" #include "paddle/math/SIMDFunctions.h" #include "paddle/utils/ThreadLocal.h" -#ifndef PADDLE_TYPE_DOUBLE -#define GEMM paddle::gemm -#else -#define GEMM paddle::gemm -#endif - namespace { inline void vecAddTo(real* a, const real* b, real scaleB, size_t len) { for (unsigned int i = 0; i < len; ++i) { @@ -114,19 +106,20 @@ void MulOp(CpuMatrix& out, real scaleT, bool aTrans, bool bTrans) { - GEMM(aTrans ? CblasTrans : CblasNoTrans, - bTrans ? CblasTrans : CblasNoTrans, - out.getHeight(), - out.getWidth(), - !aTrans ? a.getWidth() : a.getHeight(), - scaleAB, - a.getData(), - a.getStride(), - b.getData(), - b.getStride(), - scaleT, - out.getData(), - out.getStride()); + BlasGemm::compute( + aTrans, + bTrans, + out.getHeight(), + out.getWidth(), + !aTrans ? a.getWidth() : a.getHeight(), + scaleAB, + a.getData(), + a.getStride(), + b.getData(), + b.getStride(), + scaleT, + out.getData(), + out.getStride()); } /// dense matrix (+)= sparse matrix * dense matrix diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp index c8ba1074a1..c2f17beeb8 100644 --- a/paddle/math/MathFunctions.cpp +++ b/paddle/math/MathFunctions.cpp @@ -84,6 +84,7 @@ LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP) namespace paddle { +#ifndef PADDLE_USE_EIGEN_FOR_BLAS template <> void gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, @@ -143,6 +144,7 @@ void gemm(const CBLAS_TRANSPOSE transA, C, ldc); } +#endif template <> int getrf(const CBLAS_ORDER order, @@ -182,6 +184,7 @@ int getri(const CBLAS_ORDER order, return dynload::PADDLE_DGETRI(order, N, A, lda, ipiv); } +#ifndef PADDLE_USE_EIGEN_FOR_BLAS template <> void axpy(const int n, const float alpha, const float* x, float* y) { cblas_saxpy(n, alpha, x, 1, y, 1); @@ -201,6 +204,7 @@ template <> double dotProduct(const int n, const double* x, const double* y) { return cblas_ddot(n, x, 1, y, 1); } +#endif #if defined(PADDLE_USE_MKL) || defined(PADDLE_USE_MKLML) diff --git a/paddle/math/MathFunctions.h b/paddle/math/MathFunctions.h index 637643838f..9297ae78c2 100644 --- a/paddle/math/MathFunctions.h +++ b/paddle/math/MathFunctions.h @@ -40,7 +40,14 @@ extern "C" { #ifndef LAPACK_FOUND extern "C" { +#ifndef PADDLE_USE_EIGEN_FOR_BLAS #include +#else +typedef enum CBLAS_ORDER { + CblasRowMajor = 101, + CblasColMajor = 102 +} CBLAS_ORDER; +#endif int LAPACKE_sgetrf( int matrix_layout, int m, int n, float* a, int lda, int* ipiv); int LAPACKE_dgetrf( @@ -56,6 +63,7 @@ int LAPACKE_dgetri( namespace paddle { +#ifndef PADDLE_USE_EIGEN_FOR_BLAS template void gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, @@ -70,6 +78,7 @@ void gemm(const CBLAS_TRANSPOSE transA, const T beta, T* C, const int ldc); +#endif template int getrf(const CBLAS_ORDER Order, @@ -84,10 +93,20 @@ int getri( const CBLAS_ORDER Order, const int N, T* A, const int lda, const int* ipiv); template -void axpy(const int n, const T alpha, const T* x, T* y); +void axpy(const int n, const T alpha, const T* x, T* y) { + /// y = y + alpha * x + for (int i = 0; i < n; i++) { + y[i] = y[i] + alpha * x[i]; + } +} template -T dotProduct(const int n, const T* x, const T* y); +T dotProduct(const int n, const T* x, const T* y) { + T result = static_cast(0); + for (int i = 0; i < n; i++) { + result += x[i] * y[i]; + } +} template void vExp(const int n, const T* a, T* r); diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 27f7d95b75..fbf3accc9a 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -28,6 +28,7 @@ limitations under the License. */ #include "hl_top_k.h" #include "paddle/utils/Logging.h" +#include "paddle/function/GemmFunctor.h" #include "paddle/utils/ThreadLocal.h" #include "SIMDFunctions.h" @@ -2222,24 +2223,29 @@ void CpuMatrix::mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT) { CHECK(!isTransposed()) << "Not supported"; size_t a_col, b_col, a_row, b_row; - CBLAS_TRANSPOSE a_trans, b_trans; + // CBLAS_TRANSPOSE a_trans, b_trans; + bool a_trans, b_trans; if (!a->isTransposed()) { a_col = a->getWidth(); a_row = a->getHeight(); - a_trans = CblasNoTrans; + // a_trans = CblasNoTrans; + a_trans = false; } else { a_col = a->getHeight(); a_row = a->getWidth(); - a_trans = CblasTrans; + // a_trans = CblasTrans; + a_trans = true; } if (!b->isTransposed()) { b_col = b->getWidth(); b_row = b->getHeight(); - b_trans = CblasNoTrans; + // b_trans = CblasNoTrans; + b_trans = false; } else { b_col = b->getHeight(); b_row = b->getWidth(); - b_trans = CblasTrans; + // b_trans = CblasTrans; + b_trans = true; } CHECK_EQ(a_col, b_row); @@ -2256,7 +2262,7 @@ void CpuMatrix::mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT) { int lda = a->getStride(); int ldb = b->getStride(); int ldc = getStride(); - gemm( + BlasGemm::compute( a_trans, b_trans, M, N, K, scaleAB, A, lda, B, ldb, scaleT, C, ldc); } diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh index 593ae28e49..a61c7c40e9 100644 --- a/paddle/scripts/docker/build_android.sh +++ b/paddle/scripts/docker/build_android.sh @@ -2,11 +2,31 @@ set -xe -mkdir -p /paddle/build_android/$ANDROID_ABI -cd /paddle/build_android/$ANDROID_ABI -rm -rf /paddle/install 2>/dev/null || true +COMPILER=gcc +USE_EIGEN=ON +if [ $COMPILER == clang ]; then + SUFFIX=_clang + C_COMPILER=clang + CXX_COMPILER=clang++ +else + SUFFIX=_gcc + C_COMPILER=gcc + CXX_COMPILER=g++ +fi +if [ $USE_EIGEN == ON ]; then + SUFFIX=${SUFFIX}_eigen +else + SUFFIX=${SUFFIX}_openblas +fi -THIRD_PARTY_PATH=/paddle/third_party_android/$ANDROID_ABI +BUILD_ROOT=/paddle/build_android$SUFFIX +DEST_ROOT=/paddle/install$SUFFIX + +rm -rf $BUILD_ROOT 2>/dev/null || true +mkdir -p $BUILD_ROOT +cd $BUILD_ROOT + +THIRD_PARTY_PATH=/paddle/third_party_android$SUFFIX/$ANDROID_ABI if [ $ANDROID_ABI == "armeabi-v7a" ]; then cmake -DCMAKE_SYSTEM_NAME=Android \ @@ -14,27 +34,34 @@ if [ $ANDROID_ABI == "armeabi-v7a" ]; then -DANDROID_ABI=$ANDROID_ABI \ -DANDROID_ARM_NEON=ON \ -DANDROID_ARM_MODE=ON \ + -DCMAKE_C_COMPILER=$ANDROID_ARM_STANDALONE_TOOLCHAIN/bin/arm-linux-androideabi-${C_COMPILER} \ + -DCMAKE_CXX_COMPILER=$ANDROID_ARM_STANDALONE_TOOLCHAIN/bin/arm-linux-androideabi-${CXX_COMPILER} \ -DHOST_C_COMPILER=/usr/bin/gcc \ -DHOST_CXX_COMPILER=/usr/bin/g++ \ - -DCMAKE_INSTALL_PREFIX=/paddle/install \ + -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ + -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ - /paddle -elif [ $ANDROID_ABI == "arm64-v7a" ]; then + -DWITH_STYLE_CHECK=OFF \ + .. +elif [ $ANDROID_ABI == "arm64-v8a" ]; then cmake -DCMAKE_SYSTEM_NAME=Android \ -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_ARM64_STANDALONE_TOOLCHAIN \ -DANDROID_ABI=$ANDROID_ABI \ -DANDROID_ARM_MODE=ON \ + -DCMAKE_C_COMPILER=$ANDROID_ARM64_STANDALONE_TOOLCHAIN/bin/aarch64-linux-android-${C_COMPILER} \ + -DCMAKE_CXX_COMPILER=$ANDROID_ARM64_STANDALONE_TOOLCHAIN/bin/aarch64-linux-android-${CXX_COMPILER} \ -DHOST_C_COMPILER=/usr/bin/gcc \ -DHOST_CXX_COMPILER=/usr/bin/g++ \ - -DCMAKE_INSTALL_PREFIX=/paddle/install \ + -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ + -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ - /paddle + .. elif [ $ANDROID_ABI == "armeabi" ]; then cmake -DCMAKE_SYSTEM_NAME=Android \ -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_ARM_STANDALONE_TOOLCHAIN \ @@ -47,10 +74,10 @@ elif [ $ANDROID_ABI == "armeabi" ]; then -DCMAKE_BUILD_TYPE=Release \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ - /paddle + .. else echo "Invalid ANDROID_ABI: $ANDROID_ABI" fi -make -j `nproc` -make install -j `nproc` +make VERBOSE=1 -j2 +make install -j2 From f0b25c4cfb21b41e8bc7222d44f05a9818dc9b47 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 28 Aug 2017 12:20:28 +0800 Subject: [PATCH 135/170] follow comments to refine the comments. --- python/paddle/v2/parameters.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index cc3adf6f48..4cfd91882e 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -44,21 +44,20 @@ def create(layers): class Parameters(object): """ `Parameters` manages all the learnable parameters in a neural network. - It stores parameters' information in an OrderedDict, key of which is - the name of a parameter, and value related to a key is a parameter's - configuration, such as initialization mean and std, its size, whether it is - a static parameter, and so on. - - :param __param_conf__: this member stores the configurations of learnable - parameters in a network in an OrderedDict. The parameters are added by - following their creation order in the neural network one by one: - parameters of the previous layers in a network are careted first. - When a user iterates over this dict, he can visit parameters in the - network from button to up. + It stores parameters' information in an OrderedDict. The key is + the name of a parameter, and value is a parameter's configuration(in + protobuf format), such as initialization mean and std, its size, whether it + is a static parameter, and so on. + + :param __param_conf__: store the configurations of learnable parameters in + the network in an OrderedDict. Parameter is added one by one into the + dict by following their created order in the network: parameters of + the previous layers in a network are careted first. You can visit the + parameters from bottom to top by iterating over this dict. :type __param_conf__: OrderedDict :param __gradient_machines__: all of the parameters in a neural network are - appended to a Paddle gradient machine, which is used internally to copy - the parameter values between the C++ and Python end. + appended to a PaddlePaddle gradient machine, which is used internally to + copy parameter values between C++ and Python end. :type __gradient_machines__: list :param __tmp_params__: a dict to store dummy parameters if no __gradient_machines__ is appended to `Parameters`. @@ -271,7 +270,7 @@ class Parameters(object): append gradient machine to parameters. This method is used internally in Trainer.train. - :param gradient_machine: Paddle C++ GradientMachine object. + :param gradient_machine: PaddlePaddle C++ GradientMachine object. :type gradient_machine: api.GradientMachine :return: """ From 227fdfb65dcb45921398690610886ebdb9b34d98 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 28 Aug 2017 13:35:51 +0800 Subject: [PATCH 136/170] Refine NeonDepthwiseConvFunction. --- paddle/function/neon/NeonDepthwiseConv.cpp | 70 ++++++++-------------- 1 file changed, 26 insertions(+), 44 deletions(-) diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp index 3fe28b1de3..f09e98587d 100644 --- a/paddle/function/neon/NeonDepthwiseConv.cpp +++ b/paddle/function/neon/NeonDepthwiseConv.cpp @@ -509,10 +509,9 @@ public: size_t filterMultiplier = outputChannels / groups_; CHECK_EQ(inputChannels, groups_); - // only support + // only support strideH() == strideW() and filterHeight == filterWidth. CHECK_EQ(strideH(), strideW()); CHECK_EQ(filterHeight, filterWidth); - CHECK_LT(strideH(), size_t(3)); float* inputData = inputs[0].data(); float* filterData = inputs[1].data(); @@ -538,49 +537,32 @@ public: inputWidth += 2 * paddingW(); } - for (size_t i = 0; i < batchSize; i++) { - if (filterWidth == 3 && strideH() == 1) { - DepthwiseConvKernel<3, 1>::run(inputPadding, - filterData, - inputHeight, - inputWidth, - outputChannels, - outputHeight, - outputWidth, - filterMultiplier, - outputData); - } else if (filterWidth == 3 && strideH() == 2) { - DepthwiseConvKernel<3, 2>::run(inputPadding, - filterData, - inputHeight, - inputWidth, - outputChannels, - outputHeight, - outputWidth, - filterMultiplier, - outputData); - } else if (filterWidth == 4 && strideH() == 1) { - DepthwiseConvKernel<4, 1>::run(inputPadding, - filterData, - inputHeight, - inputWidth, - outputChannels, - outputHeight, - outputWidth, - filterMultiplier, - outputData); - } else if (filterWidth == 4 && strideH() == 2) { - DepthwiseConvKernel<4, 2>::run(inputPadding, - filterData, - inputHeight, - inputWidth, - outputChannels, - outputHeight, - outputWidth, - filterMultiplier, - outputData); - } + std::function + DepthWiseConv; + + if (filterWidth == 3 && strideW() == 1) { + DepthWiseConv = DepthwiseConvKernel<3, 1>::run; + } else if (filterWidth == 3 && strideW() == 2) { + DepthWiseConv = DepthwiseConvKernel<3, 2>::run; + } else if (filterWidth == 4 && strideW() == 1) { + DepthWiseConv = DepthwiseConvKernel<4, 1>::run; + } else if (filterWidth == 4 && strideW() == 2) { + DepthWiseConv = DepthwiseConvKernel<4, 2>::run; + } else { + LOG(FATAL) << "Not supported"; + } + for (size_t i = 0; i < batchSize; i++) { + DepthWiseConv(inputPadding, + filterData, + inputHeight, + inputWidth, + outputChannels, + outputHeight, + outputWidth, + filterMultiplier, + outputData); inputPadding += inputChannels * inputHeight * inputWidth; outputData += outputChannels * outputHeight * outputWidth; } From 3a75b4b70cd21449691eaca82f1805759622e640 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 28 Aug 2017 14:49:11 +0800 Subject: [PATCH 137/170] Fix CMakeLists.text --- paddle/function/CMakeLists.txt | 2 +- paddle/function/DepthwiseConvOpTest.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 05f808a6a1..f43f15e5ca 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -44,11 +44,11 @@ if(WITH_GPU) add_simple_unittest(RowConvOpTest) add_simple_unittest(BlockExpandOpTest) add_simple_unittest(CropOpTest) - add_simple_unittest(DepthwiseConvOpTest) endif() add_simple_unittest(Im2ColTest) add_simple_unittest(GemmConvOpTest) +add_simple_unittest(DepthwiseConvOpTest) endif() add_style_check_target(paddle_function ${h_files}) diff --git a/paddle/function/DepthwiseConvOpTest.cpp b/paddle/function/DepthwiseConvOpTest.cpp index bdace2c372..d8e8c889d5 100644 --- a/paddle/function/DepthwiseConvOpTest.cpp +++ b/paddle/function/DepthwiseConvOpTest.cpp @@ -34,9 +34,13 @@ TEST(DepthwiseConv, BackwardFilter) { } #endif +#if defined(__ARM_NEON__) || defined(__ARM_NEON) + TEST(DepthwiseConv, Forward) { DepthwiseConvolution( "GemmConv-CPU", "NeonDepthwiseConv-CPU", forward); } +#endif + } // namespace paddle From 34a92ab41a407679d454f437f1f3118b81dd1b34 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 28 Aug 2017 14:58:00 +0800 Subject: [PATCH 138/170] ExpandConvLayer adds support of arm-neon acceleration. --- paddle/gserver/layers/ExpandConvLayer.cpp | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp index 0ece279931..0e84581769 100644 --- a/paddle/gserver/layers/ExpandConvLayer.cpp +++ b/paddle/gserver/layers/ExpandConvLayer.cpp @@ -29,6 +29,10 @@ namespace paddle { REGISTER_LAYER(exconv, ExpandConvLayer); REGISTER_LAYER(exconvt, ExpandConvLayer); +inline bool isDepthwiseConv(int channels, int groups) { + return channels == groups; +} + bool ExpandConvLayer::init(const LayerMap &layerMap, const ParameterMap ¶meterMap) { /* Initialize the basic convolutional parent class */ @@ -47,14 +51,23 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, std::vector paddings = {(size_t)paddingY_[i], (size_t)padding_[i]}; std::vector strides = {(size_t)strideY_[i], (size_t)stride_[i]}; - if (useGpu_ && (size_t)groups_[i] == (size_t)channels_[i] && !isDeconv_) { + // Convolution Layer uses the GemmConv function by default. + convType = "GemmConv"; + convGradInputType = "GemmConvGradInput"; + convGradFilterType = "GemmConvGradFilter"; + + // If depth wise convolution and useGpu == true + if (useGpu_ && isDepthwiseConv(channels_[i], groups_[i]) && !isDeconv_) { convType = "DepthwiseConv"; convGradInputType = "DepthwiseConvGradInput"; convGradFilterType = "DepthwiseConvGradFilter"; - } else { - convType = "GemmConv"; - convGradInputType = "GemmConvGradInput"; - convGradFilterType = "GemmConvGradFilter"; + } + + // If depth wise convolution and useGpu == false and ARM-NEON + if (!useGpu_ && isDepthwiseConv(channels_[i], groups_[i]) && !isDeconv_) { +#if defined(__ARM_NEON__) || defined(__ARM_NEON) + convType = "NeonDepthwiseConv"; +#endif } if (FLAGS_use_nnpack && !isDeconv_) { From 2710584ff1d5d299361c1b4492d3368ccbdb0378 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Wed, 23 Aug 2017 22:05:50 +0800 Subject: [PATCH 139/170] fix above comments --- python/paddle/trainer/config_parser.py | 212 ++++++------------ .../paddle/trainer_config_helpers/layers.py | 76 ++----- .../configs/conv3d_deconv3d_test_config.py | 97 ++++---- 3 files changed, 130 insertions(+), 255 deletions(-) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 49b3c430e7..c0843a7357 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -901,20 +901,14 @@ class Conv3D(Cfg): padding_z=None, stride_z=None): self.add_keys(locals()) - if filter_size_y is None: - self.filter_size_y = filter_size - if padding_y is None: - self.padding_y = padding - if stride_y is None: - self.stride_y = stride + self.filter_size_y = filter_size_y if filter_size_y else filter_size + self.filter_size_z = filter_size_z if filter_size_z else filter_size + self.padding_y = padding_y if padding_y else padding + self.padding_z = padding_z if padding_z else padding + self.stride_y = stride_y if stride_y else stride + self.stride_z = stride_z if stride_z else stride if output_x is not None: config_assert(output_x <= 0) - if filter_size_z is None: - self.filter_size_z = filter_size - if padding_z is None: - self.padding_z = padding - if stride_z is None: - self.stride_z = stride @config_class @@ -1206,10 +1200,10 @@ def get_img_size(input_layer_name, channels): def get_img3d_size(input_layer_name, channels): input = g_layer_map[input_layer_name] img_pixels = input.size / channels - img_size = input.width if input.width > 0 else int(img_pixels**0.5) - img_size_y = input.height if input.height > 0 else int(img_pixels / - img_size) - img_size_z = input.depth if input.depth > 1 else 1 + img_size = input.width + img_size_y = input.height + img_size_z = input.depth + config_assert( img_size * img_size_y * img_size_z == img_pixels, "Input layer %s: Incorrect input image size %d * %d * %d for input image pixels %d" @@ -2000,8 +1994,10 @@ class ConvLayer(ConvLayerBase): layer_type = 'cudnn_conv' -@config_layer('conv_3d') -class Conv3DLayerBase(LayerBase): +@config_layer('convt') +class ConvTransLayerBase(LayerBase): + layer_type = 'convt' + def __init__(self, name, inputs=[], @@ -2009,7 +2005,7 @@ class Conv3DLayerBase(LayerBase): num_filters=None, shared_biases=False, **xargs): - super(Conv3DLayerBase, self).__init__( + super(ConvTransLayerBase, self).__init__( name, self.layer_type, 0, inputs=inputs, **xargs) if num_filters is not None: @@ -2018,12 +2014,17 @@ class Conv3DLayerBase(LayerBase): use_gpu = int(g_command_config_args.get("use_gpu", 0)) parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) - # Automatically select cudnn_type for GPU and exconv for CPU - # if set type=conv, but still reserve the way user specify - # exconv or cudnn_conv manually. - if self.layer_type == "cudnn_conv3d": - config_assert(use_gpu, "cudnn_conv3d only support GPU") + # Automatically select cudnn_type for GPU and exconvt for CPU + # if set type=exconvt, but still reserve the way user specify + # exconvt or cudnn_convt manually. + if self.layer_type == "cudnn_convt": + config_assert(use_gpu, "cudnn_convt only support GPU") + if (use_gpu == 1 and self.layer_type != "exconvt" and + (parallel_nn == 0 or self.config.device > -1)): + self.layer_type = "cudnn_convt" + else: + self.layer_type = "exconvt" # need to specify layer in config self.config.type = self.layer_type @@ -2032,15 +2033,17 @@ class Conv3DLayerBase(LayerBase): for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) + parse_conv( + self.inputs[input_index].conv, + input_layer.name, + self.config.inputs[input_index].conv_conf, + num_filters, + trans=True) conv_conf = self.config.inputs[input_index].conv_conf - parse_conv3d( - self.inputs[input_index].conv, input_layer.name, conv_conf, - num_filters - ) # for z-axis pad:0, strid:1, filter_size:1, img_size:1 psize = self.calc_parameter_size(conv_conf) self.create_input_parameter(input_index, psize) - self.set_cnn_layer(name, conv_conf.output_z, conv_conf.output_y, - conv_conf.output_x, self.config.num_filters) + self.set_cnn_layer(name, conv_conf.img_size_y, conv_conf.img_size, + self.config.num_filters) psize = self.config.size if shared_biases: @@ -2048,62 +2051,42 @@ class Conv3DLayerBase(LayerBase): self.create_bias_parameter(bias, psize, [psize, 1]) def calc_parameter_size(self, conv_conf): - return self.config.num_filters * conv_conf.filter_channels \ - * (conv_conf.filter_size * conv_conf.filter_size_y \ - * conv_conf.filter_size_z) + return conv_conf.channels * conv_conf.filter_channels \ + * (conv_conf.filter_size * conv_conf.filter_size_y) - def set_layer_height_width(self, depth, height, width): - self.config.depth = depth - self.config.height = height - self.config.width = width - def set_cnn_layer(self, - input_layer_name, - depth, - height, - width, - channels, - is_print=True): - size = depth * height * width * channels - self.set_layer_size(size) - self.set_layer_height_width(depth, height, width) - if is_print: - print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" % - (input_layer_name, channels, depth, height, width, size)) +@config_layer('exconvt') +class ConvTransLayer(ConvTransLayerBase): + layer_type = 'exconvt' -@config_layer('conv3d') -class Conv3DLayer(Conv3DLayerBase): - layer_type = 'conv3d' +@config_layer('cudnn_convt') +class ConvTransLayer(ConvTransLayerBase): + layer_type = 'cudnn_convt' -@config_layer('convt_3d') -class Conv3DTransLayerBase(LayerBase): +@config_layer('conv_3d') +class Conv3DLayerBase(LayerBase): def __init__(self, name, inputs=[], bias=True, num_filters=None, - shared_biases=False, + shared_biases=True, **xargs): - super(Conv3DTransLayerBase, self).__init__( + super(Conv3DLayerBase, self).__init__( name, self.layer_type, 0, inputs=inputs, **xargs) if num_filters is not None: self.config.num_filters = num_filters - use_gpu = int(g_command_config_args.get("use_gpu", 0)) - parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) - - # Automatically select cudnn_type for GPU and exconv for CPU - # if set type=conv, but still reserve the way user specify - # exconv or cudnn_conv manually. - if self.layer_type == "cudnn_deconv3d": - config_assert(use_gpu, "cudnn_conv3d only support GPU") - # need to specify layer in config self.config.type = self.layer_type + trans = False + if self.config.type == "deconv3d": + trans = True + if shared_biases is not None: self.config.shared_biases = shared_biases @@ -2115,12 +2098,17 @@ class Conv3DTransLayerBase(LayerBase): input_layer.name, conv_conf, num_filters, - trans=True + trans=trans ) # for z-axis pad:0, strid:1, filter_size:1, img_size:1 psize = self.calc_parameter_size(conv_conf) self.create_input_parameter(input_index, psize) - self.set_cnn_layer(name, conv_conf.img_size_z, conv_conf.img_size_y, - conv_conf.img_size, self.config.num_filters) + if trans: + self.set_cnn_layer(name, conv_conf.img_size_z, + conv_conf.img_size_y, conv_conf.img_size, + self.config.num_filters) + else: + self.set_cnn_layer(name, conv_conf.output_z, conv_conf.output_y, + conv_conf.output_x, self.config.num_filters) psize = self.config.size if shared_biases: @@ -2132,11 +2120,6 @@ class Conv3DTransLayerBase(LayerBase): * (conv_conf.filter_size * conv_conf.filter_size_y \ * conv_conf.filter_size_z) - def set_layer_height_width(self, depth, height, width): - self.config.depth = depth - self.config.height = height - self.config.width = width - def set_cnn_layer(self, input_layer_name, depth, @@ -2146,86 +2129,21 @@ class Conv3DTransLayerBase(LayerBase): is_print=True): size = depth * height * width * channels self.set_layer_size(size) - self.set_layer_height_width(depth, height, width) + self.set_layer_height_width(height, width) + self.set_layer_depth(depth) if is_print: print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" % (input_layer_name, channels, depth, height, width, size)) -@config_layer('deconv3d') -class DeConv3DLayer(Conv3DTransLayerBase): - layer_type = 'deconv3d' - - -@config_layer('convt') -class ConvTransLayerBase(LayerBase): - layer_type = 'convt' - - def __init__(self, - name, - inputs=[], - bias=True, - num_filters=None, - shared_biases=False, - **xargs): - super(ConvTransLayerBase, self).__init__( - name, self.layer_type, 0, inputs=inputs, **xargs) - - if num_filters is not None: - self.config.num_filters = num_filters - - use_gpu = int(g_command_config_args.get("use_gpu", 0)) - parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) - - # Automatically select cudnn_type for GPU and exconvt for CPU - # if set type=exconvt, but still reserve the way user specify - # exconvt or cudnn_convt manually. - if self.layer_type == "cudnn_convt": - config_assert(use_gpu, "cudnn_convt only support GPU") - - if (use_gpu == 1 and self.layer_type != "exconvt" and - (parallel_nn == 0 or self.config.device > -1)): - self.layer_type = "cudnn_convt" - else: - self.layer_type = "exconvt" - # need to specify layer in config - self.config.type = self.layer_type - - if shared_biases is not None: - self.config.shared_biases = shared_biases - - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - parse_conv( - self.inputs[input_index].conv, - input_layer.name, - self.config.inputs[input_index].conv_conf, - num_filters, - trans=True) - conv_conf = self.config.inputs[input_index].conv_conf - psize = self.calc_parameter_size(conv_conf) - self.create_input_parameter(input_index, psize) - self.set_cnn_layer(name, conv_conf.img_size_y, conv_conf.img_size, - self.config.num_filters) - - psize = self.config.size - if shared_biases: - psize = self.config.num_filters - self.create_bias_parameter(bias, psize, [psize, 1]) - - def calc_parameter_size(self, conv_conf): - return conv_conf.channels * conv_conf.filter_channels \ - * (conv_conf.filter_size * conv_conf.filter_size_y) - - -@config_layer('exconvt') -class ConvTransLayer(ConvTransLayerBase): - layer_type = 'exconvt' +@config_layer('conv3d') +class Conv3DLayer(Conv3DLayerBase): + layer_type = 'conv3d' -@config_layer('cudnn_convt') -class ConvTransLayer(ConvTransLayerBase): - layer_type = 'cudnn_convt' +@config_layer('deconv3d') +class Conv3DLayer(Conv3DLayerBase): + layer_type = 'deconv3d' @config_layer('norm') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 6953f134c5..e3ae81459f 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -6161,12 +6161,6 @@ def img_conv3d_layer(input, param_attr=None, shared_biases=True, layer_attr=None, - filter_size_y=None, - stride_y=None, - padding_y=None, - filter_size_z=None, - stride_z=None, - padding_z=None, trans=False, layer_type=None): """ @@ -6175,7 +6169,7 @@ def img_conv3d_layer(input, .. code-block:: python - conv = img_conv3d_layer(input=data, filter_size=1, filter_size_y=1, + conv = img_conv3d_layer(input=data, filter_size=1, num_channels=8, num_filters=16, stride=1, bias_attr=False, @@ -6185,13 +6179,8 @@ def img_conv3d_layer(input, :type name: basestring :param input: Layer Input. :type input: LayerOutput - :param filter_size: The x dimension of a filter kernel. Or input a tuple for - two image dimension. + :param filter_size: The x dimension of a filter kernel. Or input a list. :type filter_size: int|tuple|list - :param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle - currently supports rectangular filters, the filter's - shape will be (filter_size, filter_size_y). - :type filter_size_y: int|None :param num_filters: Each filter group's number of filter :param act: Activation type. Default is tanh :type act: BaseActivation @@ -6200,13 +6189,9 @@ def img_conv3d_layer(input, :param stride: The x dimension of the stride. Or input a tuple for two image dimension. :type stride: int|tuple|list - :param stride_y: The y dimension of the stride. - :type stride_y: int :param padding: The x dimension of the padding. Or input a tuple for two image dimension :type padding: int|tuple|list - :param padding_y: The y dimension of the padding. - :type padding_y: int :param bias_attr: Convolution bias attribute. None means default bias. False means no bias. :type bias_attr: ParameterAttribute|False @@ -6233,47 +6218,26 @@ def img_conv3d_layer(input, assert input.num_filters is not None num_channels = input.num_filters - if filter_size_y is None: - if isinstance(filter_size, collections.Sequence): - assert len(filter_size) == 2 - filter_size, filter_size_y = filter_size - else: - filter_size_y = filter_size - - if filter_size_z is None: - if isinstance(filter_size, collections.Sequence): - assert len(filter_size) == 2 - filter_size, filter_size_z = filter_size - else: - filter_size_z = filter_size - - if stride_y is None: - if isinstance(stride, collections.Sequence): - assert len(stride) == 2 - stride, stride_y = stride - else: - stride_y = stride - - if stride_z is None: - if isinstance(stride, collections.Sequence): - assert len(stride) == 2 - stride, stride_z = stride - else: - stride_z = stride + if isinstance(filter_size, collections.Sequence): + assert len(filter_size) == 3 + filter_size, filter_size_y, filter_size_z = filter_size + else: + filter_size_y = filter_size + filter_size_z = filter_size - if padding_y is None: - if isinstance(padding, collections.Sequence): - assert len(padding) == 2 - padding, padding_y = padding - else: - padding_y = padding + if isinstance(stride, collections.Sequence): + assert len(stride) == 3 + stride, stride_y, stride_z = stride + else: + stride_y = stride + stride_z = stride - if padding_z is None: - if isinstance(padding, collections.Sequence): - assert len(padding) == 2 - padding, padding_z = padding - else: - padding_z = padding + if isinstance(padding, collections.Sequence): + assert len(padding) == 3 + padding, padding_y, padding_z = padding + else: + padding_y = padding + padding_z = padding if param_attr.attr.get('initial_smart'): # special initial for conv layers. diff --git a/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py b/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py index da0d23d057..15f7c1d271 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py +++ b/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py @@ -14,23 +14,44 @@ padding_y = 1 padding_z = 1 groups = 1 -data = data_layer( - name='data1', size=12096 * num_channels, height=48, width=42, depth=6) +data1 = data_layer(name='data1', size=2016 * num_channels, height=48, width=42) -conv3d = img_conv3d_layer( +img_conv_layer( + input=data1, + filter_size=filter_size, + num_channels=num_channels, + num_filters=16, + stride=stride, + padding=padding, + act=LinearActivation(), + bias_attr=False) + +data = data_layer( + name='data', size=12096 * num_channels, height=48, width=42, depth=6) +# first +conv3d_1 = img_conv3d_layer( input=data, name='conv3d_1', num_filters=16, num_channels=num_channels, filter_size=filter_size, - filter_size_y=filter_size, - filter_size_z=filter_size, stride=stride, - stride_y=stride_y, - stride_z=stride_z, padding=padding, - padding_y=padding_y, - padding_z=padding_z, + groups=groups, + bias_attr=True, + shared_biases=True, + trans=False, + layer_type="conv3d", + act=LinearActivation()) +# second +conv3d_2 = img_conv3d_layer( + input=data, + name='conv3d_2', + num_filters=16, + num_channels=num_channels, + filter_size=[filter_size, filter_size_y, filter_size_z], + stride=[stride, stride_y, stride_z], + padding=[padding, padding_y, padding_z], groups=groups, bias_attr=True, shared_biases=True, @@ -38,61 +59,33 @@ conv3d = img_conv3d_layer( layer_type="conv3d", act=LinearActivation()) -deconv3d = img_conv3d_layer( +# first +deconv3d_1 = img_conv3d_layer( input=data, name='deconv3d_1', num_filters=16, num_channels=num_channels, filter_size=filter_size, - filter_size_y=filter_size, - filter_size_z=filter_size, stride=stride, - stride_y=stride_y, - stride_z=stride_z, padding=padding, - padding_y=padding_y, - padding_z=padding_z, groups=groups, bias_attr=True, shared_biases=True, - trans=True, + trans=False, layer_type="deconv3d", act=LinearActivation()) - -data = data_layer(name="input", size=8 * 16 * 16) -conv1 = img_conv_layer( - input=data, - filter_size=1, - filter_size_y=1, - num_channels=8, - num_filters=16, - stride=1, - bias_attr=False, - act=ReluActivation(), - layer_type="exconv") -conv2 = img_conv_layer( - input=data, - filter_size=1, - filter_size_y=1, - num_channels=8, - num_filters=16, - stride=1, - bias_attr=False, - act=ReluActivation(), - layer_type="exconv") - -concat = concat_layer(input=[conv1, conv2]) - -conv = img_conv_layer( +# second +deconv3d_2 = img_conv3d_layer( input=data, - filter_size=1, - filter_size_y=1, - num_channels=8, + name='deconv3d_2', num_filters=16, - stride=1, + num_channels=num_channels, + filter_size=[filter_size, filter_size_y, filter_size_z], + stride=[stride, stride_y, stride_z], + padding=[padding, padding_y, padding_z], + groups=groups, bias_attr=True, - act=LinearActivation(), - groups=2, - layer_type="exconv") - -outputs(concat, conv) + shared_biases=True, + trans=False, + layer_type="deconv3d", + act=LinearActivation()) From e63ad0a6bdb36967d417633a074e0e966ca55e78 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 28 Aug 2017 15:15:26 +0800 Subject: [PATCH 140/170] HuberRegressionLoss and HuberTwoClassification support multi-dimension data --- paddle/gserver/layers/CostLayer.cpp | 67 ++++++++++++++++++----------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 7f648070f2..aa4a26a83f 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -611,22 +611,26 @@ void HuberRegressionLoss::forwardImp(Matrix& output, Matrix& target) { HuberCost::forwardImp(output, label, target); size_t numSamples = target.getHeight(); + size_t dim = output.getWidth(); CHECK(label.value); CHECK_EQ((*label.value).getHeight(), numSamples); CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(output.getWidth(), (*label.value).getWidth()); + CHECK_EQ(dim, (*label.value).getWidth()); CHECK_EQ(target.getWidth(), (size_t)1); real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); real* lbl = useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData(); - std::vector cost(numSamples); + std::vector cost(numSamples, 0); for (size_t i = 0; i < numSamples; ++i) { - real a = std::abs(lbl[i] - out[i]); - if (a <= delta_) - cost[i] = a * a / 2; - else - cost[i] = delta_ * (a - delta_ / 2); + for (size_t j = 0; j < dim; ++j) { + int index = i * dim + j; + real a = std::abs(lbl[index] - out[index]); + if (a <= delta_) + cost[i] += a * a / 2; + else + cost[i] += delta_ * (a - delta_ / 2); + } } target.copyFrom(cost.data(), numSamples); } @@ -635,18 +639,22 @@ void HuberRegressionLoss::backwardImp(Matrix& output, Argument& label, Matrix& outputG) { size_t numSamples = output.getHeight(); + size_t dim = output.getWidth(); real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); real* lbl = useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData(); real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData(); for (size_t i = 0; i < numSamples; ++i) { - real a = lbl[i] - out[i]; - if (std::abs(a) <= delta_) - grad[i] += -a; - else - grad[i] += a > 0 ? -delta_ : delta_; + for (size_t j = 0; j < dim; ++j) { + int index = i * dim + j; + real a = lbl[index] - out[index]; + if (std::abs(a) <= delta_) + grad[index] += -a; + else + grad[index] += a > 0 ? -delta_ : delta_; + } } - if (useGpu_) outputG.copyFrom(grad, numSamples); + if (useGpu_) outputG.copyFrom(grad, numSamples * dim); } // @@ -664,23 +672,25 @@ void HuberTwoClassification::forwardImp(Matrix& output, Matrix& target) { HuberCost::forwardImp(output, label, target); size_t numSamples = target.getHeight(); + size_t dim = output.getWidth(); CHECK(label.ids); CHECK_EQ((*label.ids).getSize(), numSamples); CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(output.getWidth(), (size_t)1); CHECK_EQ(target.getWidth(), (size_t)1); real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData(); - std::vector cost(numSamples); + std::vector cost(numSamples, 0); for (size_t i = 0; i < numSamples; ++i) { int y = 2 * lbl[i] - 1; - if (out[i] * y < -1) - cost[i] = -4 * out[i] * y; - else if (out[i] * y < 1) - cost[i] = (1 - out[i] * y) * (1 - out[i] * y); - else - cost[i] = 0; + for (size_t j = 0; j < dim; ++j) { + int index = i * dim + j; + real a = out[index] * y; + if (a < -1) + cost[i] += -4 * a; + else if (a < 1) + cost[i] += (1 - a) * (1 - a); + } } target.copyFrom(cost.data(), numSamples); } @@ -689,17 +699,22 @@ void HuberTwoClassification::backwardImp(Matrix& output, Argument& label, Matrix& outputG) { size_t numSamples = output.getHeight(); + size_t dim = output.getWidth(); real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData(); real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData(); for (size_t i = 0; i < numSamples; ++i) { int y = 2 * lbl[i] - 1; - if (y * out[i] < -1) - grad[i] += -4 * y; - else if (y * out[i] < 1) - grad[i] += -2 * (1 - y * out[i]) * y; + for (size_t j = 0; j < dim; ++j) { + int index = i * dim + j; + real a = out[index] * y; + if (a < -1) + grad[index] += -4 * y; + else if (a < 1) + grad[index] += -2 * (1 - a) * y; + } } - if (useGpu_) outputG.copyFrom(grad, numSamples); + if (useGpu_) outputG.copyFrom(grad, numSamples * dim); } /** * This cost layer compute the sum of its input as loss. From b1c0bad9fe8258ac9c12141c07fddb8600f781c5 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Sat, 26 Aug 2017 13:09:05 +0800 Subject: [PATCH 141/170] Add config parser for pooling3D --- paddle/math/Matrix.cpp | 2 - proto/ModelConfig.proto | 1 + python/paddle/trainer/config_parser.py | 120 +++++++++++++- .../paddle/trainer_config_helpers/layers.py | 146 +++++++++++++++++- .../tests/configs/test_pooling3D_layer.py | 38 +++++ .../tests/layers_test.py | 2 +- 6 files changed, 304 insertions(+), 5 deletions(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 54c2eae475..e93a154556 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -2255,9 +2255,7 @@ void CpuMatrix::maxPool3DBackward(Matrix& outGrad, real* tgtGrad = getData(); real* otGrad = outGrad.getData(); real* maxPoolIdxData = maxPoolIdx.getData(); - size_t outStride = outGrad.getStride(); - ; for (size_t n = 0; n < num; ++n) { if (!outGrad.isContiguous()) { diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 42cf10e9d3..259f3c33c3 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -495,6 +495,7 @@ message LayerConfig { // to indicate rectangle image data optional uint64 height = 50; optional uint64 width = 51; + optional uint64 depth = 57 [ default = 1 ]; // blank label used in ctc loss optional uint32 blank = 52 [ default = 0 ]; diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index b7b696ef0c..405c5e1f13 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -903,6 +903,31 @@ class Pool(Cfg): self.add_keys(locals()) +@config_class +class Pool3d(Cfg): + def __init__( + self, + pool_type, + channels, + size_x, + size_y=None, + size_z=None, + start=None, + stride=None, # 1 by defalut in protobuf + stride_y=None, + stride_z=None, + padding=None, # 0 by defalut in protobuf + padding_y=None, + padding_z=None): + self.add_keys(locals()) + self.filter_size_y = size_y if size_y else size_x + self.filter_size_z = size_z if size_z else size_x + self.padding_y = padding_y if padding_y else padding + self.padding_z = padding_z if padding_z else padding + self.stride_y = stride_y if stride_y else stride + self.stride_z = stride_z if stride_z else stride + + @config_class class SpatialPyramidPool(Cfg): def __init__(self, pool_type, pyramid_height, channels): @@ -1167,6 +1192,20 @@ def get_img_size(input_layer_name, channels): return img_size, img_size_y +def get_img3d_size(input_layer_name, channels): + input = g_layer_map[input_layer_name] + img_pixels = input.size / channels + img_size = input.width + img_size_y = input.height + img_size_z = input.depth + + config_assert( + img_size * img_size_y * img_size_z == img_pixels, + "Input layer %s: Incorrect input image size %d * %d * %d for input image pixels %d" + % (input_layer_name, img_size, img_size_y, img_size_z, img_pixels)) + return img_size, img_size_y, img_size_z + + def parse_bilinear(bilinear, input_layer_name, bilinear_conf): parse_image(bilinear, input_layer_name, bilinear_conf.image_conf) bilinear_conf.out_size_x = bilinear.out_size_x @@ -1204,6 +1243,45 @@ def parse_pool(pool, input_layer_name, pool_conf, ceil_mode): pool_conf.stride_y, not ceil_mode) +def parse_pool3d(pool, input_layer_name, pool_conf, ceil_mode): + pool_conf.pool_type = pool.pool_type + config_assert(pool.pool_type in ['max-projection', 'avg-projection'], + "pool-type %s is not in " + "['max-projection', 'avg-projection']" % pool.pool_type) + + pool_conf.channels = pool.channels + + pool_conf.size_x = pool.size_x + pool_conf.stride = pool.stride + pool_conf.padding = pool.padding + + pool_conf.size_y = default(pool.size_y, pool_conf.size_x) + pool_conf.size_z = default(pool.size_z, pool_conf.size_x) + pool_conf.stride_y = default(pool.stride_y, pool_conf.stride) + pool_conf.stride_z = default(pool.stride_z, pool_conf.stride) + pool_conf.padding_y = default(pool.padding_y, pool_conf.padding) + pool_conf.padding_z = default(pool.padding_z, pool_conf.padding) + + pool_conf.img_size, pool_conf.img_size_y, pool_conf.img_size_z = \ + get_img3d_size(input_layer_name, pool.channels) + + config_assert(not pool.start, "start is deprecated in pooling.") + + if pool.padding is not None: + pool_conf.padding = pool.padding + pool_conf.padding_y = default(pool.padding_y, pool_conf.padding) + pool_conf.padding_z = default(pool.padding_z, pool_conf.padding) + pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x, + pool_conf.padding, pool_conf.stride, + not ceil_mode) + pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y, + pool_conf.padding_y, + pool_conf.stride_y, not ceil_mode) + pool_conf.output_z = cnn_output_size(pool_conf.img_size_z, pool_conf.size_z, + pool_conf.padding_z, + pool_conf.stride_z, not ceil_mode) + + def parse_spp(spp, input_layer_name, spp_conf): parse_image(spp, input_layer_name, spp_conf.image_conf) spp_conf.pool_type = spp.pool_type @@ -1580,6 +1658,9 @@ class LayerBase(object): self.config.height = height self.config.width = width + def set_layer_depth(self, depth): + self.config.depth = depth + def set_cnn_layer(self, input_layer_name, height, @@ -1763,11 +1844,19 @@ class DetectionOutputLayer(LayerBase): @config_layer('data') class DataLayer(LayerBase): - def __init__(self, name, size, height=None, width=None, device=None): + def __init__(self, + name, + size, + depth=None, + height=None, + width=None, + device=None): super(DataLayer, self).__init__( name, 'data', size, inputs=[], device=device) if height and width: self.set_layer_height_width(height, width) + if depth: + self.set_layer_depth(depth) ''' @@ -1995,6 +2084,35 @@ class PoolLayer(LayerBase): pool_conf.channels) +@config_layer('pool3d') +class Pool3DLayer(LayerBase): + def __init__(self, name, inputs, ceil_mode=True, **xargs): + super(Pool3DLayer, self).__init__( + name, 'pool3d', 0, inputs=inputs, **xargs) + for input_index in xrange(len(self.inputs)): + input_layer = self.get_input_layer(input_index) + pool_conf = self.config.inputs[input_index].pool_conf + parse_pool3d(self.inputs[input_index].pool, input_layer.name, + pool_conf, ceil_mode) + self.set_cnn_layer(name, pool_conf.output_z, pool_conf.output_y, + pool_conf.output_x, pool_conf.channels) + + def set_cnn_layer(self, + input_layer_name, + depth, + height, + width, + channels, + is_print=True): + size = depth * height * width * channels + self.set_layer_size(size) + self.set_layer_height_width(height, width) + self.set_layer_depth(depth) + if is_print: + print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" % + (input_layer_name, channels, depth, height, width, size)) + + @config_layer('spp') class SpatialPyramidPoolLayer(LayerBase): def __init__(self, name, inputs, **xargs): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 1bc55c8696..5c5e737b56 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -133,6 +133,7 @@ __all__ = [ 'clip_layer', 'slice_projection', 'kmax_sequence_score_layer', + 'img_pool3d_layer', ] @@ -161,6 +162,7 @@ class LayerType(object): EXCONVTRANS_LAYER = 'exconvt' CUDNNCONV_LAYER = 'cudnn_conv' POOL_LAYER = 'pool' + POOL3D_LAYER = 'pool3d' BATCH_NORM_LAYER = 'batch_norm' NORM_LAYER = 'norm' SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm' @@ -878,7 +880,8 @@ def mixed_layer(size=0, @layer_support() -def data_layer(name, size, height=None, width=None, layer_attr=None): +def data_layer(name, size, depth=None, height=None, width=None, + layer_attr=None): """ Define DataLayer For NeuralNetwork. @@ -905,6 +908,7 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): type=LayerType.DATA, name=name, size=size, + depth=depth, height=height, width=width, **ExtraLayerAttribute.to_kwargs(layer_attr)) @@ -2610,6 +2614,146 @@ def img_pool_layer(input, size=l.config.size) +@wrap_name_default("pool3d") +@layer_support() +def img_pool3d_layer(input, + pool_size, + name=None, + num_channels=None, + pool_type=None, + stride=1, + padding=0, + layer_attr=None, + pool_size_y=None, + stride_y=None, + padding_y=None, + pool_size_z=None, + stride_z=None, + padding_z=None, + ceil_mode=True): + """ + Image pooling Layer. + + The details of pooling layer, please refer ufldl's pooling_ . + + .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ + + - ceil_mode=True: + + .. math:: + + w = 1 + int(ceil(input\_width + 2 * padding - pool\_size) / float(stride)) + h = 1 + int(ceil(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) + d = 1 + int(ceil(input\_depth + 2 * padding\_z - pool\_size\_z) / float(stride\_z)) + + - ceil_mode=False: + + .. math:: + + w = 1 + int(floor(input\_width + 2 * padding - pool\_size) / float(stride)) + h = 1 + int(floor(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) + d = 1 + int(floor(input\_depth + 2 * padding\_z - pool\_size\_z) / float(stride\_z)) + + The example usage is: + + .. code-block:: python + + maxpool = img_pool3d_layer(input=conv, + pool_size=3, + num_channels=8, + stride=1, + padding=1, + pool_type=MaxPooling()) + + :param padding: pooling padding width. + :type padding: int|tuple|list + :param name: name of pooling layer + :type name: basestring. + :param input: layer's input + :type input: LayerOutput + :param pool_size: pooling window width + :type pool_size: int|tuple|list + :param num_channels: number of input channel. + :type num_channels: int + :param pool_type: pooling type. MaxPooling or AvgPooling. Default is + MaxPooling. + :type pool_type: BasePoolingType + :param stride: stride width of pooling. + :type stride: int|tuple|list + :param layer_attr: Extra Layer attribute. + :type layer_attr: ExtraLayerAttribute + :param ceil_mode: Wether to use ceil mode to calculate output height and with. + Defalut is True. If set false, Otherwise use floor. + + :type ceil_mode: bool + :return: LayerOutput object. + :rtype: LayerOutput + """ + if num_channels is None: + assert input.num_filters is not None + num_channels = input.num_filters + + if pool_type is None: + pool_type = MaxPooling() + elif isinstance(pool_type, AvgPooling): + pool_type.name = 'avg' + + type_name = pool_type.name + '-projection' \ + if ( + isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \ + else pool_type.name + + if isinstance(pool_size, collections.Sequence): + assert len(pool_size) == 3 + pool_size, pool_size_y, pool_size_z = pool_size + else: + pool_size_y = pool_size + pool_size_z = pool_size + + if isinstance(stride, collections.Sequence): + assert len(stride) == 3 + stride, stride_y, stride_z = stride + else: + stride_y = stride + stride_z = stride + + if isinstance(padding, collections.Sequence): + assert len(padding) == 3 + padding, padding_y, padding_y = padding + else: + padding_y = padding + padding_z = padding + + l = Layer( + name=name, + type=LayerType.POOL3D_LAYER, + inputs=[ + Input( + input.name, + pool=Pool3d( + pool_type=type_name, + channels=num_channels, + size_x=pool_size, + start=None, + stride=stride, + padding=padding, + size_y=pool_size_y, + stride_y=stride_y, + padding_y=padding_y, + size_z=pool_size_z, + stride_z=stride_z, + padding_z=padding_z)) + ], + ceil_mode=ceil_mode, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name, + LayerType.POOL_LAYER, + parents=[input], + num_filters=num_channels, + size=l.config.size) + + @wrap_name_default("spp") @layer_support() def spp_layer(input, diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py new file mode 100644 index 0000000000..0dbb921d41 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py @@ -0,0 +1,38 @@ +from paddle.trainer_config_helpers import * + +settings(batch_size=100, learning_rate=1e-5) + +data_2d = data_layer(name='data_2d', size=6000, height=20, width=10) + +pool_2d = img_pool_layer( + name="pool___2d", + input=data_2d, + num_channels=30, + pool_size=5, + stride=3, + padding=1, + pool_type=AvgPooling()) +outputs(pool_2d) + +data_3d = data_layer( + name='data_3d_1', size=60000, depth=10, height=20, width=10) + +pool_3d_1 = img_pool3d_layer( + name="pool_3d_1", + input=data_3d, + num_channels=30, + pool_size=5, + stride=3, + padding=1, + pool_type=AvgPooling()) +outputs(pool_3d_1) + +pool_3d_2 = img_pool3d_layer( + name="pool_3d_2", + input=data_3d, + num_channels=30, + pool_size=[5, 5, 5], + stride=[3, 3, 3], + padding=[1, 1, 1], + pool_type=MaxPooling()) +outputs(pool_3d_2) diff --git a/python/paddle/trainer_config_helpers/tests/layers_test.py b/python/paddle/trainer_config_helpers/tests/layers_test.py index 05902ea293..52218972bf 100644 --- a/python/paddle/trainer_config_helpers/tests/layers_test.py +++ b/python/paddle/trainer_config_helpers/tests/layers_test.py @@ -16,4 +16,4 @@ from paddle.trainer.config_parser import parse_config_and_serialize if __name__ == '__main__': parse_config_and_serialize( - 'trainer_config_helpers/tests/layers_test_config.py', '') + 'trainer_config_helpers/tests/configs/test_pooling3D_layer.py', '') From 6053f7e36b19a06da14c970a1e4f25a02d1dbcaf Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Mon, 28 Aug 2017 18:10:44 +0800 Subject: [PATCH 142/170] fix previous comments(c++) --- paddle/cuda/include/hl_matrix.h | 2 +- paddle/gserver/layers/Conv3DLayer.cpp | 6 ------ paddle/gserver/layers/DeConv3DLayer.cpp | 6 ------ 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/paddle/cuda/include/hl_matrix.h b/paddle/cuda/include/hl_matrix.h index a37921b749..c7f2510997 100644 --- a/paddle/cuda/include/hl_matrix.h +++ b/paddle/cuda/include/hl_matrix.h @@ -241,7 +241,7 @@ extern void hl_matrix_rotate( * @param[in] paddingD padding in the depth. * @param[in] paddingH padding in the height. * @param[in] paddingW padding in the width. - * @param[out] matDst output matrix. + * @param[out] dataDst output matrix. * */ extern void hl_matrix_vol2Col(const real* dataSrc, diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/gserver/layers/Conv3DLayer.cpp index db907bbab1..7cc9937cce 100644 --- a/paddle/gserver/layers/Conv3DLayer.cpp +++ b/paddle/gserver/layers/Conv3DLayer.cpp @@ -53,18 +53,12 @@ bool Conv3DLayer::init(const LayerMap &layerMap, size_t Conv3DLayer::getSize() { CHECK_NE(inputLayers_.size(), 0UL); - // imgSizeH_.clear(); - // imgSizeW_.clear(); - // imgSizeD_.clear(); outputH_.clear(); outputW_.clear(); outputD_.clear(); N_.clear(); size_t layerSize = 0; for (size_t i = 0; i < inputLayers_.size(); ++i) { - // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); - // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); - // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth()); outputW_.push_back(outputSize( imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true)); outputH_.push_back(outputSize( diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp index b18c06e36c..7d5c772c89 100644 --- a/paddle/gserver/layers/DeConv3DLayer.cpp +++ b/paddle/gserver/layers/DeConv3DLayer.cpp @@ -53,9 +53,6 @@ bool DeConv3DLayer::init(const LayerMap &layerMap, size_t DeConv3DLayer::getSize() { CHECK_NE(inputLayers_.size(), 0UL); - // imgSizeH_.clear(); - // imgSizeW_.clear(); - // imgSizeD_.clear(); outputH_.clear(); outputW_.clear(); outputD_.clear(); @@ -63,9 +60,6 @@ size_t DeConv3DLayer::getSize() { NOut_.clear(); size_t layerSize = 0; for (size_t i = 0; i < inputLayers_.size(); ++i) { - // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); - // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); - // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth()); outputW_.push_back( imageSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true)); outputH_.push_back(imageSize( From 5df384d67ff498c9438b2ef7dc9566af7d50c97a Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 28 Aug 2017 19:36:18 +0800 Subject: [PATCH 143/170] Remove NeonDepthwiseConv.h --- paddle/function/neon/NeonDepthwiseConv.h | 25 ------------------------ 1 file changed, 25 deletions(-) delete mode 100644 paddle/function/neon/NeonDepthwiseConv.h diff --git a/paddle/function/neon/NeonDepthwiseConv.h b/paddle/function/neon/NeonDepthwiseConv.h deleted file mode 100644 index 23e4be1921..0000000000 --- a/paddle/function/neon/NeonDepthwiseConv.h +++ /dev/null @@ -1,25 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -namespace paddle { - -namespace neon { - -template -struct DepthwiseConvKernel {}; - -} // namespace neon -} // namespace paddle From 4f0c071e4909ff041f3a86c3a40c482becf50845 Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 28 Aug 2017 22:18:11 +0800 Subject: [PATCH 144/170] refine backward --- paddle/framework/backward.cc | 5 ++++- paddle/operators/net_op.cc | 9 ++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index bfda18724c..6b4c612cd8 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -124,6 +124,9 @@ static std::unique_ptr BackwardRecursive( std::list insert_position; for (auto& dup_output_op : dup_output_ops) { const std::string& name = dup_output_op.first; + // duplicate @Empty@ don't need to be added + if (name == kEmptyVarName) continue; + auto& dup_op = dup_output_op.second; // no duplicate output if (dup_op.size() == 1) continue; @@ -209,7 +212,7 @@ std::unique_ptr Backward( const OperatorBase& forwardOp, const std::unordered_set& no_grad_vars) { std::unordered_set no_grad_names; - no_grad_names.reserve(no_grad_vars.size()); + no_grad_names.reserve(no_grad_vars.size() + 1); no_grad_names.insert(std::string(kEmptyVarName) + kGradVarSuffix); diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc index 44d925f0b0..78b5e27678 100644 --- a/paddle/operators/net_op.cc +++ b/paddle/operators/net_op.cc @@ -31,10 +31,13 @@ void NetOp::CompleteAddOp(bool calc) { for (auto& op : ops_) { for (auto& ipt : op->Inputs()) { for (auto& var_name : ipt.second) { - if (!Contains(output_set, var_name)) { // Not other op's output - input_set.insert(var_name); - } else { + // If input variable has been in output set, then it will be + // added into intermediate_outputs_. Otherwise, it will be + // added into input set. + if (Contains(output_set, var_name)) { intermediate_outputs_.insert(var_name); + } else { + input_set.insert(var_name); } } } From 980edfa69a72f57dea689d1d5b1bff6b388e7a71 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 28 Aug 2017 11:34:24 -0700 Subject: [PATCH 145/170] Refine backward document --- paddle/framework/backward.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md index 133b17c7be..ce324a73f0 100644 --- a/paddle/framework/backward.md +++ b/paddle/framework/backward.md @@ -6,9 +6,16 @@ In Neural Network, the backpropagation algorithm follows the chain rule, so we n ## Backward Operator Registry -A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients and then calculate its input gradients. In most cases, there is a one-to-one correspondence between forward and backward operators. We use registry mechanism to save these correspondences. +A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients and then calculate its input gradients. -For example, we have got a `add_two_op`, and is registered by the following code: +-| | forward operator | backward operator +-| ---------------------- | ---------------- |------------------------- | +-| **Operator::inputs_** | Inputs | Inputs, Outputs, OutputGradients | +-| **Operator::outputs_** | Outputs | InputGradients | + + In most cases, there is a one-to-one correspondence between forward and backward operators. These correspondences are recorded by a global hash map(`OpInfoMap`). To follow the philosophy of minimum core and make operators pluggable, the registry mechanism is introduced. + +For example, we have got a `add_two_op`, and we can register it's information and corresponding backward operator by the following macro: ```cpp REGISTER_OP(add_two, AddTwoOp, AddTwoOpMaker, add_two_grad, AddTwoGradOp); From eaeb69f98f70bbea4fe4aae9f7c7b830f75959c5 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 28 Aug 2017 13:47:37 -0700 Subject: [PATCH 146/170] Follow reviewer's comments --- paddle/framework/backward.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md index ce324a73f0..8aa6728a95 100644 --- a/paddle/framework/backward.md +++ b/paddle/framework/backward.md @@ -2,28 +2,28 @@ ## Motivation -In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the fundmental gradient operators/expressions together with chain rule . Every forward network need a backward network to construct the full computation lineage, the operator/expression's backward pass will be generated respect to forward pass. +In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the fundmental gradient operators/expressions together with chain rule . Every forward network need a backward network to construct the full computation graph, the operator/expression's backward pass will be generated respect to forward pass. ## Backward Operator Registry A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients and then calculate its input gradients. --| | forward operator | backward operator --| ---------------------- | ---------------- |------------------------- | --| **Operator::inputs_** | Inputs | Inputs, Outputs, OutputGradients | --| **Operator::outputs_** | Outputs | InputGradients | +| | forward operator | backward operator +| ---------------------- | ---------------- |------------------------- | +| **Operator::inputs_** | Inputs | Inputs, Outputs, OutputGradients | +| **Operator::outputs_** | Outputs | InputGradients | In most cases, there is a one-to-one correspondence between forward and backward operators. These correspondences are recorded by a global hash map(`OpInfoMap`). To follow the philosophy of minimum core and make operators pluggable, the registry mechanism is introduced. -For example, we have got a `add_two_op`, and we can register it's information and corresponding backward operator by the following macro: +For example, we have got a `mul_op`, and we can register it's information and corresponding backward operator by the following macro: ```cpp -REGISTER_OP(add_two, AddTwoOp, AddTwoOpMaker, add_two_grad, AddTwoGradOp); +REGISTER_OP(mul, MulOp, MulOpMaker, mul_grad, MulOpGrad); ``` -`add_two` is the operator's type. `AddTwoOp` and `AddTwoOpMaker` are the operator class and the operator maker class respectively. +`mul` is the operator's type. `MulOp` and `MulOpMaker` are the operator class and the operator maker class respectively. -`add_two_grad` is the type of backward operator, and `AddTwoGradOp` is its class name. +`mul_grad` is the type of backward operator, and `MulOpGrad` is its class name. ## Backward Opeartor Creating From c19eae4c8e7923aa52dc05560dcc91b8b6d58de8 Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Tue, 29 Aug 2017 15:46:52 +0800 Subject: [PATCH 147/170] update doc about how to write new operators. --- doc/howto/dev/new_op_cn.md | 56 +++++++++++++------ .../v2/framework/tests/gradient_checker.py | 2 +- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md index ebd2cf3ff0..228b3fd643 100644 --- a/doc/howto/dev/new_op_cn.md +++ b/doc/howto/dev/new_op_cn.md @@ -5,12 +5,13 @@ - [定义ProtoMaker类](#定义ProtoMaker类) - [定义Operator类](#定义Operator类) - [定义OpKernel类](#定义OpKernel类) - - [注册类](#注册类) + - [注册Operator](#注册Operator) - [编译](#编译) - [绑定Python](#绑定Python) - [实现单元测试](#实现单元测试) - [前向Operator单测](#前向Operator单测) - [反向Operator单测](#反向Operator单测) + - [编译和执行](#编译和执行) ## 概念简介 @@ -22,19 +23,17 @@ - `framework::OperatorWithKernel`:继承自OperatorBase,Op有计算函数,称作有Kernel。 - `class OpProtoAndCheckerMaker`:描述该Op的输入、输出、属性、注释,主要用于Python API接口生成 -依据是否包含kernel,将Op分为两种:包含Kernel的Op和不包含kernel的Op,前者Op的定义继承自`OperatorBase`,后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写,简单总结如下: +依据是否包含kernel,将Op分为两种:包含Kernel的Op和不包含kernel的Op,前者Op的定义继承自`OperatorBase`,后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写,简单总结Op需要包含的内容如下: -Forward Op需要包含: - - - OpProtoMake定义 - - Op定义 - - Kernel实现 + + 内容 | 定义位置 +-------------- | :---------------------- +OpProtoMake定义 | `.cc`文件,Backward Op不需要定义OpProtoMake +Op定义 | `.cc`文件 +Kernel实现 | CPU、GPU共享Kernel在`.h`文件,否则,CPU可以在`.cc`文件,GPU可在`.cu`文件。 +注册Op | Op注册在`.cc`文件;Kernel注册CPU在`.cc`文件,GPU在`.cu`文件 + -与之对应的Backward Op包含: - - - Op定义 - - Kernel实现 - 下面以矩阵乘操作,即[MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc)为例来介绍如何写带Kernel的Operator。 @@ -137,8 +136,9 @@ MulOp(const std::string &type, const framework::VariableNameMap &inputs, ``` 还需要重写`InferShape`接口。`InferShape`为const函数,不能修改Op的成员变量,参数为`const framework::InferShapeContext &ctx`,通过该参数可获取到输入输出以及属性。它的功能是: - - 1). 做检查, 尽早报错:检查输入数据维度、类型等是否合法 - - 2). 设置输出Tensor的形状 + + - 1). 做检查, 尽早报错:检查输入数据维度、类型等是否合法。 + - 2). 设置输出Tensor的形状。 通常`OpProtoMaker`和`Op`类的定义写在`.cc`文件中,和要讲到的注册函数一起放在`.cc`中 @@ -172,7 +172,7 @@ class MulKernel : public framework::OpKernel { 到此前向Op实现完成,需要在`.cc`文件中注册该op和kernel。反向Op类的定义和Kernel定义与前向Op类似,这里不再重复。但注意,反向Op没有`ProtoMaker`。 -### 4. 注册类 +### 4. 注册Operator 在`.cc`文件中注册前向、反向Op类,注册CPU Kernel。 @@ -297,4 +297,28 @@ class TestMulOp(unittest.TestCase): - 调用`create_op("mul")`创建反向Op对应的前向Op。 - 定义输入`inputs`。 - 调用`compare_grad`函数对比CPU、GPU计算结果。 - - 调用`check_grad`检查梯度稳定性。 + - 调用`check_grad`检查梯度稳定性,这里采用数值法检测梯度正确性。 + - 第一个参数`op` : 前向op。 + - 第二个参数`inputs` : 输入词典,词典的Key和`ProtoMaker`定义保持一致。 + - 第三个参数`set(["X", "Y"])` : 指定对输入变量`X`、`Y`做梯度检测。 + - 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out` + + +### 编译和执行 + +单测完成之后,在[`python/paddle/v2/framework/tests/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/CMakeLists.txt)里添加编译: + +``` +py_test(test_mul_op SRCS test_mul_op.py) +``` + +编译完成之后即可执行单测: + +``` +make test ARGS="-R test_mul_op -V" +``` +或者: + +``` +ctest -R test_mul_op +``` diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 9a7a7fbf5e..02cfb9b2c4 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -268,7 +268,7 @@ class GradientChecker(unittest.TestCase): :param input_vars: numpy value of input variable. The following computation will use these variables. :param inputs_to_check: inputs var names that should check gradient. - :param output_name: output name that used to + :param output_name: the final output variable name. :param max_relative_error: The relative tolerance parameter. :param no_grad_set: used when create backward ops :param only_cpu: only compute and check gradient on cpu kernel. From b336119424d3fc0d9ffa39688612a83c23c6e10e Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Tue, 29 Aug 2017 16:03:07 +0800 Subject: [PATCH 148/170] Add WITH_TESTING=ON for cmake in the operators writing guide doc. --- doc/howto/dev/new_op_cn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md index 228b3fd643..7f8da2da5a 100644 --- a/doc/howto/dev/new_op_cn.md +++ b/doc/howto/dev/new_op_cn.md @@ -312,7 +312,7 @@ class TestMulOp(unittest.TestCase): py_test(test_mul_op SRCS test_mul_op.py) ``` -编译完成之后即可执行单测: +编译时需要打开`WITH_TESTING`, 即 `cmake paddle_dir -DWITH_TESTING=ON`,编译成功之后执行单测命令为: ``` make test ARGS="-R test_mul_op -V" From b709af616f99c7f4e3ab300297608054638886a8 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Tue, 29 Aug 2017 16:21:45 +0800 Subject: [PATCH 149/170] HuberTwoClassification only support one dimension --- paddle/gserver/layers/CostLayer.cpp | 31 +++++++++++------------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index aa4a26a83f..ce071323ff 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -672,10 +672,10 @@ void HuberTwoClassification::forwardImp(Matrix& output, Matrix& target) { HuberCost::forwardImp(output, label, target); size_t numSamples = target.getHeight(); - size_t dim = output.getWidth(); CHECK(label.ids); CHECK_EQ((*label.ids).getSize(), numSamples); CHECK_EQ(output.getHeight(), numSamples); + CHECK_EQ(output.getWidth(), (size_t)1); CHECK_EQ(target.getWidth(), (size_t)1); real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); @@ -683,14 +683,11 @@ void HuberTwoClassification::forwardImp(Matrix& output, std::vector cost(numSamples, 0); for (size_t i = 0; i < numSamples; ++i) { int y = 2 * lbl[i] - 1; - for (size_t j = 0; j < dim; ++j) { - int index = i * dim + j; - real a = out[index] * y; - if (a < -1) - cost[i] += -4 * a; - else if (a < 1) - cost[i] += (1 - a) * (1 - a); - } + real a = out[i] * y; + if (a < -1) + cost[i] = -4 * a; + else if (a < 1) + cost[i] = (1 - a) * (1 - a); } target.copyFrom(cost.data(), numSamples); } @@ -699,22 +696,18 @@ void HuberTwoClassification::backwardImp(Matrix& output, Argument& label, Matrix& outputG) { size_t numSamples = output.getHeight(); - size_t dim = output.getWidth(); real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData(); real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData(); for (size_t i = 0; i < numSamples; ++i) { int y = 2 * lbl[i] - 1; - for (size_t j = 0; j < dim; ++j) { - int index = i * dim + j; - real a = out[index] * y; - if (a < -1) - grad[index] += -4 * y; - else if (a < 1) - grad[index] += -2 * (1 - a) * y; - } + real a = out[i] * y; + if (a < -1) + grad[i] += -4 * y; + else if (a < 1) + grad[i] += -2 * (1 - a) * y; } - if (useGpu_) outputG.copyFrom(grad, numSamples * dim); + if (useGpu_) outputG.copyFrom(grad, numSamples); } /** * This cost layer compute the sum of its input as loss. From 751d8533e830eae0ca6b9ee6e62e771a4b72a14b Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Tue, 29 Aug 2017 16:45:20 +0800 Subject: [PATCH 150/170] follow wuyi's comments. --- python/paddle/v2/framework/tests/gradient_checker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 02cfb9b2c4..518f828bac 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -268,7 +268,7 @@ class GradientChecker(unittest.TestCase): :param input_vars: numpy value of input variable. The following computation will use these variables. :param inputs_to_check: inputs var names that should check gradient. - :param output_name: the final output variable name. + :param output_name: the output variable name of forward network. :param max_relative_error: The relative tolerance parameter. :param no_grad_set: used when create backward ops :param only_cpu: only compute and check gradient on cpu kernel. From bfbd066fdd1c4a81266864bf837d89742b3f2ad6 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 29 Aug 2017 19:55:44 +0800 Subject: [PATCH 151/170] refine --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 117 ++++++++++++------------ paddle/gserver/layers/MKLDNNFcLayer.h | 2 + paddle/gserver/layers/MKLDNNLayer.h | 48 +++++++--- paddle/math/MKLDNNMatrix.cpp | 25 ++--- paddle/math/MKLDNNMatrix.h | 29 +++--- 5 files changed, 118 insertions(+), 103 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index d38e6a2099..a08cca318e 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -77,6 +77,24 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); } +void MKLDNNFcLayer::convertOutputToOtherDevice() { + copyOutputInfoToOtherDevice(); + // find other cpu device and reorder output to cpu device + int cnt = 0; + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { + // fc cpu output value do not need convert + // just share point + outputOtherDevice_[i].value = output_.value; + ++cnt; + } + } + + if (cnt > 1) { + LOG(WARNING) << "should not have more than one CPU devie"; + } +} + void MKLDNNFcLayer::reshape() { const Argument& input = getInput(0, getPrev(0)->getDeviceId()); int batchSize = input.getBatchSize(); @@ -116,7 +134,7 @@ void MKLDNNFcLayer::resetFwd() { const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr; const MatrixPtr& out = output_.value; - if (prevIsMKLDNN()) { + if (prevIsOnlyMKLDNN()) { const MatrixPtr& in = getInputValue(0); inVal_ = std::dynamic_pointer_cast(in); CHECK(inVal_) << "Input should be MKLDNNMatrix"; @@ -136,30 +154,21 @@ void MKLDNNFcLayer::resetFwd() { // change original output value to mkldnn output value output_.value = std::dynamic_pointer_cast(outVal_); - if (!nextIsMKLDNN()) { - Argument cpuOutput; - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { - cpuOutput = outputOtherDevice_[i]; - } - } - cpuOutput.setFrameHeight(output_.getFrameHeight()); - cpuOutput.setFrameWidth(output_.getFrameWidth()); - - // fc cpu output value do not need convert - cpuOutput.value = output_.value; + if (!nextIsOnlyMKLDNN()) { + convertOutputToOtherDevice(); } // create forward handle prop_kind pk = prop_kind::forward; - fc_fwd::desc fwdDesc = - hasBias ? fc_fwd::desc(pk, - inVal_->getMD(), - wgtVal_->getMD(), - biasVal_->getMD(), - outVal_->getMD()) - : fc_fwd::desc( - pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD()); + fc_fwd::desc fwdDesc = hasBias ? fc_fwd::desc(pk, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + biasVal_->getMemoryDesc(), + outVal_->getMemoryDesc()) + : fc_fwd::desc(pk, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + outVal_->getMemoryDesc()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); if (hasBias) { fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); @@ -184,36 +193,38 @@ void MKLDNNFcLayer::resetBwd() { const MatrixPtr& wgt = weight_->getWGrad(); const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; - // TODO(TJ): merge topdiffs - if (nextIsMKLDNN()) { + // TODO(TJ): merge outgrad + if (nextIsOnlyMKLDNN()) { // can not directly cast outputgrad to mkldnnmatrix, // since each layer can not write the inputgrad to mkldnn inputgrad. // So just create from matrix with outputvalue format. const MatrixPtr& out = getOutput(MKLDNN_DEVICE).grad; - outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD()); + outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); } else { const MatrixPtr& out = getOutput(CPU_DEVICE).grad; // fc do not need to convert from cpu device since output always nc // only need create from cpu device - outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD()); + outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); } - wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPD()); - biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPD()) : nullptr; + wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPrimitiveDesc()); + biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPrimitiveDesc()) + : nullptr; // create memory primitive desc fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, - inVal_->getMD(), - wgtGrad_->getMD(), - outGrad_->getMD()); + inVal_->getMemoryDesc(), + wgtGrad_->getMemoryDesc(), + outGrad_->getMemoryDesc()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - fc_bwdWgt::desc bwdWgtDesc = - hasBias ? fc_bwdWgt::desc(inVal_->getMD(), - wgtGrad_->getMD(), - biasGrad_->getMD(), - outGrad_->getMD()) - : fc_bwdWgt::desc( - inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD()); + fc_bwdWgt::desc bwdWgtDesc = hasBias + ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), + wgtGrad_->getMemoryDesc(), + biasGrad_->getMemoryDesc(), + outGrad_->getMemoryDesc()) + : fc_bwdWgt::desc(inVal_->getMemoryDesc(), + wgtGrad_->getMemoryDesc(), + outGrad_->getMemoryDesc()); fc_bwdWgt::primitive_desc bwdWgtPD = fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); @@ -227,30 +238,20 @@ void MKLDNNFcLayer::resetBwd() { pipelineBwd_.push_back(*bwdWgt_); /// backward data - if (prevIsMKLDNN()) { - const MatrixPtr& in = getInputGrad(0, MKLDNN_DEVICE); - if (in == nullptr) { - return; - } - if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) { - // TODO(TJ): use outputMaps_ ways when merge topdiff done - } else { - inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); - } + int device = prevIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; + const MatrixPtr& in = getInputGrad(0, device); + if (in == nullptr) { + return; + } + if (getInput(0, device).getAllCount() > 1) { + // TODO(TJ): use outputMaps_ ways when merge outgrad done } else { - const MatrixPtr& in = getInputGrad(0, CPU_DEVICE); - if (in == nullptr) { - return; - } - if (getInput(0, CPU_DEVICE).getAllCount() > 1) { - // TODO(TJ): use outputMaps_ ways when merge topdiff done - } else { - inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); - } + inGrad_ = MKLDNNMatrix::create(in, inVal_->getPrimitiveDesc()); } - fc_bwdData::desc bwdDataDesc = - fc_bwdData::desc(inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD()); + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(inVal_->getMemoryDesc(), + wgtGrad_->getMemoryDesc(), + outGrad_->getMemoryDesc()); fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index e2657a8d5e..e138a6faf1 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -72,6 +72,8 @@ protected: * only would be called when needed */ void resetBwd(); + + void convertOutputToOtherDevice() override; }; } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 3dd17a36ff..8fe9630e82 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -86,10 +86,7 @@ public: CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." << "Please set WITH_MKLDNN=ON " << "and set use_mkldnn=True"; - if (useGpu_ == true) { - LOG(WARNING) << "Do not support GPU yet, will change to useGpu = false"; - useGpu_ = false; - } + CHECK(!useGpu_) << "Do not support GPU yet"; // set device id before Layer::init setDevice(MKLDNN_DEVICE); @@ -116,6 +113,12 @@ public: */ virtual void convertWeightsToPaddle() {} + /** + * convert MKLDNN output to other device. + * only support CPU device yet + */ + virtual void convertOutputToOtherDevice() {} + /** * print info about sizes */ @@ -147,22 +150,25 @@ public: protected: /** - * If next layer only has MKLDNN type. - * Otherwise, only support otherdevice CPU device. + * copy image size and sequence info to other device */ - bool nextIsMKLDNN() { + void copyOutputInfoToOtherDevice() { for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) - << "Only support other device is CPU yet"; + outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight()); + outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth()); + outputOtherDevice_[i].sequenceStartPositions = + output_.sequenceStartPositions; + outputOtherDevice_[i].subSequenceStartPositions = + output_.subSequenceStartPositions; + outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; } - return outputOtherDevice_.size() == 0; } /** - * Is previous layer MKLDNN type. - * Otherwise, only support otherdevice CPU device. + * Is previous layer only has MKLDNN type. + * Otherwise, only support the previous layer using CPU device. */ - bool prevIsMKLDNN(int index = 0) { + bool prevIsOnlyMKLDNN(int index = 0) { int prevDevice = getPrev(index)->getDeviceId(); if (prevDevice == MKLDNN_DEVICE) { return true; @@ -173,11 +179,23 @@ protected: } } + /** + * If output only has MKLDNN device. + * Otherwise, other devices should only using CPU device. + */ + bool nextIsOnlyMKLDNN() { + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) + << "Only support other device is CPU yet"; + } + return outputOtherDevice_.size() == 0; + } + /** * Sync input value data */ void syncInputValue() { - if (prevIsMKLDNN()) { + if (prevIsOnlyMKLDNN()) { return; } real* iData = getInputValue(0, CPU_DEVICE)->getData(); @@ -190,7 +208,7 @@ protected: * Sync output grad data */ void syncOutputGrad() { - if (nextIsMKLDNN()) { + if (nextIsOnlyMKLDNN()) { return; } diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index 32ae3b1bcf..0a355e2644 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -31,7 +31,6 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) { if (m == nullptr) { size_t height = dims[0]; size_t width = cnts / dims[0]; - // LOG(INFO) << height << "," << width; m = Matrix::create(height, width, false, false); } @@ -40,10 +39,8 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) { CHECK(cpuMatrix) << "Only support create from CPU matrix yet"; CHECK_EQ(cnts, m->getElementCnt()) << "Count size does not match"; - size_t width = m->getWidth(); - size_t height = m->getHeight(); - real* data = m->getData(); - return std::make_shared(data, height, width, pd); + return std::make_shared( + m->getData(), m->getHeight(), m->getWidth(), pd); } MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, @@ -51,9 +48,7 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::format fmt, engine& eg, mkldnn::memory::data_type dtype) { - memory::desc md = memory::desc(dims, dtype, fmt); - memory::primitive_desc pd = memory::primitive_desc(md, eg); - return create(m, pd); + return create(m, memory::primitive_desc(memory::desc(dims, dtype, fmt), eg)); } void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m, @@ -64,9 +59,7 @@ void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m, return; } CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal"; - real* srcData = getData(); - real* dstData = m->getData(); - reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim); + reorderOnce(getData(), m->getData(), srcFmt, dstFmt, targetDim); } void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m, @@ -77,9 +70,7 @@ void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m, return; } CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal"; - real* srcData = getData(); - real* dstData = m->getData(); - reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim); + reorderOnce(getData(), m->getData(), srcFmt, dstFmt, targetDim); } void MKLDNNMatrix::reorderOnce(void* srcData, @@ -120,8 +111,9 @@ void MKLDNNMatrix::downSpatial() { return; } - memory::dims srcDims = getDims(); + // TODO(TJ): change H(height) and W(width) if support nhwc or more const int H = 2, W = 3; + memory::dims srcDims = getDims(); if (srcDims[H] != 1 || srcDims[W] != 1) { // can not down spatial return; @@ -141,13 +133,12 @@ void MKLDNNMatrix::downSpatial() { } memory::desc md = memory::desc(dstDims, getDtype(), dstFmt); memory::primitive_desc pd = memory::primitive_desc(md, getEngine()); - void* data = getData(); mkldnn_primitive_t result; mkldnn::error::wrap_c_api( mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr), "could not create a memory primitive"); reset(result); - set_data_handle(data); + set_data_handle(getData()); } } // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index ea3fd7d461..e50f698b49 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -56,9 +56,9 @@ public: public: /** * Reorder this MKLDNNMatrix from other format. - * Support inplace reorder - * Pay attention: this function would only reorder the data layout. - * will NOT change this original dim or format info + * Support inplace reorder. + * @note: this function would only reorder the data layout. + * will NOT change this original dim or format info */ void reorderDataFrom(const MKLDNNMatrixPtr& m, memory::format srcFmt, @@ -66,9 +66,9 @@ public: /** * Reorder this MKLDNNMatrix to other format. - * Support inplace reorder - * Pay attention: this function would only reorder the data layout. - * will NOT change the dst dim or format info + * Support inplace reorder. + * @note: this function would only reorder the data layout. + * will NOT change the dst dim or format info */ void reorderDataTo(const MKLDNNMatrixPtr& m, memory::format dstFmt, @@ -90,18 +90,20 @@ public: /** * Get primitive descriptor. */ - mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); } + mkldnn::memory::primitive_desc getPrimitiveDesc() { + return this->get_primitive_desc(); + } /** * Get memory descriptor. */ - mkldnn::memory::desc getMD() { return getPD().desc(); } + mkldnn::memory::desc getMemoryDesc() { return getPrimitiveDesc().desc(); } /** * Get dimensions. */ mkldnn::memory::dims getDims() { - mkldnn::memory::desc md = getMD(); + mkldnn::memory::desc md = getMemoryDesc(); const int* src = md.data.dims; int ndims = md.data.ndims; mkldnn::memory::dims dst; @@ -116,24 +118,25 @@ public: * Get format. */ mkldnn::memory::format getFormat() { - return (mkldnn::memory::format)(getMD().data.format); + return (mkldnn::memory::format)(getMemoryDesc().data.format); } /** * Get memory data type. */ mkldnn::memory::data_type getDtype() { - return (mkldnn::memory::data_type)(getMD().data.data_type); + return (mkldnn::memory::data_type)(getMemoryDesc().data.data_type); } /** * Get engine. */ - mkldnn::engine getEngine() { return getPD().get_engine(); } + mkldnn::engine getEngine() { return getPrimitiveDesc().get_engine(); } protected: /** - * Do once reorder supported inplace. + * Do reorder once. + * Can support inplace. */ void reorderOnce(void* srcData, void* dstData, From 34f4f763f9cf52d6c6326613ed839d00ac7c6eb0 Mon Sep 17 00:00:00 2001 From: chengduo Date: Wed, 30 Aug 2017 10:19:08 +0800 Subject: [PATCH 152/170] Update networks.py --- python/paddle/trainer_config_helpers/networks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 28a71cf788..34be203ee2 100644 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -1406,7 +1406,7 @@ def inputs(layers, *args): if len(args) != 0: layers.extend(args) - Inputs(* [l.name for l in layers]) + Inputs(*[l.name for l in layers]) def outputs(layers, *args): @@ -1456,7 +1456,7 @@ def outputs(layers, *args): assert len(layers) > 0 if HasInputsSet(): # input already set - Outputs(* [l.name for l in layers]) + Outputs(*[l.name for l in layers]) return # just return outputs. if len(layers) != 1: From 168707caddf9c0ed67a2d87074a5f05b7a63a5c9 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 30 Aug 2017 11:35:19 +0800 Subject: [PATCH 153/170] Fix a small bug. --- paddle/gserver/layers/ExpandConvLayer.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp index 0e84581769..20de475fc3 100644 --- a/paddle/gserver/layers/ExpandConvLayer.cpp +++ b/paddle/gserver/layers/ExpandConvLayer.cpp @@ -66,7 +66,11 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, // If depth wise convolution and useGpu == false and ARM-NEON if (!useGpu_ && isDepthwiseConv(channels_[i], groups_[i]) && !isDeconv_) { #if defined(__ARM_NEON__) || defined(__ARM_NEON) - convType = "NeonDepthwiseConv"; + if ((filterSize_[i] == filterSizeY_[i]) && + (filterSize_[i] == 3 || filterSize_[i] == 4) && + (stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2)) { + convType = "NeonDepthwiseConv"; + } #endif } From c5183caa04557628340983d17a64097f939db132 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 30 Aug 2017 13:37:51 +0800 Subject: [PATCH 154/170] rename --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 29 +++++++++++-------------- paddle/gserver/layers/MKLDNNLayer.h | 12 +++++----- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index a08cca318e..8318c8c519 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -134,7 +134,7 @@ void MKLDNNFcLayer::resetFwd() { const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr; const MatrixPtr& out = output_.value; - if (prevIsOnlyMKLDNN()) { + if (inputIsOnlyMKLDNN()) { const MatrixPtr& in = getInputValue(0); inVal_ = std::dynamic_pointer_cast(in); CHECK(inVal_) << "Input should be MKLDNNMatrix"; @@ -154,7 +154,7 @@ void MKLDNNFcLayer::resetFwd() { // change original output value to mkldnn output value output_.value = std::dynamic_pointer_cast(outVal_); - if (!nextIsOnlyMKLDNN()) { + if (!outputIsOnlyMKLDNN()) { convertOutputToOtherDevice(); } @@ -194,19 +194,16 @@ void MKLDNNFcLayer::resetBwd() { const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; // TODO(TJ): merge outgrad - if (nextIsOnlyMKLDNN()) { - // can not directly cast outputgrad to mkldnnmatrix, - // since each layer can not write the inputgrad to mkldnn inputgrad. - // So just create from matrix with outputvalue format. - const MatrixPtr& out = getOutput(MKLDNN_DEVICE).grad; - outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); - } else { - const MatrixPtr& out = getOutput(CPU_DEVICE).grad; - // fc do not need to convert from cpu device since output always nc - // only need create from cpu device - outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); - } - + int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; + // for MKLDNN device: + // can not directly cast outputgrad to mkldnnmatrix, + // since each layer can not write the inputgrad to mkldnn inputgrad. + // So just create from matrix with outputvalue format. + // for CPU device: + // fc do not need to convert from cpu device since output is always nc format + // only need create from cpu device + const MatrixPtr& out = getOutput(device).grad; + outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPrimitiveDesc()); biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPrimitiveDesc()) : nullptr; @@ -238,7 +235,7 @@ void MKLDNNFcLayer::resetBwd() { pipelineBwd_.push_back(*bwdWgt_); /// backward data - int device = prevIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; + device = inputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; const MatrixPtr& in = getInputGrad(0, device); if (in == nullptr) { return; diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 8fe9630e82..b983b833d5 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -151,6 +151,8 @@ public: protected: /** * copy image size and sequence info to other device + * @note: can not directly use Layer::copyOutputToOtherDevice since here only + * copy base info and do not copy data value */ void copyOutputInfoToOtherDevice() { for (size_t i = 0; i < outputOtherDevice_.size(); i++) { @@ -165,10 +167,10 @@ protected: } /** - * Is previous layer only has MKLDNN type. + * If input only has MKLDNN device. * Otherwise, only support the previous layer using CPU device. */ - bool prevIsOnlyMKLDNN(int index = 0) { + bool inputIsOnlyMKLDNN(int index = 0) { int prevDevice = getPrev(index)->getDeviceId(); if (prevDevice == MKLDNN_DEVICE) { return true; @@ -183,7 +185,7 @@ protected: * If output only has MKLDNN device. * Otherwise, other devices should only using CPU device. */ - bool nextIsOnlyMKLDNN() { + bool outputIsOnlyMKLDNN() { for (size_t i = 0; i < outputOtherDevice_.size(); i++) { CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) << "Only support other device is CPU yet"; @@ -195,7 +197,7 @@ protected: * Sync input value data */ void syncInputValue() { - if (prevIsOnlyMKLDNN()) { + if (inputIsOnlyMKLDNN()) { return; } real* iData = getInputValue(0, CPU_DEVICE)->getData(); @@ -208,7 +210,7 @@ protected: * Sync output grad data */ void syncOutputGrad() { - if (nextIsOnlyMKLDNN()) { + if (outputIsOnlyMKLDNN()) { return; } From 31632a694c718ac31b890b1b46788f9d70d570c8 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Wed, 30 Aug 2017 14:48:03 +0800 Subject: [PATCH 155/170] remove unused ubuntu Debian install doc --- doc/getstarted/build_and_install/index_cn.rst | 4 +- doc/getstarted/build_and_install/index_en.rst | 3 +- .../build_and_install/ubuntu_install_cn.rst | 71 ------------------- .../build_and_install/ubuntu_install_en.rst | 25 ------- 4 files changed, 2 insertions(+), 101 deletions(-) delete mode 100644 doc/getstarted/build_and_install/ubuntu_install_cn.rst delete mode 100644 doc/getstarted/build_and_install/ubuntu_install_en.rst diff --git a/doc/getstarted/build_and_install/index_cn.rst b/doc/getstarted/build_and_install/index_cn.rst index a24df6c518..dd9923697a 100644 --- a/doc/getstarted/build_and_install/index_cn.rst +++ b/doc/getstarted/build_and_install/index_cn.rst @@ -6,14 +6,12 @@ 安装流程 ++++++++ -PaddlePaddle提供数个预编译的二进制来进行安装,包括Docker镜像,ubuntu的deb安装包等。我们推荐使用Docker镜像来部署环境,同时欢迎贡献更多的安装包。 +PaddlePaddle提供Docker镜像来部署环境。 .. toctree:: :maxdepth: 1 docker_install_cn.rst - ubuntu_install_cn.rst - 编译流程 diff --git a/doc/getstarted/build_and_install/index_en.rst b/doc/getstarted/build_and_install/index_en.rst index 1bfd4f75c0..8a53588e04 100644 --- a/doc/getstarted/build_and_install/index_en.rst +++ b/doc/getstarted/build_and_install/index_en.rst @@ -8,14 +8,13 @@ Install PaddlePaddle :maxdepth: 1 docker_install_en.rst - ubuntu_install_en.rst Build from Source ----------------- .. warning:: - Please use :code:`deb` package or :code:`docker` image to install paddle. The building guide is used for hacking or contributing PaddlePaddle source code. + Please use :code:`docker` image to install paddle. The building guide is used for hacking or contributing PaddlePaddle source code. .. toctree:: :maxdepth: 1 diff --git a/doc/getstarted/build_and_install/ubuntu_install_cn.rst b/doc/getstarted/build_and_install/ubuntu_install_cn.rst deleted file mode 100644 index 9e39ccb00f..0000000000 --- a/doc/getstarted/build_and_install/ubuntu_install_cn.rst +++ /dev/null @@ -1,71 +0,0 @@ -Ubuntu部署PaddlePaddle -=================================== - -PaddlePaddle提供了ubuntu 14.04 deb安装包。 - -安装 ------- - -安装包的下载地址是\: https://github.com/PaddlePaddle/Paddle/releases - -它包含四个版本\: - -* cpu版本: 支持主流x86处理器平台, 使用了avx指令集。 - -* cpu-noavx版本:支持主流x86处理器平台,没有使用avx指令集。 - -* gpu版本:支持主流x86处理器平台,支持nvidia cuda平台,使用了avx指令集。 - -* gpu-noavx版本:支持主流x86处理器平台,支持nvidia cuda平台,没有使用avx指令集。 - -下载完相关安装包后,执行: - -.. code-block:: shell - - sudo apt-get install gdebi - gdebi paddle-*-cpu.deb - -或者: - -.. code-block:: shell - - dpkg -i paddle-*-cpu.deb - apt-get install -f - - -在 :code:`dpkg -i` 的时候如果报一些依赖未找到的错误是正常的, -在 :code:`apt-get install -f` 里会继续安装 PaddlePaddle。 - -安装完成后,可以使用命令 :code:`paddle version` 查看安装后的paddle 版本: - -.. code-block:: shell - - PaddlePaddle 0.8.0b1, compiled with - with_avx: ON - with_gpu: OFF - with_double: OFF - with_python: ON - with_rdma: OFF - with_timer: OFF - with_predict_sdk: - - -可能遇到的问题 --------------- - -libcudart.so/libcudnn.so找不到 -++++++++++++++++++++++++++++++ - -安装完成后,运行 :code:`paddle train` 报错\: - -.. code-block:: shell - - 0831 12:36:04.151525 1085 hl_dso_loader.cc:70] Check failed: nullptr != *dso_handle For Gpu version of PaddlePaddle, it couldn't find CUDA library: libcudart.so Please make sure you already specify its path.Note: for training data on Cpu using Gpu version of PaddlePaddle,you must specify libcudart.so via LD_LIBRARY_PATH. - -原因是未设置cuda运行时环境变量。 如果使用GPU版本的PaddlePaddle,请安装CUDA 7.5 和CUDNN 5到本地环境中,并设置: - -.. code-block:: shell - - export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib:$LD_LIBRARY_PATH - export PATH=/usr/local/cuda/bin:$PATH - diff --git a/doc/getstarted/build_and_install/ubuntu_install_en.rst b/doc/getstarted/build_and_install/ubuntu_install_en.rst deleted file mode 100644 index ea8042085b..0000000000 --- a/doc/getstarted/build_and_install/ubuntu_install_en.rst +++ /dev/null @@ -1,25 +0,0 @@ -Debian Package installation guide -================================= - -PaddlePaddle supports :code:`deb` pacakge. The installation of this :code:`deb` package is tested in ubuntu 14.04, but it should be support other debian based linux, too. - -There are four versions of debian package, :code:`cpu`, :code:`gpu`, :code:`cpu-noavx`, :code:`gpu-noavx`. And :code:`noavx` version is used to support CPU which does not contain :code:`AVX` instructions. The download url of :code:`deb` package is \: https://github.com/baidu/Paddle/releases/ - - -After downloading PaddlePaddle deb packages, you can use :code:`gdebi` install. - -.. code-block:: bash - - gdebi paddle-*.deb - -If :code:`gdebi` is not installed, you can use :code:`sudo apt-get install gdebi` to install it. - -Or you can use following commands to install PaddlePaddle. - -.. code-block:: bash - - dpkg -i paddle-*.deb - apt-get install -f - -And if you use GPU version deb package, you need to install CUDA toolkit and cuDNN, and set related environment variables(such as LD_LIBRARY_PATH) first. It is normal when `dpkg -i` get errors. `apt-get install -f` will continue install paddle, and install dependences. - From 64791188952437852ad549914a70baea3320f827 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Wed, 30 Aug 2017 17:36:22 +0800 Subject: [PATCH 156/170] fix download mklml error --- cmake/external/mklml.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index 51fafb9479..77ea244900 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -54,7 +54,8 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} PREFIX ${MKLML_SOURCE_DIR} DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR} - DOWNLOAD_COMMAND wget --no-check-certificate -qO- ${MKLML_URL} | tar xz -C ${MKLML_DOWNLOAD_DIR} + DOWNLOAD_COMMAND wget --no-check-certificate ${MKLML_URL} -c -O ${MKLML_VER}.tgz + && tar zxf ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT} From 2563e32bb12b363c41d608bf0f6f1060ea769f8b Mon Sep 17 00:00:00 2001 From: qijun Date: Wed, 30 Aug 2017 17:57:26 +0800 Subject: [PATCH 157/170] fix clang build error --- paddle/gserver/layers/CostLayer.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index 0ce72ef40a..0f655b48ee 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -318,7 +318,9 @@ public: void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {} + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override {} }; /** From f557b0c4c5e1ac97fdc092ed85993c4dda72fd2d Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Thu, 31 Aug 2017 00:11:44 +0800 Subject: [PATCH 158/170] fix data_layer for 3D data --- python/paddle/trainer_config_helpers/layers.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index ebb6f36504..c92764e1f9 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -929,11 +929,13 @@ def data_layer(name, size, depth=None, height=None, width=None, width=width, **ExtraLayerAttribute.to_kwargs(layer_attr)) + if depth is None: + depth = 1 num_filters = None if height is not None and width is not None: - num_filters = size / (width * height) - assert num_filters * width * height == size, \ - "size=%s width=%s height=%s" % (size, width, height) + num_filters = size / (width * height * depth) + assert num_filters * width * height * depth == size, \ + "size=%s width=%s height=%s depth=%s" % (size, width, height, depth) return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters) From 2ae37a4ea2f4b02ffe6b773590ed05c77675e6f5 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Thu, 31 Aug 2017 00:28:01 +0800 Subject: [PATCH 159/170] fix data_layer for 3D data --- python/paddle/trainer_config_helpers/layers.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 42bf1c19d1..2aa86850d1 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -926,16 +926,18 @@ def data_layer(name, size, height=None, width=None, depth=None, type=LayerType.DATA, name=name, size=size, + depth=depth, height=height, width=width, - depth=depth, **ExtraLayerAttribute.to_kwargs(layer_attr)) + if depth is None: + depth = 1 num_filters = None if height is not None and width is not None: - num_filters = size / (width * height) - assert num_filters * width * height == size, \ - "size=%s width=%s height=%s" % (size, width, height) + num_filters = size / (width * height * depth) + assert num_filters * width * height*depth == size, \ + "size=%s width=%s height=%s depth=%s" % (size, width, height, depth) return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters) From 09e903eb9417745952ced6db532594fd4a759d74 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 29 Aug 2017 13:44:51 +0800 Subject: [PATCH 160/170] fix v2 infer interface. --- paddle/gserver/layers/CrossEntropyOverBeam.cpp | 1 - python/paddle/v2/inference.py | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp index 500cd6ff8c..bffcc30154 100644 --- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp +++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp @@ -39,7 +39,6 @@ void CostForOneSequence::calValidExpandStep() { if (start + beamSize_ == findEnd) return; goldColIds_[i] = findEnd - start; } - if (goldColIds_[beams_->expansionCount - 1] != -1) goldAsExtraPath_ = false; } diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py index 4dcc3ab57e..8acea6155c 100644 --- a/python/paddle/v2/inference.py +++ b/python/paddle/v2/inference.py @@ -70,7 +70,7 @@ class Inference(object): item = [each_result[each_field] for each_field in field] yield item - def infer(self, input, field='value', **kwargs): + def infer(self, input, field='value', flatten_result=True, **kwargs): """ Infer a data by model. :param input: input data batch. Should be python iterable object. @@ -83,7 +83,10 @@ class Inference(object): retv = [[] for i in xrange(len(result))] for i, item in enumerate(result): retv[i].append(item) - retv = [numpy.concatenate(out) for out in retv] + + if flatten_result: + retv = [numpy.concatenate(out) for out in retv] + if len(retv) == 1: return retv[0] else: From 2e8d47dd09001da94015fb4a96f21452631fcbad Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Thu, 31 Aug 2017 11:01:03 +0800 Subject: [PATCH 161/170] simplify and make quiet in the download of mklml.cmake --- cmake/external/mklml.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index 77ea244900..74f3279831 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -54,8 +54,8 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} PREFIX ${MKLML_SOURCE_DIR} DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR} - DOWNLOAD_COMMAND wget --no-check-certificate ${MKLML_URL} -c -O ${MKLML_VER}.tgz - && tar zxf ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz + DOWNLOAD_COMMAND wget --no-check-certificate ${MKLML_URL} -c -q -O ${MKLML_VER}.tgz + && tar zxf ${MKLML_VER}.tgz DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT} From 2e97045c2354ea8a6ae39ee17e93098a2ec930d4 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Thu, 31 Aug 2017 14:10:40 +0800 Subject: [PATCH 162/170] fix layers_test.py --- .../tests/configs/file_list.sh | 2 +- ...3d_test_config.py => test_conv3d_layer.py} | 44 +--------------- .../tests/configs/test_deconv3d_layer.py | 50 +++++++++++++++++++ .../tests/layers_test.py | 3 +- 4 files changed, 53 insertions(+), 46 deletions(-) rename python/paddle/trainer_config_helpers/tests/configs/{conv3d_deconv3d_test_config.py => test_conv3d_layer.py} (51%) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 1ca5c8a07e..729e8e67c2 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -9,6 +9,6 @@ test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer -test_seq_slice_layer) +test_seq_slice_layer test_conv3d_layer test_deconv3d_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py b/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py similarity index 51% rename from python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py rename to python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py index 15f7c1d271..aa0a2c0d5f 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py @@ -14,18 +14,6 @@ padding_y = 1 padding_z = 1 groups = 1 -data1 = data_layer(name='data1', size=2016 * num_channels, height=48, width=42) - -img_conv_layer( - input=data1, - filter_size=filter_size, - num_channels=num_channels, - num_filters=16, - stride=stride, - padding=padding, - act=LinearActivation(), - bias_attr=False) - data = data_layer( name='data', size=12096 * num_channels, height=48, width=42, depth=6) # first @@ -58,34 +46,4 @@ conv3d_2 = img_conv3d_layer( trans=False, layer_type="conv3d", act=LinearActivation()) - -# first -deconv3d_1 = img_conv3d_layer( - input=data, - name='deconv3d_1', - num_filters=16, - num_channels=num_channels, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=groups, - bias_attr=True, - shared_biases=True, - trans=False, - layer_type="deconv3d", - act=LinearActivation()) -# second -deconv3d_2 = img_conv3d_layer( - input=data, - name='deconv3d_2', - num_filters=16, - num_channels=num_channels, - filter_size=[filter_size, filter_size_y, filter_size_z], - stride=[stride, stride_y, stride_z], - padding=[padding, padding_y, padding_z], - groups=groups, - bias_attr=True, - shared_biases=True, - trans=False, - layer_type="deconv3d", - act=LinearActivation()) +outputs(conv3d_2) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py new file mode 100644 index 0000000000..a113279fc1 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py @@ -0,0 +1,50 @@ +from paddle.trainer_config_helpers import * + +settings(batch_size=1000, learning_rate=1e-5) + +num_channels = 3 +filter_size = 3 +filter_size_y = 3 +filter_size_z = 3 +stride = 2 +stride_y = 2 +stride_z = 2 +padding = 1 +padding_y = 1 +padding_z = 1 +groups = 1 + +data = data_layer( + name='data', size=12096 * num_channels, height=48, width=42, depth=6) + +# first +deconv3d_1 = img_conv3d_layer( + input=data, + name='deconv3d_1', + num_filters=16, + num_channels=num_channels, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + bias_attr=True, + shared_biases=True, + trans=True, + layer_type="deconv3d", + act=LinearActivation()) +# second +deconv3d_2 = img_conv3d_layer( + input=data, + name='deconv3d_2', + num_filters=16, + num_channels=num_channels, + filter_size=[filter_size, filter_size_y, filter_size_z], + stride=[stride, stride_y, stride_z], + padding=[padding, padding_y, padding_z], + groups=groups, + bias_attr=True, + shared_biases=True, + trans=True, + layer_type="deconv3d", + act=LinearActivation()) +outputs(deconv3d_2) diff --git a/python/paddle/trainer_config_helpers/tests/layers_test.py b/python/paddle/trainer_config_helpers/tests/layers_test.py index 44d1c1c9b2..b3dd8f8fc7 100644 --- a/python/paddle/trainer_config_helpers/tests/layers_test.py +++ b/python/paddle/trainer_config_helpers/tests/layers_test.py @@ -16,6 +16,5 @@ from paddle.trainer.config_parser import parse_config_and_serialize if __name__ == '__main__': parse_config_and_serialize( - 'trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py', - '') + 'trainer_config_helpers/tests/layers_test_config.py', '') # layers_test_config.py From 36f0aa7390e3044b8e26d1787f99ed5edaf27ed0 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Thu, 31 Aug 2017 13:06:22 +0800 Subject: [PATCH 163/170] fix code style to pass CI. --- paddle/gserver/layers/CrossEntropyOverBeam.cpp | 11 +++++++---- paddle/gserver/layers/CrossEntropyOverBeam.h | 6 +++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp index bffcc30154..4acc077035 100644 --- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp +++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp @@ -28,8 +28,9 @@ void CostForOneSequence::calValidExpandStep() { start, start + goldRowIds_[i - 1] * beamSize_ + goldColIds_[i - 1], [](const real& val) { return val != -1.; }); - } else + } else { goldRowIds_[i] = 0; + } real* start = beams_->candidateIds[i]->getData() + goldRowIds_[i] * beamSize_; @@ -288,7 +289,7 @@ void CrossEntropyOverBeam::copyInputsToCpu() { void CrossEntropyOverBeam::splitBatchBeams() { beamCosts_.resize(batchSize_); - beamPerSeq_.resize(batchSize_, beamExpanCount_); + beamPerSeq_.resize(batchSize_, BeamExpansion(beamExpanCount_)); for (size_t i = 0; i < beamExpanCount_; ++i) { int* seqStarts = @@ -300,8 +301,9 @@ void CrossEntropyOverBeam::splitBatchBeams() { subSeqStarts = getInput(i * 3).subSequenceStartPositions->getMutableData(false); maxLen = getInput(i * 3).subSequenceStartPositions->getSize() - 1; - } else + } else { maxLen = getInput(i).sequenceStartPositions->getSize() - 1; + } for (size_t j = 0; j < batchSize_; ++j) { beamPerSeq_[j].scores[i] = @@ -348,8 +350,9 @@ void CrossEntropyOverBeam::resizeOutput() { inGrad->getWidth(), false, false); - } else + } else { candidateScoreGrad_[i] = std::move(inGrad); + } candidateScoreGrad_[i]->zeroMem(); } } diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/gserver/layers/CrossEntropyOverBeam.h index 5d0cffee3c..5643556f43 100644 --- a/paddle/gserver/layers/CrossEntropyOverBeam.h +++ b/paddle/gserver/layers/CrossEntropyOverBeam.h @@ -31,7 +31,7 @@ struct BeamExpansion { size_t expansionCount; - BeamExpansion(int n) { + explicit BeamExpansion(int n) { expansionCount = n; scores.resize(expansionCount); seqInfo.resize(expansionCount); @@ -39,7 +39,7 @@ struct BeamExpansion { scoreGrad.resize(expansionCount); gold.resize(expansionCount); - }; + } }; typedef std::shared_ptr BeamExpansionPtr; @@ -74,7 +74,7 @@ private: CHECK_GT(beams_->seqInfo[beamId]->getSize() - 1, rowId); int* starts = beams_->seqInfo[beamId]->getData(); return starts[rowId] - starts[0]; - }; + } size_t beamSize_; size_t validExpansionCount_; From d747c5d5119b7e564b9b7dcc7d7528ac91972712 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Thu, 31 Aug 2017 13:57:59 +0800 Subject: [PATCH 164/170] fix layers_test.py --- paddle/cuda/src/hl_cuda_cnn.cu | 5 +++-- paddle/parameter/Argument.h | 3 +++ .../paddle/trainer_config_helpers/tests/configs/file_list.sh | 2 +- python/paddle/trainer_config_helpers/tests/layers_test.py | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu index 95440c9446..9ba3d14261 100644 --- a/paddle/cuda/src/hl_cuda_cnn.cu +++ b/paddle/cuda/src/hl_cuda_cnn.cu @@ -1,8 +1,11 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -350,7 +353,6 @@ void hl_avgpool_backward(const int frameCnt, CHECK_SYNC("hl_avgpool_backward failed"); } -///////////////// __global__ void KeMaxPool3DForward(const int nthreads, const real* inputData, const int channels, @@ -777,7 +779,6 @@ void hl_avgpool3D_backward(const int frameCnt, outStride); CHECK_SYNC("hl_avgpool3D_backward failed"); } -///////////////// __global__ void KeBilinearInterpFw(const real* in, const size_t inImgH, diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 7b59199dde..9ed63462b1 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -1,8 +1,11 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 1ca5c8a07e..e7dc08c6dd 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -9,6 +9,6 @@ test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer -test_seq_slice_layer) +test_seq_slice_layer test_pooling3D_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/layers_test.py b/python/paddle/trainer_config_helpers/tests/layers_test.py index 52218972bf..05902ea293 100644 --- a/python/paddle/trainer_config_helpers/tests/layers_test.py +++ b/python/paddle/trainer_config_helpers/tests/layers_test.py @@ -16,4 +16,4 @@ from paddle.trainer.config_parser import parse_config_and_serialize if __name__ == '__main__': parse_config_and_serialize( - 'trainer_config_helpers/tests/configs/test_pooling3D_layer.py', '') + 'trainer_config_helpers/tests/layers_test_config.py', '') From 3e1f56fa2bfdaddc42dc716c099ffdce229a7068 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Thu, 31 Aug 2017 15:44:24 +0800 Subject: [PATCH 165/170] don't need to specify the path of libwarpctc.so, refine the python api doc --- python/paddle/trainer_config_helpers/layers.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index e73098910c..b2ba16333b 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -5065,17 +5065,6 @@ def warp_ctc_layer(input, building process, PaddlePaddle will clone the source codes, build and install it to :code:`third_party/install/warpctc` directory. - To use warp_ctc layer, you need to specify the path of :code:`libwarpctc.so`, - using following methods: - - 1. Set it in :code:`paddle.init` (python api) or :code:`paddle_init` (c api), - such as :code:`paddle.init(use_gpu=True, - warpctc_dir=your_paddle_source_dir/third_party/install/warpctc/lib)`. - - 2. Set environment variable LD_LIBRARY_PATH on Linux or DYLD_LIBRARY_PATH - on Mac OS. For instance, :code:`export - LD_LIBRARY_PATH=your_paddle_source_dir/third_party/install/warpctc/lib:$LD_LIBRARY_PATH`. - More details of CTC can be found by referring to `Connectionist Temporal Classification: Labelling Unsegmented Sequence Data with Recurrent Neural Networks Date: Thu, 31 Aug 2017 16:33:01 +0800 Subject: [PATCH 166/170] Add test_conv3d_layer.protostr,test_deconv3d_layer.protostr --- .../protostr/test_conv3d_layer.protostr | 132 ++++++++++++++++++ .../protostr/test_deconv3d_layer.protostr | 132 ++++++++++++++++++ 2 files changed, 264 insertions(+) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr new file mode 100644 index 0000000000..9fe2bc29d3 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr @@ -0,0 +1,132 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 36288 + active_type: "" + height: 48 + width: 42 + depth: 6 +} +layers { + name: "conv3d_1" + type: "conv3d" + size: 24192 + active_type: "" + inputs { + input_layer_name: "data" + input_parameter_name: "_conv3d_1.w0" + conv_conf { + filter_size: 3 + channels: 3 + stride: 2 + padding: 1 + groups: 1 + filter_channels: 3 + output_x: 21 + img_size: 42 + caffe_mode: true + filter_size_y: 3 + padding_y: 1 + stride_y: 2 + output_y: 24 + img_size_y: 48 + filter_size_z: 3 + padding_z: 1 + stride_z: 2 + output_z: 3 + img_size_z: 6 + } + } + bias_parameter_name: "_conv3d_1.wbias" + num_filters: 16 + shared_biases: true + height: 24 + width: 21 + depth: 3 +} +layers { + name: "conv3d_2" + type: "conv3d" + size: 24192 + active_type: "" + inputs { + input_layer_name: "data" + input_parameter_name: "_conv3d_2.w0" + conv_conf { + filter_size: 3 + channels: 3 + stride: 2 + padding: 1 + groups: 1 + filter_channels: 3 + output_x: 21 + img_size: 42 + caffe_mode: true + filter_size_y: 3 + padding_y: 1 + stride_y: 2 + output_y: 24 + img_size_y: 48 + filter_size_z: 3 + padding_z: 1 + stride_z: 2 + output_z: 3 + img_size_z: 6 + } + } + bias_parameter_name: "_conv3d_2.wbias" + num_filters: 16 + shared_biases: true + height: 24 + width: 21 + depth: 3 +} +parameters { + name: "_conv3d_1.w0" + size: 1296 + initial_mean: 0.0 + initial_std: 0.272165526976 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_conv3d_1.wbias" + size: 16 + initial_mean: 0.0 + initial_std: 0.0 + dims: 16 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_conv3d_2.w0" + size: 1296 + initial_mean: 0.0 + initial_std: 0.272165526976 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_conv3d_2.wbias" + size: 16 + initial_mean: 0.0 + initial_std: 0.0 + dims: 16 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "data" +output_layer_names: "conv3d_2" +sub_models { + name: "root" + layer_names: "data" + layer_names: "conv3d_1" + layer_names: "conv3d_2" + input_layer_names: "data" + output_layer_names: "conv3d_2" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr new file mode 100644 index 0000000000..7bf409731c --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr @@ -0,0 +1,132 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 36288 + active_type: "" + height: 48 + width: 42 + depth: 6 +} +layers { + name: "deconv3d_1" + type: "deconv3d" + size: 1387760 + active_type: "" + inputs { + input_layer_name: "data" + input_parameter_name: "_deconv3d_1.w0" + conv_conf { + filter_size: 3 + channels: 3 + stride: 2 + padding: 1 + groups: 1 + filter_channels: 16 + output_x: 42 + img_size: 83 + caffe_mode: true + filter_size_y: 3 + padding_y: 1 + stride_y: 2 + output_y: 48 + img_size_y: 95 + filter_size_z: 3 + padding_z: 1 + stride_z: 2 + output_z: 6 + img_size_z: 11 + } + } + bias_parameter_name: "_deconv3d_1.wbias" + num_filters: 16 + shared_biases: true + height: 95 + width: 83 + depth: 11 +} +layers { + name: "deconv3d_2" + type: "deconv3d" + size: 1387760 + active_type: "" + inputs { + input_layer_name: "data" + input_parameter_name: "_deconv3d_2.w0" + conv_conf { + filter_size: 3 + channels: 3 + stride: 2 + padding: 1 + groups: 1 + filter_channels: 16 + output_x: 42 + img_size: 83 + caffe_mode: true + filter_size_y: 3 + padding_y: 1 + stride_y: 2 + output_y: 48 + img_size_y: 95 + filter_size_z: 3 + padding_z: 1 + stride_z: 2 + output_z: 6 + img_size_z: 11 + } + } + bias_parameter_name: "_deconv3d_2.wbias" + num_filters: 16 + shared_biases: true + height: 95 + width: 83 + depth: 11 +} +parameters { + name: "_deconv3d_1.w0" + size: 6912 + initial_mean: 0.0 + initial_std: 0.272165526976 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_deconv3d_1.wbias" + size: 16 + initial_mean: 0.0 + initial_std: 0.0 + dims: 16 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_deconv3d_2.w0" + size: 6912 + initial_mean: 0.0 + initial_std: 0.272165526976 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_deconv3d_2.wbias" + size: 16 + initial_mean: 0.0 + initial_std: 0.0 + dims: 16 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "data" +output_layer_names: "deconv3d_2" +sub_models { + name: "root" + layer_names: "data" + layer_names: "deconv3d_1" + layer_names: "deconv3d_2" + input_layer_names: "data" + output_layer_names: "deconv3d_2" + is_recurrent_layer_group: false +} + From d394a1447125af9f6fc2b43c936d36a61662dc0e Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 31 Aug 2017 17:05:39 +0800 Subject: [PATCH 167/170] Remove unused incluing file net/if_arp.h. --- paddle/pserver/LightNetwork.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/pserver/LightNetwork.cpp b/paddle/pserver/LightNetwork.cpp index 8616fd2d5a..4203f26164 100644 --- a/paddle/pserver/LightNetwork.cpp +++ b/paddle/pserver/LightNetwork.cpp @@ -22,7 +22,6 @@ limitations under the License. */ #include #include -#include #include #include From 3bafa42b1afe10aa6ab712d1d258bb079ac814ea Mon Sep 17 00:00:00 2001 From: qijun Date: Thu, 31 Aug 2017 18:08:44 +0800 Subject: [PATCH 168/170] fix tensor copyfrom bug --- paddle/framework/tensor_impl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 7d7263b899..7893e233b7 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -117,6 +117,8 @@ inline void Tensor::CopyFrom(const Tensor& src, memory::Copy(boost::get(dst_place), dst_ptr, boost::get(src_place), src_ptr, size, 0); } + PADDLE_ENFORCE(cudaStreamSynchronize(0), + "cudaStreamSynchronize failed in Tensor CopyFrom"); #endif } From c54c7d91a0c098bf22ba399aee15ebb421de1bfb Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Fri, 1 Sep 2017 16:01:53 +0800 Subject: [PATCH 169/170] Use template to deliver const argument instead, to remove the compiling error "argument to __builtin_neon_vgetq_lane_f32 must be a constant integer". --- paddle/function/neon/NeonDepthwiseConv.cpp | 100 ++++++++++----------- paddle/function/neon/neon_util.h | 4 +- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp index f09e98587d..14e5198e1b 100644 --- a/paddle/function/neon/NeonDepthwiseConv.cpp +++ b/paddle/function/neon/NeonDepthwiseConv.cpp @@ -116,15 +116,15 @@ struct DepthwiseConvKernel<3, 1> { float32x4_t tmp1 = vdupq_n_f32(0.f); float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]); + tmp2 = vmlaq_laneq_f32<0>(tmp2, input[1][0], k[1]); + tmp1 = vmlaq_laneq_f32<1>(tmp1, input[1][1], k[1]); + tmp2 = vmlaq_laneq_f32<2>(tmp2, input[1][2], k[1]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]); tmp1 = vaddq_f32(tmp1, tmp2); vst1q_f32(outputData, tmp1); @@ -223,15 +223,15 @@ struct DepthwiseConvKernel<3, 2> { float32x4_t tmp1 = vdupq_n_f32(0.f); float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]); + tmp2 = vmlaq_laneq_f32<0>(tmp2, input[1][0], k[1]); + tmp1 = vmlaq_laneq_f32<1>(tmp1, input[1][1], k[1]); + tmp2 = vmlaq_laneq_f32<2>(tmp2, input[1][2], k[1]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]); tmp1 = vaddq_f32(tmp1, tmp2); vst1q_f32(outputData, tmp1); @@ -316,22 +316,22 @@ struct DepthwiseConvKernel<4, 1> { float32x4_t tmp1 = vdupq_n_f32(0.f); float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[0][3], k[0]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[1][0], k[1]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[1][1], k[1]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[1][2], k[1]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[1][3], k[1]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[2][3], k[2]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[3][0], k[3]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[3][1], k[3]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[3][2], k[3]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[3][3], k[3]); tmp1 = vaddq_f32(tmp1, tmp2); vst1q_f32(outputData, tmp1); @@ -431,22 +431,22 @@ struct DepthwiseConvKernel<4, 2> { float32x4_t tmp1 = vdupq_n_f32(0.f); float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[0][3], k[0]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[1][0], k[1]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[1][1], k[1]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[1][2], k[1]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[1][3], k[1]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[2][3], k[2]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[3][0], k[3]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[3][1], k[3]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[3][2], k[3]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[3][3], k[3]); tmp1 = vaddq_f32(tmp1, tmp2); vst1q_f32(outputData, tmp1); diff --git a/paddle/function/neon/neon_util.h b/paddle/function/neon/neon_util.h index 56b3febe2d..dbe017170b 100644 --- a/paddle/function/neon/neon_util.h +++ b/paddle/function/neon/neon_util.h @@ -33,10 +33,10 @@ inline float32_t vaddvq_f32(float32x4_t a) { return vget_lane_f32(vpadd_f32(v, v), 0); } +template inline float32x4_t vmlaq_laneq_f32(float32x4_t a, float32x4_t b, - float32x4_t v, - const int lane) { + float32x4_t v) { return vmlaq_n_f32(a, b, vgetq_lane_f32(v, lane)); } #endif From 8b15ac82fa831f95493c2bd218b93655db0d739e Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Fri, 1 Sep 2017 17:50:01 +0800 Subject: [PATCH 170/170] Move the definition of hl_cpu_gru_forward and hl_cpu_gru_backward to function/GruFunctor.h. --- paddle/cuda/include/hl_cpu_gru.cuh | 134 --------------------- paddle/function/GruFunctor.h | 160 +++++++++++++++++++++++++ paddle/gserver/layers/GruCompute.cpp | 32 ++--- paddle/scripts/docker/build_android.sh | 25 +--- 4 files changed, 181 insertions(+), 170 deletions(-) create mode 100644 paddle/function/GruFunctor.h diff --git a/paddle/cuda/include/hl_cpu_gru.cuh b/paddle/cuda/include/hl_cpu_gru.cuh index 732799a28b..347b038598 100644 --- a/paddle/cuda/include/hl_cpu_gru.cuh +++ b/paddle/cuda/include/hl_cpu_gru.cuh @@ -18,14 +18,6 @@ limitations under the License. */ #ifndef __NVCC__ -#include "paddle/math/MathFunctions.h" - -// #ifndef PADDLE_TYPE_DOUBLE -// #define CBLAS_GEMM paddle::gemm -// #else -// #define CBLAS_GEMM paddle::gemm -// #endif - template void hl_naive_gru_forward_reset_output(OpResetOutput opResetOutput, real *gateValue, @@ -210,51 +202,6 @@ inline void forward_final_output(OpFinalOutput opFinalOutput, } } -template -void hl_cpu_gru_forward(OpResetOutput opResetOutput, - OpFinalOutput opFinalOutput, - hl_gru_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate) { - if (value.prevOutValue) { -// CBLAS_GEMM(CblasNoTrans, -// CblasNoTrans, -// batchSize, -// 2 * frameSize, -// frameSize, -// 1, -// value.prevOutValue, -// frameSize, -// value.gateWeight, -// frameSize * 2, -// 1, -// value.gateValue, -// frameSize * 3); - } - - forward_reset_output(opResetOutput, value, frameSize, batchSize, active_gate); - - if (value.prevOutValue) { -// CBLAS_GEMM(CblasNoTrans, -// CblasNoTrans, -// batchSize, -// frameSize, -// frameSize, -// 1, -// value.resetOutputValue, -// frameSize, -// value.stateWeight, -// frameSize, -// 1, -// value.gateValue + frameSize * 2, -// frameSize * 3); - } - - forward_final_output(opFinalOutput, value, frameSize, batchSize, active_node); -} - template void hl_naive_gru_backward_state_grad(OpStateGrad opStateGrad, real *gateValue, @@ -524,87 +471,6 @@ inline void backward_reset_grad(OpResetGrad opResetGrad, } } } - -template -void hl_cpu_gru_backward(OpStateGrad opStateGrad, - OpResetGrad opResetGrad, - hl_gru_value value, - hl_gru_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate) { - backward_state_grad(opStateGrad, value, grad, - frameSize, batchSize, active_node); - - if (value.prevOutValue && grad.prevOutGrad) { -// CBLAS_GEMM(CblasNoTrans, -// CblasTrans, -// batchSize, -// frameSize, -// frameSize, -// 1, -// grad.gateGrad + frameSize * 2, -// frameSize * 3, -// value.stateWeight, -// frameSize, -// 0, -// grad.resetOutputGrad, -// frameSize); - - if (grad.stateWeightGrad) { -// CBLAS_GEMM(CblasTrans, -// CblasNoTrans, -// frameSize, -// frameSize, -// batchSize, -// 1, -// value.resetOutputValue, -// frameSize, -// grad.gateGrad + frameSize * 2, -// frameSize * 3, -// 1, -// grad.stateWeightGrad, -// frameSize); - } - } - - backward_reset_grad(opResetGrad, value, grad, - frameSize, batchSize, active_gate); - - if (grad.prevOutGrad && value.prevOutValue) { -// CBLAS_GEMM(CblasNoTrans, -// CblasTrans, -// batchSize, -// frameSize, -// frameSize * 2, -// 1, -// grad.gateGrad, -// frameSize * 3, -// value.gateWeight, -// frameSize * 2, -// 1, -// grad.prevOutGrad, -// frameSize); - - if (grad.gateWeightGrad) { -// CBLAS_GEMM(CblasTrans, -// CblasNoTrans, -// frameSize, -// frameSize * 2, -// batchSize, -// 1, -// value.prevOutValue, -// frameSize, -// grad.gateGrad, -// frameSize * 3, -// 1, -// grad.gateWeightGrad, -// frameSize * 2); - } - } -} - #endif #endif // HL_CPU_GRU_CUH_ diff --git a/paddle/function/GruFunctor.h b/paddle/function/GruFunctor.h new file mode 100644 index 0000000000..11f6174dbd --- /dev/null +++ b/paddle/function/GruFunctor.h @@ -0,0 +1,160 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "GemmFunctor.h" +#include "GruFunctor.h" +#include "hl_cpu_gru.cuh" + +namespace paddle { + +template +struct GruFunctor { + template + static void compute(OpResetOutput opResetOutput, + OpFinalOutput opFinalOutput, + hl_gru_value value, + int frameSize, + int batchSize, + hl_activation_mode_t active_node, + hl_activation_mode_t active_gate) { +#ifndef __NVCC__ + if (value.prevOutValue) { + BlasGemm::compute(false, + false, + batchSize, + 2 * frameSize, + frameSize, + 1, + value.prevOutValue, + frameSize, + value.gateWeight, + frameSize * 2, + 1, + value.gateValue, + frameSize * 3); + } + + forward_reset_output( + opResetOutput, value, frameSize, batchSize, active_gate); + + if (value.prevOutValue) { + BlasGemm::compute(false, + false, + batchSize, + frameSize, + frameSize, + 1, + value.resetOutputValue, + frameSize, + value.stateWeight, + frameSize, + 1, + value.gateValue + frameSize * 2, + frameSize * 3); + } + + forward_final_output( + opFinalOutput, value, frameSize, batchSize, active_node); +#endif + } +}; + +template +struct GruGradFunctor { + template + static void compute(OpStateGrad opStateGrad, + OpResetGrad opResetGrad, + hl_gru_value value, + hl_gru_grad grad, + int frameSize, + int batchSize, + hl_activation_mode_t active_node, + hl_activation_mode_t active_gate) { +#ifndef __NVCC__ + backward_state_grad( + opStateGrad, value, grad, frameSize, batchSize, active_node); + + if (value.prevOutValue && grad.prevOutGrad) { + BlasGemm::compute(false, + true, + batchSize, + frameSize, + frameSize, + 1, + grad.gateGrad + frameSize * 2, + frameSize * 3, + value.stateWeight, + frameSize, + 0, + grad.resetOutputGrad, + frameSize); + + if (grad.stateWeightGrad) { + BlasGemm::compute(true, + false, + frameSize, + frameSize, + batchSize, + 1, + value.resetOutputValue, + frameSize, + grad.gateGrad + frameSize * 2, + frameSize * 3, + 1, + grad.stateWeightGrad, + frameSize); + } + } + + backward_reset_grad( + opResetGrad, value, grad, frameSize, batchSize, active_gate); + + if (grad.prevOutGrad && value.prevOutValue) { + BlasGemm::compute(false, + true, + batchSize, + frameSize, + frameSize * 2, + 1, + grad.gateGrad, + frameSize * 3, + value.gateWeight, + frameSize * 2, + 1, + grad.prevOutGrad, + frameSize); + + if (grad.gateWeightGrad) { + BlasGemm::compute(true, + false, + frameSize, + frameSize * 2, + batchSize, + 1, + value.prevOutValue, + frameSize, + grad.gateGrad, + frameSize * 3, + 1, + grad.gateWeightGrad, + frameSize * 2); + } + } +#endif + } +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/GruCompute.cpp b/paddle/gserver/layers/GruCompute.cpp index 06907768e9..148516391c 100644 --- a/paddle/gserver/layers/GruCompute.cpp +++ b/paddle/gserver/layers/GruCompute.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "GruCompute.h" #include "hl_recurrent_apply.cuh" +#include "paddle/function/GruFunctor.h" #include "paddle/utils/Util.h" namespace paddle { @@ -25,13 +26,13 @@ void GruCompute::init(LayerConfig &config) { template <> void GruCompute::forward<0>(hl_gru_value value, int frameSize, int batchSize) { - hl_cpu_gru_forward(hppl::forward::gru_resetOutput(), - hppl::forward::gru_finalOutput(), - value, - frameSize, - batchSize, - activeNode_, - activeGate_); + GruFunctor::compute(hppl::forward::gru_resetOutput(), + hppl::forward::gru_finalOutput(), + value, + frameSize, + batchSize, + activeNode_, + activeGate_); } template <> @@ -39,14 +40,15 @@ void GruCompute::backward<0>(hl_gru_value value, hl_gru_grad grad, int frameSize, int batchSize) { - hl_cpu_gru_backward(hppl::backward::gru_stateGrad(), - hppl::backward::gru_resetGrad(), - value, - grad, - frameSize, - batchSize, - activeNode_, - activeGate_); + GruGradFunctor::compute( + hppl::backward::gru_stateGrad(), + hppl::backward::gru_resetGrad(), + value, + grad, + frameSize, + batchSize, + activeNode_, + activeGate_); } } // namespace paddle diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh index a61c7c40e9..34e31f1394 100644 --- a/paddle/scripts/docker/build_android.sh +++ b/paddle/scripts/docker/build_android.sh @@ -2,25 +2,8 @@ set -xe -COMPILER=gcc -USE_EIGEN=ON -if [ $COMPILER == clang ]; then - SUFFIX=_clang - C_COMPILER=clang - CXX_COMPILER=clang++ -else - SUFFIX=_gcc - C_COMPILER=gcc - CXX_COMPILER=g++ -fi -if [ $USE_EIGEN == ON ]; then - SUFFIX=${SUFFIX}_eigen -else - SUFFIX=${SUFFIX}_openblas -fi - -BUILD_ROOT=/paddle/build_android$SUFFIX -DEST_ROOT=/paddle/install$SUFFIX +BUILD_ROOT=/paddle/build_android +DEST_ROOT=/paddle/install rm -rf $BUILD_ROOT 2>/dev/null || true mkdir -p $BUILD_ROOT @@ -41,7 +24,7 @@ if [ $ANDROID_ABI == "armeabi-v7a" ]; then -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ - -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \ + -DUSE_EIGEN_FOR_BLAS=ON \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ -DWITH_STYLE_CHECK=OFF \ @@ -58,7 +41,7 @@ elif [ $ANDROID_ABI == "arm64-v8a" ]; then -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ - -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \ + -DUSE_EIGEN_FOR_BLAS=OFF \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ ..