From 0e45f952a29d9e9e02545e06cf81218c2992cc11 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 14 Jun 2017 13:44:05 +0800 Subject: [PATCH 001/542] Add a NNPACKConvFunction. --- paddle/function/nnpack/NNPACKConvOp.cpp | 224 ++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 paddle/function/nnpack/NNPACKConvOp.cpp diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/function/nnpack/NNPACKConvOp.cpp new file mode 100644 index 0000000000..57a6681f29 --- /dev/null +++ b/paddle/function/nnpack/NNPACKConvOp.cpp @@ -0,0 +1,224 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "ConvOp.h" +#include "nnpack.h" + +DEFINE_bool(nnpack_allocate_outside, + false, + "Allocate and free workspace memory outside the NNPACK interface."); +DEFINE_int32(nnpack_num_threads, + 0, + "The number of nnpack threads" + "default: 0; 0 to disable threadpool."); + +namespace paddle { + +nnp_convolution_algorithm get_nnp_convolution_algorithm( + const std::string& algorithm) { + if (algorithm == "auto") { + return nnp_convolution_algorithm_auto; + } else if (algorithm == "ft8x8") { + return nnp_convolution_algorithm_ft8x8; + } else if (algorithm == "ft16x16") { + return nnp_convolution_algorithm_ft16x16; + } else if (algorithm == "wt8x8") { + return nnp_convolution_algorithm_wt8x8; + } else if (algorithm == "implicit-gemm") { + return nnp_convolution_algorithm_implicit_gemm; + } else if (algorithm == "direct") { + return nnp_convolution_algorithm_direct; + } else { + return nnp_convolution_algorithm_auto; + } +} + +template +class NNPACKConvFunction : public ConvFunctionBase { +public: + void init(const FuncConfig& config) override { + ConvFunctionBase::init(config); + CHECK_EQ(groups_, (size_t)1); + algorithm_ = get_nnp_convolution_algorithm(config.get("algo")); + // algorithm_ = nnp_convolution_algorithm_auto; + transform_strategy_ = nnp_convolution_transform_strategy_compute; + nnp_status status = nnp_initialize(); + CHECK_EQ(status, nnp_status_success); + workspaceBuffer_ = nullptr; + workspaceSize_ = 0; + + threadpool_ = nullptr; + if (FLAGS_nnpack_num_threads) { + threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads); + VLOG(3) << "Number of threads " + << pthreadpool_get_threads_count(threadpool_); + } + } + + ~NNPACKConvFunction() { + if (threadpool_) { + pthreadpool_destroy(threadpool_); + } + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(numInputs_, inputs.size()); + CHECK_EQ(numOutputs_, outputs.size()); + CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + const TensorShape& input = inputs[0].shape(); + const TensorShape& filter = inputs[1].shape(); + const TensorShape& output = outputs[0].shape(); + check(input, filter, output); + + size_t batchSize = input[0]; + size_t inputChannels = input[1]; + size_t inputHeight = input[2]; + size_t inputWidth = input[3]; + size_t filterHeight = getFilterHeight(filter); + size_t filterWidth = getFilterWidth(filter); + size_t outputChannels = output[1]; + // size_t outputHeight = output[2]; + // size_t outputWidth = output[3]; + + nnp_size inputSize = {.width = inputWidth, .height = inputHeight}; + nnp_padding padding = {.top = paddingH(), + .right = paddingW(), + .bottom = paddingH(), + .left = paddingW()}; + nnp_size kernelSize = {.width = filterWidth, .height = filterHeight}; + nnp_size outputSubsampling = {.width = strideW(), .height = strideH()}; + + float* inputData = inputs[0].data(); + float* filterData = inputs[1].data(); + float* outputData = outputs[0].data(); + + void* bufferPtr = nullptr; + size_t* sizePtr = nullptr; + size_t needSize; + if (FLAGS_nnpack_allocate_outside) { + if (batchSize == 1) { + nnp_status status = nnp_convolution_inference(algorithm_, + transform_strategy_, + inputChannels, + outputChannels, + inputSize, + padding, + kernelSize, + outputSubsampling, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + &needSize, + nnp_activation_identity, + nullptr, + nullptr, + nullptr); + CHECK_EQ(status, nnp_status_success); + } else { + // only supports stride = 1 + CHECK_EQ(stride_, 1); + nnp_status status = nnp_convolution_output(algorithm_, + batchSize, + inputChannels, + outputChannels, + inputSize, + padding, + kernelSize, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + &needSize, + nnp_activation_identity, + nullptr, + nullptr, + nullptr); + CHECK_EQ(status, nnp_status_success); + } + + LOG(INFO) << "workspace size is " << needSize; + if (needSize > workspaceSize_) { + workspaceSize_ = needSize; + if (workspaceBuffer_) { + free(workspaceBuffer_); + } else { + posix_memalign(&workspaceBuffer_, 64, needSize); + } + } + + if (needSize) { + bufferPtr = workspaceBuffer_; + sizePtr = &needSize; + } + } + + if (batchSize == 1) { + nnp_status status = + nnp_convolution_inference(algorithm_, + transform_strategy_, + inputChannels, + outputChannels, + inputSize, + padding, + kernelSize, + outputSubsampling, + inputData, + filterData, + nullptr, /* bias */ + outputData, + bufferPtr, + sizePtr, + nnp_activation_identity, + nullptr, + threadpool_, /* threadpool */ + nullptr); + CHECK_EQ(status, nnp_status_success); + } else { + // only supports stride = 1 + CHECK_EQ(stride_, 1); + nnp_status status = nnp_convolution_output(algorithm_, + batchSize, + inputChannels, + outputChannels, + inputSize, + padding, + kernelSize, + inputData, + filterData, + nullptr, /* bias */ + outputData, + bufferPtr, + sizePtr, + nnp_activation_identity, + nullptr, + threadpool_, /* threadpool */ + nullptr); + CHECK_EQ(status, nnp_status_success); + } + } + +private: + nnp_convolution_algorithm algorithm_; + nnp_convolution_transform_strategy transform_strategy_; + void* workspaceBuffer_; + size_t workspaceSize_; + pthreadpool_t threadpool_; +}; + +REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction); + +} // namespace paddle From 65969dad641a95a1ac0f744b11c1166a173d169b Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 16 Jun 2017 16:29:08 +0800 Subject: [PATCH 002/542] Add DetectionOutputLayer and MultiBoxLossLayer. --- .../gserver/layers/DetectionOutputLayer.cpp | 154 ++++++++ paddle/gserver/layers/DetectionOutputLayer.h | 81 ++++ paddle/gserver/layers/MultiBoxLossLayer.cpp | 365 ++++++++++++++++++ paddle/gserver/layers/MultiBoxLossLayer.h | 103 +++++ paddle/gserver/tests/CMakeLists.txt | 7 + paddle/gserver/tests/LayerGradUtil.cpp | 25 ++ paddle/gserver/tests/LayerGradUtil.h | 18 +- paddle/gserver/tests/test_DetectionOutput.cpp | 191 +++++++++ paddle/gserver/tests/test_LayerGrad.cpp | 64 +++ proto/ModelConfig.proto | 25 ++ python/paddle/trainer/config_parser.py | 46 +++ .../paddle/trainer_config_helpers/layers.py | 161 ++++++++ 12 files changed, 1239 insertions(+), 1 deletion(-) create mode 100644 paddle/gserver/layers/DetectionOutputLayer.cpp create mode 100644 paddle/gserver/layers/DetectionOutputLayer.h create mode 100644 paddle/gserver/layers/MultiBoxLossLayer.cpp create mode 100644 paddle/gserver/layers/MultiBoxLossLayer.h create mode 100644 paddle/gserver/tests/test_DetectionOutput.cpp diff --git a/paddle/gserver/layers/DetectionOutputLayer.cpp b/paddle/gserver/layers/DetectionOutputLayer.cpp new file mode 100644 index 0000000000..2a4d7f8b5b --- /dev/null +++ b/paddle/gserver/layers/DetectionOutputLayer.cpp @@ -0,0 +1,154 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "DetectionOutputLayer.h" + +namespace paddle { + +REGISTER_LAYER(detection_output, DetectionOutputLayer); + +bool DetectionOutputLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + auto& layerConf = config_.inputs(0).detection_output_conf(); + numClasses_ = layerConf.num_classes(); + inputNum_ = layerConf.input_num(); + nmsThreshold_ = layerConf.nms_threshold(); + confidenceThreshold_ = layerConf.confidence_threshold(); + nmsTopK_ = layerConf.nms_top_k(); + keepTopK_ = layerConf.keep_top_k(); + backgroundId_ = layerConf.background_id(); + return true; +} + +void DetectionOutputLayer::forward(PassType passType) { + Layer::forward(passType); + size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight(); + + locSizeSum_ = 0; + confSizeSum_ = 0; + for (size_t n = 0; n < inputNum_; ++n) { + const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n)); + const MatrixPtr inConf = getInputValue(*getConfInputLayer(n)); + locSizeSum_ += inLoc->getElementCnt(); + confSizeSum_ += inConf->getElementCnt(); + } + + Matrix::resizeOrCreate(locTmpBuffer_, 1, locSizeSum_, false, useGpu_); + Matrix::resizeOrCreate( + confTmpBuffer_, confSizeSum_ / numClasses_, numClasses_, false, useGpu_); + locBuffer_ = locTmpBuffer_; + confBuffer_ = confTmpBuffer_; + + size_t locOffset = 0; + size_t confOffset = 0; + auto& layerConf = config_.inputs(0).detection_output_conf(); + for (size_t n = 0; n < inputNum_; ++n) { + const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n)); + const MatrixPtr inConf = getInputValue(*getConfInputLayer(n)); + + size_t height = getInput(*getLocInputLayer(n)).getFrameHeight(); + if (!height) height = layerConf.height(); + size_t width = getInput(*getLocInputLayer(n)).getFrameWidth(); + if (!width) width = layerConf.width(); + locOffset += appendWithPermute(*inLoc, + height, + width, + locSizeSum_, + locOffset, + batchSize, + *locBuffer_, + kNCHWToNHWC); + confOffset += appendWithPermute(*inConf, + height, + width, + confSizeSum_, + confOffset, + batchSize, + *confBuffer_, + kNCHWToNHWC); + } + CHECK_EQ(locOffset, locSizeSum_ / batchSize); + CHECK_EQ(confOffset, confSizeSum_ / batchSize); + + MatrixPtr priorValue; + if (useGpu_) { + Matrix::resizeOrCreate(locCpuBuffer_, 1, locSizeSum_, false, false); + Matrix::resizeOrCreate( + confCpuBuffer_, confSizeSum_ / numClasses_, numClasses_, false, false); + MatrixPtr priorTmpValue = getInputValue(*getPriorBoxLayer()); + Matrix::resizeOrCreate( + priorCpuValue_, 1, priorTmpValue->getElementCnt(), false, false); + + locCpuBuffer_->copyFrom(*locTmpBuffer_); + confCpuBuffer_->copyFrom(*confTmpBuffer_); + priorCpuValue_->copyFrom(*priorTmpValue); + + locBuffer_ = locCpuBuffer_; + confBuffer_ = confCpuBuffer_; + priorValue = priorCpuValue_; + } else { + priorValue = getInputValue(*getPriorBoxLayer()); + } + confBuffer_->softmax(*confBuffer_); + + size_t numPriors = priorValue->getElementCnt() / 8; + vector> allDecodedBBoxes; + for (size_t n = 0; n < batchSize; ++n) { + vector decodedBBoxes; + for (size_t i = 0; i < numPriors; ++i) { + size_t priorOffset = i * 8; + size_t locPredOffset = n * numPriors * 4 + i * 4; + vector priorBBoxVec; + getBBoxFromPriorData( + priorValue->getData() + priorOffset, 1, priorBBoxVec); + vector> priorBBoxVar; + getBBoxVarFromPriorData( + priorValue->getData() + priorOffset, 1, priorBBoxVar); + vector locPredData; + for (size_t j = 0; j < 4; ++j) + locPredData.push_back(*(locBuffer_->getData() + locPredOffset + j)); + NormalizedBBox bbox = + decodeBBoxWithVar(priorBBoxVec[0], priorBBoxVar[0], locPredData); + decodedBBoxes.push_back(bbox); + } + allDecodedBBoxes.push_back(decodedBBoxes); + } + + vector>> allIndices; + size_t numKept = getDetectionIndices(confBuffer_->getData(), + numPriors, + numClasses_, + backgroundId_, + batchSize, + confidenceThreshold_, + nmsTopK_, + nmsThreshold_, + keepTopK_, + allDecodedBBoxes, + &allIndices); + + resetOutput(numKept, 7); + MatrixPtr outV = getOutputValue(); + getDetectionOutput(confBuffer_->getData(), + numKept, + numPriors, + numClasses_, + batchSize, + allIndices, + allDecodedBBoxes, + *outV); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/DetectionOutputLayer.h b/paddle/gserver/layers/DetectionOutputLayer.h new file mode 100644 index 0000000000..38271cb054 --- /dev/null +++ b/paddle/gserver/layers/DetectionOutputLayer.h @@ -0,0 +1,81 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "DetectionUtil.h" +#include "Layer.h" + +using std::vector; +using std::map; +using std::pair; + +namespace paddle { + +/** + * The detection output layer for a SSD detection task. This layer apply the + * Non-maximum suppression to the all predicted bounding box and keep the + * Top-K bounding boxes. + * - Input: This layer need three input layers: This first input layer + * is the priorbox layer. The rest two input layers are convolution + * layers for generating bbox location offset and the classification + * confidence. + * - Output: The predict bounding box location. + */ + +class DetectionOutputLayer : public Layer { +public: + explicit DetectionOutputLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + void forward(PassType passType); + + void backward(const UpdateCallback& callback = nullptr) {} + +protected: + inline LayerPtr getPriorBoxLayer() { return inputLayers_[0]; } + + inline LayerPtr getLocInputLayer(size_t index) { + return inputLayers_[1 + index]; + } + + inline LayerPtr getConfInputLayer(size_t index) { + return inputLayers_[1 + inputNum_ + index]; + } + +private: + size_t numClasses_; // number of classes + size_t inputNum_; // number of input layers + real nmsThreshold_; + real confidenceThreshold_; + size_t nmsTopK_; + size_t keepTopK_; + size_t backgroundId_; + + size_t locSizeSum_; + size_t confSizeSum_; + + MatrixPtr locBuffer_; + MatrixPtr confBuffer_; + MatrixPtr locTmpBuffer_; + MatrixPtr confTmpBuffer_; + MatrixPtr priorCpuValue_; + MatrixPtr locCpuBuffer_; + MatrixPtr confCpuBuffer_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MultiBoxLossLayer.cpp b/paddle/gserver/layers/MultiBoxLossLayer.cpp new file mode 100644 index 0000000000..27a2cc3fa4 --- /dev/null +++ b/paddle/gserver/layers/MultiBoxLossLayer.cpp @@ -0,0 +1,365 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MultiBoxLossLayer.h" +#include +#include +#include "DataLayer.h" + +using std::vector; +using std::map; +using std::pair; + +namespace paddle { + +REGISTER_LAYER(multibox_loss, MultiBoxLossLayer); + +bool MultiBoxLossLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + auto layerConf = config_.inputs(0).multibox_loss_conf(); + numClasses_ = layerConf.num_classes(); + inputNum_ = layerConf.input_num(); + overlapThreshold_ = layerConf.overlap_threshold(); + negPosRatio_ = layerConf.neg_pos_ratio(); + negOverlap_ = layerConf.neg_overlap(); + backgroundId_ = layerConf.background_id(); + return true; +} + +void MultiBoxLossLayer::forward(PassType passType) { + Layer::forward(passType); + size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight(); + resetOutput(batchSize, 1); + + // all location data and confidence score data + locSizeSum_ = 0; + confSizeSum_ = 0; + for (size_t n = 0; n < inputNum_; ++n) { + const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n)); + const MatrixPtr inConf = getInputValue(*getConfInputLayer(n)); + locSizeSum_ += inLoc->getElementCnt(); + confSizeSum_ += inConf->getElementCnt(); + } + + // locBuffer layout: + // | xmin1 | ymin1 | xmax1 | ymax1 | xmin2 ...... + Matrix::resizeOrCreate(locTmpBuffer_, 1, locSizeSum_, false, useGpu_); + locBuffer_ = locTmpBuffer_; + + // confBuffer layout: + // | class1 score | class2 score | ... |classN score | class1 score | ...... + Matrix::resizeOrCreate(confTmpBuffer_, 1, confSizeSum_, false, useGpu_); + confBuffer_ = confTmpBuffer_; + + // concate location data and confidence score data + size_t locOffset = 0; + size_t confOffset = 0; + auto& layerConf = config_.inputs(0).multibox_loss_conf(); + for (size_t n = 0; n < inputNum_; ++n) { + const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n)); + const MatrixPtr inConf = getInputValue(*getConfInputLayer(n)); + size_t height = getInput(*getLocInputLayer(n)).getFrameHeight(); + if (!height) height = layerConf.height(); + size_t width = getInput(*getLocInputLayer(n)).getFrameWidth(); + if (!width) width = layerConf.width(); + locOffset += appendWithPermute(*inLoc, + height, + width, + locSizeSum_, + locOffset, + batchSize, + *locBuffer_, + kNCHWToNHWC); + confOffset += appendWithPermute(*inConf, + height, + width, + confSizeSum_, + confOffset, + batchSize, + *confBuffer_, + kNCHWToNHWC); + } + CHECK_EQ(locOffset, locSizeSum_ / batchSize); + CHECK_EQ(confOffset, confSizeSum_ / batchSize); + + // priorValue layout: + // | xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var + // | xmin2 | ...... + MatrixPtr priorValue; + + // labelValue layout: + // | class1_1 | xmin1_1 | ymin1_1 | xmax1_1 | ymax1_1 | difficult1_1 | ...... + MatrixPtr labelValue; + + // Copy data from GPU to CPU if use GPU + if (useGpu_) { + Matrix::resizeOrCreate(locCpuBuffer_, 1, locSizeSum_, false, false); + Matrix::resizeOrCreate(confCpuBuffer_, 1, confSizeSum_, false, false); + MatrixPtr priorTmpValue = getInputValue(*getPriorBoxLayer()); + Matrix::resizeOrCreate( + priorCpuValue_, 1, priorTmpValue->getElementCnt(), false, false); + MatrixPtr labelTmpValue = getInputValue(*getLabelLayer()); + Matrix::resizeOrCreate(labelCpuValue_, + labelTmpValue->getHeight(), + labelTmpValue->getWidth(), + false, + false); + + locCpuBuffer_->copyFrom(*locTmpBuffer_); + confCpuBuffer_->copyFrom(*confTmpBuffer_); + priorCpuValue_->copyFrom(*priorTmpValue); + labelCpuValue_->copyFrom(*labelTmpValue); + + locBuffer_ = locCpuBuffer_; + confBuffer_ = confCpuBuffer_; + priorValue = priorCpuValue_; + labelValue = labelCpuValue_; + } else { + priorValue = getInputValue(*getPriorBoxLayer()); + labelValue = getInputValue(*getLabelLayer()); + } + + // Get max scores for each prior bbox. Used in negative mining + vector> allMaxConfScore; + numPriors_ = priorValue->getElementCnt() / 8; + getMaxConfidenceScores(confBuffer_->getData(), + batchSize, + numPriors_, + numClasses_, + backgroundId_, + &allMaxConfScore); + + // Match prior bbox to groundtruth bbox + Argument label = getInput(*getLabelLayer()); + const int* labelIndex = label.sequenceStartPositions->getData(false); + size_t seqNum = label.getNumSequences(); + numMatches_ = 0; + numNegs_ = 0; + allMatchIndices_.clear(); + allNegIndices_.clear(); + + pair retPair = generateMatchIndices(*priorValue, + numPriors_, + *labelValue, + labelIndex, + seqNum, + allMaxConfScore, + batchSize, + overlapThreshold_, + negOverlap_, + negPosRatio_, + &allMatchIndices_, + &allNegIndices_); + numMatches_ = retPair.first; + numNegs_ = retPair.second; + + // BBox location L1 smooth loss + locLoss_ = 0.0; + if (numMatches_ >= 1) { + size_t count = 0; + MatrixPtr locLossOutput; + Matrix::resizeOrCreate(locLossOutput, numMatches_ * 4, 1, false, false); + Matrix::resizeOrCreate(locGTData_, numMatches_ * 4, 1, false, false); + Matrix::resizeOrCreate(locDiff_, numMatches_ * 4, 1, false, false); + locDiff_->zeroMem(); + vector locGTData; + + for (size_t n = 0; n < batchSize; ++n) { + for (size_t i = 0; i < numPriors_; ++i) { + if (allMatchIndices_[n][i] == -1) continue; // match none + size_t locOffset = + n * (locBuffer_->getElementCnt() / batchSize) + i * 4; + locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[0]; + locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[1]; + locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[2]; + locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[3]; + + const int gtIdx = allMatchIndices_[n][i]; + size_t priorOffset = i * 8; + vector priorBBoxVec; + getBBoxFromPriorData( + priorValue->getData() + priorOffset, 1, priorBBoxVec); + vector> priorBBoxVar; + getBBoxVarFromPriorData( + priorValue->getData() + priorOffset, 1, priorBBoxVar); + size_t labelOffset = (labelIndex[n] + gtIdx) * 6; + vector gtBBoxVec; + getBBoxFromLabelData(labelValue->getData() + labelOffset, 1, gtBBoxVec); + vector gtEncode; + encodeBBoxWithVar( + priorBBoxVec[0], priorBBoxVar[0], gtBBoxVec[0], gtEncode); + locGTData.insert(locGTData.end(), gtEncode.begin(), gtEncode.end()); + } + } + locGTData_->copyFrom(&locGTData[0], numMatches_ * 4); + locLossOutput->smoothL1(*locDiff_, *locGTData_, 0.0); + locLoss_ = locLossOutput->getSum() / numMatches_; + } + + // BBox confidence softmax loss + confLoss_ = 0; + numConf_ = numMatches_ + numNegs_; + if (numConf_ >= 1) { + Matrix::resizeOrCreate(confProb_, numConf_, numClasses_, false, false); + IVector::resizeOrCreate(confGTData_, numConf_, false); + confProb_->zeroMem(); + size_t count = 0; + + vector confPredData; + for (size_t n = 0; n < batchSize; ++n) { + for (size_t i = 0; i < numPriors_; ++i) { + if (allMatchIndices_[n][i] == -1) continue; + size_t labelOffset = (labelIndex[n] + allMatchIndices_[n][i]) * 6; + const int gtLabel = (labelValue->getData() + labelOffset)[0]; + confGTData_->getData()[count] = gtLabel; + size_t confOffset = n * numPriors_ * numClasses_ + i * numClasses_; + for (size_t j = 0; j < numClasses_; ++j) { + confProb_->getData()[count * numClasses_ + j] = + (confBuffer_->getData() + confOffset)[j]; + confPredData.push_back((confBuffer_->getData() + confOffset)[j]); + } + ++count; + } + // Negative mining samples + for (size_t i = 0; i < allNegIndices_[n].size(); ++i) { + confGTData_->getData()[count] = backgroundId_; + size_t confOffset = + n * numPriors_ * numClasses_ + allNegIndices_[n][i] * numClasses_; + for (size_t j = 0; j < numClasses_; ++j) { + confProb_->getData()[count * numClasses_ + j] = + (confBuffer_->getData() + confOffset)[j]; + confPredData.push_back((confBuffer_->getData() + confOffset)[j]); + } + count++; + } + } + confProb_->softmax(*confProb_); + MatrixPtr confLossOutput; + Matrix::resizeOrCreate(confLossOutput, numConf_, 1, false, false); + confLossOutput->oneHotCrossEntropy(*confProb_, *confGTData_); + confLoss_ = confLossOutput->getSum() / numMatches_; + } + real loss = locLoss_ + confLoss_; + MatrixPtr outV = getOutputValue(); + vector tmp(batchSize, loss); + outV->copyFrom(&tmp[0], batchSize); +} + +void MultiBoxLossLayer::backward(const UpdateCallback& callback) { + size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight(); + locBuffer_->zeroMem(); + confBuffer_->zeroMem(); + + // Back propagate on location prediction + if (numMatches_ >= 1) { + MatrixPtr locDiffBuffer; + Matrix::resizeOrCreate(locDiffBuffer, numMatches_ * 4, 1, false, false); + locDiffBuffer->smoothL1Bp(*locDiff_, *locGTData_, 0.0); + locDiff_->copyFrom(*locDiffBuffer); + // scale gradient + for (size_t i = 0; i < numMatches_ * 4; ++i) + locDiff_->getData()[i] *= (1. / numMatches_); + // Copy gradient back + size_t count = 0; + for (size_t n = 0; n < batchSize; ++n) + for (size_t i = 0; i < numPriors_; ++i) { + if (allMatchIndices_[n][i] == -1) continue; + real* locDiffData = locBuffer_->getData() + n * numPriors_ * 4 + i * 4; + locDiffData[0] = (locDiff_->getData() + count * 4)[0]; + locDiffData[1] = (locDiff_->getData() + count * 4)[1]; + locDiffData[2] = (locDiff_->getData() + count * 4)[2]; + locDiffData[3] = (locDiff_->getData() + count * 4)[3]; + ++count; + } + CHECK_EQ(count, numMatches_); + } + + if (numConf_ >= 1) { + for (size_t i = 0; i < numConf_; ++i) + confProb_->getData()[i * numClasses_ + confGTData_->getData()[i]] -= 1; + for (size_t i = 0; i < numConf_ * numClasses_; ++i) + confProb_->getData()[i] *= (1. / numMatches_); + size_t count = 0; + for (size_t n = 0; n < batchSize; ++n) { + for (size_t i = 0; i < numPriors_; ++i) { + if (allMatchIndices_[n][i] == -1) continue; + real* confDiffData = confBuffer_->getData() + + n * numPriors_ * numClasses_ + i * numClasses_; + for (size_t j = 0; j < numClasses_; ++j) + confDiffData[j] = (confProb_->getData() + count * numClasses_)[j]; + ++count; + } + for (size_t i = 0; i < allNegIndices_[n].size(); ++i) { + int idx = allNegIndices_[n][i]; + real* confDiffData = confBuffer_->getData() + + n * numPriors_ * numClasses_ + idx * numClasses_; + for (size_t j = 0; j < numClasses_; ++j) + confDiffData[j] = (confProb_->getData() + count * numClasses_)[j]; + ++count; + } + } + CHECK_EQ(count, numConf_); + } + if (useGpu_) { + locTmpBuffer_->copyFrom(*locCpuBuffer_); + confTmpBuffer_->copyFrom(*confCpuBuffer_); + locBuffer_ = locTmpBuffer_; + confBuffer_ = confTmpBuffer_; + } + // copy back + size_t locOffset = 0; + size_t confOffset = 0; + auto layerConf = config_.inputs(0).multibox_loss_conf(); + for (size_t n = 0; n < inputNum_; ++n) { + const MatrixPtr inLocG = getInputGrad(*getLocInputLayer(n)); + const MatrixPtr inConfG = getInputGrad(*getConfInputLayer(n)); + size_t height = getInput(*getLocInputLayer(n)).getFrameHeight(); + if (!height) height = layerConf.height(); + size_t width = getInput(*getLocInputLayer(n)).getFrameWidth(); + if (!width) width = layerConf.width(); + + // NHWC to NCHW + MatrixPtr locGBuffer; + Matrix::resizeOrCreate( + locGBuffer, inLocG->getHeight(), inLocG->getWidth(), false, useGpu_); + MatrixPtr confGBuffer; + Matrix::resizeOrCreate( + confGBuffer, inConfG->getHeight(), inConfG->getWidth(), false, useGpu_); + + locOffset += decomposeWithPermute(*locBuffer_, + height, + width, + locSizeSum_, + locOffset, + batchSize, + *locGBuffer, + kNHWCToNCHW); + inLocG->add(*locGBuffer); + confOffset += decomposeWithPermute(*confBuffer_, + height, + width, + confSizeSum_, + confOffset, + batchSize, + *confGBuffer, + kNHWCToNCHW); + inConfG->add(*confGBuffer); + } + CHECK_EQ(locOffset, locSizeSum_ / batchSize); + CHECK_EQ(confOffset, confSizeSum_ / batchSize); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/MultiBoxLossLayer.h b/paddle/gserver/layers/MultiBoxLossLayer.h new file mode 100644 index 0000000000..9767fed7f1 --- /dev/null +++ b/paddle/gserver/layers/MultiBoxLossLayer.h @@ -0,0 +1,103 @@ +/* copyright (c) 2016 paddlepaddle authors. all rights reserve. + +licensed under the apache license, version 2.0 (the "license"); +you may not use this file except in compliance with the license. +you may obtain a copy of the license at + + http://www.apache.org/licenses/license-2.0 + +unless required by applicable law or agreed to in writing, software +distributed under the license is distributed on an "as is" basis, +without warranties or conditions of any kind, either express or implied. +see the license for the specific language governing permissions and +limitations under the license. */ + +#pragma once + +#include +#include "CostLayer.h" +#include "DataLayer.h" +#include "DetectionUtil.h" +#include "Layer.h" + +using std::vector; +using std::pair; + +namespace paddle { + +/** + * The multibox loss layer for a SSD detection task. + * The loss is composed by the location loss and the confidence loss. + * The location loss is a smooth L1 loss and the confidence loss is + * a softmax loss. + * - Input: This layer need four input layers: This first input layer + * is the priorbox layer and the second layer is a label layer. + * The rest two input layers are convolution layers for generating + * bbox location offset and the classification confidence. + * - Output: The Single Shot Multibox Detection loss value. + * Reference: + * Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, + * Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector + */ + +class MultiBoxLossLayer : public CostLayer { +public: + explicit MultiBoxLossLayer(const LayerConfig& config) : CostLayer(config) {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + void forward(PassType passType); + + void backward(const UpdateCallback& callback = nullptr); + + void forwardImp(Matrix& output, Argument& label, Matrix& cost) {} + + void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {} + +protected: + inline LayerPtr getPriorBoxLayer() { return inputLayers_[0]; } + inline LayerPtr getLabelLayer() { return inputLayers_[1]; } + inline LayerPtr getLocInputLayer(size_t index) { + return inputLayers_[2 + index]; + } + inline LayerPtr getConfInputLayer(size_t index) { + return inputLayers_[2 + inputNum_ + index]; + } + +protected: + size_t numClasses_; + real overlapThreshold_; + real negPosRatio_; + real negOverlap_; + size_t inputNum_; + size_t backgroundId_; + + real locLoss_; + real confLoss_; + + size_t numPriors_; + size_t numMatches_; + size_t numNegs_; + size_t numConf_; + size_t locSizeSum_; + size_t confSizeSum_; + + vector> allMatchIndices_; + vector> allNegIndices_; + MatrixPtr locGTData_; + IVectorPtr confGTData_; + + MatrixPtr locBuffer_; + MatrixPtr confBuffer_; + MatrixPtr locDiff_; + MatrixPtr confProb_; + + MatrixPtr labelCpuValue_; + MatrixPtr priorCpuValue_; + MatrixPtr locCpuBuffer_; + MatrixPtr confCpuBuffer_; + MatrixPtr locTmpBuffer_; + MatrixPtr confTmpBuffer_; +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 3c4128b5b8..92f6cbcfe5 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -45,6 +45,13 @@ add_unittest_without_exec(test_PriorBox add_test(NAME test_PriorBox COMMAND test_PriorBox) +################# test_DetectionOutput ####################### +add_unittest_without_exec(test_DetectionOutput + test_DetectionOutput.cpp + LayerGradUtil.cpp) + +add_test(NAME test_DetectionOutput + COMMAND test_DetectionOutput) ################# test_ConvUnify ####################### add_unittest_without_exec(test_ConvUnify test_ConvUnify.cpp diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index a0b1cd471d..e3591ba4df 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -387,6 +387,31 @@ void initDataLayer(TestConfig testConf, data.value->sigmoid(*data.value); data.grad->zeroMem(); break; + case INPUT_SELF_DEFINE_DATA: { + size_t height = testConf.inputDefs[i].selfDefinedData->getHeight(); + size_t width = testConf.inputDefs[i].selfDefinedData->getWidth(); + CHECK_GT(static_cast(height), 0); + CHECK_GT(static_cast(width), 0); + data.value = Matrix::create(height, width, false, useGpu); + data.grad = Matrix::create(height, width, false, useGpu); + data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData); + data.grad->zeroMem(); + + const std::vector& labelSeqStartPositions = + testConf.inputDefs[i].labelSeqStartPositions; + if (labelSeqStartPositions.size() != 0) { + CHECK(!sequenceStartPositions); + CHECK_GE(static_cast(labelSeqStartPositions.size()), 2); + + sequenceStartPositions = + ICpuGpuVector::create(labelSeqStartPositions.size(), useGpu); + sequenceStartPositions->copyFrom(labelSeqStartPositions.data(), + labelSeqStartPositions.size(), + useGpu); + data.sequenceStartPositions = sequenceStartPositions; + } + break; + } default: LOG(FATAL) << " unknown inputType "; return; diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index 9f68eb64d0..18a6525a14 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -31,7 +31,8 @@ enum InputType { INPUT_SEQUENCE_LABEL, INPUT_SPARSE_NON_VALUE_DATA, INPUT_SPARSE_FLOAT_VALUE_DATA, - INPUT_DENSE_DIM_DATA, // using sequence length to init dense data + INPUT_DENSE_DIM_DATA, // using sequence length to init dense data + INPUT_SELF_DEFINE_DATA, // support customizing for input value }; struct ParaSparse { @@ -66,6 +67,7 @@ struct InputDef { bool isStatic; std::vector labelInitValue; std::vector labelSeqStartPositions; + MatrixPtr selfDefinedData; InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) { inputType = type; @@ -76,6 +78,20 @@ struct InputDef { isStatic = false; } + InputDef(InputType type, + string nameIn, + MatrixPtr selfDefinedData, + std::vector selfDefinedSeqStartPos = {}) + : labelSeqStartPositions(selfDefinedSeqStartPos), + selfDefinedData(selfDefinedData) { + inputType = type; + name = nameIn; + dim = 0; + sparse = {""}; + paraSize = 0; + isStatic = false; + } + InputDef(InputType type, string nameIn, size_t dimIn, diff --git a/paddle/gserver/tests/test_DetectionOutput.cpp b/paddle/gserver/tests/test_DetectionOutput.cpp new file mode 100644 index 0000000000..8ec7a28450 --- /dev/null +++ b/paddle/gserver/tests/test_DetectionOutput.cpp @@ -0,0 +1,191 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include + +#include "LayerGradUtil.h" +#include "paddle/testing/TestUtil.h" + +using namespace paddle; // NOLINT +using namespace std; // NOLINT + +// Do one forward pass of priorBox layer and check to see if its output +// matches the given result +void doOneDetectionOutputTest(MatrixPtr& inputLoc, + MatrixPtr& inputConf, + MatrixPtr& inputPriorBox, + size_t feature_map_width, + size_t feature_map_height, + real nms_threshold, + bool use_gpu, + MatrixPtr& result) { + // Setting up the detection output layer + TestConfig configt; + configt.layerConfig.set_type("detection_output"); + LayerInputConfig* input = configt.layerConfig.add_inputs(); + configt.layerConfig.add_inputs(); + configt.layerConfig.add_inputs(); + + DetectionOutputConfig* detOutput = input->mutable_detection_output_conf(); + detOutput->set_width(feature_map_width); + detOutput->set_height(feature_map_height); + detOutput->set_nms_threshold(nms_threshold); + detOutput->set_num_classes(2); + detOutput->set_nms_top_k(20); + detOutput->set_keep_top_k(10); + detOutput->set_background_id(0); + detOutput->set_confidence_threshold(0.01); + detOutput->set_input_num(1); + configt.inputDefs.push_back({INPUT_DATA_TARGET, "priorbox", 32, 0}); + configt.inputDefs.push_back({INPUT_DATA, "input_loc", 16, 0}); + configt.inputDefs.push_back({INPUT_DATA, "input_conf", 8, 0}); + + // data layer initialize + std::vector dataLayers; + LayerMap layerMap; + vector datas; + initDataLayer( + configt, &dataLayers, &datas, &layerMap, "priorbox", 1, false, use_gpu); + + dataLayers[0]->getOutputValue()->copyFrom(*inputPriorBox); + dataLayers[1]->getOutputValue()->copyFrom(*inputLoc); + dataLayers[2]->getOutputValue()->copyFrom(*inputConf); + + // test layer initialize + std::vector parameters; + LayerPtr detectionOutputLayer; + initTestLayer(configt, &layerMap, ¶meters, &detectionOutputLayer); + detectionOutputLayer->forward(PASS_GC); + checkMatrixEqual(detectionOutputLayer->getOutputValue(), result); +} + +TEST(Layer, detectionOutputLayerFwd) { + bool useGpu = false; + // CPU case 1. + MatrixPtr inputLoc; + MatrixPtr inputConf; + MatrixPtr inputPriorBox; + MatrixPtr result, result2, result3, result4; + real nmsTreshold = 0.01; + real inputLocData[] = {0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1}; + real inputConfData[] = {0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6}; + real inputPriorBoxData[] = {0.1, 0.1, 0.5, 0.5, 0.1, 0.1, 0.2, 0.2, + 0.2, 0.2, 0.6, 0.6, 0.1, 0.1, 0.2, 0.2, + 0.3, 0.3, 0.7, 0.7, 0.1, 0.1, 0.2, 0.2, + 0.4, 0.4, 0.8, 0.8, 0.1, 0.1, 0.2, 0.2}; + real resultData[] = { + 0, 1, 0.68997443, 0.099959746, 0.099959746, 0.50804031, 0.50804031}; + inputLoc = Matrix::create(1, 16, false, useGpu); + inputConf = Matrix::create(1, 8, false, useGpu); + inputPriorBox = Matrix::create(1, 32, false, useGpu); + result = Matrix::create(1, 7, false, useGpu); + inputLoc->setData(inputLocData); + inputConf->setData(inputConfData); + inputPriorBox->setData(inputPriorBoxData); + result->setData(resultData); + doOneDetectionOutputTest(inputLoc, + inputConf, + inputPriorBox, + /* feature_map_width */ 1, + /* feature_map_height */ 1, + nmsTreshold, + useGpu, + result); + + // CPU case 2. + nmsTreshold = 0.2; + result2 = Matrix::create(2, 7, false, useGpu); + real resultData2[] = {0, + 1, + 0.68997443, + 0.099959746, + 0.099959746, + 0.50804031, + 0.50804031, + 0, + 1, + 0.59868765, + 0.29995975, + 0.29995975, + 0.70804024, + 0.70804024}; + result2->setData(resultData2); + doOneDetectionOutputTest(inputLoc, + inputConf, + inputPriorBox, + /* feature_map_width */ 1, + /* feature_map_height */ 1, + nmsTreshold, + useGpu, + result2); + +#ifndef PADDLE_ONLY_CPU + // GPU case 1. + useGpu = true; + inputLoc = Matrix::create(1, 16, false, useGpu); + inputConf = Matrix::create(1, 8, false, useGpu); + inputPriorBox = Matrix::create(1, 32, false, useGpu); + inputLoc->copyFrom(inputLocData, 16); + inputConf->copyFrom(inputConfData, 8); + inputPriorBox->copyFrom(inputPriorBoxData, 32); + + nmsTreshold = 0.01; + result3 = Matrix::create(1, 7, false, useGpu); + result3->copyFrom(resultData, 7); + doOneDetectionOutputTest(inputLoc, + inputConf, + inputPriorBox, + /* feature_map_width */ 1, + /* feature_map_height */ 1, + nmsTreshold, + useGpu, + result3); + + // GPU case 2. + nmsTreshold = 0.2; + result4 = Matrix::create(2, 7, false, useGpu); + result4->copyFrom(resultData2, 14); + doOneDetectionOutputTest(inputLoc, + inputConf, + inputPriorBox, + /* feature_map_width */ 1, + /* feature_map_height */ 1, + nmsTreshold, + useGpu, + result4); +#endif +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 6adffcf53b..9c79bd19ee 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1689,6 +1689,70 @@ TEST(Layer, smooth_l1) { } } +TEST(Layer, multibox_loss) { + TestConfig config; + config.layerConfig.set_type("multibox_loss"); + config.biasSize = 0; + LayerInputConfig* input = config.layerConfig.add_inputs(); + MultiBoxLossConfig* multiboxLoss = input->mutable_multibox_loss_conf(); + multiboxLoss->set_num_classes(21); + multiboxLoss->set_input_num(1); + multiboxLoss->set_overlap_threshold(0.5); + multiboxLoss->set_neg_pos_ratio(3); + multiboxLoss->set_neg_overlap(0.5); + multiboxLoss->set_background_id(0); + multiboxLoss->set_height(3); + multiboxLoss->set_width(3); + + size_t gtNum = 1; + MatrixPtr labelValue = Matrix::create(gtNum, 6, false, false); + labelValue->randomizeUniform(); + labelValue->add(-0.5); + labelValue->sigmoid(*labelValue); + real* labelData = labelValue->getData(); + size_t labelWidth = labelValue->getWidth(); + for (size_t i = 0; i < gtNum; ++i) { + *(labelData + i * labelWidth) = std::rand() % 20 + 1; + *(labelData + i * labelWidth + 1) = 0.400259; + *(labelData + i * labelWidth + 2) = 0.377857; + *(labelData + i * labelWidth + 3) = 0.525712; + *(labelData + i * labelWidth + 4) = 0.519368; + } + vector seqStartPositions(gtNum + 1, 0); + for (size_t i = 1; i <= gtNum; ++i) { + seqStartPositions[i] = i; + } + + // Ensure at lease one matched bbox + MatrixPtr priorValue = Matrix::create(1, 72, false, false); + priorValue->randomizeUniform(); + priorValue->add(-0.5); + priorValue->sigmoid(*priorValue); + real* priorData = priorValue->getData(); + *(priorData) = 0.424811; + *(priorData + 1) = 0.397059; + *(priorData + 2) = 0.538905; + *(priorData + 3) = 0.447091; + *(priorData + 4) = 0.425720; + *(priorData + 5) = 0.515228; + *(priorData + 6) = 0.519452; + *(priorData + 7) = 0.591065; + + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "priorbox", priorValue, {}}); + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "label", labelValue, seqStartPositions}); + config.inputDefs.push_back({INPUT_DATA, "locPred", 36, 0}); + config.inputDefs.push_back({INPUT_DATA, "confPred", 189, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "multibox_loss", 1, false, useGpu, false); + } +} + TEST(Layer, TransLayer) { TestConfig config; const int height = 128; diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 29270829bb..3d01c23bf9 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -266,6 +266,29 @@ message PadConfig { repeated uint32 pad_w = 4; } +message MultiBoxLossConfig { + required uint32 num_classes = 1; + required float overlap_threshold = 2; + required float neg_pos_ratio = 3; + required float neg_overlap = 4; + required uint32 background_id = 5; + required uint32 input_num = 6; + optional uint32 height = 7 [default = 1]; + optional uint32 width = 8 [default = 1]; +} + +message DetectionOutputConfig { + required uint32 num_classes = 1; + required float nms_threshold = 2; + required uint32 nms_top_k = 3; + required uint32 background_id = 4; + required uint32 input_num = 5; + required uint32 keep_top_k = 6; + required float confidence_threshold = 7; + optional uint32 height = 8 [default = 1]; + optional uint32 width = 9 [default = 1]; +} + message LayerInputConfig { required string input_layer_name = 1; optional string input_parameter_name = 2; @@ -284,6 +307,8 @@ message LayerInputConfig { optional PriorBoxConfig priorbox_conf = 13; optional PadConfig pad_conf = 14; optional RowConvConfig row_conv_conf = 15; + optional MultiBoxLossConfig multibox_loss_conf = 16; + optional DetectionOutputConfig detection_output_conf = 17; } message LayerConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index fc2e3bbcde..c46b335d99 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1676,6 +1676,52 @@ class PriorBoxLayer(LayerBase): self.config.size = size +@config_layer('multibox_loss') +class MultiBoxLossLayer(LayerBase): + def __init__(self, name, inputs, input_num, num_classes, overlap_threshold, + neg_pos_ratio, neg_overlap, background_id): + super(MultiBoxLossLayer, self).__init__(name, 'multibox_loss', 0, + inputs) + config_assert( + len(inputs) == (input_num * 2 + 2), + 'MultiBoxLossLayer does not have enough inputs') + config_assert(num_classes > background_id, + 'Classes number must greater than background ID') + self.config.inputs[0].multibox_loss_conf.num_classes = num_classes + self.config.inputs[ + 0].multibox_loss_conf.overlap_threshold = overlap_threshold + self.config.inputs[0].multibox_loss_conf.neg_pos_ratio = neg_pos_ratio + self.config.inputs[0].multibox_loss_conf.neg_overlap = neg_overlap + self.config.inputs[0].multibox_loss_conf.background_id = background_id + self.config.inputs[0].multibox_loss_conf.input_num = input_num + self.config.size = 1 + + +@config_layer('detection_output') +class DetectionOutputLayer(LayerBase): + def __init__(self, name, inputs, size, input_num, num_classes, + nms_threshold, nms_top_k, keep_top_k, confidence_threshold, + background_id): + super(DetectionOutputLayer, self).__init__(name, 'detection_output', 0, + inputs) + config_assert( + len(inputs) == (input_num * 2 + 1), + 'DetectionOutputLayer does not have enough inputs') + config_assert(num_classes > background_id, + 'Classes number must greater than background ID') + self.config.inputs[0].detection_output_conf.num_classes = num_classes + self.config.inputs[ + 0].detection_output_conf.nms_threshold = nms_threshold + self.config.inputs[0].detection_output_conf.nms_top_k = nms_top_k + self.config.inputs[0].detection_output_conf.keep_top_k = keep_top_k + self.config.inputs[ + 0].detection_output_conf.confidence_threshold = confidence_threshold + self.config.inputs[ + 0].detection_output_conf.background_id = background_id + self.config.inputs[0].detection_output_conf.input_num = input_num + self.config.size = size + + @config_layer('data') class DataLayer(LayerBase): def __init__(self, name, size, height=None, width=None, device=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 2d8ddbb900..770559dc77 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -115,6 +115,8 @@ __all__ = [ 'print_layer', 'priorbox_layer', 'cross_channel_norm_layer', + 'multibox_loss_layer', + 'detection_output_layer', 'spp_layer', 'pad_layer', 'eos_layer', @@ -195,6 +197,8 @@ class LayerType(object): PRINT_LAYER = 'print' PRIORBOX_LAYER = 'priorbox' + MULTIBOX_LOSS_LAYER = 'multibox_loss' + DETECTION_OUTPUT_LAYER = 'detection_output' CTC_LAYER = 'ctc' WARP_CTC_LAYER = 'warp_ctc' @@ -1052,6 +1056,163 @@ def priorbox_layer(input, size=size) +@wrap_name_default("multibox_loss") +def multibox_loss_layer(input_loc, + input_conf, + priorbox, + label, + num_classes, + overlap_threshold=0.5, + neg_pos_ratio=3.0, + neg_overlap=0.5, + background_id=0, + name=None): + """ + Compute the location loss and the confidence loss for ssd. + + :param name: The Layer Name. + :type name: basestring + :param input_loc: The input predict location. + :type input_loc: LayerOutput + :param input_conf: The input priorbox confidence. + :type input_conf: LayerOutput + :param priorbox: The input priorbox location and the variance. + :type priorbox: LayerOutput + :param label: The input label. + :type label: LayerOutput + :param num_classes: The number of the classification. + :type num_classes: int + :param overlap_threshold: The threshold of the overlap. + :type overlap_threshold: float + :param neg_pos_ratio: The ratio of the negative bbox to the positive bbox. + :type neg_pos_ratio: float + :param neg_overlap: The negative bbox overlap threshold. + :type neg_overlap: float + :param background_id: The background class index. + :type background_id: int + :return: LayerOutput + """ + input_loc_num = 0 + input_conf_num = 0 + + if isinstance(input_loc, LayerOutput): + input_loc = [input_loc] + assert isinstance(input_loc, collections.Sequence) # list or tuple + for each in input_loc: + assert isinstance(each, LayerOutput) + input_loc_num += 1 + + if isinstance(input_conf, LayerOutput): + input_conf = [input_conf] + assert isinstance(input_conf, collections.Sequence) # list or tuple + for each in input_conf: + assert isinstance(each, LayerOutput) + input_conf_num += 1 + # Check the input layer number. + assert input_loc_num == input_conf_num + + inputs = [priorbox.name, label.name] + inputs.extend([l.name for l in input_loc]) + inputs.extend([l.name for l in input_conf]) + parents = [priorbox, label] + parents.extend(input_loc) + parents.extend(input_conf) + + Layer( + name=name, + type=LayerType.MULTIBOX_LOSS_LAYER, + inputs=inputs, + input_num=input_loc_num, + num_classes=num_classes, + overlap_threshold=overlap_threshold, + neg_pos_ratio=neg_pos_ratio, + neg_overlap=neg_overlap, + background_id=background_id) + return LayerOutput( + name, LayerType.MULTIBOX_LOSS_LAYER, parents=parents, size=1) + + +@wrap_name_default("detection_output") +def detection_output_layer(input_loc, + input_conf, + priorbox, + num_classes, + nms_threshold=0.45, + nms_top_k=400, + keep_top_k=200, + confidence_threshold=0.01, + background_id=0, + name=None): + """ + Apply the NMS to the output of network and compute the predict bounding + box location. + + :param name: The Layer Name. + :type name: basestring + :param input_loc: The input predict location. + :type input_loc: LayerOutput + :param input_conf: The input priorbox confidence. + :type input_conf: LayerOutput + :param priorbox: The input priorbox location and the variance. + :type priorbox: LayerOutput + :param num_classes: The number of the classification. + :type num_classes: int + :param nms_threshold: The Non-maximum suppression threshold. + :type nms_threshold: float + :param nms_top_k: The bbox number kept of the NMS's output + :type nms_top_k: int + :param keep_top_k: The bbox number kept of the layer's output + :type keep_top_k: int + :param confidence_threshold: The classification confidence threshold + :type confidence_threshold: float + :param background_id: The background class index. + :type background_id: int + :return: LayerOutput + """ + input_loc_num = 0 + input_conf_num = 0 + + if isinstance(input_loc, LayerOutput): + input_loc = [input_loc] + assert isinstance(input_loc, collections.Sequence) # list or tuple + for each in input_loc: + assert isinstance(each, LayerOutput) + input_loc_num += 1 + + if isinstance(input_conf, LayerOutput): + input_conf = [input_conf] + assert isinstance(input_conf, collections.Sequence) # list or tuple + for each in input_conf: + assert isinstance(each, LayerOutput) + input_conf_num += 1 + # Check the input layer number. + assert input_loc_num == input_conf_num + + inputs = [priorbox.name] + inputs.extend([l.name for l in input_loc]) + inputs.extend([l.name for l in input_conf]) + parents = [priorbox] + parents.extend(input_loc) + parents.extend(input_conf) + + size = keep_top_k * 7 + + Layer( + name=name, + type=LayerType.DETECTION_OUTPUT_LAYER, + inputs=inputs, + size=size, + input_num=input_loc_num, + num_classes=num_classes, + nms_threshold=nms_threshold, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + confidence_threshold=confidence_threshold, + background_id=background_id) + return LayerOutput( + name, LayerType.DETECTION_OUTPUT_LAYER, parents=parents, size=size) + + @wrap_name_default("cross_channel_norm") def cross_channel_norm_layer(input, name=None, param_attr=None): """ From b233ed135352de1260b644112f939938798048ec Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 19 Jun 2017 14:53:59 +0800 Subject: [PATCH 003/542] Set FLAGS_use_gpu in test_DetectionOutput. --- paddle/gserver/tests/test_DetectionOutput.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paddle/gserver/tests/test_DetectionOutput.cpp b/paddle/gserver/tests/test_DetectionOutput.cpp index 8ec7a28450..af43dc51fa 100644 --- a/paddle/gserver/tests/test_DetectionOutput.cpp +++ b/paddle/gserver/tests/test_DetectionOutput.cpp @@ -65,9 +65,12 @@ void doOneDetectionOutputTest(MatrixPtr& inputLoc, dataLayers[2]->getOutputValue()->copyFrom(*inputConf); // test layer initialize + bool store_FLAGS_use_gpu = FLAGS_use_gpu; + FLAGS_use_gpu = use_gpu; std::vector parameters; LayerPtr detectionOutputLayer; initTestLayer(configt, &layerMap, ¶meters, &detectionOutputLayer); + FLAGS_use_gpu = store_FLAGS_use_gpu; detectionOutputLayer->forward(PASS_GC); checkMatrixEqual(detectionOutputLayer->getOutputValue(), result); } From bcac91a463bd90867974ed6d39b1eda5accdac25 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 21 Jun 2017 18:21:58 +0800 Subject: [PATCH 004/542] scope design doc --- doc/refactor/scope.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 doc/refactor/scope.md diff --git a/doc/refactor/scope.md b/doc/refactor/scope.md new file mode 100644 index 0000000000..15759d6217 --- /dev/null +++ b/doc/refactor/scope.md @@ -0,0 +1,30 @@ +# Scope + +### Define + +Scope is a context to manage Variables. It mainly contains a map from Variable name to Variable. Net will get and update variable throw scope. + +```cpp +class Scope { + Variable GetVar(); + +private: + // var_name -> var + std::map var_map_; + Scope* parent_scope_; +} +``` + +You need to specify a scope to run a Net. One net can run in different scopes and update different variable in the scope. If you did not specify one, It will run in a default scope. +```python +with ScopeGuard(scope): + Net net = Net(); + Net.run() +``` + +### Chain structure + +Scope has a pointer point to it's parent scope, this is mainly used in RNN when it need to create many stepNet. + + +### Scope Guard \ No newline at end of file From dbfe58ca47e8ff59d8d020739529ed6f02f5b9f0 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Wed, 21 Jun 2017 19:27:12 +0800 Subject: [PATCH 005/542] add local recordio reader interface --- python/paddle/v2/reader/creator.py | 21 +++++++++++++++++- python/paddle/v2/reader/tests/creator_test.py | 9 ++++++++ .../v2/reader/tests/test_recordio_creator.dat | Bin 0 -> 88 bytes 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 python/paddle/v2/reader/tests/test_recordio_creator.dat diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 07142056f8..994062096f 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -16,7 +16,7 @@ Creator package contains some simple reader creator, which could be used in user program. """ -__all__ = ['np_array', 'text_file'] +__all__ = ['np_array', 'text_file', "RecordIO"] def np_array(x): @@ -55,3 +55,22 @@ def text_file(path): f.close() return reader + + +def RecordIO(path): + """ + Creates a data reader that outputs record one one by one from given recordio file + :path: path of recordio file + :returns: data reader of recordio file + """ + + def reader(): + f = recordio.reader(path) + while True: + r = f.read() + if r is None: + break + yield r + f.close() + + return reader diff --git a/python/paddle/v2/reader/tests/creator_test.py b/python/paddle/v2/reader/tests/creator_test.py index 9f8d7133b8..dd84fbb002 100644 --- a/python/paddle/v2/reader/tests/creator_test.py +++ b/python/paddle/v2/reader/tests/creator_test.py @@ -36,5 +36,14 @@ class TestTextFile(unittest.TestCase): self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1)) +class TestRecordIO(unittest.TestCase): + def test_RecordIO(self): + path = os.path.join( + os.path.dirname(__file__), "test_recordio_creator.dat") + reader = paddle.v2.reader.creator.RecordIO(path) + for idx, r in enumerate(reader()): + self.assertSequenceEqual(r, str(idx)) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/reader/tests/test_recordio_creator.dat b/python/paddle/v2/reader/tests/test_recordio_creator.dat new file mode 100644 index 0000000000000000000000000000000000000000..17aa89b6796184407e83246d3f342a55a66b4a69 GIT binary patch literal 88 zcmZQ!W@2QOHw Date: Wed, 21 Jun 2017 21:12:09 +0800 Subject: [PATCH 006/542] Rearrange docs --- doc/{refactor => design}/scope.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename doc/{refactor => design}/scope.md (97%) diff --git a/doc/refactor/scope.md b/doc/design/scope.md similarity index 97% rename from doc/refactor/scope.md rename to doc/design/scope.md index 15759d6217..c8ca62688c 100644 --- a/doc/refactor/scope.md +++ b/doc/design/scope.md @@ -27,4 +27,4 @@ with ScopeGuard(scope): Scope has a pointer point to it's parent scope, this is mainly used in RNN when it need to create many stepNet. -### Scope Guard \ No newline at end of file +### Scope Guard From 674b1d34625bb876299c4500c5a85b0aad8ef808 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Jun 2017 21:31:27 +0800 Subject: [PATCH 007/542] Update code --- doc/design/scope.md | 48 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index c8ca62688c..b8390a3714 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -5,21 +5,49 @@ Scope is a context to manage Variables. It mainly contains a map from Variable name to Variable. Net will get and update variable throw scope. ```cpp -class Scope { - Variable GetVar(); +class Variable; +using VariablePtr = std::shared_ptr; + +class Scope final { +public: + Scope(); + Scope(const std::shared_ptr& parent); + + //! Get Variable in this scope. + //! @return nullptr if no such variable. + const VariablePtr& getVar(const std::string& name) const; + + //! Create or get a variable in this scope. + VariablePtr& createOrGetVar(const std::string& name); private: - // var_name -> var - std::map var_map_; - Scope* parent_scope_; -} + /// variable name -> variable + std::unordered_map vars_; + std::shared_ptr parent_{nullptr}; +}; ``` You need to specify a scope to run a Net. One net can run in different scopes and update different variable in the scope. If you did not specify one, It will run in a default scope. -```python -with ScopeGuard(scope): - Net net = Net(); - Net.run() + +```cpp +Scope global; +auto x = newVar("X"); // x is created in scope global, implicitly. +auto y = newVar("Y"); +Net net1; +net1.addOp("add", {x, y}, {x}); // x = x + y; +net1.run(); + +for (size_t i=0; i<10; ++i) { + Scope local; + auto tmp = newVar("tmp"); // tmp is created in scope local. + Net net2; + net2.addOp("add", {x, y}, {tmp}); + net2.run(); // tmp = x + y; +} + +Net net3; +net3.addOp("add", {x, y}, {"tmp"}); // error! cannot found "tmp" in global scope. + ``` ### Chain structure From 7a4850771006937d264039ae782fe7e302545362 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 21 Jun 2017 23:45:26 +0800 Subject: [PATCH 008/542] fix code style --- doc/design/scope.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index b8390a3714..a0b0be50dc 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -9,16 +9,16 @@ class Variable; using VariablePtr = std::shared_ptr; class Scope final { -public: + public: Scope(); Scope(const std::shared_ptr& parent); //! Get Variable in this scope. //! @return nullptr if no such variable. - const VariablePtr& getVar(const std::string& name) const; + const VariablePtr& GetVar(const std::string& name) const; //! Create or get a variable in this scope. - VariablePtr& createOrGetVar(const std::string& name); + VariablePtr& GetOrCreateVar(const std::string& name); private: /// variable name -> variable @@ -31,22 +31,22 @@ You need to specify a scope to run a Net. One net can run in different scopes an ```cpp Scope global; -auto x = newVar("X"); // x is created in scope global, implicitly. -auto y = newVar("Y"); +auto x = NewVar("X"); // x is created in scope global, implicitly. +auto y = NewVar("Y"); Net net1; -net1.addOp("add", {x, y}, {x}); // x = x + y; -net1.run(); +net1.AddOp("add", {x, y}, {x}); // x = x + y; +net1.Run(); for (size_t i=0; i<10; ++i) { Scope local; - auto tmp = newVar("tmp"); // tmp is created in scope local. + auto tmp = NewVar("tmp"); // tmp is created in scope local. Net net2; - net2.addOp("add", {x, y}, {tmp}); - net2.run(); // tmp = x + y; + net2.AddOp("add", {x, y}, {tmp}); + net2.Run(); // tmp = x + y; } Net net3; -net3.addOp("add", {x, y}, {"tmp"}); // error! cannot found "tmp" in global scope. +net3.AddOp("add", {x, y}, {"tmp"}); // error! cannot found "tmp" in global scope. ``` From 5128714c004f3b3c54d4c389131599c08e5413fd Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Thu, 22 Jun 2017 03:59:51 +0800 Subject: [PATCH 009/542] "integrate go and optimizer library" --- go/pserver/cclient/cclient.go | 12 +++----- go/pserver/optimizer.c | 58 ----------------------------------- go/pserver/optimizer.go | 20 +++++------- go/pserver/optimizer.h | 22 ------------- go/pserver/service.go | 18 ++++++++--- 5 files changed, 26 insertions(+), 104 deletions(-) delete mode 100644 go/pserver/optimizer.c delete mode 100644 go/pserver/optimizer.h diff --git a/go/pserver/cclient/cclient.go b/go/pserver/cclient/cclient.go index 92a41b7f54..6aaaff7409 100644 --- a/go/pserver/cclient/cclient.go +++ b/go/pserver/cclient/cclient.go @@ -123,9 +123,8 @@ func paddle_begin_init_params(client C.paddle_pserver_client) C.int { func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int { et := pserver.ElementType(param.element_type) name := C.GoString(param.name) - content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len)) pc := pserver.ParameterWithConfig{ - Param: pserver.Parameter{Name: name, ElementType: et, Content: content}, + Param: pserver.Parameter{Name: name, ElementType: et, Content: param.content, Length: para.content_len}, Config: cArrayToSlice(param_config, int(config_len)), } c := get(client) @@ -167,8 +166,7 @@ func paddle_send_grads(client C.paddle_pserver_client, grads **C.paddle_gradient grad := *(**C.paddle_gradient)(unsafe.Pointer((uintptr(unsafe.Pointer(grads)) + uintptr(i)*unsafe.Sizeof(*grads)))) et := pserver.ElementType(grad.element_type) name := C.GoString(grad.name) - content := cArrayToSlice(unsafe.Pointer(grad.content), int(grad.content_len)) - gs = append(gs, pserver.Gradient{Name: name, ElementType: et, Content: content}) + gs = append(gs, pserver.Gradient{Name: name, ElementType: et, Content: grad.content, Length: grad.content_len}) } c := get(client) @@ -225,14 +223,14 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, } if unsafe.Pointer(param.content) != nullPtr { - if int(param.content_len) != len(p.Content) { + if int(param.content_len) != p.Length { log.Errorf("the pre-allocated content len does not match parameter content len. Pre-allocated len: %d, returned len: %d", param.content_len, len(p.Content)) return C.PSERVER_ERROR } } - C.memcpy(unsafe.Pointer(param.content), unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content))) - param.content_len = C.int(len(p.Content)) + C.memcpy(unsafe.Pointer(param.content), unsafe.Pointer(p.Content), C.size_t(p.Length)) + param.content_len = C.int(p.Length) param.element_type = C.paddle_element_type(p.ElementType) } diff --git a/go/pserver/optimizer.c b/go/pserver/optimizer.c deleted file mode 100644 index f16ba2cbf8..0000000000 --- a/go/pserver/optimizer.c +++ /dev/null @@ -1,58 +0,0 @@ -#include - -#include "optimizer.h" - -typedef int (*update_func)(void*, void*, paddle_element_type, const void*, int); -typedef void (*release_func)(void*); - -typedef struct paddle_optimizer { - update_func update; - release_func release; - void* optimizer; -} paddle_optimizer; - -void paddle_release_optimizer(paddle_optimizer* o) { - o->release(o->optimizer); - free(o); -} - -int paddle_update_parameter(paddle_optimizer* o, - void* buffer, - paddle_element_type element_type, - const void* gradient, - int num_bytes) { - return o->update(o->optimizer, buffer, element_type, gradient, num_bytes); -} - -typedef struct { double learning_rate; } SGD_optimizer; - -int update_SGD(void* optimizer, - void* buffer, - paddle_element_type element_type, - const void* gradient, - int num_bytes) { - SGD_optimizer* o = (SGD_optimizer*)optimizer; - float* parameter = (float*)buffer; - float* grad = (float*)gradient; - - int i; - for (i = 0; i < num_bytes / sizeof(float); ++i) { - parameter[i] -= o->learning_rate * grad[i]; - } - return 0; -} - -void release_SGD(void* optimizer) { - SGD_optimizer* o = (SGD_optimizer*)optimizer; - // nothing allocated on heap -} - -paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate) { - SGD_optimizer* impl = (SGD_optimizer*)malloc(sizeof(SGD_optimizer)); - impl->learning_rate = learning_rate; - paddle_optimizer* opt = (paddle_optimizer*)malloc(sizeof(paddle_optimizer)); - opt->update = update_SGD; - opt->release = release_SGD; - opt->optimizer = impl; - return opt; -} diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 417f8c5093..5abbca538f 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -1,7 +1,7 @@ package pserver /* -#include "optimizer.h" +#include "paddle/optimizer/optimizer.h" */ import "C" import ( @@ -9,34 +9,30 @@ import ( "unsafe" ) -type optimizerType int - -const ( - sgd optimizerType = iota -) - var nullPtr = unsafe.Pointer(uintptr(0)) type optimizer struct { opt *C.struct_paddle_optimizer } -func newOptimizer(t optimizerType, learning_rate float64) *optimizer { +func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { o := &optimizer{} - o.opt = C.paddle_create_SGD_optimizer(C.double(learning_rate)) + p := paramWithConfigs.Param + c := paramWithConfigs.Config + o.opt = C.paddle_create_optimizer(C.uchar(c), C.int(len(c)), unsafe.Pointer(p.Content), c.int(p.Length), nullPtr, 0) return o } func (o *optimizer) UpdateParameter(p Parameter, g Gradient) error { - if len(p.Content) != len(g.Content) { - return fmt.Errorf("Name: %s, parameter and gradient length not match, parameter: %d, gradient: %d", p.Name, len(p.Content), len(g.Content)) + if p.Length != g.Length { + return fmt.Errorf("Name: %s, parameter and gradient length not match, parameter: %d, gradient: %d", p.Name, p.Length, g.Length) } if p.ElementType != g.ElementType { return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", p.Name, p.ElementType, g.ElementType) } - r := C.paddle_update_parameter(o.opt, unsafe.Pointer(&p.Content[0]), C.paddle_element_type(p.ElementType), unsafe.Pointer(&g.Content[0]), C.int(len(g.Content))) + r := C.paddle_update_parameter(o.opt, C.paddle_element_type(p.ElementType), unsafe.Pointer(g.Content), C.int(g.Length)) if r != 0 { return fmt.Errorf("optimizer update returned error code: %d", r) } diff --git a/go/pserver/optimizer.h b/go/pserver/optimizer.h deleted file mode 100644 index a7e3ff0530..0000000000 --- a/go/pserver/optimizer.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef PADDLE_PSERVER_OPTIMIZER_H -#define PADDLE_PSERVER_OPTIMIZER_H - -typedef enum { - PADDLE_ELEMENT_TYPE_INT32 = 0, - PADDLE_ELEMENT_TYPE_UINT32 = 1, - PADDLE_ELEMENT_TYPE_INT64 = 2, - PADDLE_ELEMENT_TYPE_UINT64 = 3, - PADDLE_ELEMENT_TYPE_FLOAT32 = 4, - PADDLE_ELEMENT_TYPE_FLOAT64 = 5, -} paddle_element_type; - -struct paddle_optimizer; -struct paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate); -void paddle_release_optimizer(struct paddle_optimizer* o); -int paddle_update_parameter(struct paddle_optimizer* o, - void* buffer, - paddle_element_type element_type, - const void* gradient, - int num_bytes); - -#endif /* PADDLE_PSERVER_OPTIMIZER_H */ diff --git a/go/pserver/service.go b/go/pserver/service.go index 78a2bfaf63..c721388b6a 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -28,7 +28,8 @@ const ( type Parameter struct { Name string ElementType ElementType - Content []byte + Content *byte + Length int } // ParameterWithConfig contains the parameter and the configuration. @@ -44,14 +45,16 @@ type Gradient Parameter type Service struct { initialized chan struct{} - mu sync.Mutex - opt *optimizer + mu sync.Mutex + // injection from parameter to optimizer + optMap map[string]*optimizer paramMap map[string]Parameter } // NewService creates a new service. func NewService() *Service { - s := &Service{opt: newOptimizer(sgd, 0.005)} + s := &Service{} + s.optMap = make(map[string]*optimizer) s.paramMap = make(map[string]Parameter) s.initialized = make(chan struct{}) return s @@ -74,6 +77,7 @@ func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) er // properly memory aligned, if not, make copy to a memory // aligned region. s.paramMap[paramWithConfigs.Param.Name] = paramWithConfigs.Param + s.optMap[paramWithConfigs.Param.Name] = newOptimizer(paramWithConfigs) return nil } @@ -106,8 +110,12 @@ func (s *Service) SendGrad(g Gradient, dummy *int) error { if !ok { return fmt.Errorf("parameter: %s does not exist", g.Name) } + o, ok := s.optMap[g.Name] + if !ok { + return fmt.Errorf("optimizer: %s does not exist", g.Name) + } - return s.opt.UpdateParameter(p, g) + return o.UpdateParameter(p, g) } // GetParam gets parameters from the parameter server. From 7cb68a8d9315bd3c3c769e47ee3752867854ee12 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Wed, 21 Jun 2017 13:19:40 -0700 Subject: [PATCH 010/542] Add paddle/memory/README.md --- paddle/README.md | 141 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 paddle/README.md diff --git a/paddle/README.md b/paddle/README.md new file mode 100644 index 0000000000..24af37987e --- /dev/null +++ b/paddle/README.md @@ -0,0 +1,141 @@ +In my mind, the memory package works like the following: + +## Design + +### Usage + +To allocate 4KB CPU memory: + +```cpp +p = memory::Alloc(platform::CPUPlace(), 4*1024); +``` + +To allocate 4KB memory on the 3rd GPU: + +```cpp +p = memory::Alloc(platform::GPUPlace(2), 4*1024); +``` + +To free memory and check the so-far used amount of memory on a place: + +```cpp +auto pl = platform::GPUPlace(0); +p = memory::Alloc(pl, 4*1024); +cout << memory::Used(pl); +memory::Free(pl, p); +``` + +### The API + +In `paddle/memory/memory.h` we have: + +```cpp +template void* Alloc(Place, size_t); +template void Free(Place, void*); +} +``` + +These function templates have specializations on either `platform::CPUPlace` or `platform::GPUPlace`: + +```cpp +template<> +void Alloc(CPUPlace p, size_t size) { + return GetCPUBuddyAllocator()->Alloc(size); +} +``` + +and + +```cpp +template<> +void Alloc(GPUPlace)(GPUPlace p, size_t size) { + return GetGPUBuddyAllocator(p.id)->Alloc(size); +} +``` + +### The Implementation + +`GetCPUBuddyAllocator` and `GetGPUBuddyAllocator` are singletions. + +```cpp +BuddyAllocator* GetCPUBuddyAllocator() { + static BuddyAllocator* a = NULL; + if (a == NULL) { + a = new BuddyAllocator(new CPUAllocator /*backup allocator*/, ...); + } + return a; +} + +BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { + static BuddyAllocator* as = NULL; + if (as == NULL) { + as = new BuddyAllocator*[platform::NumGPUs()]; + for (int gpu = 0; gpu < platform::NumGPUs(); gpu++) { + as[gpu] = new BuddyAllocator(new GPUAllocator(gpu) /* backup allocator */, ...); + } + } + return as[gpu_id); +``` + +#### `BuddyAllocator` + +`BuddyAllocator` implements the buddy allocation algorithm. Its constructor takes parameters only related with the algorithm: + +```cpp +BuddyAllocator::BuddyAllocator(initial_pool_size, max_pool_size) { + ... +} +``` + +Please be aware that **`BuddyAllocator` always allocate aligned memory**, aligned on 32-bytes, which can hold a `BuddyAllocator::Block` object: + +```cpp +class BuddyAllocator { + private: + struct Block { + size_t size; + Blobk* left, right; + }; + ... +}; +``` + +#### System Allocators + +The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They hold information about the device, including the amount of memory has been allocated. So that we can call + +- `GPUAllocator::Used` and +- `CPUAllocator::Used` + +to get the amount of memory that has been allocated so far. + + +## Why Such a Design + +I got inspiration from Majel and Caffe2, though above design look different from both. + +### Caffe2 + +In Caffe2, `Tensor::mutable_data()` allocates the memroy. In particular, [`Tensor::mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L523) calls [`Tensor::raw_mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L459), which in turn calls [`Context::New`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L479). + +There are two implementations of `Context`: + +1. [`CPUContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L105), whose [`New` method](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L131) calls [`g_cpu_allocator.get()->New(size_t)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.cc#L15) to allocate the memory. + +1. [`CUDAContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L99), which has a data member [`int gpu_id_`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L202). This looks very similar to class `majel::GPUPlace`, who also has an `int id_` data member. `CUDAContext::New(size_t)` calls [`g_cub_allocator->DeviceAllocate(&ptr, nbytes)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.cu#L355) to allocate the memory. + +### Majel + +In Majel, there are basically two allocator types: + +1. `cpu::SystemAllocator`, which has similar functionality to `caffe2::CPUContext::New/Delete`. +1. `gpu::SystemAllocator`, which has similar functionality to `caffe2::CUDAContext::New/Delete`. + +However, memory allocation is not via these two allocators. Instead, these two allocators are defined in hidden namespaces. + +In Majel there are hidden global variables like: + +1. `cpu::SystemAllocator g_cpu_allocator`, and +1. `vector g_gpu_allocators(NUM_GPUS)`. + +Programs allocate memory via a BuddyAllocator, which can take the `g_cpu_allocator` or a `g_gpu_allocators[gpu_id]` as its *fallback allocator*, so that if BuddyAllocator cannot find a block in its memory pool, it extends its memory pool by calling the fallback allocator's `New(size_t)`. From 0a92908b5ea68daa040155a7088b7f520c16c51d Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Wed, 21 Jun 2017 17:02:30 -0700 Subject: [PATCH 011/542] Has to auto format networks.py because CI complains about it. --- python/paddle/trainer_config_helpers/networks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 1bf59ed484..67154a8d7d 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -1381,7 +1381,7 @@ def inputs(layers, *args): if len(args) != 0: layers.extend(args) - Inputs(*[l.name for l in layers]) + Inputs(* [l.name for l in layers]) def outputs(layers, *args): @@ -1424,7 +1424,7 @@ def outputs(layers, *args): assert len(layers) > 0 if HasInputsSet(): # input already set - Outputs(*[l.name for l in layers]) + Outputs(* [l.name for l in layers]) return # just return outputs. if len(layers) != 1: From 8bffa4a72fb28f4ca019e7dead41773731a2e33c Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 22 Jun 2017 11:09:00 +0800 Subject: [PATCH 012/542] fix bugs --- python/paddle/v2/reader/creator.py | 6 ++++-- python/paddle/v2/reader/tests/creator_test.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 994062096f..8888faae36 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -57,15 +57,17 @@ def text_file(path): return reader -def RecordIO(path): +def recordio(path): """ Creates a data reader that outputs record one one by one from given recordio file :path: path of recordio file :returns: data reader of recordio file """ + import recordio as rec + def reader(): - f = recordio.reader(path) + f = rec.reader(path) while True: r = f.read() if r is None: diff --git a/python/paddle/v2/reader/tests/creator_test.py b/python/paddle/v2/reader/tests/creator_test.py index dd84fbb002..e20af9e5e4 100644 --- a/python/paddle/v2/reader/tests/creator_test.py +++ b/python/paddle/v2/reader/tests/creator_test.py @@ -37,10 +37,10 @@ class TestTextFile(unittest.TestCase): class TestRecordIO(unittest.TestCase): - def test_RecordIO(self): + def test_recordio(self): path = os.path.join( os.path.dirname(__file__), "test_recordio_creator.dat") - reader = paddle.v2.reader.creator.RecordIO(path) + reader = paddle.v2.reader.creator.recordio(path) for idx, r in enumerate(reader()): self.assertSequenceEqual(r, str(idx)) From 0adb9e01e9bd9ed662ef653b6a62417b18dd1937 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 22 Jun 2017 11:12:26 +0800 Subject: [PATCH 013/542] fix bugs --- python/paddle/v2/reader/creator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 8888faae36..9f888b16d6 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -16,7 +16,7 @@ Creator package contains some simple reader creator, which could be used in user program. """ -__all__ = ['np_array', 'text_file', "RecordIO"] +__all__ = ['np_array', 'text_file', "recordio"] def np_array(x): From 1f217f0ab319254b901d9f8df8be447e0bed17a6 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Thu, 22 Jun 2017 12:26:47 +0800 Subject: [PATCH 014/542] "add c testing, python testing TODO" --- go/pserver/cclient/cclient.go | 9 +-------- go/pserver/cclient/test/dump_optimizer_proto.py | 13 +++++++++++++ go/pserver/cclient/test/main.c | 13 ++++++++++++- go/pserver/cclient/test/optimizer.pb.txt | Bin 0 -> 51 bytes go/pserver/cclient/test/test_train.py | 2 ++ go/pserver/client_test.go | 8 ++++++-- python/paddle/v2/optimizer.py | 2 ++ 7 files changed, 36 insertions(+), 11 deletions(-) create mode 100644 go/pserver/cclient/test/dump_optimizer_proto.py create mode 100644 go/pserver/cclient/test/optimizer.pb.txt diff --git a/go/pserver/cclient/cclient.go b/go/pserver/cclient/cclient.go index 6aaaff7409..ba2a235de3 100644 --- a/go/pserver/cclient/cclient.go +++ b/go/pserver/cclient/cclient.go @@ -121,14 +121,7 @@ func paddle_begin_init_params(client C.paddle_pserver_client) C.int { //export paddle_init_param func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int { - et := pserver.ElementType(param.element_type) - name := C.GoString(param.name) - pc := pserver.ParameterWithConfig{ - Param: pserver.Parameter{Name: name, ElementType: et, Content: param.content, Length: para.content_len}, - Config: cArrayToSlice(param_config, int(config_len)), - } - c := get(client) - err := c.InitParam(pc) + et if err != nil { if err.Error() == pserver.AlreadyInitialized { diff --git a/go/pserver/cclient/test/dump_optimizer_proto.py b/go/pserver/cclient/test/dump_optimizer_proto.py new file mode 100644 index 0000000000..2ed4db97f9 --- /dev/null +++ b/go/pserver/cclient/test/dump_optimizer_proto.py @@ -0,0 +1,13 @@ +import OptimizerConfig_pb2 as pb + +config = pb.OptimizerConfig() +config.clip_norm = 0.1 +config.lr_policy = pb.OptimizerConfig.Const +config.optimizer = pb.OptimizerConfig.SGD +config.sgd.momentum = 0.0 +config.sgd.decay = 0.0 +config.sgd.nesterov = False +config.const_lr.learning_rate = 0.1 +s = config.SerializeToString() +with open("optimizer.pb.txt", 'w') as f: + f.write(s) diff --git a/go/pserver/cclient/test/main.c b/go/pserver/cclient/test/main.c index 03f749d4e4..7d26127b60 100644 --- a/go/pserver/cclient/test/main.c +++ b/go/pserver/cclient/test/main.c @@ -45,9 +45,20 @@ void getParams(paddle_pserver_client c) { } } + + int main() { char addr[] = "localhost:3000"; paddle_pserver_client c = paddle_new_pserver_client(addr, 1); + char config_proto[1024]; + size_t config_proto_len = 0; + ssize_t nread; + FILE *fp = fopen("optimizer.pb.txt", "r"); + if(!fp) { fail(); } + while((nread = getline(&config_proto, &config_proto_len, fp)) != -1) { + printf("%s", config_proto); + } + fclose(fp); retry: if (paddle_begin_init_params(c)) { paddle_parameter param; @@ -59,7 +70,7 @@ retry: param.name = name_a; param.content = content_a; param.content_len = 2000; - int error = paddle_init_param(c, param, NULL, 0); + int error = paddle_init_param(c, param, config_proto, config_proto_len); if (error != 0) { goto retry; } diff --git a/go/pserver/cclient/test/optimizer.pb.txt b/go/pserver/cclient/test/optimizer.pb.txt new file mode 100644 index 0000000000000000000000000000000000000000..27c8a584df40ab714edfd730f0ff7b7bd3783964 GIT binary patch literal 51 lcmd;JloDUb$N&X9;j9CU3=s@ToSd^}g1}Dum25B;7XZ}t4FdoG literal 0 HcmV?d00001 diff --git a/go/pserver/cclient/test/test_train.py b/go/pserver/cclient/test/test_train.py index 3f8d5d793b..68e1d9b269 100644 --- a/go/pserver/cclient/test/test_train.py +++ b/go/pserver/cclient/test/test_train.py @@ -22,6 +22,8 @@ def main(): # create optimizer optimizer = paddle.optimizer.Momentum(momentum=0) + #TODO(zhihong) : replace optimizer with new OptimizerConfig + trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, update_equation=optimizer, diff --git a/go/pserver/client_test.go b/go/pserver/client_test.go index d0371a26a1..c5d38e4112 100644 --- a/go/pserver/client_test.go +++ b/go/pserver/client_test.go @@ -75,7 +75,9 @@ func TestClientFull(t *testing.T) { var p pserver.Parameter p.Name = "p_" + strconv.Itoa(i) p.ElementType = pserver.Float32 - p.Content = make([]byte, (i+1)*100) + ElementValue := make([]byte, (i+1)*100) + p.Content = &ElementValue[0] + p.Length = len(ElementValue) err := c.InitParam(pserver.ParameterWithConfig{Param: p}) if err != nil { t.Fatal(err) @@ -92,7 +94,9 @@ func TestClientFull(t *testing.T) { var g pserver.Gradient g.Name = "p_" + strconv.Itoa(i) g.ElementType = pserver.Float32 - g.Content = make([]byte, (i+1)*100) + ElementValue := make([]byte, (i+1)*100) + g.Content = &ElementValue[0] + g.Length = len(ElementValue) grads = append(grads, g) } diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index 1ef2dceca9..8124e219ba 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -5,6 +5,8 @@ import paddle.trainer_config_helpers.optimizers as v1_optimizers """ Optimizers(update equation) for SGD method. +TODO(zhihong) : create new optimizer with proto config, add new optimizer here + TODO(yuyang18): Complete comments. """ From cc487c09f73e2dc0e29d2ab07401061facd9b782 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Thu, 22 Jun 2017 12:52:18 +0800 Subject: [PATCH 015/542] "fix typo delete" --- go/pserver/cclient/cclient.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/go/pserver/cclient/cclient.go b/go/pserver/cclient/cclient.go index ba2a235de3..6aaaff7409 100644 --- a/go/pserver/cclient/cclient.go +++ b/go/pserver/cclient/cclient.go @@ -121,7 +121,14 @@ func paddle_begin_init_params(client C.paddle_pserver_client) C.int { //export paddle_init_param func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int { - et + et := pserver.ElementType(param.element_type) + name := C.GoString(param.name) + pc := pserver.ParameterWithConfig{ + Param: pserver.Parameter{Name: name, ElementType: et, Content: param.content, Length: para.content_len}, + Config: cArrayToSlice(param_config, int(config_len)), + } + c := get(client) + err := c.InitParam(pc) if err != nil { if err.Error() == pserver.AlreadyInitialized { From 3e099787b803b30a4f76cbd0c6738a81e1d16c93 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 14:21:37 +0800 Subject: [PATCH 016/542] Add scope doc --- doc/design/scope.md | 66 ++++++++++++++------------------------------- 1 file changed, 20 insertions(+), 46 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index b8390a3714..b7e0a10d03 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -1,58 +1,32 @@ -# Scope +# What is a scope. -### Define +## Overview -Scope is a context to manage Variables. It mainly contains a map from Variable name to Variable. Net will get and update variable throw scope. +预期使用场景。 -```cpp -class Variable; -using VariablePtr = std::shared_ptr; +引出Scope的两个属性。 + 1. Scope是Variable的Container + 2. Scope可以共享 -class Scope final { -public: - Scope(); - Scope(const std::shared_ptr& parent); +## Scope 是一个Variable的Container - //! Get Variable in this scope. - //! @return nullptr if no such variable. - const VariablePtr& getVar(const std::string& name) const; +解释下为啥Scope是Variable的container。解释下面几个小点的原因。 - //! Create or get a variable in this scope. - VariablePtr& createOrGetVar(const std::string& name); + * 他只包含variable + * 每一个variable也只属于一个Scope + * 每一个Scope析构的时候,会同时析构variable + * 只能通过Scope创建Vairable。 + * 只能通过Scope获取Variable。 -private: - /// variable name -> variable - std::unordered_map vars_; - std::shared_ptr parent_{nullptr}; -}; -``` +## Scope 可以被继承或者叫共享 -You need to specify a scope to run a Net. One net can run in different scopes and update different variable in the scope. If you did not specify one, It will run in a default scope. +解释下Scope如何被共享,如何查找Variable的算法。 + * Scope永远从本地寻找Variable,找不到会从他的父亲Scope寻找Variable + * 嵌套深度不做要求。 -```cpp -Scope global; -auto x = newVar("X"); // x is created in scope global, implicitly. -auto y = newVar("Y"); -Net net1; -net1.addOp("add", {x, y}, {x}); // x = x + y; -net1.run(); +# 接口实现 -for (size_t i=0; i<10; ++i) { - Scope local; - auto tmp = newVar("tmp"); // tmp is created in scope local. - Net net2; - net2.addOp("add", {x, y}, {tmp}); - net2.run(); // tmp = x + y; -} +C++ code. -Net net3; -net3.addOp("add", {x, y}, {"tmp"}); // error! cannot found "tmp" in global scope. -``` - -### Chain structure - -Scope has a pointer point to it's parent scope, this is mainly used in RNN when it need to create many stepNet. - - -### Scope Guard +## 各个接口是啥意思,为啥这么设计 From 04ad9b6b02f0ca69c6b8b879397906edf1fec61f Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 15:10:37 +0800 Subject: [PATCH 017/542] Add Scope Parent & Local section --- doc/design/scope.md | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index e73d3c231c..3ebbd26338 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -17,11 +17,34 @@ * 每一个Scope析构的时候,会同时析构variable * 只能通过Scope创建Vairable。 * 只能通过Scope获取Variable。 -## Scope 可以被继承或者叫共享 -解释下Scope如何被共享,如何查找Variable的算法。 - * Scope永远从本地寻找Variable,找不到会从他的父亲Scope寻找Variable - * 嵌套深度不做要求。 +## Parent scope and local scope + +Just like [scope](https://en.wikipedia.org/wiki/Scope_(computer_science)) in programming languages, `Scope` in the neural network also can be local. There are two attributes about local scope. + +* We can create local variables in a local scope, and when that local scope are destroyed, all local variables should also be destroyed. +* Variables in a parent scope can be retrieved from that parent scope's local scope, i.e., when user get a variable from a scope, it will search this variable in current scope firstly. If there is no such variable in local scope, `scope` will keep searching from its parent, until the variable is found or there is no parent. + +```cpp +class Scope { +public: + Scope(const std::shared_ptr& scope): parent_(scope) {} + + Variable* Get(const std::string& name) const { + Variable* var = GetVarLocally(name); + if (var != nullptr) { + return var; + } else if (parent_ != nullptr) { + return parent_->Get(name); + } else { + return nullptr; + } + } + +private: + std::shared_ptr parent_ {nullptr}; +}; +``` # 接口实现 From 581e4c1cbd2208896dbbc6facef4099a607bfc8c Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 15:27:16 +0800 Subject: [PATCH 018/542] Parent & local scope done --- doc/design/scope.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 3ebbd26338..27ceed961f 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -20,10 +20,10 @@ ## Parent scope and local scope -Just like [scope](https://en.wikipedia.org/wiki/Scope_(computer_science)) in programming languages, `Scope` in the neural network also can be local. There are two attributes about local scope. +Just like [scope](https://en.wikipedia.org/wiki/Scope_(computer_science)) in programming languages, `Scope` in the neural network also can be local. There are two attributes about local scope. -* We can create local variables in a local scope, and when that local scope are destroyed, all local variables should also be destroyed. -* Variables in a parent scope can be retrieved from that parent scope's local scope, i.e., when user get a variable from a scope, it will search this variable in current scope firstly. If there is no such variable in local scope, `scope` will keep searching from its parent, until the variable is found or there is no parent. +1. We can create local variables in a local scope, and when that local scope are destroyed, all local variables should also be destroyed. +2. Variables in a parent scope can be retrieved from that parent scope's local scope, i.e., when user get a variable from a scope, it will search this variable in current scope firstly. If there is no such variable in local scope, `scope` will keep searching from its parent, until the variable is found or there is no parent. ```cpp class Scope { @@ -46,6 +46,10 @@ private: }; ``` +In `Scope` class, there is a private data member called `parent_`. `parent_` is a smart pointer to its parent scope. When user `Get` a variable by its `name`, the `name` will be searched locally inside the current scope. If the variable cannot be found locally and parent scope is not a `nullptr`, the variable will be searched inside that parent scope. `parent_` pointer's default value is `nullptr`. It means that the scope is a global scope when `parent_` is nullptr. + +A local scope is very useful when we implement Recurrent Neural Network. Each timestep of an RNN should be a `Net`. Each `Net` of timestep (`StepNet` for short) should use an independent local scope. Just like each variable in a while loop is inside a local scope in programming languages. By using a single `StepNet` and changing local scope, we can implement an RNN easily. + # 接口实现 # 各个接口是啥意思,为啥这么设计 From 0b70361a0ee71ed04ca6925a25bbdd3e434c2bfe Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 15:35:00 +0800 Subject: [PATCH 019/542] Refining english --- doc/design/scope.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 27ceed961f..341ec16d79 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -20,10 +20,10 @@ ## Parent scope and local scope -Just like [scope](https://en.wikipedia.org/wiki/Scope_(computer_science)) in programming languages, `Scope` in the neural network also can be local. There are two attributes about local scope. +Just like [scope](https://en.wikipedia.org/wiki/Scope_(computer_science)) in programming languages, `Scope` in the neural network can also be local. There are two attributes about local scope. -1. We can create local variables in a local scope, and when that local scope are destroyed, all local variables should also be destroyed. -2. Variables in a parent scope can be retrieved from that parent scope's local scope, i.e., when user get a variable from a scope, it will search this variable in current scope firstly. If there is no such variable in local scope, `scope` will keep searching from its parent, until the variable is found or there is no parent. +1. We can create local variables in a local scope. When that local scope are destroyed, all local variables should also be destroyed. +2. Variables in a parent scope can be retrieved from that parent scope's local scopes, i.e., when user get a variable from a scope, it will try to search this variable in current scope. If there is no such variable in the local scope, `scope` will keep searching from its parent, until the variable is found or there is no parent. ```cpp class Scope { @@ -46,9 +46,9 @@ private: }; ``` -In `Scope` class, there is a private data member called `parent_`. `parent_` is a smart pointer to its parent scope. When user `Get` a variable by its `name`, the `name` will be searched locally inside the current scope. If the variable cannot be found locally and parent scope is not a `nullptr`, the variable will be searched inside that parent scope. `parent_` pointer's default value is `nullptr`. It means that the scope is a global scope when `parent_` is nullptr. +In `Scope` class, there is a private data member called `parent_`. `parent_` is a smart pointer to its parent scope. When user `Get` a variable by its `name`, the `name` will be searched inside the current scope. If the variable cannot be found locally and parent scope is not a `nullptr`, the variable will be searched inside that parent scope. `parent_` pointer's default value is `nullptr`. It means that the scope is a global scope when `parent_` is nullptr. -A local scope is very useful when we implement Recurrent Neural Network. Each timestep of an RNN should be a `Net`. Each `Net` of timestep (`StepNet` for short) should use an independent local scope. Just like each variable in a while loop is inside a local scope in programming languages. By using a single `StepNet` and changing local scope, we can implement an RNN easily. +A local scope is very useful when we implement Recurrent Neural Network. Each timestep of an RNN should be a `Net`. Each `Net` of timestep (`StepNet` for short) should use an independent local scope. Just like variables in a while loop is inside a local scope in programming languages. By using a single `StepNet` and changing local scope, we can implement an RNN easily. # 接口实现 From 76e2a3cd95236ab29fb1b5562d5aab52afa99470 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 15:40:08 +0800 Subject: [PATCH 020/542] Refine English --- doc/design/scope.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 341ec16d79..695426b2f2 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -20,10 +20,10 @@ ## Parent scope and local scope -Just like [scope](https://en.wikipedia.org/wiki/Scope_(computer_science)) in programming languages, `Scope` in the neural network can also be local. There are two attributes about local scope. +Just like [scope](https://en.wikipedia.org/wiki/Scope_(computer_science)) in programming languages, `Scope` in the neural network can also be a local scope. There are two attributes about local scope. 1. We can create local variables in a local scope. When that local scope are destroyed, all local variables should also be destroyed. -2. Variables in a parent scope can be retrieved from that parent scope's local scopes, i.e., when user get a variable from a scope, it will try to search this variable in current scope. If there is no such variable in the local scope, `scope` will keep searching from its parent, until the variable is found or there is no parent. +2. Variables in a parent scope can be retrieved from local scopes of that parent scope, i.e., when user get a variable from a scope, it will try to search this variable in current scope. If there is no such variable in the local scope, `scope` will keep searching from its parent, until the variable is found or there is no parent. ```cpp class Scope { From 8282138047cd443aa579d04320bb10ffd5bde5ca Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 22 Jun 2017 15:49:52 +0800 Subject: [PATCH 021/542] some properties of scope --- doc/design/scope.md | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index e73d3c231c..b0ee744535 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -8,15 +8,29 @@ 1. Scope是Variable的Container 2. Scope可以共享 -## Scope 是一个Variable的Container +## Scope is a Container of Variables. -解释下为啥Scope是Variable的container。解释下面几个小点的原因。 + * Scope contains Variables as it's data member. + * Scope contains methods that are used to manage Variables, such as Create/Get/Delete. + * every variable only belong to one certain Scope. + * Scope should destruct all Variables within it when itself is destructed. + * Variable can only be created by Scope. + * Variable can only be got from Scope. + + * Scope do not contains Operators and have no information to run them. + +```cpp +class Scope { + public: + Variable* CreateVariable(const std::string& name); + const Variable* GetVariable(const std::string& name) const; + bool DeleteVariable(const std::string& name); + + private: + std::unordered_map> variable_map_; +}; +``` - * 他只包含variable - * 每一个variable也只属于一个Scope - * 每一个Scope析构的时候,会同时析构variable - * 只能通过Scope创建Vairable。 - * 只能通过Scope获取Variable。 ## Scope 可以被继承或者叫共享 解释下Scope如何被共享,如何查找Variable的算法。 From d7aca775c5bc5b9d914435cd4b4648c2e7cbd687 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 16:31:03 +0800 Subject: [PATCH 022/542] Update API --- doc/design/scope.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 695426b2f2..6694e275b6 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -30,7 +30,7 @@ class Scope { public: Scope(const std::shared_ptr& scope): parent_(scope) {} - Variable* Get(const std::string& name) const { + Variable* GetVar(const std::string& name) const { Variable* var = GetVarLocally(name); if (var != nullptr) { return var; From 2d5507fab26fe990e88cbe3569b5a1f1a810b6b8 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 16:46:41 +0800 Subject: [PATCH 023/542] Add interfaces --- doc/design/scope.md | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 6694e275b6..68395435dd 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -50,6 +50,44 @@ In `Scope` class, there is a private data member called `parent_`. `parent_` is A local scope is very useful when we implement Recurrent Neural Network. Each timestep of an RNN should be a `Net`. Each `Net` of timestep (`StepNet` for short) should use an independent local scope. Just like variables in a while loop is inside a local scope in programming languages. By using a single `StepNet` and changing local scope, we can implement an RNN easily. -# 接口实现 +# Interface Design -# 各个接口是啥意思,为啥这么设计 +```cpp +class Variable { +private: + Variable() = default; + friend class Scope; +}; + +using VariablePtr = std::weak_ptr; + +class Scope { +public: + Scope(const std::shared_ptr& parent = nullptr); + + // return nullptr if not found. + VariablePtr GetVariable(const std::string& name) const; + + // return Error if already contains same name variable. + Error CreateVariable(const std::string& name); + +private: + std::shared_ptr parent_; + std::unordered_map> attrs_; +}; +``` +## Only scope can create a variable + +To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `CreateVariable` can construct `Variable`. + +## When scope destroyed, all variables inside this scope should be destroyed together + +The `VariablePtr` is a `weak_ptr`. `Net` and `Op` can only get a Variable from `Scope`, but cannot hold it. When scope is destroyed, all `VariablePtr`s belong to this Scope will be changed to `nullptr`. + +## Sharing a parent scope + +Local scope contains a `parent_` pointer. It is a linked-list for scopes. Using a `shared_ptr` because when a local scope is using, its parents cannot be destroyed. + +## Orthogonal interface + +`GetVariable` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `CreateVariable` will return a `Error` when there is a name conflict locally. Combine `GetVariable` and `CreateVariable`, we can implement `CreateOrGetVariable` easily. From 73b1c5bd96d5bae8e8558839681f955261f4d197 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 22 Jun 2017 16:51:08 +0800 Subject: [PATCH 024/542] add overview for scope design doc --- doc/design/scope.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index de2e67d327..5023d4b0e4 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -2,11 +2,12 @@ ## Overview -预期使用场景。 +Scope is an important concept in programming languages, which defines a program region that a set of bindings between names and entities applies. In a specific scope, a valid name is uniquely associated with an entity, such as a variable. And in another scope, this name may refer to other entity or nothing at all. It clearly restricts the visibility and validity of names in a program. Hence **Scope** is introduced to PaddlePaddle to manage variables in context. But different from the original abstract concept, Scope now becomes an object with two important attributes: -引出Scope的两个属性。 - 1. Scope是Variable的Container - 2. Scope可以共享 +- Scope is a container of variables +- Scope can be inherited or shared + +A detailed explanation of these two attributes goes as following. ## Scope is a Container of Variables. From 1f0056b242f79f34fa472dcf93014866a787753c Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 16:52:35 +0800 Subject: [PATCH 025/542] Update interface --- doc/design/scope.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 74bb6242e4..76af6c30c1 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -76,9 +76,12 @@ private: using VariablePtr = std::weak_ptr; class Scope { -public: +private: Scope(const std::shared_ptr& parent = nullptr); +public: + static std::shared_ptr Create(const std::shared_ptr& parent = nullptr); + // return nullptr if not found. VariablePtr GetVariable(const std::string& name) const; @@ -102,6 +105,8 @@ The `VariablePtr` is a `weak_ptr`. `Net` and `Op` can only get a Variable from ` Local scope contains a `parent_` pointer. It is a linked-list for scopes. Using a `shared_ptr` because when a local scope is using, its parents cannot be destroyed. +Also, as the parent scope is a `shared_ptr`, we can only `Create()` a scope shared pointer. We cannot construct a scope variable, because it cannot be passed to other scope as `parent` pointer. + ## Orthogonal interface `GetVariable` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `CreateVariable` will return a `Error` when there is a name conflict locally. Combine `GetVariable` and `CreateVariable`, we can implement `CreateOrGetVariable` easily. From 17eed332af53894b7525ef7584e37e622bca3f4d Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 16:58:03 +0800 Subject: [PATCH 026/542] Update key attributes --- doc/design/scope.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 5ba3deb847..1bd36beb5a 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -4,8 +4,8 @@ Scope is an important concept in programming languages, which defines a program region that a set of bindings between names and entities applies. In a specific scope, a valid name is uniquely associated with an entity, such as a variable. And in another scope, this name may refer to other entity or nothing at all. It clearly restricts the visibility and validity of names in a program. Hence **Scope** is introduced to PaddlePaddle to manage variables in context. But different from the original abstract concept, Scope now becomes an object with two important attributes: -- Scope is a container of variables -- Scope can be inherited or shared +- Scope is an association of a name to variable. +- Variables in a parent scope can be retrieved from local scope. A detailed explanation of these two attributes goes as following. From 37fd48bf159711a525a3f3d67ee055b5a05a5d3a Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 22 Jun 2017 17:01:59 +0800 Subject: [PATCH 027/542] some detailed explaination of the Scope properties --- doc/design/scope.md | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 1bd36beb5a..fac9643b4b 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -9,16 +9,30 @@ Scope is an important concept in programming languages, which defines a program A detailed explanation of these two attributes goes as following. + ## Scope is a Container of Variables. - * Scope contains Variables as it's data member. - * Scope contains methods that are used to manage Variables, such as Create/Get/Delete. - * every variable only belong to one certain Scope. - * Scope should destruct all Variables within it when itself is destructed. - * Variable can only be created by Scope. - * Variable can only be got from Scope. +Scope is used to provide a running environment for Net. + +1. Scope mainly has Variables as it's data member. + Scope is a running environment for Net. Net should get all it need to do computation from a scope, such as data buffer, state(momentum) etc. + All these data/state can be abstracted and create as variable in Paddle, so the only thing Scope need to care about is Variable. +1. Variable can only be created by Scope. +1. Variable can only be got from Scope. +1. Scope contains methods that are used to manage Variables, such as Create/Get/Delete. + Because we only need to care about Variable, we only need method to manage the lifecycle of Variable. + - `Create` is used to create a Variable by its name and add the mapping relation. + - `Get` is used to find a Variable by name. + - `Delete` is used to remove a Variable because sometimes we want to release memory or other resources. + +1. Every variable only belongs to one certain Scope. + Variable can not be shared between nets, if we want to use variables from different scope we can use `Parent scope`. + +1. Scope should destruct all Variables within it when itself is destructed. + Because Variable can only be got from Scope, when destroying Scope, we also need to destroy all the Vars in it. - * Scope do not contains Operators and have no information to run them. +1. Scope do not contain Operators and have no information to run them. + Net is designed to drive the computation, Scope is only used to provide a running environment. ```cpp class Scope { From c3a4b8bc4407b2496378b57dfea54be0dd05f745 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 22 Jun 2017 17:03:56 +0800 Subject: [PATCH 028/542] refine style of markdown --- doc/design/scope.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/design/scope.md b/doc/design/scope.md index fac9643b4b..a255b361d0 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -15,23 +15,28 @@ A detailed explanation of these two attributes goes as following. Scope is used to provide a running environment for Net. 1. Scope mainly has Variables as it's data member. + Scope is a running environment for Net. Net should get all it need to do computation from a scope, such as data buffer, state(momentum) etc. All these data/state can be abstracted and create as variable in Paddle, so the only thing Scope need to care about is Variable. 1. Variable can only be created by Scope. 1. Variable can only be got from Scope. 1. Scope contains methods that are used to manage Variables, such as Create/Get/Delete. + Because we only need to care about Variable, we only need method to manage the lifecycle of Variable. - `Create` is used to create a Variable by its name and add the mapping relation. - `Get` is used to find a Variable by name. - `Delete` is used to remove a Variable because sometimes we want to release memory or other resources. 1. Every variable only belongs to one certain Scope. + Variable can not be shared between nets, if we want to use variables from different scope we can use `Parent scope`. 1. Scope should destruct all Variables within it when itself is destructed. + Because Variable can only be got from Scope, when destroying Scope, we also need to destroy all the Vars in it. 1. Scope do not contain Operators and have no information to run them. + Net is designed to drive the computation, Scope is only used to provide a running environment. ```cpp From db96c0eef9dbe860f8ee080d91b157c77bf4ddbc Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 17:10:47 +0800 Subject: [PATCH 029/542] Use unique_ptr instead of shared_ptr/weak_ptr. But user can not hold this pointers. --- doc/design/scope.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 76af6c30c1..8d5744c227 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -73,8 +73,6 @@ private: friend class Scope; }; -using VariablePtr = std::weak_ptr; - class Scope { private: Scope(const std::shared_ptr& parent = nullptr); @@ -83,14 +81,14 @@ public: static std::shared_ptr Create(const std::shared_ptr& parent = nullptr); // return nullptr if not found. - VariablePtr GetVariable(const std::string& name) const; + Variable* GetVariable(const std::string& name) const; // return Error if already contains same name variable. Error CreateVariable(const std::string& name); private: std::shared_ptr parent_; - std::unordered_map> attrs_; + std::unordered_map> attrs_; }; ``` ## Only scope can create a variable @@ -99,7 +97,7 @@ To ensure `only scope can create a variable`, we should mark `Variable`'s constr ## When scope destroyed, all variables inside this scope should be destroyed together -The `VariablePtr` is a `weak_ptr`. `Net` and `Op` can only get a Variable from `Scope`, but cannot hold it. When scope is destroyed, all `VariablePtr`s belong to this Scope will be changed to `nullptr`. +The scope hold unique pointers for all variables. User can `GetVariable` from scope, but he should not hold this pointer as a member variable. Because when scope is destroyed, all variables inside this scope will be destroyed together. ## Sharing a parent scope From f104ce2cd2f047f59d09422643d8791feeba483c Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 22 Jun 2017 17:12:57 +0800 Subject: [PATCH 030/542] fix a mistake share by nets -> share by scopes --- doc/design/scope.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index a255b361d0..dec033ad63 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -29,7 +29,7 @@ Scope is used to provide a running environment for Net. 1. Every variable only belongs to one certain Scope. - Variable can not be shared between nets, if we want to use variables from different scope we can use `Parent scope`. + Variable can not be shared between scopes, if we want to use variables from different scope we can use `Parent scope`. 1. Scope should destruct all Variables within it when itself is destructed. From eab0e5229f1d020c8b495dab4187221517dbfb67 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 17:20:48 +0800 Subject: [PATCH 031/542] To google code style --- doc/design/scope.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index f2869bc254..76616cc6a6 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -61,7 +61,7 @@ Just like [scope](https://en.wikipedia.org/wiki/Scope_(computer_science)) in pro ```cpp class Scope { -public: + public: Scope(const std::shared_ptr& scope): parent_(scope) {} Variable* GetVar(const std::string& name) const { @@ -75,7 +75,7 @@ public: } } -private: + private: std::shared_ptr parent_ {nullptr}; }; ``` @@ -88,16 +88,16 @@ A local scope is very useful when we implement Recurrent Neural Network. Each ti ```cpp class Variable { -private: + private: Variable() = default; friend class Scope; }; class Scope { -private: + private: Scope(const std::shared_ptr& parent = nullptr); -public: + public: static std::shared_ptr Create(const std::shared_ptr& parent = nullptr); // return nullptr if not found. @@ -106,7 +106,7 @@ public: // return Error if already contains same name variable. Error CreateVariable(const std::string& name); -private: + private: std::shared_ptr parent_; std::unordered_map> attrs_; }; From 63a56b4a5182d31bdcbabdd15573dfec54a04bb9 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 17:24:30 +0800 Subject: [PATCH 032/542] Change typo --- doc/design/scope.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 76616cc6a6..cd92ba2aa8 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -108,7 +108,7 @@ class Scope { private: std::shared_ptr parent_; - std::unordered_map> attrs_; + std::unordered_map> attrs_; }; ``` ## Only scope can create a variable From 921fa13eef21c63455bfbb9972ac7f16fb345c51 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 17:25:19 +0800 Subject: [PATCH 033/542] Remove delete --- doc/design/scope.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index cd92ba2aa8..e08aebfb01 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -20,12 +20,11 @@ Scope is used to provide a running environment for Net. All these data/state can be abstracted and create as variable in Paddle, so the only thing Scope need to care about is Variable. 1. Variable can only be created by Scope. 1. Variable can only be got from Scope. -1. Scope contains methods that are used to manage Variables, such as Create/Get/Delete. +1. Scope contains methods that are used to manage Variables, such as Create/Get. Because we only need to care about Variable, we only need method to manage the lifecycle of Variable. - `Create` is used to create a Variable by its name and add the mapping relation. - `Get` is used to find a Variable by name. - - `Delete` is used to remove a Variable because sometimes we want to release memory or other resources. 1. Every variable only belongs to one certain Scope. @@ -44,10 +43,9 @@ class Scope { public: Variable* CreateVariable(const std::string& name); const Variable* GetVariable(const std::string& name) const; - bool DeleteVariable(const std::string& name); private: - std::unordered_map> variable_map_; + std::unordered_map> vars_; }; ``` @@ -108,7 +106,7 @@ class Scope { private: std::shared_ptr parent_; - std::unordered_map> attrs_; + std::unordered_map> vars_; }; ``` ## Only scope can create a variable From 5d88249125039a5fcc5f1a33fd84d7accbd2c526 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 17:26:15 +0800 Subject: [PATCH 034/542] Typo --- doc/design/scope.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index e08aebfb01..23893308b2 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -62,7 +62,7 @@ class Scope { public: Scope(const std::shared_ptr& scope): parent_(scope) {} - Variable* GetVar(const std::string& name) const { + Variable* GetVariable(const std::string& name) const { Variable* var = GetVarLocally(name); if (var != nullptr) { return var; From f8a209c2dee0ce16dbf12857ae88556c9917e458 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 17:48:19 +0800 Subject: [PATCH 035/542] Rearrange description. --- doc/design/scope.md | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 23893308b2..1740019f3d 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -10,33 +10,30 @@ Scope is an important concept in programming languages, which defines a program A detailed explanation of these two attributes goes as following. -## Scope is a Container of Variables. +## Scope is an association of a name to variable. -Scope is used to provide a running environment for Net. +Scope is an association of a name to variable. All variables belong to `Scope`. You need to specify a scope to run a Net, i.e., `net.Run(&scope)`. One net can run in different scopes and update different variable in the scope. -1. Scope mainly has Variables as it's data member. - Scope is a running environment for Net. Net should get all it need to do computation from a scope, such as data buffer, state(momentum) etc. - All these data/state can be abstracted and create as variable in Paddle, so the only thing Scope need to care about is Variable. -1. Variable can only be created by Scope. -1. Variable can only be got from Scope. -1. Scope contains methods that are used to manage Variables, such as Create/Get. +1. Scope only contains a map of a name to variable. - Because we only need to care about Variable, we only need method to manage the lifecycle of Variable. + All parameters, data, states in a Net should be variables and stored inside a scope. Each op should get inputs and outputs to do computation from a scope, such as data buffer, state(momentum) etc. + +1. Variable can only be created by Scope and a variable can only be got from Scope. User cannot create or get a variable outside a scope. This is a constraints of our framework, and will keep our framework simple and clear. + +1. Scope only contains methods that are used to Create and Get Variables. Scope do not contain Operators and have no information to run them. + + `Net` is designed to drive the computation and Scope only contains a map of variables. There is no computation logic inside a `Scope`. Scope just handles the lifetime management of variables. - `Create` is used to create a Variable by its name and add the mapping relation. - `Get` is used to find a Variable by name. 1. Every variable only belongs to one certain Scope. - Variable can not be shared between scopes, if we want to use variables from different scope we can use `Parent scope`. - -1. Scope should destruct all Variables within it when itself is destructed. - - Because Variable can only be got from Scope, when destroying Scope, we also need to destroy all the Vars in it. + Variable can not belong to many scopes. If you want to use variables from parent scope, you can use `parent scope`. -1. Scope do not contain Operators and have no information to run them. +1. Scope should destruct all Variables inside it when itself is destructed. User can never store `Variable` pointer somewhere else. - Net is designed to drive the computation, Scope is only used to provide a running environment. + Because Variable can only be got from Scope. When destroying Scope, we also need to destroy all the Variables in it. If user store `Variable` pointer to private data member or some global variable, the pointer will be a invalid pointer when associated `Scope` is destroyed. ```cpp class Scope { @@ -45,7 +42,7 @@ class Scope { const Variable* GetVariable(const std::string& name) const; private: - std::unordered_map> vars_; + std::unordered_map> vars_; }; ``` From c5ad89a24d54f934bb40ec586f3f42fdf007ee16 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 17:51:04 +0800 Subject: [PATCH 036/542] Change title --- doc/design/scope.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 1740019f3d..d00ab37da4 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -1,4 +1,4 @@ -# What is a scope. +# Design of Scope in Paddle ## Overview From 237efc2f263b7cdf7404a065c95b7e45260f4dda Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 18:00:28 +0800 Subject: [PATCH 037/542] Fix markdown --- doc/design/scope.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index d00ab37da4..149e1ea1b9 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -22,8 +22,7 @@ Scope is an association of a name to variable. All variables belong to `Scope`. 1. Variable can only be created by Scope and a variable can only be got from Scope. User cannot create or get a variable outside a scope. This is a constraints of our framework, and will keep our framework simple and clear. 1. Scope only contains methods that are used to Create and Get Variables. Scope do not contain Operators and have no information to run them. - - `Net` is designed to drive the computation and Scope only contains a map of variables. There is no computation logic inside a `Scope`. Scope just handles the lifetime management of variables. + `Net` is designed to drive the computation and Scope only contains a map of variables. There is no computation logic inside a `Scope`. Scope just handles the lifetime management of variables. - `Create` is used to create a Variable by its name and add the mapping relation. - `Get` is used to find a Variable by name. From 3bac2d0d7e50f67fd82f96bccaf3bfdeabd930f7 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 22:00:25 +0800 Subject: [PATCH 038/542] Typo --- doc/design/scope.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 149e1ea1b9..2ff416f06e 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -63,7 +63,7 @@ class Scope { if (var != nullptr) { return var; } else if (parent_ != nullptr) { - return parent_->Get(name); + return parent_->GetVariable(name); } else { return nullptr; } @@ -102,7 +102,7 @@ class Scope { private: std::shared_ptr parent_; - std::unordered_map> vars_; + std::unordered_map> vars_; }; ``` ## Only scope can create a variable From d3e2db4b4f3efa537a2b85bb88d8d8f3e780f09c Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 22 Jun 2017 08:12:10 -0700 Subject: [PATCH 039/542] Revert changes made by misleading errors from Travis CI --- python/paddle/trainer_config_helpers/networks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 67154a8d7d..1bf59ed484 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -1381,7 +1381,7 @@ def inputs(layers, *args): if len(args) != 0: layers.extend(args) - Inputs(* [l.name for l in layers]) + Inputs(*[l.name for l in layers]) def outputs(layers, *args): @@ -1424,7 +1424,7 @@ def outputs(layers, *args): assert len(layers) > 0 if HasInputsSet(): # input already set - Outputs(* [l.name for l in layers]) + Outputs(*[l.name for l in layers]) return # just return outputs. if len(layers) != 1: From eaed87b00ff4254c261136b4ed6de8eb794b8090 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Thu, 22 Jun 2017 23:33:47 +0800 Subject: [PATCH 040/542] "fix optimizer include error" --- go/pserver/cclient/CMakeLists.txt | 5 +++-- go/pserver/optimizer.go | 1 + go/pserver/optimizer_test.go | 6 ++++++ go/pserver/service_test.go | 8 ++++++-- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/go/pserver/cclient/CMakeLists.txt b/go/pserver/cclient/CMakeLists.txt index fff7ae7858..65a38ba1ad 100644 --- a/go/pserver/cclient/CMakeLists.txt +++ b/go/pserver/cclient/CMakeLists.txt @@ -10,5 +10,6 @@ include(golang) include(flags) go_library(paddle_pserver_cclient STATIC) - -add_subdirectory(test) +if(WITH_TESTING) + add_subdirectory(test) +endif() diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 5abbca538f..3ee4c74652 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -1,6 +1,7 @@ package pserver /* +#cgo CFLAGS: -I ../../ #include "paddle/optimizer/optimizer.h" */ import "C" diff --git a/go/pserver/optimizer_test.go b/go/pserver/optimizer_test.go index 64d6d092aa..4930f0d95f 100644 --- a/go/pserver/optimizer_test.go +++ b/go/pserver/optimizer_test.go @@ -3,6 +3,12 @@ package pserver import "testing" func TestSGDCreateRelease(t *testing.T) { + param := pserver.ParameterWithConfig{ + Param : pserver.Parameter{Name : "a", + ElementType: , + Content: , + Length : } + } o := newOptimizer(sgd, 1) o.Cleanup() } diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index b746d13e1c..1b2626f7db 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -13,7 +13,9 @@ func TestFull(t *testing.T) { s := pserver.NewService() var p pserver.Parameter p.Name = "param_a" - p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} + ElementValue := []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} + p.Content = &ElementValue[0] + p.Length = len(ElementValue) p.ElementType = pserver.Int32 err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) if err != nil { @@ -22,7 +24,9 @@ func TestFull(t *testing.T) { var p1 pserver.Parameter p1.Name = "param_b" - p1.Content = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + ElementValue = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + p1.Content = &ElementValue[0] + p1.Length = len(ElementValue) p1.ElementType = pserver.Float32 err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: nil}, nil) if err != nil { From 7386b06ccdec68f71dc71cdcef1588cc0ff68cb3 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Fri, 23 Jun 2017 01:01:15 +0800 Subject: [PATCH 041/542] "add optimizer naive link option" --- go/pserver/cclient/test/CMakeLists.txt | 4 - go/pserver/cclient/test/main.c | 104 ---------------- go/pserver/cclient/test/test_cclient.c | 159 ++++++++++++------------- go/pserver/optimizer.go | 3 + 4 files changed, 76 insertions(+), 194 deletions(-) delete mode 100644 go/pserver/cclient/test/main.c diff --git a/go/pserver/cclient/test/CMakeLists.txt b/go/pserver/cclient/test/CMakeLists.txt index 1a3dd7e5e9..722bd45d2f 100644 --- a/go/pserver/cclient/test/CMakeLists.txt +++ b/go/pserver/cclient/test/CMakeLists.txt @@ -1,7 +1,5 @@ cmake_minimum_required(VERSION 3.0) -add_executable(main main.c) -add_dependencies(main paddle_pserver_cclient) add_executable(test_cclient test_cclient.c) add_dependencies(test_cclient paddle_pserver_cclient) @@ -13,10 +11,8 @@ endif() if(PROJ_ROOT) include_directories(${CMAKE_CURRENT_BINARY_DIR}/..) - target_link_libraries(main ${CMAKE_CURRENT_BINARY_DIR}/../libpaddle_pserver_cclient.a pthread) target_link_libraries(test_cclient ${CMAKE_CURRENT_BINARY_DIR}/../libpaddle_pserver_cclient.a pthread) else(PROJ_ROOT) include_directories(${CMAKE_BINARY_DIR}) - target_link_libraries(main ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread) target_link_libraries(test_cclient ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread) endif(PROJ_ROOT) diff --git a/go/pserver/cclient/test/main.c b/go/pserver/cclient/test/main.c deleted file mode 100644 index 7d26127b60..0000000000 --- a/go/pserver/cclient/test/main.c +++ /dev/null @@ -1,104 +0,0 @@ -#include -#include - -#include "libpaddle_pserver_cclient.h" - -// TODO(helin): Fix: gtest using cmake is not working, using this -// hacky way for now. -#define fail() \ - fprintf(stderr, "info: %s:%d: ", __FILE__, __LINE__); \ - exit(-1); - -void sendGrads(paddle_pserver_client c) { - unsigned char grad_a[2000] = {2}; - unsigned char grad_b[3000] = {3}; - paddle_gradient grad1 = { - "param_a", PADDLE_ELEMENT_TYPE_FLOAT32, grad_a, 2000}; - paddle_gradient grad2 = { - "param_b", PADDLE_ELEMENT_TYPE_FLOAT32, grad_b, 3000}; - paddle_gradient* grads[2] = {&grad1, &grad2}; - if (paddle_send_grads(c, grads, 2)) { - fail(); - } -} - -void getParams(paddle_pserver_client c) { - paddle_parameter param_a; - paddle_parameter param_b; - char name_a[] = "param_a"; - char name_b[] = "param_b"; - // Must pre-allocate the prameter content before calling paddle_get_params. - unsigned char content_a[2000] = {}; - unsigned char content_b[3000] = {}; - param_a.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; - param_a.name = name_a; - param_a.content = content_a; - param_a.content_len = 2000; - param_b.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; - param_b.name = name_b; - param_b.content = content_b; - param_b.content_len = 3000; - - paddle_parameter* params[2] = {¶m_a, ¶m_b}; - if (paddle_get_params(c, params, 2)) { - fail(); - } -} - - - -int main() { - char addr[] = "localhost:3000"; - paddle_pserver_client c = paddle_new_pserver_client(addr, 1); - char config_proto[1024]; - size_t config_proto_len = 0; - ssize_t nread; - FILE *fp = fopen("optimizer.pb.txt", "r"); - if(!fp) { fail(); } - while((nread = getline(&config_proto, &config_proto_len, fp)) != -1) { - printf("%s", config_proto); - } - fclose(fp); -retry: - if (paddle_begin_init_params(c)) { - paddle_parameter param; - char name_a[] = "param_a"; - char name_b[] = "param_b"; - unsigned char content_a[2000] = {1}; - unsigned char content_b[3000] = {0}; - param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; - param.name = name_a; - param.content = content_a; - param.content_len = 2000; - int error = paddle_init_param(c, param, config_proto, config_proto_len); - if (error != 0) { - goto retry; - } - - param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; - param.name = name_b; - param.content = content_b; - param.content_len = 3000; - error = paddle_init_param(c, param, NULL, 0); - if (error != 0) { - goto retry; - } - - error = paddle_finish_init_params(c); - if (error != 0) { - goto retry; - } - } - - int i; - for (i = 0; i < 100; i++) { - sendGrads(c); - getParams(c); - } - - if (paddle_save_model(c, "/tmp/")) { - fail(); - } - - return 0; -} diff --git a/go/pserver/cclient/test/test_cclient.c b/go/pserver/cclient/test/test_cclient.c index 0f9c2ef801..7d26127b60 100644 --- a/go/pserver/cclient/test/test_cclient.c +++ b/go/pserver/cclient/test/test_cclient.c @@ -3,113 +3,100 @@ #include "libpaddle_pserver_cclient.h" -typedef float real; - -void fail() { - // TODO(helin): fix: gtest using cmake is not working, using this - // hacky way for now. - printf("test failed.\n"); +// TODO(helin): Fix: gtest using cmake is not working, using this +// hacky way for now. +#define fail() \ + fprintf(stderr, "info: %s:%d: ", __FILE__, __LINE__); \ exit(-1); + +void sendGrads(paddle_pserver_client c) { + unsigned char grad_a[2000] = {2}; + unsigned char grad_b[3000] = {3}; + paddle_gradient grad1 = { + "param_a", PADDLE_ELEMENT_TYPE_FLOAT32, grad_a, 2000}; + paddle_gradient grad2 = { + "param_b", PADDLE_ELEMENT_TYPE_FLOAT32, grad_b, 3000}; + paddle_gradient* grads[2] = {&grad1, &grad2}; + if (paddle_send_grads(c, grads, 2)) { + fail(); + } } -void print_parameter(paddle_gradient* param) { - if (param == NULL) { - printf("param is NULL!!\n"); - } else { - printf("==== parameter ====\n"); - printf("name: %s\n", param->name); - printf("content_len: %d\n", param->content_len); - printf("content_type: %d\n", param->element_type); - int i; - for (i = 0; i < param->content_len / (int)sizeof(real); ++i) { - printf("%f ", ((float*)param->content)[i]); - } - printf("\n\n"); +void getParams(paddle_pserver_client c) { + paddle_parameter param_a; + paddle_parameter param_b; + char name_a[] = "param_a"; + char name_b[] = "param_b"; + // Must pre-allocate the prameter content before calling paddle_get_params. + unsigned char content_a[2000] = {}; + unsigned char content_b[3000] = {}; + param_a.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + param_a.name = name_a; + param_a.content = content_a; + param_a.content_len = 2000; + param_b.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + param_b.name = name_b; + param_b.content = content_b; + param_b.content_len = 3000; + + paddle_parameter* params[2] = {¶m_a, ¶m_b}; + if (paddle_get_params(c, params, 2)) { + fail(); } } + + int main() { char addr[] = "localhost:3000"; paddle_pserver_client c = paddle_new_pserver_client(addr, 1); - - char* names[] = {"param_a", "param_b"}; - + char config_proto[1024]; + size_t config_proto_len = 0; + ssize_t nread; + FILE *fp = fopen("optimizer.pb.txt", "r"); + if(!fp) { fail(); } + while((nread = getline(&config_proto, &config_proto_len, fp)) != -1) { + printf("%s", config_proto); + } + fclose(fp); retry: - printf("init parameter to pserver:\n"); - - real param_content1[] = {0.1, 0.2, 0.3}; - real param_content2[] = {0.4, 0.5, 0.6}; - paddle_parameter** params = - (paddle_parameter**)malloc(sizeof(paddle_parameter*) * 2); - params[0] = (paddle_parameter*)malloc(sizeof(paddle_parameter)); - params[0]->name = names[0]; - params[0]->content = (unsigned char*)param_content1; - params[0]->content_len = 3 * sizeof(real); - params[0]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32; - - params[1] = (paddle_parameter*)malloc(sizeof(paddle_parameter)); - params[1]->name = names[1]; - params[1]->content = (unsigned char*)param_content2; - params[1]->content_len = 3 * sizeof(real); - params[1]->element_type = PADDLE_ELEMENT_TYPE_INT32; - if (paddle_begin_init_params(c)) { - if (paddle_init_param(c, *params[0], NULL, 0) != 0) { + paddle_parameter param; + char name_a[] = "param_a"; + char name_b[] = "param_b"; + unsigned char content_a[2000] = {1}; + unsigned char content_b[3000] = {0}; + param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + param.name = name_a; + param.content = content_a; + param.content_len = 2000; + int error = paddle_init_param(c, param, config_proto, config_proto_len); + if (error != 0) { goto retry; } - if (paddle_init_param(c, *params[1], NULL, 0) != 0) { + + param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + param.name = name_b; + param.content = content_b; + param.content_len = 3000; + error = paddle_init_param(c, param, NULL, 0); + if (error != 0) { goto retry; } - if (paddle_finish_init_params(c) != 0) { + + error = paddle_finish_init_params(c); + if (error != 0) { goto retry; } - } else { - fail(); - } - - printf("get inited parameters from pserver:\n"); - // get parameters again by reusing the allocated parameter buffers. - if (paddle_get_params(c, params, 2) != 0) { - fail(); - } - print_parameter(params[0]); - print_parameter(params[1]); - - printf("send gradient to pserver:\n"); - real gradient_content1[] = {0.01, 0.02, 0.03}; - real gradinet_content2[] = {0.04, 0.05, 0.06}; - - paddle_gradient** grads = - (paddle_gradient**)malloc(sizeof(paddle_gradient*) * 2); - grads[0] = (paddle_gradient*)malloc(sizeof(paddle_gradient)); - grads[0]->name = names[0]; - grads[0]->content = (unsigned char*)gradient_content1; - grads[0]->content_len = 3 * sizeof(real); - grads[0]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32; - - grads[1] = (paddle_gradient*)malloc(sizeof(paddle_gradient)); - grads[1]->name = names[1]; - grads[1]->content = (unsigned char*)gradinet_content2; - grads[1]->content_len = 3 * sizeof(real); - grads[1]->element_type = PADDLE_ELEMENT_TYPE_INT32; - - printf("print gradient sent to pserver:\n"); - print_parameter(grads[0]); - print_parameter(grads[1]); - - if (paddle_send_grads(c, grads, 2) != 0) { - fail(); } - printf("get updated parameters from pserver:\n"); - // get parameters again by reusing the allocated parameter buffers. - if (paddle_get_params(c, params, 2) != 0) { - fail(); + int i; + for (i = 0; i < 100; i++) { + sendGrads(c); + getParams(c); } - print_parameter(params[0]); - print_parameter(params[1]); - if (paddle_save_model(c, "/tmp/") != 0) { + if (paddle_save_model(c, "/tmp/")) { fail(); } diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 3ee4c74652..df2219aa84 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -1,7 +1,10 @@ package pserver /* +// TODO(zhihong): move compile flags to cmake go_library +#cgo pkg-config: protobuf #cgo CFLAGS: -I ../../ +#cgo LDFLAGS: ../../build/paddle/optimizer/libpaddle_optimizer.a ../../build/proto/libpaddle_proto.a ../../third_party/install/glog/lib/libglog.a ../../third_party/install/gtest/lib/libgtest.a ../../third_party/install/gflags/lib/libgflags.a ../../third_party/install/openblas/lib/libopenblas.a -I/usr/local/lib/ -lprotobuf #include "paddle/optimizer/optimizer.h" */ import "C" From 8cfa48dc88c0c702b30094ca558bf2182e00faba Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 22 Jun 2017 10:27:36 -0700 Subject: [PATCH 042/542] Move README.md from paddle/ to paddle/memory/ --- paddle/{ => memory}/README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename paddle/{ => memory}/README.md (100%) diff --git a/paddle/README.md b/paddle/memory/README.md similarity index 100% rename from paddle/README.md rename to paddle/memory/README.md From c617520776c58791d77d1382eba67ac4264916f0 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 22 Jun 2017 10:35:52 -0700 Subject: [PATCH 043/542] In response to comments from Liao Gang and Yu Yang --- paddle/memory/README.md | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/paddle/memory/README.md b/paddle/memory/README.md index 24af37987e..b71ca29696 100644 --- a/paddle/memory/README.md +++ b/paddle/memory/README.md @@ -25,14 +25,16 @@ cout << memory::Used(pl); memory::Free(pl, p); ``` -### The API +### API In `paddle/memory/memory.h` we have: ```cpp -template void* Alloc(Place, size_t); -template void Free(Place, void*); -} +namespace memory { +template void* Alloc(Place, size_t); +template void Free(Place, void*); +template void Used(Place); +} // namespace memory ``` These function templates have specializations on either `platform::CPUPlace` or `platform::GPUPlace`: @@ -48,12 +50,14 @@ and ```cpp template<> -void Alloc(GPUPlace)(GPUPlace p, size_t size) { +void Alloc(GPUPlace p, size_t size) { return GetGPUBuddyAllocator(p.id)->Alloc(size); } ``` -### The Implementation +Similar specializations exist for `Free` and `Used`. + +### Implementation `GetCPUBuddyAllocator` and `GetGPUBuddyAllocator` are singletions. @@ -94,7 +98,7 @@ class BuddyAllocator { private: struct Block { size_t size; - Blobk* left, right; + Block* left, right; }; ... }; @@ -102,15 +106,15 @@ class BuddyAllocator { #### System Allocators -The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They hold information about the device, including the amount of memory has been allocated. So that we can call +The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`. A system allocator holds information about a device, including the amount of memory has been allocated, so we can call -- `GPUAllocator::Used` and -- `CPUAllocator::Used` +- `GPUAllocator::Used()` and +- `CPUAllocator::Used()` to get the amount of memory that has been allocated so far. -## Why Such a Design +## Justification I got inspiration from Majel and Caffe2, though above design look different from both. From 85e42cbeaa105b34fd3dba864ba75b95adcfe73f Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 23 Jun 2017 15:16:37 +0800 Subject: [PATCH 044/542] ENH: supoort commnad `make target_name` --- cmake/generic.cmake | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 69e8164a00..19f0db5273 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -97,15 +97,12 @@ function(merge_static_libs TARGET_NAME) endforeach() if(APPLE) # Use OSX's libtool to merge archives - add_custom_target(${TARGET_NAME}_archive - COMMAND libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS ${libs} - ) - add_library(${TARGET_NAME} STATIC IMPORTED GLOBAL) - set_property(TARGET ${TARGET_NAME} PROPERTY - IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a") - add_dependencies(${TARGET_NAME} ${TARGET_NAME}_archive) + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) + file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") + add_library(${TARGET_NAME} STATIC ${dummyfile}) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" + COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}) else() # general UNIX: use "ar" to extract objects and re-add to a common lib foreach(lib ${libs}) set(objlistfile ${lib}.objlist) # list of objects in the input library From 869f2b3861990c430941d1992f9b22a711e89670 Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 23 Jun 2017 17:26:49 +0800 Subject: [PATCH 045/542] FIX: Polish go library --- cmake/generic.cmake | 34 +++++++++--------- cmake/system.cmake | 1 + go/cmake/CMakeDetermineGoCompiler.cmake | 44 ----------------------- go/cmake/CMakeGoCompiler.cmake.in | 8 ----- go/cmake/CMakeGoInformation.cmake | 7 ---- go/cmake/CMakeTestGoCompiler.cmake | 1 - go/cmake/flags.cmake | 45 ----------------------- go/cmake/golang.cmake | 48 ------------------------- go/pserver/cclient/CMakeLists.txt | 15 +++----- go/pserver/cclient/test/CMakeLists.txt | 23 ++---------- 10 files changed, 24 insertions(+), 202 deletions(-) delete mode 100644 go/cmake/CMakeDetermineGoCompiler.cmake delete mode 100644 go/cmake/CMakeGoCompiler.cmake.in delete mode 100644 go/cmake/CMakeGoInformation.cmake delete mode 100644 go/cmake/CMakeTestGoCompiler.cmake delete mode 100644 go/cmake/flags.cmake delete mode 100644 go/cmake/golang.cmake diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 69e8164a00..76810432e0 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -257,31 +257,31 @@ file(MAKE_DIRECTORY ${GOPATH}) # tensor # Because ops depend on tensor, this line is optional. # ops) function(go_library TARGET_NAME) - set(options OPTIONAL) + set(options STATIC static SHARED shared) set(oneValueArgs "") - set(multiValueArgs SRCS DEPS) + set(multiValueArgs DEPS) cmake_parse_arguments(go_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - if (${go_library_OPTIONAL} STREQUAL "SHARED") + + if (go_library_SHARED OR go_library_shared) set(BUILD_MODE "-buildmode=c-shared") - if(APPLE) - set(LIB_NAME "lib${TARGET_NAME}.dylib") - else() - set(LIB_NAME "lib${TARGET_NAME}.so") - endif() + set(LIB_NAME "${LIBRARY_PREFIX}${TARGET_NAME}${SHARED_LIBRARY_SUFFIX}") else() set(BUILD_MODE "-buildmode=c-archive") - set(LIB_NAME "lib${TARGET_NAME}.a") + set(LIB_NAME "${LIBRARY_PREFIX}${TARGET_NAME}${STATIC_LIBRARY_SUFFIX}") endif() - add_custom_command(OUTPUT ${TARGET_NAME}_timestamp + + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) + file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") + add_library(${TARGET_NAME} STATIC ${dummyfile}) + add_dependencies(${TARGET_NAME} ${go_library_DEPS}) + + file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" - ${go_library_SRCS} + ${GO_SOURCE} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - add_custom_target(${TARGET_NAME}_lib ALL DEPENDS ${TARGET_NAME}_timestamp ${go_library_DEPS}) - add_library(${TARGET_NAME} STATIC IMPORTED) - set_property(TARGET ${TARGET_NAME} PROPERTY - IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}") - add_dependencies(${TARGET_NAME} ${TARGET_NAME}_lib) endfunction(go_library) function(go_binary TARGET_NAME) @@ -316,5 +316,5 @@ endfunction(go_test) # go_extern(target_name extern_source) # go_extern(go_redis github.com/hoisie/redis) function(go_extern TARGET_NAME) - add_custom_target(${TARGET_NAME} env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get ${ARGN}) + add_custom_target(${TARGET_NAME} env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ${ARGN}) endfunction(go_extern) diff --git a/cmake/system.cmake b/cmake/system.cmake index 904652413e..40a1b2f67f 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -33,6 +33,7 @@ ELSE(WIN32) SET(CMAKE_OSX_DEPLOYMENT_TARGET ${MACOS_VERSION} CACHE STRING "Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value.") ENDIF() + set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") ELSE(APPLE) IF(EXISTS "/etc/issue") diff --git a/go/cmake/CMakeDetermineGoCompiler.cmake b/go/cmake/CMakeDetermineGoCompiler.cmake deleted file mode 100644 index a9bb6906c7..0000000000 --- a/go/cmake/CMakeDetermineGoCompiler.cmake +++ /dev/null @@ -1,44 +0,0 @@ -if(NOT CMAKE_Go_COMPILER) - if(NOT $ENV{GO_COMPILER} STREQUAL "") - get_filename_component(CMAKE_Go_COMPILER_INIT $ENV{GO_COMPILER} PROGRAM PROGRAM_ARGS CMAKE_Go_FLAGS_ENV_INIT) - - if(CMAKE_Go_FLAGS_ENV_INIT) - set(CMAKE_Go_COMPILER_ARG1 "${CMAKE_Go_FLAGS_ENV_INIT}" CACHE STRING "First argument to Go compiler") - endif() - - if(NOT EXISTS ${CMAKE_Go_COMPILER_INIT}) - message(SEND_ERROR "Could not find compiler set in environment variable GO_COMPILER:\n$ENV{GO_COMPILER}.") - endif() - - endif() - - set(Go_BIN_PATH - $ENV{GOPATH} - $ENV{GOROOT} - $ENV{GOROOT}/../bin - $ENV{GO_COMPILER} - /usr/bin - /usr/local/bin - ) - - if(CMAKE_Go_COMPILER_INIT) - set(CMAKE_Go_COMPILER ${CMAKE_Go_COMPILER_INIT} CACHE PATH "Go Compiler") - else() - find_program(CMAKE_Go_COMPILER - NAMES go - PATHS ${Go_BIN_PATH} - ) - EXEC_PROGRAM(${CMAKE_Go_COMPILER} ARGS version OUTPUT_VARIABLE GOLANG_VERSION) - STRING(REGEX MATCH "go[0-9]+.[0-9]+.[0-9]+[ /A-Za-z0-9]*" VERSION "${GOLANG_VERSION}") - message("-- The Golang compiler identification is ${VERSION}") - message("-- Check for working Golang compiler: ${CMAKE_Go_COMPILER}") - endif() - -endif() - -mark_as_advanced(CMAKE_Go_COMPILER) - -configure_file(${CMAKE_MODULE_PATH}/CMakeGoCompiler.cmake.in - ${CMAKE_PLATFORM_INFO_DIR}/CMakeGoCompiler.cmake @ONLY) - -set(CMAKE_Go_COMPILER_ENV_VAR "GO_COMPILER") diff --git a/go/cmake/CMakeGoCompiler.cmake.in b/go/cmake/CMakeGoCompiler.cmake.in deleted file mode 100644 index a71f08e064..0000000000 --- a/go/cmake/CMakeGoCompiler.cmake.in +++ /dev/null @@ -1,8 +0,0 @@ -set(CMAKE_Go_COMPILER "@CMAKE_Go_COMPILER@") -set(CMAKE_Go_COMPILER_LOADED 1) - -set(CMAKE_Go_SOURCE_FILE_EXTENSIONS go) -set(CMAKE_Go_LINKER_PREFERENCE 40) -set(CMAKE_Go_OUTPUT_EXTENSION .o) -set(CMAKE_Go_OUTPUT_EXTENSION_REPLACE 1) -set(CMAKE_Go_COMPILER_ENV_VAR "GO_COMPILER") diff --git a/go/cmake/CMakeGoInformation.cmake b/go/cmake/CMakeGoInformation.cmake deleted file mode 100644 index ba51ac93fc..0000000000 --- a/go/cmake/CMakeGoInformation.cmake +++ /dev/null @@ -1,7 +0,0 @@ -if(NOT CMAKE_Go_COMPILE_OBJECT) - set(CMAKE_Go_COMPILE_OBJECT "go tool compile -l -N -o ") -endif() - -if(NOT CMAKE_Go_LINK_EXECUTABLE) - set(CMAKE_Go_LINK_EXECUTABLE "go tool link -o ") -endif() diff --git a/go/cmake/CMakeTestGoCompiler.cmake b/go/cmake/CMakeTestGoCompiler.cmake deleted file mode 100644 index b9891b015b..0000000000 --- a/go/cmake/CMakeTestGoCompiler.cmake +++ /dev/null @@ -1 +0,0 @@ -set(CMAKE_Go_COMPILER_WORKS 1 CACHE INTERNAL "") diff --git a/go/cmake/flags.cmake b/go/cmake/flags.cmake deleted file mode 100644 index a167c432a9..0000000000 --- a/go/cmake/flags.cmake +++ /dev/null @@ -1,45 +0,0 @@ -# Setting Paddle Compile Flags -include(CheckCXXCompilerFlag) -include(CheckCCompilerFlag) -include(CheckCXXSymbolExists) -include(CheckTypeSize) - -function(CheckCompilerCXX11Flag) - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8) - message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.") - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - # cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang" - # Apple Clang is a different compiler than upstream Clang which havs different version numbers. - # https://gist.github.com/yamaya/2924292 - if(APPLE) # cmake < 3.0 compiler id "Clang" on Mac OS X - if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 5.1) - message(FATAL_ERROR "Unsupported AppleClang version. AppleClang >= 5.1 required.") - endif() - else() - if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.3) - message(FATAL_ERROR "Unsupported Clang version. Clang >= 3.3 required.") - endif() - endif() - endif() -endfunction() - -CheckCompilerCXX11Flag() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") - -# Common gpu architectures: Kepler, Maxwell -foreach(capability 30 35 50) - list(APPEND __arch_flags " -gencode arch=compute_${capability},code=sm_${capability}") -endforeach() - -if (CUDA_VERSION VERSION_GREATER "7.0" OR CUDA_VERSION VERSION_EQUAL "7.0") - list(APPEND __arch_flags " -gencode arch=compute_52,code=sm_52") -endif() - -# Modern gpu architectures: Pascal -if (CUDA_VERSION VERSION_GREATER "8.0" OR CUDA_VERSION VERSION_EQUAL "8.0") - list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60") -endif() - -set(CUDA_NVCC_FLAGS ${__arch_flags} ${CUDA_NVCC_FLAGS}) diff --git a/go/cmake/golang.cmake b/go/cmake/golang.cmake deleted file mode 100644 index a5a43886f8..0000000000 --- a/go/cmake/golang.cmake +++ /dev/null @@ -1,48 +0,0 @@ -set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") -file(MAKE_DIRECTORY ${GOPATH}) -set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle") -file(MAKE_DIRECTORY ${PADDLE_IN_GOPATH}) - -function(GO_LIBRARY NAME BUILD_TYPE) - if(BUILD_TYPE STREQUAL "STATIC") - set(BUILD_MODE -buildmode=c-archive) - set(LIB_NAME "lib${NAME}.a") - else() - set(BUILD_MODE -buildmode=c-shared) - if(APPLE) - set(LIB_NAME "lib${NAME}.dylib") - else() - set(LIB_NAME "lib${NAME}.so") - endif() - endif() - - file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") - file(RELATIVE_PATH rel ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) - - # find Paddle directory. - get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) - get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY) - get_filename_component(PADDLE_DIR ${PARENT_DIR} DIRECTORY) - - # automatically get all dependencies specified in the source code - # for given target. - add_custom_target(${NAME}_goGet env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ${rel}/...) - - # make a symlink that references Paddle inside $GOPATH, so go get - # will use the local changes in Paddle rather than checkout Paddle - # in github. - add_custom_target(${NAME}_copyPaddle - COMMAND rm -rf ${PADDLE_IN_GOPATH}/Paddle - COMMAND ln -sf ${PADDLE_DIR} ${PADDLE_IN_GOPATH}/Paddle) - add_dependencies(${NAME}_goGet ${NAME}_copyPaddle) - - add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} - -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" - ${CMAKE_GO_FLAGS} ${GO_SOURCE} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - - add_custom_target(${NAME} ALL DEPENDS ${OUTPUT_DIR}/.timestamp ${ARGN}) - add_dependencies(${NAME} ${NAME}_goGet) - -endfunction(GO_LIBRARY) diff --git a/go/pserver/cclient/CMakeLists.txt b/go/pserver/cclient/CMakeLists.txt index fff7ae7858..8af6bc2e50 100644 --- a/go/pserver/cclient/CMakeLists.txt +++ b/go/pserver/cclient/CMakeLists.txt @@ -1,14 +1,7 @@ -cmake_minimum_required(VERSION 3.0) +file(RELATIVE_PATH rel ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) -get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) -get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY) -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake") - -project(cxx_go C Go) - -include(golang) -include(flags) - -go_library(paddle_pserver_cclient STATIC) +go_extern(go_pserver ${rel}/...) +go_extern(go_logrus github.com/sirupsen/logrus) +go_library(paddle_pserver_cclient STATIC DEPS go_logrus go_pserver) add_subdirectory(test) diff --git a/go/pserver/cclient/test/CMakeLists.txt b/go/pserver/cclient/test/CMakeLists.txt index 1a3dd7e5e9..916e4e99a2 100644 --- a/go/pserver/cclient/test/CMakeLists.txt +++ b/go/pserver/cclient/test/CMakeLists.txt @@ -1,22 +1,3 @@ -cmake_minimum_required(VERSION 3.0) -add_executable(main main.c) -add_dependencies(main paddle_pserver_cclient) -add_executable(test_cclient test_cclient.c) -add_dependencies(test_cclient paddle_pserver_cclient) - -if(APPLE) - set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") -else() - set(CMAKE_EXE_LINKER_FLAGS "-pthread") -endif() - -if(PROJ_ROOT) - include_directories(${CMAKE_CURRENT_BINARY_DIR}/..) - target_link_libraries(main ${CMAKE_CURRENT_BINARY_DIR}/../libpaddle_pserver_cclient.a pthread) - target_link_libraries(test_cclient ${CMAKE_CURRENT_BINARY_DIR}/../libpaddle_pserver_cclient.a pthread) -else(PROJ_ROOT) - include_directories(${CMAKE_BINARY_DIR}) - target_link_libraries(main ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread) - target_link_libraries(test_cclient ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread) -endif(PROJ_ROOT) +cc_library(main SRCS main.c DEPS paddle_pserver_cclient) +cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient) From 09a50cb9b3eda517400ddda1ec6abfc5ab13204e Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 23 Jun 2017 17:52:15 +0800 Subject: [PATCH 046/542] ENH: Change to CMAKE variable --- cmake/generic.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 76810432e0..dd927b82d3 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -264,10 +264,10 @@ function(go_library TARGET_NAME) if (go_library_SHARED OR go_library_shared) set(BUILD_MODE "-buildmode=c-shared") - set(LIB_NAME "${LIBRARY_PREFIX}${TARGET_NAME}${SHARED_LIBRARY_SUFFIX}") + set(LIB_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}") else() set(BUILD_MODE "-buildmode=c-archive") - set(LIB_NAME "${LIBRARY_PREFIX}${TARGET_NAME}${STATIC_LIBRARY_SUFFIX}") + set(LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") endif() set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) From 5e87e27c757efc1b6f0cea06a39a5ebc6dea5ec7 Mon Sep 17 00:00:00 2001 From: lianxiaochen Date: Fri, 23 Jun 2017 10:53:26 -0700 Subject: [PATCH 047/542] fix error clipping --- paddle/gserver/layers/Layer.cpp | 9 ++++----- python/paddle/trainer/config_parser.py | 10 +++++++++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp index 125aaf947f..b8a1c8d0fc 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/gserver/layers/Layer.cpp @@ -354,12 +354,11 @@ void Layer::backwardActivation() { /* Do error clipping */ if (config_.error_clipping_threshold() > 0.0f) { if (FLAGS_log_error_clipping) { - CpuVector outGradVec(0, nullptr); - outGradVec.subVecFrom( - output_.grad->getData(), 0, output_.grad->getElementCnt()); - real maxAbsGrad = outGradVec.getAbsMax(); + VectorPtr outGradVec = Vector::create( + output_.grad->getData(), output_.grad->getElementCnt(), useGpu_); + real maxAbsGrad = outGradVec->getAbsMax(); if (maxAbsGrad > config_.error_clipping_threshold()) { - real avgAbsGrad = outGradVec.getAbsSum() / outGradVec.getSize(); + real avgAbsGrad = outGradVec->getAbsSum() / outGradVec->getSize(); LOG(INFO) << " layer=" << config_.name() << " need clipping," << " max error=" << maxAbsGrad << " avg error=" << avgAbsGrad; } diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 58e4902f57..8dec50221f 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1571,7 +1571,13 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): @config_layer('fc') class FCLayer(LayerBase): - def __init__(self, name, size, inputs, bias=True, **xargs): + def __init__(self, + name, + size, + inputs, + bias=True, + error_clipping_threshold=None, + **xargs): super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) @@ -1588,6 +1594,8 @@ class FCLayer(LayerBase): self.create_input_parameter(input_index, psize, dims, sparse, format) self.create_bias_parameter(bias, self.config.size) + if error_clipping_threshold is not None: + self.config.error_clipping_threshold = error_clipping_threshold @config_layer('selective_fc') From b55df90dfdf6b9720548613885d291ae8769705b Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 23 Jun 2017 11:42:48 -0700 Subject: [PATCH 048/542] Remove unnecessary preamble --- paddle/memory/README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/paddle/memory/README.md b/paddle/memory/README.md index b71ca29696..fd32d07ef4 100644 --- a/paddle/memory/README.md +++ b/paddle/memory/README.md @@ -1,5 +1,3 @@ -In my mind, the memory package works like the following: - ## Design ### Usage From 59b40ecb78781c8ba05973e0f9befc7ed34471e2 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Sat, 24 Jun 2017 16:20:16 +0800 Subject: [PATCH 049/542] revert parameter with []byte --- go/pserver/optimizer.go | 1 + go/pserver/service.go | 14 +++----------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index df2219aa84..40748d03c1 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -5,6 +5,7 @@ package pserver #cgo pkg-config: protobuf #cgo CFLAGS: -I ../../ #cgo LDFLAGS: ../../build/paddle/optimizer/libpaddle_optimizer.a ../../build/proto/libpaddle_proto.a ../../third_party/install/glog/lib/libglog.a ../../third_party/install/gtest/lib/libgtest.a ../../third_party/install/gflags/lib/libgflags.a ../../third_party/install/openblas/lib/libopenblas.a -I/usr/local/lib/ -lprotobuf +#cgo LDFLAGS: /Users/dzh/.go/src/github.com/PaddlePaddle/Paddle/build/lib/libdep.a #include "paddle/optimizer/optimizer.h" */ import "C" diff --git a/go/pserver/service.go b/go/pserver/service.go index c721388b6a..32449f66b7 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -28,8 +28,7 @@ const ( type Parameter struct { Name string ElementType ElementType - Content *byte - Length int + Content []byte } // ParameterWithConfig contains the parameter and the configuration. @@ -47,15 +46,13 @@ type Service struct { mu sync.Mutex // injection from parameter to optimizer - optMap map[string]*optimizer - paramMap map[string]Parameter + optMap map[string]*optimizer } // NewService creates a new service. func NewService() *Service { s := &Service{} s.optMap = make(map[string]*optimizer) - s.paramMap = make(map[string]Parameter) s.initialized = make(chan struct{}) return s } @@ -76,7 +73,6 @@ func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) er // TODO(helin): check if paramWithConfigs.Param.Content is // properly memory aligned, if not, make copy to a memory // aligned region. - s.paramMap[paramWithConfigs.Param.Name] = paramWithConfigs.Param s.optMap[paramWithConfigs.Param.Name] = newOptimizer(paramWithConfigs) return nil } @@ -106,13 +102,9 @@ func (s *Service) SendGrad(g Gradient, dummy *int) error { s.mu.Lock() defer s.mu.Unlock() - p, ok := s.paramMap[g.Name] - if !ok { - return fmt.Errorf("parameter: %s does not exist", g.Name) - } o, ok := s.optMap[g.Name] if !ok { - return fmt.Errorf("optimizer: %s does not exist", g.Name) + return fmt.Errorf("parameter: %s does not exist", g.Name) } return o.UpdateParameter(p, g) From b34a05d1141c4758803c084cda7e7d4c976567d8 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 24 Jun 2017 16:18:14 +0800 Subject: [PATCH 050/542] Fix travis-ci in new image Fix issue #2562 --- .travis.yml | 1 - python/paddle/trainer_config_helpers/networks.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2c46da71e7..ff41551ba7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,3 @@ -group: deprecated-2017Q2 language: cpp cache: directories: diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 1bf59ed484..67154a8d7d 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -1381,7 +1381,7 @@ def inputs(layers, *args): if len(args) != 0: layers.extend(args) - Inputs(*[l.name for l in layers]) + Inputs(* [l.name for l in layers]) def outputs(layers, *args): @@ -1424,7 +1424,7 @@ def outputs(layers, *args): assert len(layers) > 0 if HasInputsSet(): # input already set - Outputs(*[l.name for l in layers]) + Outputs(* [l.name for l in layers]) return # just return outputs. if len(layers) != 1: From 8c735c8b092c9f21161bf7b8f8deb8b2f2047184 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Sat, 24 Jun 2017 18:37:48 +0800 Subject: [PATCH 051/542] add dependy --- python/setup.py.in | 1 + 1 file changed, 1 insertion(+) diff --git a/python/setup.py.in b/python/setup.py.in index 2e22f640cb..86fc0fc5c0 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -13,6 +13,7 @@ packages=['paddle', setup_requires=["requests", "numpy", "protobuf==3.1", + "recordio", "matplotlib", "rarfile"] From b359d5c5cdffb05679245886dbb3193981a4d442 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Sat, 24 Jun 2017 18:48:02 +0800 Subject: [PATCH 052/542] restore creator.py --- python/paddle/v2/reader/creator.py | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 994062096f..07142056f8 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -16,7 +16,7 @@ Creator package contains some simple reader creator, which could be used in user program. """ -__all__ = ['np_array', 'text_file', "RecordIO"] +__all__ = ['np_array', 'text_file'] def np_array(x): @@ -55,22 +55,3 @@ def text_file(path): f.close() return reader - - -def RecordIO(path): - """ - Creates a data reader that outputs record one one by one from given recordio file - :path: path of recordio file - :returns: data reader of recordio file - """ - - def reader(): - f = recordio.reader(path) - while True: - r = f.read() - if r is None: - break - yield r - f.close() - - return reader From 90c909ac7c0ba7155151b3af6aea655e0cd8ce98 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Sat, 24 Jun 2017 18:51:03 +0800 Subject: [PATCH 053/542] restore creator_test.py --- python/paddle/v2/reader/tests/creator_test.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/python/paddle/v2/reader/tests/creator_test.py b/python/paddle/v2/reader/tests/creator_test.py index dd84fbb002..9f8d7133b8 100644 --- a/python/paddle/v2/reader/tests/creator_test.py +++ b/python/paddle/v2/reader/tests/creator_test.py @@ -36,14 +36,5 @@ class TestTextFile(unittest.TestCase): self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1)) -class TestRecordIO(unittest.TestCase): - def test_RecordIO(self): - path = os.path.join( - os.path.dirname(__file__), "test_recordio_creator.dat") - reader = paddle.v2.reader.creator.RecordIO(path) - for idx, r in enumerate(reader()): - self.assertSequenceEqual(r, str(idx)) - - if __name__ == '__main__': unittest.main() From ae79b9ac1ccdf99713241c2e2b9f5c6bddcc0193 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Sat, 24 Jun 2017 18:52:09 +0800 Subject: [PATCH 054/542] restore --- .../v2/reader/tests/test_recordio_creator.dat | Bin 88 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 python/paddle/v2/reader/tests/test_recordio_creator.dat diff --git a/python/paddle/v2/reader/tests/test_recordio_creator.dat b/python/paddle/v2/reader/tests/test_recordio_creator.dat deleted file mode 100644 index 17aa89b6796184407e83246d3f342a55a66b4a69..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 88 zcmZQ!W@2QOHw Date: Sat, 24 Jun 2017 19:53:28 +0800 Subject: [PATCH 055/542] set ps_desired when pserver init --- go/cmd/pserver/pserver.go | 3 ++- go/pserver/service.go | 28 +++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index fe1fe5f6f0..6c85b1804b 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -18,6 +18,7 @@ func main() { etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379", "comma separated endpoint string for pserver to connect to etcd") etcdTimeout := flag.Int("etcd-timeout", 5, "timeout for etcd calls") + numPservers := flag.Int("num-pservers", 1, "total pserver count in a training job") logLevel := flag.String("log-level", "info", "log level, possible values: debug, info, warning, error, fatal, panic") flag.Parse() @@ -29,7 +30,7 @@ func main() { log.SetLevel(level) timeout := time.Second * time.Duration((*etcdTimeout)) - s, err := pserver.NewService(*etcdEndpoint, timeout) + s, err := pserver.NewService(*etcdEndpoint, *numPservers, timeout) if err != nil { panic(err) } diff --git a/go/pserver/service.go b/go/pserver/service.go index 7e2b841dd8..f966595fdc 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -73,7 +73,7 @@ type Service struct { // NewService creates a new service, will bypass etcd registration if no // endpoints specified. -func NewService(endpoints string, timeout time.Duration) (*Service, error) { +func NewService(endpoints string, numPservers int, timeout time.Duration) (*Service, error) { s := &Service{opt: newOptimizer(sgd, 0.005)} s.paramMap = make(map[string]Parameter) s.initialized = make(chan struct{}) @@ -103,6 +103,22 @@ func NewService(endpoints string, timeout time.Duration) (*Service, error) { log.Debugf("inited client to %s", s.etcdEndpoints) break } + // init /ps_desired using transaction, for multiple pservers may want to write + // it at the same time. + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + _, err := s.initDesiredPsercers(ctx, numPservers) + cancel() + if err != nil { + log.Warn(err) + time.Sleep(s.etcdTimeout) + continue + } + break + } + // TODO: when implementing extending or reducing pservers, /ps_desired is + // changed, then we need to watch /ps_desired node for events. For now, just + // write once when init and read from it. // wait and set s.desired init value for { ctx, cancel := context.WithTimeout(context.Background(), time.Second) @@ -141,6 +157,16 @@ func NewService(endpoints string, timeout time.Duration) (*Service, error) { return s, nil } +func (s *Service) initDesiredPsercers(ctx context.Context, numPservers int) (*clientv3.TxnResponse, error) { + return concurrency.NewSTM(s.etcdClient, func(c concurrency.STM) error { + dsStr := c.Get(PsDesired) + if dsStr == "" { + c.Put(PsDesired, strconv.Itoa(numPservers)) + } + return nil + }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) +} + // registerPserverEtcd registers pserver node on etcd using transaction. func (s *Service) registerPserverEtcd(ctx context.Context) (*clientv3.TxnResponse, error) { return concurrency.NewSTM(s.etcdClient, func(c concurrency.STM) error { From 55684af208071bd788381946ac76c9da2b5b7329 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 25 Jun 2017 13:13:10 +0800 Subject: [PATCH 056/542] fix MultiGradientMachine train and infer --- .../gradientmachines/MultiGradientMachine.cpp | 12 ++++++------ .../gserver/gradientmachines/MultiGradientMachine.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 3159026e6b..9abda18d54 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -171,6 +171,12 @@ MultiGradientMachine::MultiGradientMachine(const ModelConfig& config, } } +MultiGradientMachine::~MultiGradientMachine() { + for (auto& thread : threads_) { + thread->stop(); + } +} + std::vector*> MultiGradientMachine::getSlaveParameters() { std::vector*> vec; @@ -326,12 +332,6 @@ void MultiGradientMachine::onPassEnd() { } } -void MultiGradientMachine::finish() { - for (auto& thread : threads_) { - thread->stop(); - } -} - Evaluator* MultiGradientMachine::makeEvaluator() const { return threads_[0]->getGradientMachine()->makeEvaluator(); } diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index 70203bbb97..c005c0ed67 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -176,6 +176,8 @@ public: explicit MultiGradientMachine(const ModelConfig& config, bool useGpu); + virtual ~MultiGradientMachine(); + virtual void prefetch(const std::vector& inArgs); virtual void forward(const std::vector& inArgs, @@ -193,8 +195,6 @@ public: virtual void onPassEnd(); - virtual void finish(); - virtual Evaluator* makeEvaluator() const; virtual void eval(Evaluator* evaluator) const; From 84d1c734ca2fe7a17e000467823d49891507cf0b Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Sun, 25 Jun 2017 15:40:45 -0700 Subject: [PATCH 057/542] add paddle/memory/detail/cpu_allocator* --- paddle/CMakeLists.txt | 1 + paddle/memory/CMakeLists.txt | 1 + paddle/memory/README.md | 14 ++--- paddle/memory/detail/CMakeLists.txt | 1 + paddle/memory/detail/cpu_allocator.h | 63 ++++++++++++++++++++++ paddle/memory/detail/cpu_allocator_test.cc | 32 +++++++++++ paddle/memory/memory.cc | 51 ++++++++++++++++++ paddle/memory/memory.h | 27 ++++++++++ paddle/platform/place.cc | 12 ++--- paddle/platform/place.h | 45 ++++++++++------ paddle/platform/place_test.cc | 14 ++--- 11 files changed, 224 insertions(+), 37 deletions(-) create mode 100644 paddle/memory/CMakeLists.txt create mode 100644 paddle/memory/detail/CMakeLists.txt create mode 100644 paddle/memory/detail/cpu_allocator.h create mode 100644 paddle/memory/detail/cpu_allocator_test.cc create mode 100644 paddle/memory/memory.cc create mode 100644 paddle/memory/memory.h diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index 573bd937a3..0cddb95244 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -10,6 +10,7 @@ add_subdirectory(trainer) add_subdirectory(scripts) add_subdirectory(optimizer) add_subdirectory(strings) +add_subdirectory(memory) # Do not build go directory until go cmake is working smoothly. # if(CMAKE_Go_COMPILER) diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt new file mode 100644 index 0000000000..3943c3cfad --- /dev/null +++ b/paddle/memory/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(detail) diff --git a/paddle/memory/README.md b/paddle/memory/README.md index fd32d07ef4..e5f7880e4c 100644 --- a/paddle/memory/README.md +++ b/paddle/memory/README.md @@ -31,7 +31,7 @@ In `paddle/memory/memory.h` we have: namespace memory { template void* Alloc(Place, size_t); template void Free(Place, void*); -template void Used(Place); +template size_t Used(Place); } // namespace memory ``` @@ -39,7 +39,7 @@ These function templates have specializations on either `platform::CPUPlace` or ```cpp template<> -void Alloc(CPUPlace p, size_t size) { +void* Alloc(CPUPlace p, size_t size) { return GetCPUBuddyAllocator()->Alloc(size); } ``` @@ -102,15 +102,11 @@ class BuddyAllocator { }; ``` -#### System Allocators - -The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`. A system allocator holds information about a device, including the amount of memory has been allocated, so we can call +Because BuddyAllocator has the meta-data of each block, it can trace the used memory -- record the amount returned by `Alloc` freed in `Free`. Instead, `CPUAllocator` and `GPUAllocator` doesn't know the size of freed memory block and cannot do the trace. -- `GPUAllocator::Used()` and -- `CPUAllocator::Used()` - -to get the amount of memory that has been allocated so far. +#### System Allocators +The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`. ## Justification diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt new file mode 100644 index 0000000000..fb8a11062d --- /dev/null +++ b/paddle/memory/detail/CMakeLists.txt @@ -0,0 +1 @@ +cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h new file mode 100644 index 0000000000..8a872d3800 --- /dev/null +++ b/paddle/memory/detail/cpu_allocator.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include // for malloc and free +#include // for size_t + +namespace paddle { +namespace memory { +namespace detail { + +// CPUAllocator calls cudaMallocHost, which returns +// pinned and mlocked memory as staging areas for data exchange +// between host and device. Allocates too much would reduce the +// amount of memory available to the system for paging. So, by +// default, we should use CPUAllocator. +template +class CPUAllocator { +public: + void* Alloc(size_t size); + void Free(void* p); +}; + +template <> +class CPUAllocator { +public: + void* Alloc(size_t size) { return malloc(size); } + void Free(void* p) { free(p); } +}; + +// If CMake macro WITH_GPU is OFF, C++ compiler won't generate the +// following specialization that depends on the CUDA library. +#ifdef WITH_GPU +template <> +class CPUAllocator { +public: + void* Alloc(size_t size) { + void* p; + if (cudaMallocHost(&p, size) != cudaSuccess) { + return NULL; + } + return *p; + } + + void Free(void* p) { cudaFreeHost(p); } +}; +#endif // WITH_GPU + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/cpu_allocator_test.cc b/paddle/memory/detail/cpu_allocator_test.cc new file mode 100644 index 0000000000..0aa33a22fd --- /dev/null +++ b/paddle/memory/detail/cpu_allocator_test.cc @@ -0,0 +1,32 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/memory/detail/cpu_allocator.h" +#include "gtest/gtest.h" + +TEST(CPUAllocator, NonStaging) { + paddle::memory::detail::CPUAllocator a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p); +} + +#ifdef WITH_GPU +TEST(CPUAllocator, Staging) { + paddle::memory::detail::CPUAllocator a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p); +} +#endif // WITH_GPU diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc new file mode 100644 index 0000000000..5f1253ede6 --- /dev/null +++ b/paddle/memory/memory.cc @@ -0,0 +1,51 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/memory/memory.h" + +namespace paddle { +namespace memory { + +template <> +void* Alloc(CPUPlace, size_t size) { + return GetCPUBuddyAllocator()->Alloc(size); +} + +template <> +void* Alloc(GPUPlace pl, size_t size) { + return GetGPUBuddyAllocator(pl.device)->Alloc(size); +} + +template <> +void Free(CPUPlace, void* p) { + return GetCPUBuddyAllocator()->Free(p); +} + +template <> +void* Alloc(GPUPlace pl, void* p) { + return GetGPUBuddyAllocator(pl.device)->Free(p); +} + +template <> +size_t Used(CPUPlace) { + return GetCPUBuddyAllocator()->Used(); +} + +template <> +size_t Alloc(GPUPlace pl) { + return GetGPUBuddyAllocator(pl.device)->Used(); +} + +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h new file mode 100644 index 0000000000..ae8ac6ca52 --- /dev/null +++ b/paddle/memory/memory.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/frameowork/place.h" + +namespace paddle { +namespace memory { + +typename void* Alloc(Place, size_t); +typename void Free(Place, void*); +typename size_t Used(Place); + +} // namespace memory +} // namespace paddle diff --git a/paddle/platform/place.cc b/paddle/platform/place.cc index 1afd03c011..0704820aa0 100644 --- a/paddle/platform/place.cc +++ b/paddle/platform/place.cc @@ -8,8 +8,8 @@ namespace detail { class PlacePrinter : public boost::static_visitor<> { public: PlacePrinter(std::ostream &os) : os_(os) {} - void operator()(const CpuPlace &) { os_ << "CpuPlace"; } - void operator()(const GpuPlace &p) { os_ << "GpuPlace(" << p.device << ")"; } + void operator()(const CPUPlace &) { os_ << "CPUPlace"; } + void operator()(const GPUPlace &p) { os_ << "GPUPlace(" << p.device << ")"; } private: std::ostream &os_; @@ -22,14 +22,14 @@ static Place the_default_place; void set_place(const Place &place) { the_default_place = place; } const Place &get_place() { return the_default_place; } -const GpuPlace default_gpu() { return GpuPlace(0); } -const CpuPlace default_cpu() { return CpuPlace(); } +const GPUPlace default_gpu() { return GPUPlace(0); } +const CPUPlace default_cpu() { return CPUPlace(); } bool is_gpu_place(const Place &p) { - return boost::apply_visitor(IsGpuPlace(), p); + return boost::apply_visitor(IsGPUPlace(), p); } bool is_cpu_place(const Place &p) { - return !boost::apply_visitor(IsGpuPlace(), p); + return !boost::apply_visitor(IsGPUPlace(), p); } bool places_are_same_class(const Place &p1, const Place &p2) { diff --git a/paddle/platform/place.h b/paddle/platform/place.h index 489572c526..7cead18388 100644 --- a/paddle/platform/place.h +++ b/paddle/platform/place.h @@ -1,43 +1,58 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #pragma once + #include #include namespace paddle { namespace platform { -struct CpuPlace { +struct CPUPlace { // WORKAROUND: for some reason, omitting this constructor // causes errors with boost 1.59 and OSX - CpuPlace() {} + CPUPlace() {} // needed for variant equality comparison - inline bool operator==(const CpuPlace &) const { return true; } - inline bool operator!=(const CpuPlace &) const { return false; } + inline bool operator==(const CPUPlace &) const { return true; } + inline bool operator!=(const CPUPlace &) const { return false; } }; -struct GpuPlace { - GpuPlace() : GpuPlace(0) {} - GpuPlace(int d) : device(d) {} +struct GPUPlace { + GPUPlace() : GPUPlace(0) {} + GPUPlace(int d) : device(d) {} // needed for variant equality comparison - inline bool operator==(const GpuPlace &o) const { return device == o.device; } - inline bool operator!=(const GpuPlace &o) const { return !(*this == o); } + inline bool operator==(const GPUPlace &o) const { return device == o.device; } + inline bool operator!=(const GPUPlace &o) const { return !(*this == o); } int device; }; -struct IsGpuPlace : public boost::static_visitor { - bool operator()(const CpuPlace &) const { return false; } - bool operator()(const GpuPlace &gpu) const { return true; } +struct IsGPUPlace : public boost::static_visitor { + bool operator()(const CPUPlace &) const { return false; } + bool operator()(const GPUPlace &gpu) const { return true; } }; -typedef boost::variant Place; +typedef boost::variant Place; void set_place(const Place &); const Place &get_place(); -const GpuPlace default_gpu(); -const CpuPlace default_cpu(); +const GPUPlace default_gpu(); +const CPUPlace default_cpu(); bool is_gpu_place(const Place &); bool is_cpu_place(const Place &); diff --git a/paddle/platform/place_test.cc b/paddle/platform/place_test.cc index 73fccceedf..33e2e5a439 100644 --- a/paddle/platform/place_test.cc +++ b/paddle/platform/place_test.cc @@ -3,8 +3,8 @@ #include "gtest/gtest.h" TEST(Place, Equality) { - paddle::platform::CpuPlace cpu; - paddle::platform::GpuPlace g0(0), g1(1), gg0(0); + paddle::platform::CPUPlace cpu; + paddle::platform::GPUPlace g0(0), g1(1), gg0(0); EXPECT_EQ(cpu, cpu); EXPECT_EQ(g0, g0); @@ -22,19 +22,19 @@ TEST(Place, Default) { EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::default_gpu())); EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::default_cpu())); - paddle::platform::set_place(paddle::platform::CpuPlace()); + paddle::platform::set_place(paddle::platform::CPUPlace()); EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::get_place())); } TEST(Place, Print) { { std::stringstream ss; - ss << paddle::platform::GpuPlace(1); - EXPECT_EQ("GpuPlace(1)", ss.str()); + ss << paddle::platform::GPUPlace(1); + EXPECT_EQ("GPUPlace(1)", ss.str()); } { std::stringstream ss; - ss << paddle::platform::CpuPlace(); - EXPECT_EQ("CpuPlace", ss.str()); + ss << paddle::platform::CPUPlace(); + EXPECT_EQ("CPUPlace", ss.str()); } } From ab2550c6400bce5d2596f5bff8629ef67ed195b8 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Sun, 25 Jun 2017 15:44:55 -0700 Subject: [PATCH 058/542] Update design --- paddle/memory/README.md | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/paddle/memory/README.md b/paddle/memory/README.md index fd32d07ef4..e5f7880e4c 100644 --- a/paddle/memory/README.md +++ b/paddle/memory/README.md @@ -31,7 +31,7 @@ In `paddle/memory/memory.h` we have: namespace memory { template void* Alloc(Place, size_t); template void Free(Place, void*); -template void Used(Place); +template size_t Used(Place); } // namespace memory ``` @@ -39,7 +39,7 @@ These function templates have specializations on either `platform::CPUPlace` or ```cpp template<> -void Alloc(CPUPlace p, size_t size) { +void* Alloc(CPUPlace p, size_t size) { return GetCPUBuddyAllocator()->Alloc(size); } ``` @@ -102,15 +102,11 @@ class BuddyAllocator { }; ``` -#### System Allocators - -The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`. A system allocator holds information about a device, including the amount of memory has been allocated, so we can call +Because BuddyAllocator has the meta-data of each block, it can trace the used memory -- record the amount returned by `Alloc` freed in `Free`. Instead, `CPUAllocator` and `GPUAllocator` doesn't know the size of freed memory block and cannot do the trace. -- `GPUAllocator::Used()` and -- `CPUAllocator::Used()` - -to get the amount of memory that has been allocated so far. +#### System Allocators +The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`. ## Justification From db128c4586c3c925a6c53a9ae770cb07cdbea1bf Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Sun, 25 Jun 2017 17:54:06 -0700 Subject: [PATCH 059/542] Pass cpu_allocator_test --- CMakeLists.txt | 2 +- cmake/generic.cmake | 4 ++++ paddle/memory/detail/CMakeLists.txt | 6 +++++- paddle/memory/detail/cpu_allocator.h | 13 +++++++++---- paddle/memory/detail/cpu_allocator_test.cc | 16 +++++++++++----- paddle/memory/memory.cc | 14 ++++++++++++-- paddle/memory/memory.h | 16 +++++++++++++--- 7 files changed, 55 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c5d7f2c7ec..3c719d35ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,7 @@ if(ANDROID) "Disable RDMA when cross-compiling for Android" FORCE) endif(ANDROID) -set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING +set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING "A path setting third party libraries download & build directories.") if (WITH_C_API AND WITH_PYTHON) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 69e8164a00..840155750e 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -78,6 +78,10 @@ # # cc_test(example_test SRCS example_test.cc DEPS example glog gflags) +if(WITH_GPU) + add_definitions(-DPADDLE_WITH_GPU) +endif() + if(NOT APPLE) find_package(Threads REQUIRED) link_libraries(${CMAKE_THREAD_LIBS_INIT}) diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index fb8a11062d..c425e9f947 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1 +1,5 @@ -cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) +if(${WITH_GPU}) + nv_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # nv_test links CUDA, but +else(${WITH_GPU}) + cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # cc_test doesn't. +endif(${WITH_GPU}) diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h index 8a872d3800..0d8ea3f52b 100644 --- a/paddle/memory/detail/cpu_allocator.h +++ b/paddle/memory/detail/cpu_allocator.h @@ -17,6 +17,11 @@ limitations under the License. */ #include // for malloc and free #include // for size_t +#ifdef PADDLE_WITH_GPU +#include +#include +#endif // PADDLE_WITH_GPU + namespace paddle { namespace memory { namespace detail { @@ -40,9 +45,9 @@ public: void Free(void* p) { free(p); } }; -// If CMake macro WITH_GPU is OFF, C++ compiler won't generate the +// If CMake macro PADDLE_WITH_GPU is OFF, C++ compiler won't generate the // following specialization that depends on the CUDA library. -#ifdef WITH_GPU +#ifdef PADDLE_WITH_GPU template <> class CPUAllocator { public: @@ -51,12 +56,12 @@ public: if (cudaMallocHost(&p, size) != cudaSuccess) { return NULL; } - return *p; + return p; } void Free(void* p) { cudaFreeHost(p); } }; -#endif // WITH_GPU +#endif // PADDLE_WITH_GPU } // namespace detail } // namespace memory diff --git a/paddle/memory/detail/cpu_allocator_test.cc b/paddle/memory/detail/cpu_allocator_test.cc index 0aa33a22fd..464bc84e5c 100644 --- a/paddle/memory/detail/cpu_allocator_test.cc +++ b/paddle/memory/detail/cpu_allocator_test.cc @@ -22,11 +22,17 @@ TEST(CPUAllocator, NonStaging) { a.Free(p); } -#ifdef WITH_GPU +#ifdef PADDLE_WITH_GPU TEST(CPUAllocator, Staging) { paddle::memory::detail::CPUAllocator a; - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p); + + int devices; + if (cudaGetDeviceCount(&devices) == cudaSuccess && devices > 0) { + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p); + } else { + EXPECT_EQ(a.Alloc(4096), nullptr); + } } -#endif // WITH_GPU +#endif // PADDLE_WITH_GPU diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index 5f1253ede6..b617923731 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -19,7 +19,11 @@ namespace memory { template <> void* Alloc(CPUPlace, size_t size) { - return GetCPUBuddyAllocator()->Alloc(size); + return GetCPUBuddyAllocator(false /*non-staging*/)->Alloc(size); +} + +void* AllocStaging(CPUPlace, size_t size) { + return GetCPUBuddyAllocator(true /*staging*/)->Alloc(size); } template <> @@ -29,9 +33,14 @@ void* Alloc(GPUPlace pl, size_t size) { template <> void Free(CPUPlace, void* p) { - return GetCPUBuddyAllocator()->Free(p); + return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p); +} + +void FreeStaging(CPUPlace, void* p) { + return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p); } +#ifdef PADDLE_WITH_GPU template <> void* Alloc(GPUPlace pl, void* p) { return GetGPUBuddyAllocator(pl.device)->Free(p); @@ -46,6 +55,7 @@ template <> size_t Alloc(GPUPlace pl) { return GetGPUBuddyAllocator(pl.device)->Used(); } +#endif // PADDLE_WITH_GPU } // namespace memory } // namespace paddle diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h index ae8ac6ca52..8c15a133bb 100644 --- a/paddle/memory/memory.h +++ b/paddle/memory/memory.h @@ -19,9 +19,19 @@ limitations under the License. */ namespace paddle { namespace memory { -typename void* Alloc(Place, size_t); -typename void Free(Place, void*); -typename size_t Used(Place); +template +void* Alloc(Place, size_t); +template +void Free(Place, void*); +template +size_t Used(Place); + +// Staging memory means "pinned" host memory that can be mapped into +// the CUDA memory space and accessed by the device rapidly. Don't +// allocate too much staging memory; otherwise system performance will +// degrade because the OS cannot find enough swap memory space. +void* AllocStaging(CPUPlace, size_t); +void* FreeStaging(CPUPlace, size_t); } // namespace memory } // namespace paddle From f403096aa4e03475f2201f6c444ce86f2e13a1a8 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Sun, 25 Jun 2017 17:59:23 -0700 Subject: [PATCH 060/542] Move directory third_party into /build --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c5d7f2c7ec..3c719d35ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,7 @@ if(ANDROID) "Disable RDMA when cross-compiling for Android" FORCE) endif(ANDROID) -set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING +set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING "A path setting third party libraries download & build directories.") if (WITH_C_API AND WITH_PYTHON) From 9dd211f6c69066b93d7e81dd30e98bb12091a014 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 26 Jun 2017 11:38:21 +0800 Subject: [PATCH 061/542] Add Third Party Path back to TravisCI cache. --- .travis.yml | 5 ++++- paddle/scripts/travis/build_doc.sh | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2c46da71e7..a57f1cd84b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ cache: directories: - $HOME/.ccache - $HOME/.cache/pip + - $HOME/third_party sudo: required dist: trusty os: @@ -41,7 +42,9 @@ before_install: - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: - - paddle/scripts/travis/$JOB.sh + - | + timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout + RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi; notifications: email: on_success: change diff --git a/paddle/scripts/travis/build_doc.sh b/paddle/scripts/travis/build_doc.sh index 88264d8c26..193c291d43 100755 --- a/paddle/scripts/travis/build_doc.sh +++ b/paddle/scripts/travis/build_doc.sh @@ -6,12 +6,13 @@ mkdir -p $TRAVIS_BUILD_DIR/build cd $TRAVIS_BUILD_DIR/build # Compile Documentation only. -cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF +cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF -DTHIRD_PARTY_PATH=$HOME/third_party + mkdir output make -j `nproc` find .. -name '*whl' | xargs pip install # install all wheels. rm -rf * -cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON +cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON -DTHIRD_PARTY_PATH=$HOME/third_party make -j `nproc` paddle_docs paddle_docs_cn # check websites for broken links From be54d38a1f2e1bcf8a6fb40576a4712fbf05ca77 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 26 Jun 2017 11:42:03 +0800 Subject: [PATCH 062/542] Cache Paddle Default ThirdParty Dir --- .travis.yml | 2 +- paddle/scripts/travis/build_doc.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index a57f1cd84b..64961adcf2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,7 @@ cache: directories: - $HOME/.ccache - $HOME/.cache/pip - - $HOME/third_party + - $TRAVIS_BUILD_DIR/build/third_party sudo: required dist: trusty os: diff --git a/paddle/scripts/travis/build_doc.sh b/paddle/scripts/travis/build_doc.sh index 193c291d43..a44bd35357 100755 --- a/paddle/scripts/travis/build_doc.sh +++ b/paddle/scripts/travis/build_doc.sh @@ -6,13 +6,13 @@ mkdir -p $TRAVIS_BUILD_DIR/build cd $TRAVIS_BUILD_DIR/build # Compile Documentation only. -cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF -DTHIRD_PARTY_PATH=$HOME/third_party +cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF mkdir output make -j `nproc` find .. -name '*whl' | xargs pip install # install all wheels. rm -rf * -cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON -DTHIRD_PARTY_PATH=$HOME/third_party +cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON make -j `nproc` paddle_docs paddle_docs_cn # check websites for broken links From d76d2febbfd55243f471ea3521337d81e10f5971 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 26 Jun 2017 11:52:50 +0800 Subject: [PATCH 063/542] Adding platform/must_check.h __must_check is a macro mark of function return value. It let developer must check the return value is legal or not. --- paddle/platform/CMakeLists.txt | 1 + .../{utils/Compiler.h => platform/must_check.h} | 17 +++++------------ paddle/platform/must_check_test.cc | 10 ++++++++++ paddle/utils/Error.h | 2 +- 4 files changed, 17 insertions(+), 13 deletions(-) rename paddle/{utils/Compiler.h => platform/must_check.h} (78%) create mode 100644 paddle/platform/must_check_test.cc diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index c7d7b14518..7abe2ab89e 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -2,3 +2,4 @@ nv_test(cuda_test SRCS cuda_test.cu) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) +cc_test(must_check_test SRCS must_check_test.cc) diff --git a/paddle/utils/Compiler.h b/paddle/platform/must_check.h similarity index 78% rename from paddle/utils/Compiler.h rename to paddle/platform/must_check.h index cebca5a2a3..4fcc62afc0 100644 --- a/paddle/utils/Compiler.h +++ b/paddle/platform/must_check.h @@ -10,24 +10,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -/** - * This header defines some useful attribute by each compiler. It is the - * abstract layer of compilers. - */ -#ifdef __GNUC__ -#define GCC_VERSION \ - (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) -#else -#define GCC_VERSION -#endif - /** * __must_check macro. It make the function's return value must be used, * otherwise it will raise a compile warning. And also Paddle treat all compile * warnings as errors. */ -#if GCC_VERSION >= 30400 +#ifdef __GNUC__ +#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 30400 #define __must_check __attribute__((warn_unused_result)) #else #define __must_check #endif +#else +#define __must_check +#endif diff --git a/paddle/platform/must_check_test.cc b/paddle/platform/must_check_test.cc new file mode 100644 index 0000000000..6ee3ea49ac --- /dev/null +++ b/paddle/platform/must_check_test.cc @@ -0,0 +1,10 @@ +#include +#include + +int __must_check SomeFunctionMustCheck() { return 0; } + +TEST(MustCheck, all) { + // This line should not be compiled, because the + // return value of SomeFunctionMustCheck marked as __must_check + // SomeFunctionMustCheck(); +} \ No newline at end of file diff --git a/paddle/utils/Error.h b/paddle/utils/Error.h index cda1b5c37d..f3d535c69c 100644 --- a/paddle/utils/Error.h +++ b/paddle/utils/Error.h @@ -19,7 +19,7 @@ limitations under the License. */ #include #include #include -#include "Compiler.h" +#include "paddle/platform/must_check.h" namespace paddle { From 9f05a0f80225bf4f630817c413b82b23d7579091 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 26 Jun 2017 14:22:18 +0800 Subject: [PATCH 064/542] use GradientMachine::start and finish --- .../gradientmachines/MultiGradientMachine.cpp | 12 ++++++++++-- .../gserver/gradientmachines/MultiGradientMachine.h | 4 +++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 9abda18d54..8ef5e9d0c1 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -166,12 +166,16 @@ MultiGradientMachine::MultiGradientMachine(const ModelConfig& config, outArgStream_ = HPPL_STREAM_1; + start(); +} + +void MultiGradientMachine::start() { for (auto& thread : threads_) { thread->start(); } } -MultiGradientMachine::~MultiGradientMachine() { +void MultiGradientMachine::finish() { for (auto& thread : threads_) { thread->stop(); } @@ -445,7 +449,7 @@ TrainerThread::TrainerThread(const ModelConfig& config, gradStream_ = HPPL_STREAM_2; valueStream_ = HPPL_STREAM_3; - stopping_ = false; + stopping_ = true; updateCounter_ = 0; parameterUpdated_ = false; } @@ -453,6 +457,10 @@ TrainerThread::TrainerThread(const ModelConfig& config, TrainerThread::~TrainerThread() { stop(); } void TrainerThread::start() { + if (!stopping_) return; + + stopping_ = false; + gradientMachine_->start(); computeThread_.reset(new std::thread([this]() { computeThread(); })); diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index c005c0ed67..5e7622f929 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -176,7 +176,9 @@ public: explicit MultiGradientMachine(const ModelConfig& config, bool useGpu); - virtual ~MultiGradientMachine(); + virtual void start(); + + virtual void finish(); virtual void prefetch(const std::vector& inArgs); From 736af1f1a58ca8bdf392d6707f76c83876b73796 Mon Sep 17 00:00:00 2001 From: liaogang Date: Mon, 26 Jun 2017 16:58:06 +0800 Subject: [PATCH 065/542] FIX: go get dependencies automatically --- cmake/generic.cmake | 25 +++++++++++++++++-------- go/pserver/cclient/CMakeLists.txt | 6 +----- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index dd927b82d3..cc294c8c00 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -246,6 +246,7 @@ endfunction(nv_test) set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") file(MAKE_DIRECTORY ${GOPATH}) +set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle") # Because api.go defines a GO wrapper to ops and tensor, it depends on # both. This implies that if any of tensor.{h,cc}, ops.{h,cu}, or @@ -270,14 +271,29 @@ function(go_library TARGET_NAME) set(LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") endif() + # Add dummy code to support `make target_name` under Terminal Command set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") add_library(${TARGET_NAME} STATIC ${dummyfile}) - add_dependencies(${TARGET_NAME} ${go_library_DEPS}) + if(go_library_DEPS) + add_dependencies(${TARGET_NAME} ${go_library_DEPS}) + endif(go_library_DEPS) + # we need to symlink Paddle directory into GOPATH. If we + # don't do it and we have code that depends on Paddle, go + # get ./... will download a new Paddle repo from Github, + # without the changes in our current Paddle repo that we + # want to build. file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") add_custom_command(TARGET ${TARGET_NAME} POST_BUILD COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" + # Symlink Paddle directory into GOPATH + COMMAND mkdir -p ${PADDLE_IN_GOPATH} + COMMAND rm -rf ${PADDLE_IN_GOPATH} + COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} + # Automatically get all dependencies specified in the source code + COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d . + # Golang build source code COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" ${GO_SOURCE} @@ -311,10 +327,3 @@ function(go_test TARGET_NAME) add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_test_DEPS}) add_test(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}) endfunction(go_test) - -# go_extern will download extern go project. -# go_extern(target_name extern_source) -# go_extern(go_redis github.com/hoisie/redis) -function(go_extern TARGET_NAME) - add_custom_target(${TARGET_NAME} env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ${ARGN}) -endfunction(go_extern) diff --git a/go/pserver/cclient/CMakeLists.txt b/go/pserver/cclient/CMakeLists.txt index 8af6bc2e50..d2c339d688 100644 --- a/go/pserver/cclient/CMakeLists.txt +++ b/go/pserver/cclient/CMakeLists.txt @@ -1,7 +1,3 @@ -file(RELATIVE_PATH rel ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) - -go_extern(go_pserver ${rel}/...) -go_extern(go_logrus github.com/sirupsen/logrus) -go_library(paddle_pserver_cclient STATIC DEPS go_logrus go_pserver) +go_library(paddle_pserver_cclient STATIC) add_subdirectory(test) From 12749ad5526e88d2cc11d62a94065dfcd89d4207 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 26 Jun 2017 18:33:26 +0800 Subject: [PATCH 066/542] "fix cmake flags in optimizer" --- go/pserver/cclient/CMakeLists.txt | 2 + .../test/{ => testdata}/optimizer.pb.txt | Bin go/pserver/optimizer.go | 41 ++++++++++++++---- go/pserver/optimizer_test.go | 24 ++++++---- go/pserver/service.go | 11 +++-- go/pserver/service_test.go | 8 +--- paddle/optimizer/CMakeLists.txt | 1 + 7 files changed, 60 insertions(+), 27 deletions(-) rename go/pserver/cclient/test/{ => testdata}/optimizer.pb.txt (100%) diff --git a/go/pserver/cclient/CMakeLists.txt b/go/pserver/cclient/CMakeLists.txt index 65a38ba1ad..b3e79ca661 100644 --- a/go/pserver/cclient/CMakeLists.txt +++ b/go/pserver/cclient/CMakeLists.txt @@ -9,6 +9,8 @@ project(cxx_go C Go) include(golang) include(flags) +cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags) + go_library(paddle_pserver_cclient STATIC) if(WITH_TESTING) add_subdirectory(test) diff --git a/go/pserver/cclient/test/optimizer.pb.txt b/go/pserver/cclient/test/testdata/optimizer.pb.txt similarity index 100% rename from go/pserver/cclient/test/optimizer.pb.txt rename to go/pserver/cclient/test/testdata/optimizer.pb.txt diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 40748d03c1..12bf055b4d 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -4,8 +4,7 @@ package pserver // TODO(zhihong): move compile flags to cmake go_library #cgo pkg-config: protobuf #cgo CFLAGS: -I ../../ -#cgo LDFLAGS: ../../build/paddle/optimizer/libpaddle_optimizer.a ../../build/proto/libpaddle_proto.a ../../third_party/install/glog/lib/libglog.a ../../third_party/install/gtest/lib/libgtest.a ../../third_party/install/gflags/lib/libgflags.a ../../third_party/install/openblas/lib/libopenblas.a -I/usr/local/lib/ -lprotobuf -#cgo LDFLAGS: /Users/dzh/.go/src/github.com/PaddlePaddle/Paddle/build/lib/libdep.a +#cgo LDFLAGS: /Users/dzh/.go/src/github.com/PaddlePaddle/Paddle/build/go/pserver/cclient/libpaddle_go_optimizer.a #include "paddle/optimizer/optimizer.h" */ import "C" @@ -18,26 +17,50 @@ var nullPtr = unsafe.Pointer(uintptr(0)) type optimizer struct { opt *C.struct_paddle_optimizer + // used in GetParam, reconstruct Parameter from optimizer + ElementType ElementType +} + +func cArrayToSlice(p unsafe.Pointer, len int) []byte { + if p == nullPtr { + return nil + } + + // create a Go clice backed by a C array, reference: + // https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices + // + // Go garbage collector will not interact with this data, need + // to be freed properly. + return (*[1 << 30]byte)(p)[:len:len] } func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { o := &optimizer{} p := paramWithConfigs.Param c := paramWithConfigs.Config - o.opt = C.paddle_create_optimizer(C.uchar(c), C.int(len(c)), unsafe.Pointer(p.Content), c.int(p.Length), nullPtr, 0) + buffer := &p.Content[0] + o.opt = C.paddle_create_optimizer(C.uchar(c), C.int(len(c)), unsafe.Pointer(buffer), C.int(len(p.Content)), nullPtr, 0) return o } -func (o *optimizer) UpdateParameter(p Parameter, g Gradient) error { - if p.Length != g.Length { - return fmt.Errorf("Name: %s, parameter and gradient length not match, parameter: %d, gradient: %d", p.Name, p.Length, g.Length) +func (o *optimizer) GetWeights(p *Parameter) error { + + var buffer unsafe.Pointer + buffer_len := C.paddle_optimizer_get_weights(unsafe.Pointer(o), &buffer) + if buffer_len == 0 || buffer == nullPtr { + return fmt.Errorf("parameter optimizer error : %s get failed", p.name) } + p.Content = cArrayToSlice(buffer, int(buffer_len)) + return nil +} - if p.ElementType != g.ElementType { - return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", p.Name, p.ElementType, g.ElementType) +func (o *optimizer) UpdateParameter(g Gradient) error { + if o.ElementType != g.ElementType { + return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", g.Name, g.ElementType, g.ElementType) } - r := C.paddle_update_parameter(o.opt, C.paddle_element_type(p.ElementType), unsafe.Pointer(g.Content), C.int(g.Length)) + // FIXME: do we need a copy? discard g.Content by GC ok + r := C.paddle_update_parameter(o.opt, C.paddle_element_type(g.ElementType), unsafe.Pointer(g.Content), C.int(len(g.Content))) if r != 0 { return fmt.Errorf("optimizer update returned error code: %d", r) } diff --git a/go/pserver/optimizer_test.go b/go/pserver/optimizer_test.go index 4930f0d95f..eac744b5cd 100644 --- a/go/pserver/optimizer_test.go +++ b/go/pserver/optimizer_test.go @@ -1,14 +1,22 @@ package pserver -import "testing" +import ( + "io/ioutil" + "testing" +) -func TestSGDCreateRelease(t *testing.T) { - param := pserver.ParameterWithConfig{ - Param : pserver.Parameter{Name : "a", - ElementType: , - Content: , - Length : } +func TestOptimizerCreateRelease(t *testing.T) { + p := Parameter{ + Name: "a", + ElementType: Float32, } - o := newOptimizer(sgd, 1) + p.Content = []byte{0.1, 0.3} + config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb.txt") + + param := ParameterWithConfig{ + Param: p, + Config: config, + } + o := newOptimizer(param) o.Cleanup() } diff --git a/go/pserver/service.go b/go/pserver/service.go index 32449f66b7..d0d57136b5 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -107,7 +107,7 @@ func (s *Service) SendGrad(g Gradient, dummy *int) error { return fmt.Errorf("parameter: %s does not exist", g.Name) } - return o.UpdateParameter(p, g) + return o.UpdateParameter(g) } // GetParam gets parameters from the parameter server. @@ -116,7 +116,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { s.mu.Lock() defer s.mu.Unlock() - p, ok := s.paramMap[name] + opt, ok := s.optMap[name] if !ok { return fmt.Errorf("parameter: %s does not exist", name) } @@ -128,8 +128,11 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { // nature. This race condition is allowed deliberately // to save the program from making a copy of the // paramter content. - *parameter = p - return nil + p.Name = name + p.ElementType = opt.ElementType + + ok := opt.GetWeights(¶meter) + return ok } // Save tells the parameter server to save parameters. diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index 1b2626f7db..b746d13e1c 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -13,9 +13,7 @@ func TestFull(t *testing.T) { s := pserver.NewService() var p pserver.Parameter p.Name = "param_a" - ElementValue := []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} - p.Content = &ElementValue[0] - p.Length = len(ElementValue) + p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) if err != nil { @@ -24,9 +22,7 @@ func TestFull(t *testing.T) { var p1 pserver.Parameter p1.Name = "param_b" - ElementValue = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} - p1.Content = &ElementValue[0] - p1.Length = len(ElementValue) + p1.Content = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} p1.ElementType = pserver.Float32 err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: nil}, nil) if err != nil { diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt index 4536f62ec7..35f04789cf 100644 --- a/paddle/optimizer/CMakeLists.txt +++ b/paddle/optimizer/CMakeLists.txt @@ -12,6 +12,7 @@ set(OPITMIZER_SRCS add_library(paddle_optimizer STATIC ${OPITMIZER_SRCS}) add_dependencies(paddle_optimizer gen_proto_cpp) + if(WITH_TESTING) add_simple_unittest(serialization_test) add_simple_unittest(parameter_optimizer_test) From 97270b9f270fc7711f08b3ad80a4e17612d4606c Mon Sep 17 00:00:00 2001 From: root Date: Mon, 26 Jun 2017 19:46:20 +0800 Subject: [PATCH 067/542] add convert function --- python/paddle/v2/dataset/cifar.py | 29 +++++++++++++++----- python/paddle/v2/dataset/common.py | 5 +++- python/paddle/v2/dataset/conll05.py | 36 +++++++++++++++++-------- python/paddle/v2/dataset/imdb.py | 11 ++++++++ python/paddle/v2/dataset/imikolov.py | 14 ++++++++-- python/paddle/v2/dataset/mnist.py | 8 ++++++ python/paddle/v2/dataset/movielens.py | 14 +++++++--- python/paddle/v2/dataset/sentiment.py | 20 ++++++++++---- python/paddle/v2/dataset/uci_housing.py | 18 +++++++++---- python/paddle/v2/dataset/wmt14.py | 28 +++++++++++++------ 10 files changed, 141 insertions(+), 42 deletions(-) diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index 81af0a8e66..95984d980d 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -31,7 +31,7 @@ images per class. import cPickle import itertools import numpy -from common import download +import paddle.v2.dataset.common import tarfile __all__ = ['train100', 'test100', 'train10', 'test10'] @@ -75,7 +75,8 @@ def train100(): :rtype: callable """ return reader_creator( - download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train') + paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), + 'train') def test100(): @@ -88,7 +89,9 @@ def test100(): :return: Test reader creator. :rtype: callable """ - return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test') + return reader_creator( + paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), + 'test') def train10(): @@ -102,7 +105,8 @@ def train10(): :rtype: callable """ return reader_creator( - download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch') + paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), + 'data_batch') def test10(): @@ -116,9 +120,20 @@ def test10(): :rtype: callable """ return reader_creator( - download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch') + paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), + 'test_batch') def fetch(): - download(CIFAR10_URL, 'cifar', CIFAR10_MD5) - download(CIFAR100_URL, 'cifar', CIFAR100_MD5) + paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5) + paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train100(), 10, "cifar_train100") + paddle.v2.dataset.common.convert(path, test100(), 10, "cifar_test100") + paddle.v2.dataset.common.convert(path, train10(), 10, "cifar_train10") + paddle.v2.dataset.common.convert(path, test10(), 10, "cifar_test10") diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index 72894c24b1..4a2eb59c34 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -23,7 +23,10 @@ import paddle.v2.dataset import cPickle import glob -__all__ = ['DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader'] +__all__ = [ + 'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader', + 'convert' +] DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index 12d648bf65..d4c2276b1b 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -23,7 +23,7 @@ to initialize SRL model. import tarfile import gzip import itertools -from common import download +import paddle.v2.dataset.common __all__ = ['test, get_dict', 'get_embedding'] @@ -182,9 +182,15 @@ def get_dict(): """ Get the word, verb and label dictionary of Wikipedia corpus. """ - word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)) - verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)) - label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)) + word_dict = load_dict( + paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', + WORDDICT_MD5)) + verb_dict = load_dict( + paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', + VERBDICT_MD5)) + label_dict = load_dict( + paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', + TRGDICT_MD5)) return word_dict, verb_dict, label_dict @@ -192,7 +198,7 @@ def get_embedding(): """ Get the trained word vector based on Wikipedia corpus. """ - return download(EMB_URL, 'conll05st', EMB_MD5) + return paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) def test(): @@ -209,15 +215,23 @@ def test(): """ word_dict, verb_dict, label_dict = get_dict() reader = corpus_reader( - download(DATA_URL, 'conll05st', DATA_MD5), + paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5), words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') return reader_creator(reader, word_dict, verb_dict, label_dict) def fetch(): - download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) - download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) - download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) - download(EMB_URL, 'conll05st', EMB_MD5) - download(DATA_URL, 'conll05st', DATA_MD5) + paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) + paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) + paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) + paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) + paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5) + + +def convert(): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, test(), 10, "conl105_train") + paddle.v2.dataset.common.convert(path, test(), 10, "conl105_test") diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index 5dc5abfe53..d939bc3065 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -166,3 +166,14 @@ def word_dict(): def fetch(): paddle.v2.dataset.common.download(URL, 'imdb', MD5) + + +def convert(): + """ + Converts dataset to recordio format + """ + word_dict = ds.imdb.word_dict() + paddle.v2.dataset.common.convert(path, lambda: train(word_dict), 10, + "imdb_train") + paddle.v2.dataset.common.convert(path, lambda: test(word_dict), 10, + "imdb_test") diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index dd3a4552d2..034f58c2c8 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -18,7 +18,7 @@ This module will download dataset from http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set into paddle reader creators. """ -import paddle.v2.dataset.common +import paddle.v2.dataset.common as common import collections import tarfile @@ -145,4 +145,14 @@ def test(word_idx, n, data_type=DataType.NGRAM): def fetch(): - paddle.v2.dataset.common.download(URL, "imikolov", MD5) + common.download(URL, "imikolov", MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + N = 5 + word_dict = build_dict() + common.convert(path, train(word_dict, N), 10, "imikolov_train") + common.convert(path, test(word_dict, N), 10, "imikolov_test") diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 435556b292..92d7f69b8d 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -113,3 +113,11 @@ def fetch(): paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train(), 10, "minist_train") + paddle.v2.dataset.common.convert(path, test(), 10, "minist_test") diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index 837a859126..fb906cd4b6 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -23,7 +23,7 @@ set and test set into paddle reader creators. """ import zipfile -from common import download +import paddle.v2.dataset.common import re import random import functools @@ -99,7 +99,7 @@ USER_INFO = None def __initialize_meta_info__(): - fn = download(URL, "movielens", MD5) + fn = paddle.v2.dataset.common.download(URL, "movielens", MD5) global MOVIE_INFO if MOVIE_INFO is None: pattern = re.compile(r'^(.*)\((\d+)\)$') @@ -246,7 +246,15 @@ def unittest(): def fetch(): - download(URL, "movielens", MD5) + paddle.v2.dataset.common.download(URL, "movielens", MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train(), 10, "movielens_train") + paddle.v2.dataset.common.convert(path, test(), 10, "movielens_test") if __name__ == '__main__': diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index 4dd34e7383..89683c2063 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -26,7 +26,7 @@ from itertools import chain import nltk from nltk.corpus import movie_reviews -import common +import paddle.v2.dataset.common __all__ = ['train', 'test', 'get_word_dict'] NUM_TRAINING_INSTANCES = 1600 @@ -39,12 +39,13 @@ def download_data_if_not_yet(): """ try: # make sure that nltk can find the data - if common.DATA_HOME not in nltk.data.path: - nltk.data.path.append(common.DATA_HOME) + if paddle.v2.dataset.common.DATA_HOME not in nltk.data.path: + nltk.data.path.append(paddle.v2.dataset.common.DATA_HOME) movie_reviews.categories() except LookupError: print "Downloading movie_reviews data set, please wait....." - nltk.download('movie_reviews', download_dir=common.DATA_HOME) + nltk.download( + 'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME) print "Download data set success....." print "Path is " + nltk.data.find('corpora/movie_reviews').path @@ -128,4 +129,13 @@ def test(): def fetch(): - nltk.download('movie_reviews', download_dir=common.DATA_HOME) + nltk.download( + 'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train, 10, "sentiment_train") + paddle.v2.dataset.common.convert(path, test, 10, "sentiment_test") diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py index 3469fd9ce1..9e15000c02 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/v2/dataset/uci_housing.py @@ -14,14 +14,14 @@ """ UCI Housing dataset. -This module will download dataset from +This module will paddle.v2.dataset.common.download dataset from https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and parse training set and test set into paddle reader creators. """ import numpy as np import os -from common import download +import paddle.v2.dataset.common __all__ = ['train', 'test'] @@ -82,7 +82,7 @@ def train(): :rtype: callable """ global UCI_TRAIN_DATA - load_data(download(URL, 'uci_housing', MD5)) + load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)) def reader(): for d in UCI_TRAIN_DATA: @@ -102,7 +102,7 @@ def test(): :rtype: callable """ global UCI_TEST_DATA - load_data(download(URL, 'uci_housing', MD5)) + load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)) def reader(): for d in UCI_TEST_DATA: @@ -112,4 +112,12 @@ def test(): def fetch(): - download(URL, 'uci_housing', MD5) + paddle.v2.dataset.common.download(URL, 'uci_housing', MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train(), 10, "uci_housing_train") + paddle.v2.dataset.common.convert(path, test(), 10, "uci_houseing_test") diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py index 0902f87741..f29c9275f0 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/v2/dataset/wmt14.py @@ -22,7 +22,7 @@ parse training set and test set into paddle reader creators. import tarfile import gzip -from paddle.v2.dataset.common import download +import paddle.v2.dataset.common from paddle.v2.parameters import Parameters __all__ = ['train', 'test', 'build_dict'] @@ -115,7 +115,8 @@ def train(dict_size): :rtype: callable """ return reader_creator( - download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size) + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + 'train/train', dict_size) def test(dict_size): @@ -130,16 +131,18 @@ def test(dict_size): :rtype: callable """ return reader_creator( - download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size) + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + 'test/test', dict_size) def gen(dict_size): return reader_creator( - download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'gen/gen', dict_size) + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + 'gen/gen', dict_size) def model(): - tar_file = download(URL_MODEL, 'wmt14', MD5_MODEL) + tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) with gzip.open(tar_file, 'r') as f: parameters = Parameters.from_tar(f) return parameters @@ -148,7 +151,7 @@ def model(): def get_dict(dict_size, reverse=True): # if reverse = False, return dict = {'a':'001', 'b':'002', ...} # else reverse = true, return dict = {'001':'a', '002':'b', ...} - tar_file = download(URL_TRAIN, 'wmt14', MD5_TRAIN) + tar_file = paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) src_dict, trg_dict = __read_to_dict__(tar_file, dict_size) if reverse: src_dict = {v: k for k, v in src_dict.items()} @@ -157,5 +160,14 @@ def get_dict(dict_size, reverse=True): def fetch(): - download(URL_TRAIN, 'wmt14', MD5_TRAIN) - download(URL_MODEL, 'wmt14', MD5_MODEL) + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) + paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) + + +def convert(path): + """ + Converts dataset to recordio format + """ + dict_size = 30000 + paddle.v2.dataset.common.convert(path, train(dict_size), 10, "wmt14_train") + paddle.v2.dataset.common.convert(path, test(dict_size), 10, "wmt14_test") From 5c67669d105024c5ec84948207f80a01d3309887 Mon Sep 17 00:00:00 2001 From: liaogang Date: Mon, 26 Jun 2017 20:19:53 +0800 Subject: [PATCH 068/542] FIX: support shared type --- cmake/generic.cmake | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index cc294c8c00..850c1868e6 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -274,7 +274,11 @@ function(go_library TARGET_NAME) # Add dummy code to support `make target_name` under Terminal Command set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") - add_library(${TARGET_NAME} STATIC ${dummyfile}) + if (go_library_SHARED OR go_library_shared) + add_library(${TARGET_NAME} SHARED ${dummyfile}) + else() + add_library(${TARGET_NAME} STATIC ${dummyfile}) + endif() if(go_library_DEPS) add_dependencies(${TARGET_NAME} ${go_library_DEPS}) endif(go_library_DEPS) From e915aa9cf1784a82dce2b8cd0b77486c1219f6c3 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 26 Jun 2017 20:27:07 +0800 Subject: [PATCH 069/542] fix bugs --- python/paddle/v2/dataset/cifar.py | 2 +- python/paddle/v2/dataset/conll05.py | 4 ++-- python/paddle/v2/dataset/imdb.py | 12 +++++------- python/paddle/v2/dataset/imikolov.py | 12 +++++++----- python/paddle/v2/dataset/mnist.py | 2 +- python/paddle/v2/dataset/movielens.py | 3 ++- python/paddle/v2/dataset/sentiment.py | 2 +- python/paddle/v2/dataset/uci_housing.py | 2 +- python/paddle/v2/dataset/wmt14.py | 2 +- 9 files changed, 21 insertions(+), 20 deletions(-) diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index 95984d980d..f885b2834e 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -34,7 +34,7 @@ import numpy import paddle.v2.dataset.common import tarfile -__all__ = ['train100', 'test100', 'train10', 'test10'] +__all__ = ['train100', 'test100', 'train10', 'test10', 'convert'] URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/' CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz' diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index d4c2276b1b..f8aae52e7c 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -25,7 +25,7 @@ import gzip import itertools import paddle.v2.dataset.common -__all__ = ['test, get_dict', 'get_embedding'] +__all__ = ['test, get_dict', 'get_embedding', 'convert'] DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' DATA_MD5 = '387719152ae52d60422c016e92a742fc' @@ -229,7 +229,7 @@ def fetch(): paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5) -def convert(): +def convert(path): """ Converts dataset to recordio format """ diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index d939bc3065..c0ec5992e0 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -28,7 +28,7 @@ import re import string import threading -__all__ = ['build_dict', 'train', 'test'] +__all__ = ['build_dict', 'train', 'test', 'convert'] URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz' MD5 = '7c2ac02c03563afcf9b574c7e56c153a' @@ -168,12 +168,10 @@ def fetch(): paddle.v2.dataset.common.download(URL, 'imdb', MD5) -def convert(): +def convert(path): """ Converts dataset to recordio format """ - word_dict = ds.imdb.word_dict() - paddle.v2.dataset.common.convert(path, lambda: train(word_dict), 10, - "imdb_train") - paddle.v2.dataset.common.convert(path, lambda: test(word_dict), 10, - "imdb_test") + w = word_dict() + paddle.v2.dataset.common.convert(path, lambda: train(w), 10, "imdb_train") + paddle.v2.dataset.common.convert(path, lambda: test(w), 10, "imdb_test") diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index 034f58c2c8..b18ee8e9ba 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -18,11 +18,11 @@ This module will download dataset from http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set into paddle reader creators. """ -import paddle.v2.dataset.common as common +import paddle.v2.dataset.common import collections import tarfile -__all__ = ['train', 'test', 'build_dict'] +__all__ = ['train', 'test', 'build_dict', 'convert'] URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz' MD5 = '30177ea32e27c525793142b6bf2c8e2d' @@ -145,7 +145,7 @@ def test(word_idx, n, data_type=DataType.NGRAM): def fetch(): - common.download(URL, "imikolov", MD5) + paddle.v2.dataset.common.download(URL, "imikolov", MD5) def convert(path): @@ -154,5 +154,7 @@ def convert(path): """ N = 5 word_dict = build_dict() - common.convert(path, train(word_dict, N), 10, "imikolov_train") - common.convert(path, test(word_dict, N), 10, "imikolov_test") + paddle.v2.dataset.common.convert(path, + train(word_dict, N), 10, "imikolov_train") + paddle.v2.dataset.common.convert(path, + test(word_dict, N), 10, "imikolov_test") diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 92d7f69b8d..ea5891f4f3 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -21,7 +21,7 @@ import paddle.v2.dataset.common import subprocess import numpy import platform -__all__ = ['train', 'test'] +__all__ = ['train', 'test', 'convert'] URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/' TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz' diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index fb906cd4b6..d9372d422a 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -30,7 +30,8 @@ import functools __all__ = [ 'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id', - 'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info' + 'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info', + 'convert' ] age_table = [1, 18, 25, 35, 45, 50, 56] diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index 89683c2063..e33f120c87 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -28,7 +28,7 @@ from nltk.corpus import movie_reviews import paddle.v2.dataset.common -__all__ = ['train', 'test', 'get_word_dict'] +__all__ = ['train', 'test', 'get_word_dict', 'convert'] NUM_TRAINING_INSTANCES = 1600 NUM_TOTAL_INSTANCES = 2000 diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py index 9e15000c02..c715ea9681 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/v2/dataset/uci_housing.py @@ -29,7 +29,7 @@ URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing MD5 = 'd4accdce7a25600298819f8e28e8d593' feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', - 'PTRATIO', 'B', 'LSTAT' + 'PTRATIO', 'B', 'LSTAT', 'convert' ] UCI_TRAIN_DATA = None diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py index f29c9275f0..e1dc4f4c30 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/v2/dataset/wmt14.py @@ -25,7 +25,7 @@ import gzip import paddle.v2.dataset.common from paddle.v2.parameters import Parameters -__all__ = ['train', 'test', 'build_dict'] +__all__ = ['train', 'test', 'build_dict', 'convert'] URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz' MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' From a243bdfbcf2e2ad718d2140b66964187b4deab9e Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 26 Jun 2017 20:38:18 +0800 Subject: [PATCH 070/542] rm not need --- python/paddle/v2/reader/creator.py | 21 +------------------ python/paddle/v2/reader/tests/creator_test.py | 11 ---------- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 994062096f..07142056f8 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -16,7 +16,7 @@ Creator package contains some simple reader creator, which could be used in user program. """ -__all__ = ['np_array', 'text_file', "RecordIO"] +__all__ = ['np_array', 'text_file'] def np_array(x): @@ -55,22 +55,3 @@ def text_file(path): f.close() return reader - - -def RecordIO(path): - """ - Creates a data reader that outputs record one one by one from given recordio file - :path: path of recordio file - :returns: data reader of recordio file - """ - - def reader(): - f = recordio.reader(path) - while True: - r = f.read() - if r is None: - break - yield r - f.close() - - return reader diff --git a/python/paddle/v2/reader/tests/creator_test.py b/python/paddle/v2/reader/tests/creator_test.py index dd84fbb002..359f3eeefb 100644 --- a/python/paddle/v2/reader/tests/creator_test.py +++ b/python/paddle/v2/reader/tests/creator_test.py @@ -13,9 +13,7 @@ # limitations under the License. import os import unittest - import numpy as np - import paddle.v2.reader.creator @@ -36,14 +34,5 @@ class TestTextFile(unittest.TestCase): self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1)) -class TestRecordIO(unittest.TestCase): - def test_RecordIO(self): - path = os.path.join( - os.path.dirname(__file__), "test_recordio_creator.dat") - reader = paddle.v2.reader.creator.RecordIO(path) - for idx, r in enumerate(reader()): - self.assertSequenceEqual(r, str(idx)) - - if __name__ == '__main__': unittest.main() From b9d015cbc4975f9513f106356e8f7848737cf0f9 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 26 Jun 2017 20:40:12 +0800 Subject: [PATCH 071/542] rm not need --- .../v2/reader/tests/test_recordio_creator.dat | Bin 88 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 python/paddle/v2/reader/tests/test_recordio_creator.dat diff --git a/python/paddle/v2/reader/tests/test_recordio_creator.dat b/python/paddle/v2/reader/tests/test_recordio_creator.dat deleted file mode 100644 index 17aa89b6796184407e83246d3f342a55a66b4a69..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 88 zcmZQ!W@2QOHw Date: Mon, 26 Jun 2017 23:32:46 +0800 Subject: [PATCH 072/542] FIX: Pinned memory --- paddle/memory/README.md | 1 + paddle/memory/detail/CMakeLists.txt | 6 +--- paddle/memory/detail/cpu_allocator.h | 39 ++++++++++++---------- paddle/memory/detail/cpu_allocator_test.cc | 16 +++------ 4 files changed, 27 insertions(+), 35 deletions(-) diff --git a/paddle/memory/README.md b/paddle/memory/README.md index e5f7880e4c..96a331a486 100644 --- a/paddle/memory/README.md +++ b/paddle/memory/README.md @@ -97,6 +97,7 @@ class BuddyAllocator { struct Block { size_t size; Block* left, right; + size_t index; // allocator id }; ... }; diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index c425e9f947..fb8a11062d 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1,5 +1 @@ -if(${WITH_GPU}) - nv_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # nv_test links CUDA, but -else(${WITH_GPU}) - cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # cc_test doesn't. -endif(${WITH_GPU}) +cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h index 0d8ea3f52b..a487fecef4 100644 --- a/paddle/memory/detail/cpu_allocator.h +++ b/paddle/memory/detail/cpu_allocator.h @@ -14,20 +14,19 @@ limitations under the License. */ #pragma once -#include // for malloc and free #include // for size_t +#include // for malloc and free -#ifdef PADDLE_WITH_GPU -#include -#include -#endif // PADDLE_WITH_GPU +#ifndef _WIN32 +#include // for mlock and munlock +#endif namespace paddle { namespace memory { namespace detail { -// CPUAllocator calls cudaMallocHost, which returns -// pinned and mlocked memory as staging areas for data exchange +// CPUAllocator calls mlock, which returns +// pinned and locked memory as staging areas for data exchange // between host and device. Allocates too much would reduce the // amount of memory available to the system for paging. So, by // default, we should use CPUAllocator. @@ -35,33 +34,37 @@ template class CPUAllocator { public: void* Alloc(size_t size); - void Free(void* p); + void Free(void* p, size_t size); }; template <> class CPUAllocator { public: - void* Alloc(size_t size) { return malloc(size); } - void Free(void* p) { free(p); } + void* Alloc(size_t size) { return std::malloc(size); } + void Free(void* p, size_t size) { std::free(p); } }; -// If CMake macro PADDLE_WITH_GPU is OFF, C++ compiler won't generate the -// following specialization that depends on the CUDA library. -#ifdef PADDLE_WITH_GPU template <> class CPUAllocator { public: void* Alloc(size_t size) { - void* p; - if (cudaMallocHost(&p, size) != cudaSuccess) { - return NULL; + void* p = std::malloc(size); + if (p == nullptr) { + return p; } +#ifndef _WIN32 + mlock(p, size); +#endif return p; } - void Free(void* p) { cudaFreeHost(p); } + void Free(void* p, size_t size) { +#ifndef _WIN32 + munlock(p, size); +#endif + std::free(p); + } }; -#endif // PADDLE_WITH_GPU } // namespace detail } // namespace memory diff --git a/paddle/memory/detail/cpu_allocator_test.cc b/paddle/memory/detail/cpu_allocator_test.cc index 464bc84e5c..4e45266cd8 100644 --- a/paddle/memory/detail/cpu_allocator_test.cc +++ b/paddle/memory/detail/cpu_allocator_test.cc @@ -19,20 +19,12 @@ TEST(CPUAllocator, NonStaging) { paddle::memory::detail::CPUAllocator a; void* p = a.Alloc(4096); EXPECT_NE(p, nullptr); - a.Free(p); + a.Free(p, 4096); } -#ifdef PADDLE_WITH_GPU TEST(CPUAllocator, Staging) { paddle::memory::detail::CPUAllocator a; - - int devices; - if (cudaGetDeviceCount(&devices) == cudaSuccess && devices > 0) { - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p); - } else { - EXPECT_EQ(a.Alloc(4096), nullptr); - } + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p, 4096); } -#endif // PADDLE_WITH_GPU From c44a94b4dc82db7b4493fe27a60270bee8cf9273 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 26 Jun 2017 23:55:30 +0800 Subject: [PATCH 073/542] "fix cmake build flags" --- go/pserver/cclient/cclient.go | 12 +++++++----- go/pserver/cclient/test/test_cclient.c | 8 ++++---- go/pserver/client_test.go | 8 ++------ go/pserver/optimizer.go | 17 +++++++++++------ go/pserver/optimizer_test.go | 8 +++++--- go/pserver/service.go | 8 ++++---- 6 files changed, 33 insertions(+), 28 deletions(-) diff --git a/go/pserver/cclient/cclient.go b/go/pserver/cclient/cclient.go index 6aaaff7409..92a41b7f54 100644 --- a/go/pserver/cclient/cclient.go +++ b/go/pserver/cclient/cclient.go @@ -123,8 +123,9 @@ func paddle_begin_init_params(client C.paddle_pserver_client) C.int { func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int { et := pserver.ElementType(param.element_type) name := C.GoString(param.name) + content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len)) pc := pserver.ParameterWithConfig{ - Param: pserver.Parameter{Name: name, ElementType: et, Content: param.content, Length: para.content_len}, + Param: pserver.Parameter{Name: name, ElementType: et, Content: content}, Config: cArrayToSlice(param_config, int(config_len)), } c := get(client) @@ -166,7 +167,8 @@ func paddle_send_grads(client C.paddle_pserver_client, grads **C.paddle_gradient grad := *(**C.paddle_gradient)(unsafe.Pointer((uintptr(unsafe.Pointer(grads)) + uintptr(i)*unsafe.Sizeof(*grads)))) et := pserver.ElementType(grad.element_type) name := C.GoString(grad.name) - gs = append(gs, pserver.Gradient{Name: name, ElementType: et, Content: grad.content, Length: grad.content_len}) + content := cArrayToSlice(unsafe.Pointer(grad.content), int(grad.content_len)) + gs = append(gs, pserver.Gradient{Name: name, ElementType: et, Content: content}) } c := get(client) @@ -223,14 +225,14 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, } if unsafe.Pointer(param.content) != nullPtr { - if int(param.content_len) != p.Length { + if int(param.content_len) != len(p.Content) { log.Errorf("the pre-allocated content len does not match parameter content len. Pre-allocated len: %d, returned len: %d", param.content_len, len(p.Content)) return C.PSERVER_ERROR } } - C.memcpy(unsafe.Pointer(param.content), unsafe.Pointer(p.Content), C.size_t(p.Length)) - param.content_len = C.int(p.Length) + C.memcpy(unsafe.Pointer(param.content), unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content))) + param.content_len = C.int(len(p.Content)) param.element_type = C.paddle_element_type(p.ElementType) } diff --git a/go/pserver/cclient/test/test_cclient.c b/go/pserver/cclient/test/test_cclient.c index 7d26127b60..5bd4913ba3 100644 --- a/go/pserver/cclient/test/test_cclient.c +++ b/go/pserver/cclient/test/test_cclient.c @@ -50,10 +50,10 @@ void getParams(paddle_pserver_client c) { int main() { char addr[] = "localhost:3000"; paddle_pserver_client c = paddle_new_pserver_client(addr, 1); - char config_proto[1024]; + char *config_proto; size_t config_proto_len = 0; ssize_t nread; - FILE *fp = fopen("optimizer.pb.txt", "r"); + FILE *fp = fopen("testdata/optimizer.pb.txt", "r"); if(!fp) { fail(); } while((nread = getline(&config_proto, &config_proto_len, fp)) != -1) { printf("%s", config_proto); @@ -70,7 +70,7 @@ retry: param.name = name_a; param.content = content_a; param.content_len = 2000; - int error = paddle_init_param(c, param, config_proto, config_proto_len); + int error = paddle_init_param(c, param, (void *)config_proto, config_proto_len); if (error != 0) { goto retry; } @@ -79,7 +79,7 @@ retry: param.name = name_b; param.content = content_b; param.content_len = 3000; - error = paddle_init_param(c, param, NULL, 0); + error = paddle_init_param(c, param, (void *)config_proto, config_proto_len); if (error != 0) { goto retry; } diff --git a/go/pserver/client_test.go b/go/pserver/client_test.go index c5d38e4112..d0371a26a1 100644 --- a/go/pserver/client_test.go +++ b/go/pserver/client_test.go @@ -75,9 +75,7 @@ func TestClientFull(t *testing.T) { var p pserver.Parameter p.Name = "p_" + strconv.Itoa(i) p.ElementType = pserver.Float32 - ElementValue := make([]byte, (i+1)*100) - p.Content = &ElementValue[0] - p.Length = len(ElementValue) + p.Content = make([]byte, (i+1)*100) err := c.InitParam(pserver.ParameterWithConfig{Param: p}) if err != nil { t.Fatal(err) @@ -94,9 +92,7 @@ func TestClientFull(t *testing.T) { var g pserver.Gradient g.Name = "p_" + strconv.Itoa(i) g.ElementType = pserver.Float32 - ElementValue := make([]byte, (i+1)*100) - g.Content = &ElementValue[0] - g.Length = len(ElementValue) + g.Content = make([]byte, (i+1)*100) grads = append(grads, g) } diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 12bf055b4d..4ecae0911c 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -4,7 +4,7 @@ package pserver // TODO(zhihong): move compile flags to cmake go_library #cgo pkg-config: protobuf #cgo CFLAGS: -I ../../ -#cgo LDFLAGS: /Users/dzh/.go/src/github.com/PaddlePaddle/Paddle/build/go/pserver/cclient/libpaddle_go_optimizer.a +#cgo LDFLAGS: /Users/dzh/.go/src/github.com/PaddlePaddle/Paddle/build/go/pserver/cclient/libpaddle_go_optimizer.a -lstdc++ #include "paddle/optimizer/optimizer.h" */ import "C" @@ -38,17 +38,20 @@ func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { o := &optimizer{} p := paramWithConfigs.Param c := paramWithConfigs.Config - buffer := &p.Content[0] - o.opt = C.paddle_create_optimizer(C.uchar(c), C.int(len(c)), unsafe.Pointer(buffer), C.int(len(p.Content)), nullPtr, 0) + var cbuffer unsafe.Pointer + cbuffer = unsafe.Pointer(&p.Content[0]) + o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)), + C.paddle_element_type(p.ElementType), cbuffer, C.int(len(p.Content)), + (*C.char)(nullPtr), 0) return o } func (o *optimizer) GetWeights(p *Parameter) error { var buffer unsafe.Pointer - buffer_len := C.paddle_optimizer_get_weights(unsafe.Pointer(o), &buffer) + buffer_len := C.paddle_optimizer_get_weights(o.opt, &buffer) if buffer_len == 0 || buffer == nullPtr { - return fmt.Errorf("parameter optimizer error : %s get failed", p.name) + return fmt.Errorf("parameter optimizer error : %s get failed", p.Name) } p.Content = cArrayToSlice(buffer, int(buffer_len)) return nil @@ -60,7 +63,9 @@ func (o *optimizer) UpdateParameter(g Gradient) error { } // FIXME: do we need a copy? discard g.Content by GC ok - r := C.paddle_update_parameter(o.opt, C.paddle_element_type(g.ElementType), unsafe.Pointer(g.Content), C.int(len(g.Content))) + var cbuffer unsafe.Pointer + cbuffer = unsafe.Pointer(&g.Content[0]) + r := C.paddle_update_parameter(o.opt, C.paddle_element_type(g.ElementType), cbuffer, C.int(len(g.Content))) if r != 0 { return fmt.Errorf("optimizer update returned error code: %d", r) } diff --git a/go/pserver/optimizer_test.go b/go/pserver/optimizer_test.go index eac744b5cd..368047d6f8 100644 --- a/go/pserver/optimizer_test.go +++ b/go/pserver/optimizer_test.go @@ -8,11 +8,13 @@ import ( func TestOptimizerCreateRelease(t *testing.T) { p := Parameter{ Name: "a", - ElementType: Float32, + ElementType: Int32, } - p.Content = []byte{0.1, 0.3} + p.Content = []byte{1, 3} config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb.txt") - + if err != nil { + t.Fatalf("read optimizer proto failed") + } param := ParameterWithConfig{ Param: p, Config: config, diff --git a/go/pserver/service.go b/go/pserver/service.go index d0d57136b5..cdd433260a 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -128,11 +128,11 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { // nature. This race condition is allowed deliberately // to save the program from making a copy of the // paramter content. - p.Name = name - p.ElementType = opt.ElementType + parameter.Name = name + parameter.ElementType = opt.ElementType - ok := opt.GetWeights(¶meter) - return ok + err := opt.GetWeights(parameter) + return err } // Save tells the parameter server to save parameters. From ce70df86b1e8c892cdde5312caa0c2699f368f7d Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 27 Jun 2017 00:15:36 +0800 Subject: [PATCH 074/542] Add gpu_allocator --- paddle/memory/.clang-format | 5 ++ paddle/memory/detail/CMakeLists.txt | 1 + paddle/memory/detail/cpu_allocator.h | 6 +- paddle/memory/detail/gpu_allocator.h | 92 ++++++++++++++++++++++ paddle/memory/detail/gpu_allocator_test.cc | 30 +++++++ 5 files changed, 131 insertions(+), 3 deletions(-) create mode 100644 paddle/memory/.clang-format create mode 100644 paddle/memory/detail/gpu_allocator.h create mode 100644 paddle/memory/detail/gpu_allocator_test.cc diff --git a/paddle/memory/.clang-format b/paddle/memory/.clang-format new file mode 100644 index 0000000000..29282dc87e --- /dev/null +++ b/paddle/memory/.clang-format @@ -0,0 +1,5 @@ +--- +Language: Cpp +BasedOnStyle: Google +Standard: Cpp11 +... diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index fb8a11062d..81ca8a0bbf 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1 +1,2 @@ cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) +nv_test(gpu_allocator_test SRCS gpu_allocator_test.cc) diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h index a487fecef4..17753ccef7 100644 --- a/paddle/memory/detail/cpu_allocator.h +++ b/paddle/memory/detail/cpu_allocator.h @@ -32,21 +32,21 @@ namespace detail { // default, we should use CPUAllocator. template class CPUAllocator { -public: + public: void* Alloc(size_t size); void Free(void* p, size_t size); }; template <> class CPUAllocator { -public: + public: void* Alloc(size_t size) { return std::malloc(size); } void Free(void* p, size_t size) { std::free(p); } }; template <> class CPUAllocator { -public: + public: void* Alloc(size_t size) { void* p = std::malloc(size); if (p == nullptr) { diff --git a/paddle/memory/detail/gpu_allocator.h b/paddle/memory/detail/gpu_allocator.h new file mode 100644 index 0000000000..9452c41fb8 --- /dev/null +++ b/paddle/memory/detail/gpu_allocator.h @@ -0,0 +1,92 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include // for size_t + +#include +#include + +namespace paddle { +namespace memory { +namespace detail { + +inline void throw_on_error(cudaError_t e, const char* message) { + if (e) { + throw thrust::system_error(e, thrust::cuda_category(), message); + } +} + +// GPUAllocator calls cudaHostMalloc, which returns +// pinned and locked memory as staging areas for data exchange +// between host and device. Allocates too much would reduce the +// amount of memory available to the system for paging. So, by +// default, we should use GPUAllocator. +template +class GPUAllocator { +public: + void* Alloc(size_t size); + void Free(void* p, size_t size); +}; + +template <> +class GPUAllocator { +public: + void* Alloc(size_t size) { + void* p = 0; + cudaError_t result = cudaMalloc(&p, size); + if (result == cudaSuccess) { + return p; + } + // clear last error + cudaGetLastError(); + return nullptr; + } + + void Free(void* p, size_t size) { + // Purposefully allow cudaErrorCudartUnloading, because + // that is returned if you ever call cudaFree after the + // driver has already shutdown. This happens only if the + // process is terminating, in which case we don't care if + // cudaFree succeeds. + auto err = cudaFree(p); + if (err != cudaErrorCudartUnloading) { + throw_on_error(err, "cudaFree failed"); + } + } +}; + +template <> +class GPUAllocator { +public: + void* Alloc(size_t size) { + void* p = 0; + cudaError_t result = cudaMallocHost(&p, size); + if (result == cudaSuccess) { + return p; + } + // clear last error + cudaGetLastError(); + return nullptr; + } + + void Free(void* p, size_t size) { + throw_on_error(cudaFreeHost(p), "cudaFreeHost failed"); + } +}; + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/gpu_allocator_test.cc b/paddle/memory/detail/gpu_allocator_test.cc new file mode 100644 index 0000000000..18c1c9ab43 --- /dev/null +++ b/paddle/memory/detail/gpu_allocator_test.cc @@ -0,0 +1,30 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/memory/detail/gpu_allocator.h" +#include "gtest/gtest.h" + +TEST(GPUAllocator, NonStaging) { + paddle::memory::detail::GPUAllocator a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p, 4096); +} + +TEST(GPUAllocator, Staging) { + paddle::memory::detail::GPUAllocator a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p, 4096); +} From cebfae94678b86bbe890077e01bd1a21364b5e2e Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 27 Jun 2017 01:24:38 +0800 Subject: [PATCH 075/542] "move proto.txt to testdata folder" --- .../cclient/test/dump_optimizer_proto.py | 13 -- .../cclient/test/testdata/optimizer.pb.txt | Bin 51 -> 50 bytes go/pserver/service_test.go | 134 ++++++++++-------- 3 files changed, 72 insertions(+), 75 deletions(-) delete mode 100644 go/pserver/cclient/test/dump_optimizer_proto.py diff --git a/go/pserver/cclient/test/dump_optimizer_proto.py b/go/pserver/cclient/test/dump_optimizer_proto.py deleted file mode 100644 index 2ed4db97f9..0000000000 --- a/go/pserver/cclient/test/dump_optimizer_proto.py +++ /dev/null @@ -1,13 +0,0 @@ -import OptimizerConfig_pb2 as pb - -config = pb.OptimizerConfig() -config.clip_norm = 0.1 -config.lr_policy = pb.OptimizerConfig.Const -config.optimizer = pb.OptimizerConfig.SGD -config.sgd.momentum = 0.0 -config.sgd.decay = 0.0 -config.sgd.nesterov = False -config.const_lr.learning_rate = 0.1 -s = config.SerializeToString() -with open("optimizer.pb.txt", 'w') as f: - f.write(s) diff --git a/go/pserver/cclient/test/testdata/optimizer.pb.txt b/go/pserver/cclient/test/testdata/optimizer.pb.txt index 27c8a584df40ab714edfd730f0ff7b7bd3783964..27dd3bc5f19e2964b4b674cff8860233cbdb445a 100644 GIT binary patch delta 4 LcmXpunqUL~0=NMv delta 6 NcmXpqo?yht1poyT0W$yq diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index b746d13e1c..a88e2df73a 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -1,7 +1,7 @@ package pserver_test import ( - "reflect" + "io/ioutil" "sync" "testing" "time" @@ -15,73 +15,79 @@ func TestFull(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) + config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb.txt") if err != nil { - t.FailNow() - } - - var p1 pserver.Parameter - p1.Name = "param_b" - p1.Content = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} - p1.ElementType = pserver.Float32 - err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: nil}, nil) - if err != nil { - t.FailNow() - } - - err = s.FinishInitParams(0, nil) - if err != nil { - t.FailNow() - } - - var param pserver.Parameter - err = s.GetParam("param_b", ¶m) - if err != nil { - t.FailNow() - } - - if !reflect.DeepEqual(param, p1) { - t.FailNow() - } - - g1, g2 := pserver.Gradient(p1), pserver.Gradient(p) - err = s.SendGrad(g1, nil) - if err != nil { - t.FailNow() - } - err = s.SendGrad(g2, nil) - - if err != nil { - t.FailNow() - } - - var param1 pserver.Parameter - err = s.GetParam("param_a", ¶m1) - if err != nil { - t.FailNow() - } - - // don't compare content, since it's already changed by - // gradient update. - param1.Content = nil - p.Content = nil - - if !reflect.DeepEqual(param1, p) { - t.FailNow() + t.Fatalf("read optimizer proto failed") } -} -func TestMultipleInit(t *testing.T) { - s := pserver.NewService() - err := s.FinishInitParams(0, nil) + err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: config}, nil) if err != nil { t.FailNow() } - err = s.FinishInitParams(0, nil) - if err.Error() != pserver.AlreadyInitialized { - t.FailNow() - } + // var p1 pserver.Parameter + // p1.Name = "param_b" + // p1.Content = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + // p1.ElementType = pserver.Float32 + // fmt.Println("paddle passed") + // err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: config}, nil) + // if err != nil { + // t.FailNow() + // } + + // err = s.FinishInitParams(0, nil) + // if err != nil { + // t.FailNow() + // } + + // var param pserver.Parameter + // err = s.GetParam("param_b", ¶m) + // if err != nil { + // t.FailNow() + // } + + // if !reflect.DeepEqual(param, p1) { + // t.FailNow() + // } + + // g1, g2 := pserver.Gradient(p1), pserver.Gradient(p) + // err = s.SendGrad(g1, nil) + // if err != nil { + // t.FailNow() + // } + // err = s.SendGrad(g2, nil) + + // if err != nil { + // t.FailNow() + // } + + // var param1 pserver.Parameter + // err = s.GetParam("param_a", ¶m1) + // if err != nil { + // t.FailNow() + // } + + // // don't compare content, since it's already changed by + // // gradient update. + // param1.Content = nil + // p.Content = nil + + // if !reflect.DeepEqual(param1, p) { + // t.FailNow() + // } + // } + + // func TestMultipleInit(t *testing.T) { + // s := pserver.NewService() + // err := s.FinishInitParams(0, nil) + // if err != nil { + // t.FailNow() + // } + + // err = s.FinishInitParams(0, nil) + // if err.Error() != pserver.AlreadyInitialized { + // t.FailNow() + // } } func TestUninitialized(t *testing.T) { @@ -133,7 +139,11 @@ func TestBlockUntilInitialized(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) + config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb.txt") + if err != nil { + t.Fatalf("read optimizer proto failed") + } + err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: config}, nil) if err != nil { t.FailNow() } From e2e0fbd4188fcbcc6bf69d1ef22b3f6f0a927f84 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 26 Jun 2017 10:36:49 -0700 Subject: [PATCH 076/542] Add tesnor.h --- paddle/framework/tensor.h | 91 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 paddle/framework/tensor.h diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h new file mode 100644 index 0000000000..a658537430 --- /dev/null +++ b/paddle/framework/tensor.h @@ -0,0 +1,91 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#pragma once + +namespace paddle { +namespace framework { + +class Tensor { + using paddle::platform::Place; + using paddle::platform::get_place; + + public: + explicit Tensor(DDim dims) : dims_(dims), place_(get_place()) {} + explicit Tensor(DDim dims, Place place) : dims_(dims), place_(place) {} + + template + const T* data() const { + PADDLE_ASSERT(holder_ != nullptr); + PADDLE_ASSERT(holder_->Place() == place_); + PADDLE_ASSERT(holder_->Size() >= dims_.product() * sizeof(T)); + return static_cast(holder->Ptr()); + } + + template ::value>::type> + T* mutable_data() { + if (holder_ == nullptr || holder_->Place() != place_ || + holder_->Size() < dims_.product() * sizeof(T)) { + holder_.reset(new PlaceholderImpl(place_, dims.product() * sizeof(T))); + } + return static_cast(holder_->Ptr()); + } + + template ::value>::type> + T* mutable_data(DDim dims) { + dims_ = dims; + return mutable_data(); + } + + template ::value>::type> + T* mutable_data(DDim dims, Place place) { + dims_ = dims; + place_ = place; + return mutable_data(); + } + + private: + // Placeholder hides type T, so it doesn't appear as a template + // parameter of Variable. + struct Placeholder { + virtual ~Placeholder() {} + virtual void* Ptr() const = 0; + virtual Place Place() const = 0; + virtual size_t Size() const = 0; + }; + + template + struct PlaceholderImpl : public Placeholder { + PlaceholderImpl(Place pl, size_t size) + : ptr_(memory::Alloc(pl, size), paddle::memory::Deleter(pl)), + place_(pl), + size_(size) {} + + virtual void* Ptr() const { return static_cast(ptr_.get()); } + virtual size_t Size() const { return size_; } + virtual Place Place() const { return place_; } + + std::unique_ptr ptr_; + Place place_; // record the place of ptr_. + size_t size_; // size of the memory block. + }; + + std::unique_ptr holder_; // holds the memory block if allocated. + DDim dims_; // could be smallers than the holder_->Size(). + paddle::platform::Place place_; +}; + +} // namespace framework +} // namespace paddle From 864386d59682307ba9e033cfce8355029beda9b5 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 27 Jun 2017 01:51:10 +0800 Subject: [PATCH 077/542] "change log in optimizer" --- go/pserver/optimizer.go | 4 +- go/pserver/service_test.go | 129 +++++++++++++++++++------------------ 2 files changed, 67 insertions(+), 66 deletions(-) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 4ecae0911c..af7faad254 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -47,7 +47,7 @@ func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { } func (o *optimizer) GetWeights(p *Parameter) error { - + // FIXME: get weigths from optimizer has bug var buffer unsafe.Pointer buffer_len := C.paddle_optimizer_get_weights(o.opt, &buffer) if buffer_len == 0 || buffer == nullPtr { @@ -59,7 +59,7 @@ func (o *optimizer) GetWeights(p *Parameter) error { func (o *optimizer) UpdateParameter(g Gradient) error { if o.ElementType != g.ElementType { - return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", g.Name, g.ElementType, g.ElementType) + return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", g.Name, o.ElementType, g.ElementType) } // FIXME: do we need a copy? discard g.Content by GC ok diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index a88e2df73a..a09b25dec0 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -2,6 +2,7 @@ package pserver_test import ( "io/ioutil" + "reflect" "sync" "testing" "time" @@ -9,7 +10,7 @@ import ( "github.com/PaddlePaddle/Paddle/go/pserver" ) -func TestFull(t *testing.T) { +func TestNewName(t *testing.T) { s := pserver.NewService() var p pserver.Parameter p.Name = "param_a" @@ -25,69 +26,69 @@ func TestFull(t *testing.T) { t.FailNow() } - // var p1 pserver.Parameter - // p1.Name = "param_b" - // p1.Content = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} - // p1.ElementType = pserver.Float32 - // fmt.Println("paddle passed") - // err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: config}, nil) - // if err != nil { - // t.FailNow() - // } - - // err = s.FinishInitParams(0, nil) - // if err != nil { - // t.FailNow() - // } - - // var param pserver.Parameter - // err = s.GetParam("param_b", ¶m) - // if err != nil { - // t.FailNow() - // } - - // if !reflect.DeepEqual(param, p1) { - // t.FailNow() - // } - - // g1, g2 := pserver.Gradient(p1), pserver.Gradient(p) - // err = s.SendGrad(g1, nil) - // if err != nil { - // t.FailNow() - // } - // err = s.SendGrad(g2, nil) - - // if err != nil { - // t.FailNow() - // } - - // var param1 pserver.Parameter - // err = s.GetParam("param_a", ¶m1) - // if err != nil { - // t.FailNow() - // } - - // // don't compare content, since it's already changed by - // // gradient update. - // param1.Content = nil - // p.Content = nil - - // if !reflect.DeepEqual(param1, p) { - // t.FailNow() - // } - // } - - // func TestMultipleInit(t *testing.T) { - // s := pserver.NewService() - // err := s.FinishInitParams(0, nil) - // if err != nil { - // t.FailNow() - // } - - // err = s.FinishInitParams(0, nil) - // if err.Error() != pserver.AlreadyInitialized { - // t.FailNow() - // } + var p1 pserver.Parameter + p1.Name = "param_b" + p1.Content = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + p1.ElementType = pserver.Float32 + err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: config}, nil) + if err != nil { + t.FailNow() + } + + err = s.FinishInitParams(0, nil) + if err != nil { + t.FailNow() + } + + var param pserver.Parameter + err = s.GetParam("param_b", ¶m) + if err != nil { + t.FailNow() + } + + if !reflect.DeepEqual(param, p1) { + t.FailNow() + } + + g1, g2 := pserver.Gradient(p1), pserver.Gradient(p) + + err = s.SendGrad(g1, nil) + if err != nil { + t.FailNow() + } + err = s.SendGrad(g2, nil) + + if err != nil { + t.FailNow() + } + + var param1 pserver.Parameter + err = s.GetParam("param_a", ¶m1) + if err != nil { + t.FailNow() + } + + // don't compare content, since it's already changed by + // gradient update. + param1.Content = nil + p.Content = nil + + if !reflect.DeepEqual(param1, p) { + t.FailNow() + } +} + +func TestMultipleInit(t *testing.T) { + s := pserver.NewService() + err := s.FinishInitParams(0, nil) + if err != nil { + t.FailNow() + } + + err = s.FinishInitParams(0, nil) + if err.Error() != pserver.AlreadyInitialized { + t.FailNow() + } } func TestUninitialized(t *testing.T) { From e02859c0f53dfe4616976b015d4fefd8aaa6eb39 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 26 Jun 2017 15:27:01 -0700 Subject: [PATCH 078/542] Replace {cpu,gpu}_allocator.h and {cpu,gpu}_allocator_test.cc by system_allocator{.h,_test.cc} --- paddle/memory/CMakeLists.txt | 6 ++ paddle/memory/detail/CMakeLists.txt | 3 +- paddle/memory/detail/cpu_allocator.h | 71 ----------------- paddle/memory/detail/cpu_allocator_test.cc | 30 ------- .../{gpu_allocator.h => system_allocator.h} | 79 +++++++++++-------- ...cator_test.cc => system_allocator_test.cc} | 20 ++++- paddle/memory/memory.cc | 67 +++++++--------- paddle/memory/memory.h | 16 +--- 8 files changed, 106 insertions(+), 186 deletions(-) delete mode 100644 paddle/memory/detail/cpu_allocator.h delete mode 100644 paddle/memory/detail/cpu_allocator_test.cc rename paddle/memory/detail/{gpu_allocator.h => system_allocator.h} (58%) rename paddle/memory/detail/{gpu_allocator_test.cc => system_allocator_test.cc} (69%) diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt index 3943c3cfad..8662512496 100644 --- a/paddle/memory/CMakeLists.txt +++ b/paddle/memory/CMakeLists.txt @@ -1 +1,7 @@ add_subdirectory(detail) + +if(${WITH_GPU}) + nv_library(memory SRCS memory.cc) +else(${WITH_GPU}) + cc_library(memory SRCS memroy.cc) +endif(${WITH_GPU}) diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index 81ca8a0bbf..3b5bbd7a12 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1,2 +1 @@ -cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) -nv_test(gpu_allocator_test SRCS gpu_allocator_test.cc) +cc_test(system_allocator_test SRCS system_allocator_test.cc) diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h deleted file mode 100644 index 17753ccef7..0000000000 --- a/paddle/memory/detail/cpu_allocator.h +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include // for size_t -#include // for malloc and free - -#ifndef _WIN32 -#include // for mlock and munlock -#endif - -namespace paddle { -namespace memory { -namespace detail { - -// CPUAllocator calls mlock, which returns -// pinned and locked memory as staging areas for data exchange -// between host and device. Allocates too much would reduce the -// amount of memory available to the system for paging. So, by -// default, we should use CPUAllocator. -template -class CPUAllocator { - public: - void* Alloc(size_t size); - void Free(void* p, size_t size); -}; - -template <> -class CPUAllocator { - public: - void* Alloc(size_t size) { return std::malloc(size); } - void Free(void* p, size_t size) { std::free(p); } -}; - -template <> -class CPUAllocator { - public: - void* Alloc(size_t size) { - void* p = std::malloc(size); - if (p == nullptr) { - return p; - } -#ifndef _WIN32 - mlock(p, size); -#endif - return p; - } - - void Free(void* p, size_t size) { -#ifndef _WIN32 - munlock(p, size); -#endif - std::free(p); - } -}; - -} // namespace detail -} // namespace memory -} // namespace paddle diff --git a/paddle/memory/detail/cpu_allocator_test.cc b/paddle/memory/detail/cpu_allocator_test.cc deleted file mode 100644 index 4e45266cd8..0000000000 --- a/paddle/memory/detail/cpu_allocator_test.cc +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/memory/detail/cpu_allocator.h" -#include "gtest/gtest.h" - -TEST(CPUAllocator, NonStaging) { - paddle::memory::detail::CPUAllocator a; - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p, 4096); -} - -TEST(CPUAllocator, Staging) { - paddle::memory::detail::CPUAllocator a; - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p, 4096); -} diff --git a/paddle/memory/detail/gpu_allocator.h b/paddle/memory/detail/system_allocator.h similarity index 58% rename from paddle/memory/detail/gpu_allocator.h rename to paddle/memory/detail/system_allocator.h index 9452c41fb8..0a64553188 100644 --- a/paddle/memory/detail/gpu_allocator.h +++ b/paddle/memory/detail/system_allocator.h @@ -14,20 +14,58 @@ limitations under the License. */ #pragma once -#include // for size_t +#include // for size_t +#include // for mlock and munlock +#include // for malloc and free -#include +#ifndef PADDLE_ONLY_CPU #include +#include +#endif // PADDLE_ONLY_CPU namespace paddle { namespace memory { namespace detail { +class SystemAllocator { + public: + virtual void* Alloc(size_t size) = 0; + virtual void* Free(void* p) = 0; +}; + +// CPUAllocator calls mlock, which returns pinned +// and locked memory as staging areas for data exchange between host +// and device. Allocates too much would reduce the amount of memory +// available to the system for paging. So, by default, we should use +// CPUAllocator. +template +class CPUAllocator : public SystemAllocator { + public: + virtual void* Alloc(size_t size) { + void* p = std::malloc(size); + if (p != nullptr && lock_memory) { + mlock(p, size); + } + return p; + } + + virtual void Free(void* p, size_t size) { + if (p != nullptr && lock_memory) { + munlock(p, size); + } + std::free(p); + } +}; + +#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. + +namespace { inline void throw_on_error(cudaError_t e, const char* message) { if (e) { throw thrust::system_error(e, thrust::cuda_category(), message); } } +} // namespace // GPUAllocator calls cudaHostMalloc, which returns // pinned and locked memory as staging areas for data exchange @@ -36,17 +74,11 @@ inline void throw_on_error(cudaError_t e, const char* message) { // default, we should use GPUAllocator. template class GPUAllocator { -public: - void* Alloc(size_t size); - void Free(void* p, size_t size); -}; - -template <> -class GPUAllocator { -public: + public: void* Alloc(size_t size) { void* p = 0; - cudaError_t result = cudaMalloc(&p, size); + cudaError_t result = + staging ? cudaMallocHost(&p, size) : cudaMalloc(&p, size); if (result == cudaSuccess) { return p; } @@ -60,32 +92,15 @@ public: // that is returned if you ever call cudaFree after the // driver has already shutdown. This happens only if the // process is terminating, in which case we don't care if - // cudaFree succeeds. - auto err = cudaFree(p); + // cudaFree succeeds. + auto err = staging ? cudaFreeHost(p) : cudaFree(p); if (err != cudaErrorCudartUnloading) { - throw_on_error(err, "cudaFree failed"); + throw_on_error(err, "cudaFree failed"); } } }; -template <> -class GPUAllocator { -public: - void* Alloc(size_t size) { - void* p = 0; - cudaError_t result = cudaMallocHost(&p, size); - if (result == cudaSuccess) { - return p; - } - // clear last error - cudaGetLastError(); - return nullptr; - } - - void Free(void* p, size_t size) { - throw_on_error(cudaFreeHost(p), "cudaFreeHost failed"); - } -}; +#endif // PADDLE_ONLY_CPU } // namespace detail } // namespace memory diff --git a/paddle/memory/detail/gpu_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc similarity index 69% rename from paddle/memory/detail/gpu_allocator_test.cc rename to paddle/memory/detail/system_allocator_test.cc index 18c1c9ab43..4e7b8018b6 100644 --- a/paddle/memory/detail/gpu_allocator_test.cc +++ b/paddle/memory/detail/system_allocator_test.cc @@ -12,9 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/memory/detail/gpu_allocator.h" +#include "paddle/memory/detail/system_allocator.h" #include "gtest/gtest.h" +TEST(CPUAllocator, NoLockMem) { + paddle::memory::detail::CPUAllocator a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p, 4096); +} + +TEST(CPUAllocator, LockMem) { + paddle::memory::detail::CPUAllocator a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p, 4096); +} + +#ifndef PADDLE_ONLY_CPU + TEST(GPUAllocator, NonStaging) { paddle::memory::detail::GPUAllocator a; void* p = a.Alloc(4096); @@ -28,3 +44,5 @@ TEST(GPUAllocator, Staging) { EXPECT_NE(p, nullptr); a.Free(p, 4096); } + +#endif // PADDLE_ONLY_CPU diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index b617923731..ca3c01ebdb 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -14,48 +14,41 @@ limitations under the License. */ #include "paddle/memory/memory.h" +#include "paddle/memory/detail/cpu_allocator.h" +#include "paddle/memory/detail/gpu_allocator.h" + namespace paddle { namespace memory { -template <> -void* Alloc(CPUPlace, size_t size) { - return GetCPUBuddyAllocator(false /*non-staging*/)->Alloc(size); -} - -void* AllocStaging(CPUPlace, size_t size) { - return GetCPUBuddyAllocator(true /*staging*/)->Alloc(size); -} - -template <> -void* Alloc(GPUPlace pl, size_t size) { - return GetGPUBuddyAllocator(pl.device)->Alloc(size); -} - -template <> -void Free(CPUPlace, void* p) { - return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p); -} - -void FreeStaging(CPUPlace, void* p) { - return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p); -} - -#ifdef PADDLE_WITH_GPU -template <> -void* Alloc(GPUPlace pl, void* p) { - return GetGPUBuddyAllocator(pl.device)->Free(p); -} - -template <> -size_t Used(CPUPlace) { +void Alloc(paddle::platform::Place pl, size_t size) { +#ifndef PADDLE_ONLY_CPU + if (paddle::platform::is_gpu_place(pl)) { + return GetGPUBuddyAllocator(pl.device)->Alloc(size); + } +#endif // PADDLE_ONLY_CPU + PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); + return GetCPUBuddyAllocator()->Alloc(size); +} + +void Free(paddle::platform::Place pl, void* p) { +#ifndef PADDLE_ONLY_CPU + if (paddle::platform::is_gpu_place(pl)) { + GetGPUBuddyAllocator(pl.device)->Free(p); + } +#endif // PADDLE_ONLY_CPU + PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); + GetCPUBuddyAllocator()->Free(p); +} + +size_t Used(paddle::platform::Place pl) { +#ifndef PADDLE_ONLY_CPU + if (paddle::platform::is_gpu_place(pl)) { + return GetGPUBuddyAllocator(pl.device)->Used(); + } +#endif // PADDLE_ONLY_CPU + PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); return GetCPUBuddyAllocator()->Used(); } -template <> -size_t Alloc(GPUPlace pl) { - return GetGPUBuddyAllocator(pl.device)->Used(); -} -#endif // PADDLE_WITH_GPU - } // namespace memory } // namespace paddle diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h index 8c15a133bb..0bc609205e 100644 --- a/paddle/memory/memory.h +++ b/paddle/memory/memory.h @@ -19,19 +19,9 @@ limitations under the License. */ namespace paddle { namespace memory { -template -void* Alloc(Place, size_t); -template -void Free(Place, void*); -template -size_t Used(Place); - -// Staging memory means "pinned" host memory that can be mapped into -// the CUDA memory space and accessed by the device rapidly. Don't -// allocate too much staging memory; otherwise system performance will -// degrade because the OS cannot find enough swap memory space. -void* AllocStaging(CPUPlace, size_t); -void* FreeStaging(CPUPlace, size_t); +void* Alloc(paddle::framework::Place, size_t); +void Free(paddle::framework::Place, void*); +size_t Used(paddle::framework::Place); } // namespace memory } // namespace paddle From 9cee78c380fd497fe2011868219eed89b7bc6808 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 27 Jun 2017 09:29:22 +0800 Subject: [PATCH 079/542] fix precommit --- python/paddle/trainer_config_helpers/networks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 1bf59ed484..67154a8d7d 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -1381,7 +1381,7 @@ def inputs(layers, *args): if len(args) != 0: layers.extend(args) - Inputs(*[l.name for l in layers]) + Inputs(* [l.name for l in layers]) def outputs(layers, *args): @@ -1424,7 +1424,7 @@ def outputs(layers, *args): assert len(layers) > 0 if HasInputsSet(): # input already set - Outputs(*[l.name for l in layers]) + Outputs(* [l.name for l in layers]) return # just return outputs. if len(layers) != 1: From 6250d108bfd39afb3b2beba438ecb22eca8991bc Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 27 Jun 2017 09:51:55 +0800 Subject: [PATCH 080/542] FIX: clang-format --- paddle/memory/detail/gpu_allocator.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/paddle/memory/detail/gpu_allocator.h b/paddle/memory/detail/gpu_allocator.h index 9452c41fb8..682afdf7d3 100644 --- a/paddle/memory/detail/gpu_allocator.h +++ b/paddle/memory/detail/gpu_allocator.h @@ -16,8 +16,8 @@ limitations under the License. */ #include // for size_t -#include #include +#include namespace paddle { namespace memory { @@ -36,14 +36,14 @@ inline void throw_on_error(cudaError_t e, const char* message) { // default, we should use GPUAllocator. template class GPUAllocator { -public: + public: void* Alloc(size_t size); void Free(void* p, size_t size); }; template <> class GPUAllocator { -public: + public: void* Alloc(size_t size) { void* p = 0; cudaError_t result = cudaMalloc(&p, size); @@ -60,22 +60,22 @@ public: // that is returned if you ever call cudaFree after the // driver has already shutdown. This happens only if the // process is terminating, in which case we don't care if - // cudaFree succeeds. + // cudaFree succeeds. auto err = cudaFree(p); if (err != cudaErrorCudartUnloading) { - throw_on_error(err, "cudaFree failed"); + throw_on_error(err, "cudaFree failed"); } } }; template <> class GPUAllocator { -public: + public: void* Alloc(size_t size) { void* p = 0; cudaError_t result = cudaMallocHost(&p, size); if (result == cudaSuccess) { - return p; + return p; } // clear last error cudaGetLastError(); From 0990b0a5d41de2f6a82795f808b3ab36e9634a72 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 27 Jun 2017 10:20:30 +0800 Subject: [PATCH 081/542] add testdata --- .../v2/reader/tests/test_recordio_creator.dat | Bin 0 -> 88 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 python/paddle/v2/reader/tests/test_recordio_creator.dat diff --git a/python/paddle/v2/reader/tests/test_recordio_creator.dat b/python/paddle/v2/reader/tests/test_recordio_creator.dat new file mode 100644 index 0000000000000000000000000000000000000000..17aa89b6796184407e83246d3f342a55a66b4a69 GIT binary patch literal 88 zcmZQ!W@2QOHw Date: Tue, 27 Jun 2017 10:50:13 +0800 Subject: [PATCH 082/542] "copy parameter content with memcpy" --- go/pserver/optimizer.go | 17 ++++++----------- go/pserver/service.go | 8 +++----- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index af7faad254..df0ea373bb 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -39,22 +39,20 @@ func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { p := paramWithConfigs.Param c := paramWithConfigs.Config var cbuffer unsafe.Pointer - cbuffer = unsafe.Pointer(&p.Content[0]) + cbuffer_len := int(unsafe.Sizeof(p.Content[0])) * len(p.Content) + cbuffer = C.malloc(C.size_t(cbuffer_len)) + C.memcpy(cbuffer, unsafe.Pointer(&p.Content[0]), C.size_t(cbuffer_len)) o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)), C.paddle_element_type(p.ElementType), cbuffer, C.int(len(p.Content)), (*C.char)(nullPtr), 0) return o } -func (o *optimizer) GetWeights(p *Parameter) error { +func (o *optimizer) GetWeights() []byte { // FIXME: get weigths from optimizer has bug var buffer unsafe.Pointer buffer_len := C.paddle_optimizer_get_weights(o.opt, &buffer) - if buffer_len == 0 || buffer == nullPtr { - return fmt.Errorf("parameter optimizer error : %s get failed", p.Name) - } - p.Content = cArrayToSlice(buffer, int(buffer_len)) - return nil + return cArrayToSlice(buffer, int(buffer_len)) } func (o *optimizer) UpdateParameter(g Gradient) error { @@ -62,10 +60,7 @@ func (o *optimizer) UpdateParameter(g Gradient) error { return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", g.Name, o.ElementType, g.ElementType) } - // FIXME: do we need a copy? discard g.Content by GC ok - var cbuffer unsafe.Pointer - cbuffer = unsafe.Pointer(&g.Content[0]) - r := C.paddle_update_parameter(o.opt, C.paddle_element_type(g.ElementType), cbuffer, C.int(len(g.Content))) + r := C.paddle_update_parameter(o.opt, C.paddle_element_type(g.ElementType), unsafe.Pointer(&g.Content[0]), C.int(len(g.Content))) if r != 0 { return fmt.Errorf("optimizer update returned error code: %d", r) } diff --git a/go/pserver/service.go b/go/pserver/service.go index cdd433260a..2ab622d790 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -44,8 +44,7 @@ type Gradient Parameter type Service struct { initialized chan struct{} - mu sync.Mutex - // injection from parameter to optimizer + mu sync.Mutex optMap map[string]*optimizer } @@ -130,9 +129,8 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { // paramter content. parameter.Name = name parameter.ElementType = opt.ElementType - - err := opt.GetWeights(parameter) - return err + parameter.Content = opt.GetWeights() + return nil } // Save tells the parameter server to save parameters. From 5b31bd7ae3345e555493c26b1b941f6dfcfd2387 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Jun 2017 11:29:30 +0800 Subject: [PATCH 083/542] Add `Variable::IsType` Make user can check what type is in variable --- paddle/framework/variable.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h index b33e10e682..72c4a7a2a1 100644 --- a/paddle/framework/variable.h +++ b/paddle/framework/variable.h @@ -25,21 +25,24 @@ class Variable { public: template const T& Get() const { - PADDLE_ASSERT(holder_ != nullptr); - PADDLE_ASSERT(std::type_index(typeid(T)) == - std::type_index(holder_->Type())); + PADDLE_ASSERT(IsType()); return *static_cast(holder_->Ptr()); } template T* GetMutable() { - if (holder_ == nullptr || - std::type_index(typeid(T)) != std::type_index(holder_->Type())) { + if (!IsType()) { holder_.reset(new PlaceholderImpl(new T())); } return static_cast(holder_->Ptr()); } + template + bool IsType() const { + return holder_ != nullptr && + std::type_index(typeid(T)) == std::type_index(holder_->Type()); + } + private: struct Placeholder { virtual ~Placeholder() {} From 4bfd757e7851dca20419fc09e726f6aac004864c Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 27 Jun 2017 11:31:14 +0800 Subject: [PATCH 084/542] ENH: Add go doc --- cmake/generic.cmake | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 850c1868e6..6839abc1a7 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -77,6 +77,15 @@ # /cmake/external/*.cmake: # # cc_test(example_test SRCS example_test.cc DEPS example glog gflags) +# +# To build a go static library using Golang, use the go_ prefixed version: +# +# go_library(example STATIC) +# +# To build a go shared library using Golang, use the go_ prefixed version: +# +# go_library(example SHARED) +# if(NOT APPLE) find_package(Threads REQUIRED) @@ -248,15 +257,6 @@ set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") file(MAKE_DIRECTORY ${GOPATH}) set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle") -# Because api.go defines a GO wrapper to ops and tensor, it depends on -# both. This implies that if any of tensor.{h,cc}, ops.{h,cu}, or -# api.go is changed, api need to be re-built. -# go_library(api -# SRCS -# api.go -# DEPS -# tensor # Because ops depend on tensor, this line is optional. -# ops) function(go_library TARGET_NAME) set(options STATIC static SHARED shared) set(oneValueArgs "") @@ -296,7 +296,7 @@ function(go_library TARGET_NAME) COMMAND rm -rf ${PADDLE_IN_GOPATH} COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} # Automatically get all dependencies specified in the source code - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d . + COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./.. # Golang build source code COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" From a489a54de0f730586c22f40881dc2b510fbe6890 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 27 Jun 2017 11:32:43 +0800 Subject: [PATCH 085/542] fix style --- python/paddle/v2/reader/creator.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 5e052026f6..9f888b16d6 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -16,7 +16,6 @@ Creator package contains some simple reader creator, which could be used in user program. """ - __all__ = ['np_array', 'text_file', "recordio"] @@ -76,4 +75,4 @@ def recordio(path): yield r f.close() - return reader \ No newline at end of file + return reader From f149d183f7d78fdaa171f2afabaf8a138596c8ff Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 26 Jun 2017 20:41:33 -0700 Subject: [PATCH 086/542] Add system_allocator --- paddle/memory/detail/CMakeLists.txt | 6 +- paddle/memory/detail/system_allocator.h | 84 ++++++++++++------- paddle/memory/detail/system_allocator_test.cc | 44 +++++----- 3 files changed, 81 insertions(+), 53 deletions(-) diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index 3b5bbd7a12..c16dfadeb2 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1 +1,5 @@ -cc_test(system_allocator_test SRCS system_allocator_test.cc) +if(${WITH_GPU}) + nv_test(system_allocator_test SRCS system_allocator_test.cc) +else(${WITH_GPU}) + cc_test(system_allocator_test SRCS system_allocator_test.cc) +endif(${WITH_GPU}) diff --git a/paddle/memory/detail/system_allocator.h b/paddle/memory/detail/system_allocator.h index 0a64553188..1768f9a0da 100644 --- a/paddle/memory/detail/system_allocator.h +++ b/paddle/memory/detail/system_allocator.h @@ -23,14 +23,31 @@ limitations under the License. */ #include #endif // PADDLE_ONLY_CPU +#include "paddle/platform/assert.h" + namespace paddle { namespace memory { namespace detail { -class SystemAllocator { +class CPUDeleter { public: - virtual void* Alloc(size_t size) = 0; - virtual void* Free(void* p) = 0; + CPUDeleter(void* ptr, size_t size, bool locked) + : ptr_(ptr), size_(size), locked_(locked) {} + + void* Ptr() { return ptr_; } + + void operator()(void* ptr) { + PADDLE_ASSERT(ptr == ptr_); + if (ptr_ != nullptr && locked_) { + munlock(ptr_, size_); + } + std::free(ptr_); + } + + private: + void* ptr_; + size_t size_; + bool locked_; }; // CPUAllocator calls mlock, which returns pinned @@ -39,21 +56,14 @@ class SystemAllocator { // available to the system for paging. So, by default, we should use // CPUAllocator. template -class CPUAllocator : public SystemAllocator { +class CPUAllocator { public: - virtual void* Alloc(size_t size) { + static CPUDeleter Alloc(size_t size) { void* p = std::malloc(size); if (p != nullptr && lock_memory) { mlock(p, size); } - return p; - } - - virtual void Free(void* p, size_t size) { - if (p != nullptr && lock_memory) { - munlock(p, size); - } - std::free(p); + return CPUDeleter(p, size, lock_memory); } }; @@ -67,6 +77,32 @@ inline void throw_on_error(cudaError_t e, const char* message) { } } // namespace +class GPUDeleter { + public: + GPUDeleter(void* ptr, size_t size, bool staging) + : ptr_(ptr), size_(size), staging_(staging) {} + + void* Ptr() { return ptr_; } + + void operator()(void* ptr) { + PADDLE_ASSERT(ptr == ptr_); + // Purposefully allow cudaErrorCudartUnloading, because + // that is returned if you ever call cudaFree after the + // driver has already shutdown. This happens only if the + // process is terminating, in which case we don't care if + // cudaFree succeeds. + cudaError_t err = staging_ ? cudaFreeHost(ptr) : cudaFree(ptr); + if (err != cudaErrorCudartUnloading) { + throw_on_error(err, "cudaFree{Host} failed"); + } + } + + private: + void* ptr_; + size_t size_; + bool staging_; +}; + // GPUAllocator calls cudaHostMalloc, which returns // pinned and locked memory as staging areas for data exchange // between host and device. Allocates too much would reduce the @@ -75,28 +111,14 @@ inline void throw_on_error(cudaError_t e, const char* message) { template class GPUAllocator { public: - void* Alloc(size_t size) { + static GPUDeleter Alloc(size_t size) { void* p = 0; cudaError_t result = staging ? cudaMallocHost(&p, size) : cudaMalloc(&p, size); - if (result == cudaSuccess) { - return p; - } - // clear last error - cudaGetLastError(); - return nullptr; - } - - void Free(void* p, size_t size) { - // Purposefully allow cudaErrorCudartUnloading, because - // that is returned if you ever call cudaFree after the - // driver has already shutdown. This happens only if the - // process is terminating, in which case we don't care if - // cudaFree succeeds. - auto err = staging ? cudaFreeHost(p) : cudaFree(p); - if (err != cudaErrorCudartUnloading) { - throw_on_error(err, "cudaFree failed"); + if (result != cudaSuccess) { + cudaGetLastError(); // clear error if there is any. } + return GPUDeleter(result == cudaSuccess ? p : nullptr, size, staging); } }; diff --git a/paddle/memory/detail/system_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc index 4e7b8018b6..fec70a65b7 100644 --- a/paddle/memory/detail/system_allocator_test.cc +++ b/paddle/memory/detail/system_allocator_test.cc @@ -13,36 +13,38 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/memory/detail/system_allocator.h" + +#include +#include + #include "gtest/gtest.h" -TEST(CPUAllocator, NoLockMem) { - paddle::memory::detail::CPUAllocator a; - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p, 4096); +template +void TestAllocator() { + { + auto d = Allocator::Alloc(sizeof(int)); + EXPECT_NE(d.Ptr(), nullptr); + std::unique_ptr p(static_cast(d.Ptr()), d); + } + { + auto d = Allocator::Alloc(0); + EXPECT_EQ(d.Ptr(), nullptr); + std::unique_ptr p(static_cast(d.Ptr()), d); + } } +TEST(CPUAllocator, NoLockMem) { + TestAllocator>(); +} TEST(CPUAllocator, LockMem) { - paddle::memory::detail::CPUAllocator a; - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p, 4096); + TestAllocator>(); } #ifndef PADDLE_ONLY_CPU - -TEST(GPUAllocator, NonStaging) { - paddle::memory::detail::GPUAllocator a; - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p, 4096); +TEST(GPUAllocator, NoStaging) { + TestAllocator>(); } - TEST(GPUAllocator, Staging) { - paddle::memory::detail::GPUAllocator a; - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p, 4096); + TestAllocator>(); } - #endif // PADDLE_ONLY_CPU From 49e87ee327a7115cca08384e24e043fbe73413c5 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Tue, 27 Jun 2017 14:19:34 +0800 Subject: [PATCH 087/542] Change the CpuMatrix::copyFrom and CpuVector::copyFrom with the stream parameter to the synchronous interface. --- paddle/gserver/layers/Layer.cpp | 5 +++++ paddle/math/Matrix.cpp | 2 ++ paddle/math/Matrix.h | 3 ++- paddle/math/Vector.cpp | 2 ++ paddle/math/Vector.h | 8 ++++---- paddle/math/tests/test_matrixCompare.cpp | 14 ++++++++++++++ 6 files changed, 29 insertions(+), 5 deletions(-) diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp index 125aaf947f..4b92b5d163 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/gserver/layers/Layer.cpp @@ -191,6 +191,11 @@ void Layer::addOutputArgument(int deviceId) { void Layer::copyOutputToOtherDevice() { for (size_t i = 0; i != outputOtherDevice_.size(); i++) { SetDevice device(outputOtherDevice_[i].deviceId); + // If outputOtherDevice_[i].value is a CpuMatrix, + // the copyFrom is a synchronous interface. + // If outputOtherDevice_[i].value is a GpuMatrix, since subsequent + // calculations are all on HPPL_STREAM_DEFAULT, + // copyFrom can be an asynchronous interface. outputOtherDevice_[i].value->copyFrom(*getOutputValue(), HPPL_STREAM_DEFAULT); outputOtherDevice_[i].sequenceStartPositions = diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index c910146164..4431d613f6 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1565,6 +1565,8 @@ void CpuMatrix::copyFrom(const Matrix& src, hl_stream_t stream) { const_cast(src.getData()), sizeof(real) * elementCnt_, stream); + // There is a need to add synchronization to ensure that the data is copied. + hl_stream_synchronize(stream); } else if (typeid(src) == typeid(CpuMatrix)) { memcpy(data_, src.getData(), sizeof(real) * elementCnt_); } else { diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 748be850b4..7dfd593225 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -239,7 +239,8 @@ public: LOG(FATAL) << "Not implemented"; } - // asynchronous copy + // For GpuMatrix this is an asynchronous copy interface + // For CpuMatrix this is an synchronous copy interface virtual void copyFrom(const Matrix& src, hl_stream_t stream) { LOG(FATAL) << "Not implemented"; } diff --git a/paddle/math/Vector.cpp b/paddle/math/Vector.cpp index c519ca500a..eb87ee9bb7 100644 --- a/paddle/math/Vector.cpp +++ b/paddle/math/Vector.cpp @@ -657,6 +657,8 @@ void CpuVectorT::copyFrom(const VectorT& src, hl_stream_t stream) { (void*)src.getData(), sizeof(T) * this->getSize(), stream); + // There is a need to add synchronization to ensure that the data is copied. + hl_stream_synchronize(stream); } else { src.copyTo(this); } diff --git a/paddle/math/Vector.h b/paddle/math/Vector.h index 9af6e30c9e..80b9775fcc 100644 --- a/paddle/math/Vector.h +++ b/paddle/math/Vector.h @@ -168,11 +168,11 @@ public: virtual void copyFrom(const VectorT& src) = 0; /** - * If use_gpu, this function will push the copy-task to the specifed-stream - * and return immediately. + * If GpuVector, this function is an asynchronous interface, + * will push the copy-task to the specifed-stream and return immediately. * - * If not use GPU, this function is same as - * the copyFrom(const VectorT& src), which use stream HPPL_STREAM_DEFAULT. + * If CpuVector, this function is an synchronous interface, + * same as the copyFrom(const VectorT& src). */ virtual void copyFrom(const VectorT& src, hl_stream_t stream) = 0; diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 5a0dffe086..354f58df39 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -1127,4 +1127,18 @@ TEST(Matrix, MaxOutFwdBwd) { } } +TEST(CpuMatrix, copyFrom) { + const size_t height = 1000; + const size_t width = 1000; + CpuMatrix cpu(height, width); + GpuMatrix gpu(height, width); + CpuMatrix copy(height, width); + + cpu.randomizeUniform(); + gpu.copyFrom(cpu); + copy.copyFrom(gpu, HPPL_STREAM_DEFAULT); + + TensorCheckEqual(cpu, copy); +} + #endif From 714c2c2d4253fdcf27df781efda1dfe13fdffa68 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Tue, 27 Jun 2017 15:06:03 +0800 Subject: [PATCH 088/542] fix go get -d ./.. --- cmake/generic.cmake | 2 +- paddle/scripts/docker/build.sh | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 6839abc1a7..11c1f677ae 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -296,7 +296,7 @@ function(go_library TARGET_NAME) COMMAND rm -rf ${PADDLE_IN_GOPATH} COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} # Automatically get all dependencies specified in the source code - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./.. + COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./... # Golang build source code COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 2b48e4dc0f..a182e5f4ae 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -31,6 +31,7 @@ Configuring cmake in /paddle/build ... -DWITH_DOC=OFF -DWITH_GPU=${WITH_GPU:-OFF} -DWITH_AVX=${WITH_AVX:-OFF} + -DWITH_GOLANG=${WITH_GOLANG:-OFF} -DWITH_SWIG_PY=ON -DCUDNN_ROOT=/usr/ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} @@ -43,6 +44,7 @@ cmake .. \ -DWITH_DOC=OFF \ -DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \ + -DWITH_GOLANG=${WITH_GOLANG:-OFF} \ -DWITH_SWIG_PY=ON \ -DCUDNN_ROOT=/usr/ \ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \ From ab91232cf6bad3c9ff5595c6d655eb538a651f24 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 27 Jun 2017 20:16:52 +0800 Subject: [PATCH 089/542] add cmake external project for eigen --- CMakeLists.txt | 1 + cmake/external/eigen.cmake | 20 ++++++++++++++++ paddle/framework/ddim_test.cc | 44 +++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 cmake/external/eigen.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c719d35ec..9be75f4a7d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,6 +92,7 @@ include(external/openblas) # download, build, install openblas include(external/swig) # download, build, install swig include(external/warpctc) # download, build, install warpctc include(external/any) # download libn::any +include(external/eigen) # download eigen3 include(generic) # simplify cmake module include(package) # set paddle packages diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake new file mode 100644 index 0000000000..543504a274 --- /dev/null +++ b/cmake/external/eigen.cmake @@ -0,0 +1,20 @@ +INCLUDE(ExternalProject) + +SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3) + +INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/) + +ExternalProject_Add( + eigen3 + ${EXTERNAL_PROJECT_LOG_ARGS} + URL "https://bitbucket.org/eigen/eigen/get/f3a22f35b044.tar.gz" + URL_MD5 "4645c66075982da6fa0bcf6b20f3e8f7" + PREFIX ${EIGEN_SOURCE_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) + +LIST(APPEND external_project_dependencies eigen3) \ No newline at end of file diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc index 36eef02370..3e4ad840f3 100644 --- a/paddle/framework/ddim_test.cc +++ b/paddle/framework/ddim_test.cc @@ -1,6 +1,9 @@ #include #include +#include "eigen3/Eigen/Core" +#include "eigen3/Eigen/Dense" +#include "eigen3/unsupported/Eigen/CXX11/Tensor" #include "gtest/gtest.h" #include "paddle/framework/ddim.h" @@ -61,3 +64,44 @@ TEST(DDim, Print) { ss << ddim; EXPECT_EQ("2, 3, 4", ss.str()); } + +template +using Vec = + Eigen::TensorMap, + Eigen::Aligned>; + +template +using Matrix = + Eigen::TensorMap, + Eigen::Aligned>; + +template +void print(T* input, int size) { + for (int i = 0; i < size; i++) { + std::cout << input[i] << " "; + } + std::cout << std::endl; +} + +TEST(Eigen, start) { + int size = 4; + + float* t_a = (float*)malloc(size * sizeof(float)); + float* t_b = (float*)malloc(size * sizeof(float)); + float* t_c = (float*)malloc(size * sizeof(float)); + for (int i = 0; i < size; i++) { + t_a[i] = i; + t_b[i] = i; + } + Vec a(t_a, size); + Vec b(t_b, size); + Vec c(t_c, size); + + Eigen::DefaultDevice dd; + c.device(dd) = a + b; + print(t_c, size); + + free(t_a); + free(t_b); + free(t_c); +} From 155e40ef26e337deb82914a7702b669c8d5a5c44 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Tue, 27 Jun 2017 20:16:53 +0800 Subject: [PATCH 090/542] using glide for go package vendor --- CMakeLists.txt | 4 +- cmake/generic.cmake | 72 +++++++++++++++++++++-------------- go/.gitignore | 1 + go/CMakeLists.txt | 30 +++++++++++++++ go/cmd/master/CMakeLists.txt | 15 ++++++++ go/cmd/pserver/CMakeLists.txt | 15 ++++++++ go/glide.lock | 61 +++++++++++++++++++++++++++++ go/glide.yaml | 12 ++++++ 8 files changed, 179 insertions(+), 31 deletions(-) create mode 100644 go/.gitignore create mode 100644 go/CMakeLists.txt create mode 100644 go/cmd/master/CMakeLists.txt create mode 100644 go/cmd/pserver/CMakeLists.txt create mode 100644 go/glide.lock create mode 100644 go/glide.yaml diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c719d35ec..18e5ebeac2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,6 +48,7 @@ option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF) option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF) option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) +option(GLIDE_INSTALL "Download and install go dependencies " ON) # CMAKE_BUILD_TYPE if(NOT CMAKE_BUILD_TYPE) @@ -131,8 +132,7 @@ add_subdirectory(paddle) add_subdirectory(python) if(WITH_GOLANG) - #TODO (add go/master/c back when fixed) - add_subdirectory(go/pserver/cclient) + add_subdirectory(go) endif(WITH_GOLANG) if(WITH_DOC) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 11c1f677ae..0d8bfa17d3 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -17,7 +17,7 @@ # generic.cmake defines CMakes functions that look like Bazel's # building rules (https://bazel.build/). # -# +# # ------------------------------------------- # C++ CUDA C++ Go # ------------------------------------------- @@ -25,51 +25,51 @@ # cc_binary nv_binary go_binary # cc_test nv_test go_test # ------------------------------------------- -# +# # To build a static library example.a from example.cc using the system # compiler (like GCC): -# +# # cc_library(example SRCS example.cc) -# +# # To build a static library example.a from multiple source files # example{1,2,3}.cc: -# +# # cc_library(example SRCS example1.cc example2.cc example3.cc) -# +# # To build a shared library example.so from example.cc: -# +# # cc_library(example SHARED SRCS example.cc) -# +# # To build a library using Nvidia's NVCC from .cu file(s), use the nv_ # prefixed version: -# +# # nv_library(example SRCS example.cu) -# +# # To specify that a library new_example.a depends on other libraies: -# +# # cc_library(new_example SRCS new_example.cc DEPS example) -# +# # Static libraries can be composed of other static libraries: -# +# # cc_library(composed DEPS dependent1 dependent2 dependent3) -# +# # To build an executable binary file from some source files and # dependent libraries: -# +# # cc_binary(example SRCS main.cc something.cc DEPS example1 example2) -# +# # To build an executable binary file using NVCC, use the nv_ prefixed # version: -# +# # nv_binary(example SRCS main.cc something.cu DEPS example1 example2) -# +# # To build a unit test binary, which is an executable binary with # GoogleTest linked: -# +# # cc_test(example_test SRCS example_test.cc DEPS example) -# +# # To build a unit test binary using NVCC, use the nv_ prefixed version: -# +# # nv_test(example_test SRCS example_test.cu DEPS example) # # It is pretty often that executable and test binaries depend on @@ -256,6 +256,8 @@ endfunction(nv_test) set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") file(MAKE_DIRECTORY ${GOPATH}) set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle") +file(MAKE_DIRECTORY "${PADDLE_IN_GOPATH}") +set(PADDLE_GO_SRC "${CMAKE_SOURCE_DIR}/go") function(go_library TARGET_NAME) set(options STATIC static SHARED shared) @@ -280,7 +282,7 @@ function(go_library TARGET_NAME) add_library(${TARGET_NAME} STATIC ${dummyfile}) endif() if(go_library_DEPS) - add_dependencies(${TARGET_NAME} ${go_library_DEPS}) + add_dependencies(${TARGET_NAME} ${go_library_DEPS} paddle_go_path_link) endif(go_library_DEPS) # we need to symlink Paddle directory into GOPATH. If we @@ -289,19 +291,23 @@ function(go_library TARGET_NAME) # without the changes in our current Paddle repo that we # want to build. file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") + string(REPLACE "${PADDLE_GO_SRC}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) add_custom_command(TARGET ${TARGET_NAME} POST_BUILD COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" # Symlink Paddle directory into GOPATH COMMAND mkdir -p ${PADDLE_IN_GOPATH} - COMMAND rm -rf ${PADDLE_IN_GOPATH} + COMMAND rm -rf ${PADDLE_IN_GOPATH} COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} - # Automatically get all dependencies specified in the source code - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./... + WORKING_DIRECTORY ${PADDLE_GO_SRC}) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + # Automatically get all dependencies specified in the source code + #COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./... # Golang build source code COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" - ${GO_SOURCE} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${GO_SOURCE}" + # must run under GOPATH + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") endfunction(go_library) function(go_binary TARGET_NAME) @@ -309,12 +315,20 @@ function(go_binary TARGET_NAME) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + string(REPLACE "${PADDLE_GO_SRC}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + add_custom_command(OUTPUT ${TARGET_NAME}_link + # Symlink Paddle directory into GOPATH + COMMAND mkdir -p ${PADDLE_IN_GOPATH} + COMMAND rm -rf ${PADDLE_IN_GOPATH} + COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + add_custom_command(OUTPUT ${TARGET_NAME}_timestamp COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" ${go_library_SRCS} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_binary_DEPS}) + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go/${CMAKE_CURRENT_SOURCE_REL_DIR}") + add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_link ${TARGET_NAME}_timestamp ${go_binary_DEPS}) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin) endfunction(go_binary) diff --git a/go/.gitignore b/go/.gitignore new file mode 100644 index 0000000000..48b8bf9072 --- /dev/null +++ b/go/.gitignore @@ -0,0 +1 @@ +vendor/ diff --git a/go/CMakeLists.txt b/go/CMakeLists.txt new file mode 100644 index 0000000000..fb7bd14b89 --- /dev/null +++ b/go/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# FIXME(typhoonzero): Download glide into cmake build temprary GOPATH +if(EXISTS $ENV{GOPATH}/bin/glide) + set(GLIDE "$ENV{GOPATH}/bin/glide") +else() + message(FATAL_ERROR "no glide executeble found: $ENV{GOPATH}/bin/glide") +endif() + +set(PADDLE_GO_PATH "${CMAKE_SOURCE_DIR}/go") + +if (GLIDE_INSTALL) + message(STATUS ${PADDLE_GO_PATH}) + execute_process(COMMAND ${GLIDE} install WORKING_DIRECTORY ${PADDLE_GO_PATH}) +endif() + +add_subdirectory(go/pserver/cclient) +#TODO (add go/master/c back when fixed) diff --git a/go/cmd/master/CMakeLists.txt b/go/cmd/master/CMakeLists.txt new file mode 100644 index 0000000000..a604272a08 --- /dev/null +++ b/go/cmd/master/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +go_binary(master) diff --git a/go/cmd/pserver/CMakeLists.txt b/go/cmd/pserver/CMakeLists.txt new file mode 100644 index 0000000000..ad7da915e7 --- /dev/null +++ b/go/cmd/pserver/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +go_binary(pserver) diff --git a/go/glide.lock b/go/glide.lock new file mode 100644 index 0000000000..190a222338 --- /dev/null +++ b/go/glide.lock @@ -0,0 +1,61 @@ +hash: b8f18ce6784bd3fadd9fed0b8443e7b658234ea785ae1f220723ae2c1f652aa7 +updated: 2017-06-27T14:05:48.925262819+08:00 +imports: +- name: github.com/coreos/etcd + version: 61fc123e7a8b14a0a258aa3f5c4159861b1ec2e7 + subpackages: + - auth/authpb + - clientv3 + - clientv3/concurrency + - etcdserver/api/v3rpc/rpctypes + - etcdserver/etcdserverpb + - mvcc/mvccpb +- name: github.com/golang/protobuf + version: 4bd1920723d7b7c925de087aa32e2187708897f7 + subpackages: + - jsonpb + - proto +- name: github.com/golang/snappy + version: 553a641470496b2327abcac10b36396bd98e45c9 +- name: github.com/namsral/flag + version: 71ceffbeb0ba60fccc853971bb3ed4d7d90bfd04 +- name: github.com/PaddlePaddle/recordio + version: edfb82af0739c84f241c87390ec5649c7b28c129 +- name: github.com/sirupsen/logrus + version: 202f25545ea4cf9b191ff7f846df5d87c9382c2b +- name: golang.org/x/net + version: c8c74377599bd978aee1cf3b9b63a8634051cec2 + subpackages: + - context + - http2 + - http2/hpack + - idna + - internal/timeseries + - lex/httplex + - trace +- name: golang.org/x/sys + version: f7928cfef4d09d1b080aa2b6fd3ca9ba1567c733 + subpackages: + - unix +- name: golang.org/x/text + version: 4e9ab9ee170f2a39bd66c92b3e0a47ff47a4bc77 + subpackages: + - secure/bidirule + - transform + - unicode/bidi + - unicode/norm +- name: google.golang.org/grpc + version: 8050b9cbc271307e5a716a9d782803d09b0d6f2d + subpackages: + - codes + - credentials + - grpclog + - internal + - keepalive + - metadata + - naming + - peer + - stats + - tap + - transport +testImports: [] diff --git a/go/glide.yaml b/go/glide.yaml new file mode 100644 index 0000000000..05c5d15ca2 --- /dev/null +++ b/go/glide.yaml @@ -0,0 +1,12 @@ +package: github.com/PaddlePaddle/Paddle/go +import: +- package: github.com/PaddlePaddle/recordio +- package: github.com/coreos/etcd + version: ^3.2.1 + subpackages: + - clientv3 + - clientv3/concurrency +- package: github.com/namsral/flag + version: ^1.7.4-pre +- package: github.com/sirupsen/logrus + version: ^1.0.0 From c5d9ca8617869e4d8bc12b9302c5594279ab93ad Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 27 Jun 2017 15:06:00 +0000 Subject: [PATCH 091/542] fix eigen include path --- cmake/external/eigen.cmake | 8 +++---- paddle/framework/ddim_test.cc | 44 ----------------------------------- 2 files changed, 4 insertions(+), 48 deletions(-) diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake index 543504a274..253d436bcc 100644 --- a/cmake/external/eigen.cmake +++ b/cmake/external/eigen.cmake @@ -2,13 +2,13 @@ INCLUDE(ExternalProject) SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3) -INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/) +INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/eigen3) ExternalProject_Add( eigen3 ${EXTERNAL_PROJECT_LOG_ARGS} - URL "https://bitbucket.org/eigen/eigen/get/f3a22f35b044.tar.gz" - URL_MD5 "4645c66075982da6fa0bcf6b20f3e8f7" + URL "https://bitbucket.org/eigen/eigen/get/3.3.4.tar.gz" + URL_MD5 "1a47e78efe365a97de0c022d127607c3" PREFIX ${EIGEN_SOURCE_DIR} UPDATE_COMMAND "" CONFIGURE_COMMAND "" @@ -17,4 +17,4 @@ ExternalProject_Add( TEST_COMMAND "" ) -LIST(APPEND external_project_dependencies eigen3) \ No newline at end of file +LIST(APPEND external_project_dependencies eigen3) diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc index 3e4ad840f3..36eef02370 100644 --- a/paddle/framework/ddim_test.cc +++ b/paddle/framework/ddim_test.cc @@ -1,9 +1,6 @@ #include #include -#include "eigen3/Eigen/Core" -#include "eigen3/Eigen/Dense" -#include "eigen3/unsupported/Eigen/CXX11/Tensor" #include "gtest/gtest.h" #include "paddle/framework/ddim.h" @@ -64,44 +61,3 @@ TEST(DDim, Print) { ss << ddim; EXPECT_EQ("2, 3, 4", ss.str()); } - -template -using Vec = - Eigen::TensorMap, - Eigen::Aligned>; - -template -using Matrix = - Eigen::TensorMap, - Eigen::Aligned>; - -template -void print(T* input, int size) { - for (int i = 0; i < size; i++) { - std::cout << input[i] << " "; - } - std::cout << std::endl; -} - -TEST(Eigen, start) { - int size = 4; - - float* t_a = (float*)malloc(size * sizeof(float)); - float* t_b = (float*)malloc(size * sizeof(float)); - float* t_c = (float*)malloc(size * sizeof(float)); - for (int i = 0; i < size; i++) { - t_a[i] = i; - t_b[i] = i; - } - Vec a(t_a, size); - Vec b(t_b, size); - Vec c(t_c, size); - - Eigen::DefaultDevice dd; - c.device(dd) = a + b; - print(t_c, size); - - free(t_a); - free(t_b); - free(t_c); -} From 2f47562df8e9cbd81e96bba642646b0036e7bab6 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 27 Jun 2017 23:39:59 +0800 Subject: [PATCH 092/542] scope-impl --- doc/design/scope.md | 2 +- paddle/framework/CMakeLists.txt | 5 +++ paddle/framework/scope.cc | 54 +++++++++++++++++++++++++++++++++ paddle/framework/scope.h | 51 +++++++++++++++++++++++++++++++ paddle/framework/scope_test.cc | 47 ++++++++++++++++++++++++++++ 5 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 paddle/framework/scope.cc create mode 100644 paddle/framework/scope.h create mode 100644 paddle/framework/scope_test.cc diff --git a/doc/design/scope.md b/doc/design/scope.md index 2ff416f06e..4d14a64977 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -41,7 +41,7 @@ class Scope { const Variable* GetVariable(const std::string& name) const; private: - std::unordered_map> vars_; + std::unordered_map> vars_; }; ``` diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index e3c3155aa9..7ea17f7114 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -1,6 +1,11 @@ +# ddim lib cc_library(ddim SRCS ddim.cc) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) cc_test(variable_test SRCS variable_test.cc) + +# scope lib +cc_library(scope SRCS scope.cc) +cc_test(scope_test SRCS scope_test.cc DEPS scope) diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc new file mode 100644 index 0000000000..ed75aece01 --- /dev/null +++ b/paddle/framework/scope.cc @@ -0,0 +1,54 @@ +#include "paddle/framework/scope.h" + +namespace paddle { +namespace framework { + +Error Scope::CreateVariable(const std::string &name) { + if (name == "") { + return Error("Variable name should not be empty"); + } + + if (HaveVariable(name)) { + return AlreadyCreated; + } + vars_[name] = std::unique_ptr(new Variable()); + return Error(); +} + +Variable* Scope::GetVarLocally(const std::string& name) const { + if (vars_.count(name)) { + return vars_.at(name).get(); + } + return nullptr; +} + +Variable* Scope::GetVariable(const std::string &name) const { + Variable* var = GetVarLocally(name); + if (var != nullptr) { + return var; + } else if (parent_ != nullptr) { + return parent_->GetVariable(name); + } else { + return nullptr; + } +} + +Variable* Scope::GetOrCreateVariable(const std::string &name) { + Variable* var; + var = GetVariable(name); + if (var == nullptr) { + auto err = CreateVariable(name); + if (!err.isOK()) { + return nullptr; + } + } + return GetVariable(name); +} + +bool Scope::HaveVariable(const std::string &name) { + return vars_.count(name) != 0; +} + +} // namespace framework +} // namespace paddle + diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h new file mode 100644 index 0000000000..ad1ed2ddab --- /dev/null +++ b/paddle/framework/scope.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include "paddle/framework/variable.h" +#include "paddle/utils/Error.h" + +namespace paddle { +namespace framework { + +const static Error AlreadyCreated("Variable has already been created"); + +/** + * Scope is an association of a name to Variable. All variables belong to `Scope`. + * You need to specify a scope to run a Net, i.e., `net.Run(&scope)`. One net can + * run in different scopes and update different variable in the scope. + */ +class Scope { + public: + Scope() {} + + explicit Scope(const std::shared_ptr &scope): + parent_(scope) {} + + ~Scope() {} + + // Create Variable in this Scope. Return error if Variable already been + // created. + Error __must_check CreateVariable(const std::string& name); + + // Get Variable from this Scope, this function will recursive find Variable + // from it's parent scope. + // Return nullptr if not found. + Variable* GetVariable(const std::string& name) const; + + // find and return Variables in the scope it self. + Variable* GetVarLocally(const std::string& name) const; + + // Get a Variable from Scope, if the Variable is not exist then create it. + // User should call this function most of time. + Variable* GetOrCreateVariable(const std::string& name); + + bool HaveVariable(const std::string& name); + + private: + std::unordered_map> vars_; + std::shared_ptr parent_ {nullptr}; +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/scope_test.cc b/paddle/framework/scope_test.cc new file mode 100644 index 0000000000..09fbb78d69 --- /dev/null +++ b/paddle/framework/scope_test.cc @@ -0,0 +1,47 @@ +#include "paddle/framework/scope.h" +#include "gtest/gtest.h" + +TEST(Scope, Create) { + using paddle::framework::Scope; + using paddle::Error; + using paddle::framework::Variable; + using paddle::framework::AlreadyCreated; + + Scope* scope = new Scope(); + + Error err = scope->CreateVariable(""); + EXPECT_FALSE(err.isOK()); + + Variable* var1 = scope->GetVariable("a"); + EXPECT_EQ(var1, nullptr); + + Error err1 = scope->CreateVariable("a"); + EXPECT_TRUE(err1.isOK()); + + Error err2 = scope->CreateVariable("a"); + EXPECT_EQ(err2, AlreadyCreated); + + Variable* var2 = scope->GetVariable("a"); + EXPECT_NE(var2, nullptr); + + Variable* var3 = scope->GetOrCreateVariable("b"); + EXPECT_NE(var3, nullptr); +} + +TEST(Scope, Parent) { + using paddle::framework::Scope; + using paddle::framework::Variable; + using paddle::Error; + + const auto parent_scope_ptr = std::shared_ptr(new Scope()); + Scope* scope = new Scope(parent_scope_ptr); + + Error err = parent_scope_ptr->CreateVariable("a"); + EXPECT_TRUE(err.isOK()); + + Variable* var1 = scope->GetVarLocally("a"); + EXPECT_EQ(var1, nullptr); + + Variable* var2 = scope->GetVariable("a"); + EXPECT_NE(var2, nullptr); +} \ No newline at end of file From 5e8d8e073bf0e208927c20103d3fc383f9e90316 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 27 Jun 2017 23:53:37 +0800 Subject: [PATCH 093/542] refine GetOrCreateVariable --- paddle/framework/scope.cc | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index ed75aece01..31956aecb9 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -34,15 +34,17 @@ Variable* Scope::GetVariable(const std::string &name) const { } Variable* Scope::GetOrCreateVariable(const std::string &name) { - Variable* var; - var = GetVariable(name); - if (var == nullptr) { - auto err = CreateVariable(name); - if (!err.isOK()) { - return nullptr; - } + Variable* var = GetVariable(name); + if (var != nullptr) { + return var; + } + + Error err = CreateVariable(name); + if (!err.isOK()) { + return nullptr; + } else { + return GetVariable(name); } - return GetVariable(name); } bool Scope::HaveVariable(const std::string &name) { From 7f03817806591ba508652263b2a83788d6baf397 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 28 Jun 2017 00:11:31 +0800 Subject: [PATCH 094/542] "cgo ldflags config" --- go/pserver/optimizer.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index df0ea373bb..5824a529fc 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -1,13 +1,14 @@ package pserver -/* -// TODO(zhihong): move compile flags to cmake go_library -#cgo pkg-config: protobuf -#cgo CFLAGS: -I ../../ -#cgo LDFLAGS: /Users/dzh/.go/src/github.com/PaddlePaddle/Paddle/build/go/pserver/cclient/libpaddle_go_optimizer.a -lstdc++ -#include "paddle/optimizer/optimizer.h" -*/ +// #cgo pkg-config: protobuf +// #cgo CFLAGS: -I ../../ +// FIXME: ldflags contain "build" path +// #cgo LDFLAGS: ../../build/go/pserver/cclient/libpaddle_go_optimizer.a -lstdc++ +// #include "paddle/optimizer/optimizer.h" +// #include +// #include import "C" + import ( "fmt" "unsafe" From 7d138593d206f8bc16ddacb2a9aa072c08cb829d Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 28 Jun 2017 00:15:00 +0800 Subject: [PATCH 095/542] refine code of scope with style check --- paddle/framework/scope.cc | 9 ++++----- paddle/framework/scope.h | 17 ++++++++--------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index 31956aecb9..e985598849 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -3,7 +3,7 @@ namespace paddle { namespace framework { -Error Scope::CreateVariable(const std::string &name) { +Error Scope::CreateVariable(const std::string& name) { if (name == "") { return Error("Variable name should not be empty"); } @@ -22,7 +22,7 @@ Variable* Scope::GetVarLocally(const std::string& name) const { return nullptr; } -Variable* Scope::GetVariable(const std::string &name) const { +Variable* Scope::GetVariable(const std::string& name) const { Variable* var = GetVarLocally(name); if (var != nullptr) { return var; @@ -33,7 +33,7 @@ Variable* Scope::GetVariable(const std::string &name) const { } } -Variable* Scope::GetOrCreateVariable(const std::string &name) { +Variable* Scope::GetOrCreateVariable(const std::string& name) { Variable* var = GetVariable(name); if (var != nullptr) { return var; @@ -47,10 +47,9 @@ Variable* Scope::GetOrCreateVariable(const std::string &name) { } } -bool Scope::HaveVariable(const std::string &name) { +bool Scope::HaveVariable(const std::string& name) { return vars_.count(name) != 0; } } // namespace framework } // namespace paddle - diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index ad1ed2ddab..90c8141e4f 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -1,7 +1,7 @@ #pragma once -#include #include +#include #include "paddle/framework/variable.h" #include "paddle/utils/Error.h" @@ -11,16 +11,16 @@ namespace framework { const static Error AlreadyCreated("Variable has already been created"); /** - * Scope is an association of a name to Variable. All variables belong to `Scope`. - * You need to specify a scope to run a Net, i.e., `net.Run(&scope)`. One net can - * run in different scopes and update different variable in the scope. + * Scope is an association of a name to Variable. All variables belong to + * `Scope`. You need to specify a scope to run a Net, i.e., `net.Run(&scope)`. + * One net can run in different scopes and update different variable in the + * scope. */ class Scope { public: Scope() {} - explicit Scope(const std::shared_ptr &scope): - parent_(scope) {} + explicit Scope(const std::shared_ptr& scope) : parent_(scope) {} ~Scope() {} @@ -29,8 +29,7 @@ class Scope { Error __must_check CreateVariable(const std::string& name); // Get Variable from this Scope, this function will recursive find Variable - // from it's parent scope. - // Return nullptr if not found. + // from it's parent scope. Return nullptr if not found. Variable* GetVariable(const std::string& name) const; // find and return Variables in the scope it self. @@ -44,7 +43,7 @@ class Scope { private: std::unordered_map> vars_; - std::shared_ptr parent_ {nullptr}; + std::shared_ptr parent_{nullptr}; }; } // namespace framework From 80d915049f62630bb902c0e62ffc4a3dffef49de Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 28 Jun 2017 00:29:05 +0800 Subject: [PATCH 096/542] " add elementtype in optimizer" --- go/pserver/optimizer.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 5824a529fc..4872139c56 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -2,7 +2,7 @@ package pserver // #cgo pkg-config: protobuf // #cgo CFLAGS: -I ../../ -// FIXME: ldflags contain "build" path +// //FIXME: ldflags contain "build" path // #cgo LDFLAGS: ../../build/go/pserver/cclient/libpaddle_go_optimizer.a -lstdc++ // #include "paddle/optimizer/optimizer.h" // #include @@ -37,6 +37,7 @@ func cArrayToSlice(p unsafe.Pointer, len int) []byte { func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { o := &optimizer{} + o.ElementType = paramWithConfigs.Param.ElementType p := paramWithConfigs.Param c := paramWithConfigs.Config var cbuffer unsafe.Pointer From b49c9baa3501210b5fe859723174570381a6dd60 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 28 Jun 2017 00:32:34 +0800 Subject: [PATCH 097/542] "remove unuse comment" --- go/pserver/optimizer.go | 1 - 1 file changed, 1 deletion(-) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 4872139c56..5575fab3c0 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -51,7 +51,6 @@ func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { } func (o *optimizer) GetWeights() []byte { - // FIXME: get weigths from optimizer has bug var buffer unsafe.Pointer buffer_len := C.paddle_optimizer_get_weights(o.opt, &buffer) return cArrayToSlice(buffer, int(buffer_len)) From a2fabcc1144c480d0f972026194693b80ca6c397 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 28 Jun 2017 00:57:52 +0800 Subject: [PATCH 098/542] "fix client test" --- go/pserver/client_test.go | 9 +++++++-- go/pserver/optimizer.go | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/go/pserver/client_test.go b/go/pserver/client_test.go index d0371a26a1..d746bf3f26 100644 --- a/go/pserver/client_test.go +++ b/go/pserver/client_test.go @@ -1,6 +1,7 @@ package pserver_test import ( + "io/ioutil" "net" "net/http" "net/rpc" @@ -71,18 +72,22 @@ func TestClientFull(t *testing.T) { } const numParameter = 100 + config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb.txt") + if err != nil { + t.Fatalf("read optimizer proto failed") + } for i := 0; i < numParameter; i++ { var p pserver.Parameter p.Name = "p_" + strconv.Itoa(i) p.ElementType = pserver.Float32 p.Content = make([]byte, (i+1)*100) - err := c.InitParam(pserver.ParameterWithConfig{Param: p}) + err := c.InitParam(pserver.ParameterWithConfig{Param: p, Config: config}) if err != nil { t.Fatal(err) } } - err := c.FinishInitParams() + err = c.FinishInitParams() if err != nil { t.Fatal(err) } diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 5575fab3c0..9cb2801f30 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -61,6 +61,7 @@ func (o *optimizer) UpdateParameter(g Gradient) error { return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", g.Name, o.ElementType, g.ElementType) } + fmt.Println(g) r := C.paddle_update_parameter(o.opt, C.paddle_element_type(g.ElementType), unsafe.Pointer(&g.Content[0]), C.int(len(g.Content))) if r != 0 { return fmt.Errorf("optimizer update returned error code: %d", r) From dd08d337c0138c9def5f7ce95f88bae5599e5f92 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 28 Jun 2017 01:30:57 +0800 Subject: [PATCH 099/542] FIX: fix cmake type error --- CMakeLists.txt | 2 ++ paddle/CMakeLists.txt | 10 +--------- paddle/memory/CMakeLists.txt | 2 +- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c719d35ec..b779caefb9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ if(NOT CMAKE_CROSSCOMPILING) endif(NOT CMAKE_CROSSCOMPILING) find_package(Git REQUIRED) find_package(Threads REQUIRED) +find_package(Boost QUIET) include(simd) @@ -109,6 +110,7 @@ include_directories("${PROJ_ROOT}") include_directories("${PROJ_ROOT}/paddle/cuda/include") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/cclient") +include_directories(${Boost_INCLUDE_DIRS}) set(EXTERNAL_LIBS ${GFLAGS_LIBRARIES} diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index 0cddb95244..979b68e827 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -10,17 +10,9 @@ add_subdirectory(trainer) add_subdirectory(scripts) add_subdirectory(optimizer) add_subdirectory(strings) -add_subdirectory(memory) - -# Do not build go directory until go cmake is working smoothly. -# if(CMAKE_Go_COMPILER) -# add_subdirectory(go) -# endif() - -find_package(Boost QUIET) if(Boost_FOUND) - include_directories(${Boost_INCLUDE_DIRS}) + add_subdirectory(memory) add_subdirectory(platform) add_subdirectory(framework) endif() diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt index 8662512496..e74ce75c93 100644 --- a/paddle/memory/CMakeLists.txt +++ b/paddle/memory/CMakeLists.txt @@ -3,5 +3,5 @@ add_subdirectory(detail) if(${WITH_GPU}) nv_library(memory SRCS memory.cc) else(${WITH_GPU}) - cc_library(memory SRCS memroy.cc) + cc_library(memory SRCS memory.cc) endif(${WITH_GPU}) From dde0da9e0ffee7a49510061a139ab2abc7ab55b9 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 28 Jun 2017 01:31:24 +0800 Subject: [PATCH 100/542] ENH: Add cuda.h in platform --- paddle/platform/cuda.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 paddle/platform/cuda.h diff --git a/paddle/platform/cuda.h b/paddle/platform/cuda.h new file mode 100644 index 0000000000..864a5d3340 --- /dev/null +++ b/paddle/platform/cuda.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifndef PADDLE_ONLY_CPU + +#include +#include + +namespace paddle { +namespace platform { + +inline void throw_on_error(cudaError_t e, const char* message) { + if (e) { + throw thrust::system_error(e, thrust::cuda_category(), message); + } +} + +int GetDeviceCount(void) { + int count; + throw_on_error(cudaGetDeviceCount(&count), + "cudaGetDeviceCount failed"); + return count; +} + +} // namespace platform +} // namespace paddle + +#endif // PADDLE_ONLY_CPU From 29c7512b3ce13ca7b89d3ff3f4aea2c7d7f27478 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 28 Jun 2017 01:31:46 +0800 Subject: [PATCH 101/542] FIX: fix memory.h/cc --- paddle/memory/memory.cc | 23 ++++++++++++++--------- paddle/memory/memory.h | 8 ++++---- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index ca3c01ebdb..0d123d99e2 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -13,41 +13,46 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/memory/memory.h" +#include "paddle/memory/detail/buddy_allocator.h" +#include "paddle/memory/detail/system_allocator.h" +#include "paddle/platform/assert.h" -#include "paddle/memory/detail/cpu_allocator.h" -#include "paddle/memory/detail/gpu_allocator.h" +#include namespace paddle { namespace memory { -void Alloc(paddle::platform::Place pl, size_t size) { +void* Alloc(platform::Place pl, size_t size) { #ifndef PADDLE_ONLY_CPU if (paddle::platform::is_gpu_place(pl)) { - return GetGPUBuddyAllocator(pl.device)->Alloc(size); + size_t gpu_id = boost::get(pl).device; + return detail::GetGPUBuddyAllocator(gpu_id)->Alloc(size); } #endif // PADDLE_ONLY_CPU PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); - return GetCPUBuddyAllocator()->Alloc(size); + return detail::GetCPUBuddyAllocator()->Alloc(size); } void Free(paddle::platform::Place pl, void* p) { #ifndef PADDLE_ONLY_CPU if (paddle::platform::is_gpu_place(pl)) { - GetGPUBuddyAllocator(pl.device)->Free(p); + size_t gpu_id = boost::get(pl).device; + detail::GetGPUBuddyAllocator(gpu_id)->Free(p); } #endif // PADDLE_ONLY_CPU PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); - GetCPUBuddyAllocator()->Free(p); + detail::GetCPUBuddyAllocator()->Free(p); } size_t Used(paddle::platform::Place pl) { #ifndef PADDLE_ONLY_CPU if (paddle::platform::is_gpu_place(pl)) { - return GetGPUBuddyAllocator(pl.device)->Used(); + size_t gpu_id = boost::get(pl).device; + return detail::GetGPUBuddyAllocator(gpu_id)->Used(); } #endif // PADDLE_ONLY_CPU PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); - return GetCPUBuddyAllocator()->Used(); + return detail::GetCPUBuddyAllocator()->Used(); } } // namespace memory diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h index 0bc609205e..a33092bade 100644 --- a/paddle/memory/memory.h +++ b/paddle/memory/memory.h @@ -14,14 +14,14 @@ limitations under the License. */ #pragma once -#include "paddle/frameowork/place.h" +#include "paddle/platform/place.h" namespace paddle { namespace memory { -void* Alloc(paddle::framework::Place, size_t); -void Free(paddle::framework::Place, void*); -size_t Used(paddle::framework::Place); +void* Alloc(paddle::platform::Place, size_t); +void Free(paddle::platform::Place, void*); +size_t Used(paddle::platform::Place); } // namespace memory } // namespace paddle From b22dd12854150c31b9cb9e3e550bdee4b5df5977 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 28 Jun 2017 01:32:06 +0800 Subject: [PATCH 102/542] ENH: Add buddy allocator draft --- paddle/memory/detail/CMakeLists.txt | 4 +- paddle/memory/detail/buddy_allocator.h | 79 ++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 paddle/memory/detail/buddy_allocator.h diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index c16dfadeb2..cd5622203f 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1,5 +1,5 @@ if(${WITH_GPU}) - nv_test(system_allocator_test SRCS system_allocator_test.cc) + nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog) else(${WITH_GPU}) - cc_test(system_allocator_test SRCS system_allocator_test.cc) + cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog) endif(${WITH_GPU}) diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h new file mode 100644 index 0000000000..35e96fd507 --- /dev/null +++ b/paddle/memory/detail/buddy_allocator.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/memory/detail/system_allocator.h" + +namespace paddle { +namespace memory { +namespace detail { + +template +class BuddyAllocator { + public: + // TODO(gangliao): This is a draft, add Buddy Allocator Algorithm soon + BuddyAllocator() {} + ~BuddyAllocator() {} + + public: + void* Alloc(size_t size) { + return Allocator::Alloc(size); + } + void Free(void*) { + // Because all info like size are stored in meta data, + // thus it's duplicate if add the parameter `size` in + // `Free(void*)` interface. + } + size_t Used(); + + public: + BuddyAllocator(const BuddyAllocator&) = delete; + BuddyAllocator& operator=(const BuddyAllocator&) = delete; + + private: + size_t min_alloc_size_; + size_t max_alloc_size_; + + private: + std::mutex mutex_; +}; + +BuddyAllocator* GetCPUBuddyAllocator() { + static BuddyAllocator* a = nullptr; + if (a == nullptr) { + a = new BuddyAllocator(); + } + return a; +} + +#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. + +BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { + static BuddyAllocator** as = NULL; + if (as == NULL) { + int gpu_num = platform::GetDeviceCount(); + as = new BuddyAllocator*[gpu_num]; + for (int gpu = 0; gpu < gpu_num; gpu++) { + as[gpu] = new BuddyAllocator(); + } + } + return as[gpu_id]; +} + +#endif // PADDLE_ONLY_CPU + +} // namespace detail +} // namespace memory +} // namespace paddle From 79373dabc8d2e4edc87fbef40efdfa1f54b35a9f Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 28 Jun 2017 01:33:06 +0800 Subject: [PATCH 103/542] TEST: Add test for system allocator and deleter --- paddle/memory/detail/system_allocator.h | 108 ++++++------------ paddle/memory/detail/system_allocator_test.cc | 40 ++++--- 2 files changed, 60 insertions(+), 88 deletions(-) diff --git a/paddle/memory/detail/system_allocator.h b/paddle/memory/detail/system_allocator.h index 1768f9a0da..f411019854 100644 --- a/paddle/memory/detail/system_allocator.h +++ b/paddle/memory/detail/system_allocator.h @@ -18,107 +18,69 @@ limitations under the License. */ #include // for mlock and munlock #include // for malloc and free -#ifndef PADDLE_ONLY_CPU -#include -#include -#endif // PADDLE_ONLY_CPU - +#include #include "paddle/platform/assert.h" +#include "paddle/platform/cuda.h" + +DEFINE_bool(uses_pinned_memory, false, + "If set, allocate cpu/gpu pinned memory."); namespace paddle { namespace memory { namespace detail { -class CPUDeleter { - public: - CPUDeleter(void* ptr, size_t size, bool locked) - : ptr_(ptr), size_(size), locked_(locked) {} - - void* Ptr() { return ptr_; } - - void operator()(void* ptr) { - PADDLE_ASSERT(ptr == ptr_); - if (ptr_ != nullptr && locked_) { - munlock(ptr_, size_); - } - std::free(ptr_); - } - - private: - void* ptr_; - size_t size_; - bool locked_; -}; - -// CPUAllocator calls mlock, which returns pinned -// and locked memory as staging areas for data exchange between host -// and device. Allocates too much would reduce the amount of memory -// available to the system for paging. So, by default, we should use -// CPUAllocator. -template +// If uses_pinned_memory is true, CPUAllocator calls mlock, which +// returns pinned and locked memory as staging areas for data exchange +// between host and device. Allocates too much would reduce the amount +// of memory available to the system for paging. So, by default, we +// should set false to uses_pinned_memory. class CPUAllocator { public: - static CPUDeleter Alloc(size_t size) { + static void* Alloc(size_t size) { void* p = std::malloc(size); - if (p != nullptr && lock_memory) { + if (p != nullptr && FLAGS_uses_pinned_memory) { mlock(p, size); } - return CPUDeleter(p, size, lock_memory); + return p; } -}; - -#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. - -namespace { -inline void throw_on_error(cudaError_t e, const char* message) { - if (e) { - throw thrust::system_error(e, thrust::cuda_category(), message); - } -} -} // namespace - -class GPUDeleter { - public: - GPUDeleter(void* ptr, size_t size, bool staging) - : ptr_(ptr), size_(size), staging_(staging) {} - - void* Ptr() { return ptr_; } - void operator()(void* ptr) { - PADDLE_ASSERT(ptr == ptr_); - // Purposefully allow cudaErrorCudartUnloading, because - // that is returned if you ever call cudaFree after the - // driver has already shutdown. This happens only if the - // process is terminating, in which case we don't care if - // cudaFree succeeds. - cudaError_t err = staging_ ? cudaFreeHost(ptr) : cudaFree(ptr); - if (err != cudaErrorCudartUnloading) { - throw_on_error(err, "cudaFree{Host} failed"); + static void Free(void* p, size_t size) { + if (p != nullptr && FLAGS_uses_pinned_memory) { + munlock(p, size); } + std::free(p); } - - private: - void* ptr_; - size_t size_; - bool staging_; }; +#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. + // GPUAllocator calls cudaHostMalloc, which returns // pinned and locked memory as staging areas for data exchange // between host and device. Allocates too much would reduce the // amount of memory available to the system for paging. So, by // default, we should use GPUAllocator. -template class GPUAllocator { public: - static GPUDeleter Alloc(size_t size) { + static void* Alloc(size_t size) { void* p = 0; - cudaError_t result = - staging ? cudaMallocHost(&p, size) : cudaMalloc(&p, size); + cudaError_t result = FLAGS_uses_pinned_memory ? cudaMallocHost(&p, size) + : cudaMalloc(&p, size); if (result != cudaSuccess) { cudaGetLastError(); // clear error if there is any. } - return GPUDeleter(result == cudaSuccess ? p : nullptr, size, staging); + return result == cudaSuccess ? p : nullptr; + } + + static void Free(void* p, size_t size) { + // Purposefully allow cudaErrorCudartUnloading, because + // that is returned if you ever call cudaFree after the + // driver has already shutdown. This happens only if the + // process is terminating, in which case we don't care if + // cudaFree succeeds. + cudaError_t err = FLAGS_uses_pinned_memory ? cudaFreeHost(p) : cudaFree(p); + if (err != cudaErrorCudartUnloading) { + platform::throw_on_error(err, "cudaFree{Host} failed"); + } } }; diff --git a/paddle/memory/detail/system_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc index fec70a65b7..829d3558ba 100644 --- a/paddle/memory/detail/system_allocator_test.cc +++ b/paddle/memory/detail/system_allocator_test.cc @@ -17,34 +17,44 @@ limitations under the License. */ #include #include +#include "glog/logging.h" #include "gtest/gtest.h" template -void TestAllocator() { - { - auto d = Allocator::Alloc(sizeof(int)); - EXPECT_NE(d.Ptr(), nullptr); - std::unique_ptr p(static_cast(d.Ptr()), d); - } - { - auto d = Allocator::Alloc(0); - EXPECT_EQ(d.Ptr(), nullptr); - std::unique_ptr p(static_cast(d.Ptr()), d); - } +void TestAllocator(void* p) { + p = Allocator::Alloc(1024); + + int* i = static_cast(p); + std::shared_ptr ptr(i, [](int* p) { Allocator::Free(p, 1024); }); + + EXPECT_NE(p, nullptr); } TEST(CPUAllocator, NoLockMem) { - TestAllocator>(); + void* p = nullptr; + FLAGS_uses_pinned_memory = false; + TestAllocator(p); + EXPECT_EQ(p, nullptr); } + TEST(CPUAllocator, LockMem) { - TestAllocator>(); + void* p = nullptr; + FLAGS_uses_pinned_memory = true; + TestAllocator(p); + EXPECT_EQ(p, nullptr); } #ifndef PADDLE_ONLY_CPU TEST(GPUAllocator, NoStaging) { - TestAllocator>(); + void* p = nullptr; + FLAGS_uses_pinned_memory = false; + TestAllocator(p); + EXPECT_EQ(p, nullptr); } TEST(GPUAllocator, Staging) { - TestAllocator>(); + void* p = nullptr; + FLAGS_uses_pinned_memory = true; + TestAllocator(p); + EXPECT_EQ(p, nullptr); } #endif // PADDLE_ONLY_CPU From 2c188a20de53741e6f965738636eb7d6f797a821 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Tue, 27 Jun 2017 12:00:41 -0700 Subject: [PATCH 104/542] Follow QingQing's suggestion --- paddle/framework/tensor.h | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index a658537430..8962b76a12 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -20,23 +20,19 @@ class Tensor { using paddle::platform::get_place; public: - explicit Tensor(DDim dims) : dims_(dims), place_(get_place()) {} - explicit Tensor(DDim dims, Place place) : dims_(dims), place_(place) {} - template const T* data() const { - PADDLE_ASSERT(holder_ != nullptr); - PADDLE_ASSERT(holder_->Place() == place_); - PADDLE_ASSERT(holder_->Size() >= dims_.product() * sizeof(T)); + PADDLE_ASSERT(holder_ != nullptr, + "Tensor::data must be called after Tensor::mutable_data"); return static_cast(holder->Ptr()); } template ::value>::type> - T* mutable_data() { - if (holder_ == nullptr || holder_->Place() != place_ || - holder_->Size() < dims_.product() * sizeof(T)) { - holder_.reset(new PlaceholderImpl(place_, dims.product() * sizeof(T))); + T* mutable_data(DDim dims, Place place) { + if (holder_ == nullptr || holder_->Place() != place || + holder_->Size() < dims.product() * sizeof(T)) { + holder_.reset(new PlaceholderImpl(place, dims.product() * sizeof(T))); } return static_cast(holder_->Ptr()); } @@ -44,16 +40,7 @@ class Tensor { template ::value>::type> T* mutable_data(DDim dims) { - dims_ = dims; - return mutable_data(); - } - - template ::value>::type> - T* mutable_data(DDim dims, Place place) { - dims_ = dims; - place_ = place; - return mutable_data(); + return mutable_data(dims, paddle::platform::get_place()); } private: @@ -69,7 +56,7 @@ class Tensor { template struct PlaceholderImpl : public Placeholder { PlaceholderImpl(Place pl, size_t size) - : ptr_(memory::Alloc(pl, size), paddle::memory::Deleter(pl)), + : ptr_(paddle::memory::Alloc(pl, size), paddle::memory::Deleter(pl)), place_(pl), size_(size) {} @@ -83,8 +70,6 @@ class Tensor { }; std::unique_ptr holder_; // holds the memory block if allocated. - DDim dims_; // could be smallers than the holder_->Size(). - paddle::platform::Place place_; }; } // namespace framework From b8f5922d88e5f7949eb9a469f761ad49981d677a Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Tue, 27 Jun 2017 16:32:24 -0700 Subject: [PATCH 105/542] Make CPUAllocator and GPUAllocator subclasses of SystemAllocator --- paddle/memory/detail/CMakeLists.txt | 6 +- paddle/memory/detail/system_allocator.h | 80 +++++-------------- paddle/memory/detail/system_allocator_test.cc | 57 +++++++------ 3 files changed, 59 insertions(+), 84 deletions(-) diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index cd5622203f..72d3749ad7 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1,5 +1,7 @@ if(${WITH_GPU}) - nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog) + nv_library(system_allocator SRCS system_allocator.cc DEPS gflags) + nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags) else(${WITH_GPU}) - cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog) + cc_library(system_allocator SRCS system_allocator.cc DEPS gflags) + cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags) endif(${WITH_GPU}) diff --git a/paddle/memory/detail/system_allocator.h b/paddle/memory/detail/system_allocator.h index f411019854..184b383f7f 100644 --- a/paddle/memory/detail/system_allocator.h +++ b/paddle/memory/detail/system_allocator.h @@ -14,76 +14,38 @@ limitations under the License. */ #pragma once -#include // for size_t -#include // for mlock and munlock -#include // for malloc and free - -#include -#include "paddle/platform/assert.h" -#include "paddle/platform/cuda.h" - -DEFINE_bool(uses_pinned_memory, false, - "If set, allocate cpu/gpu pinned memory."); +#include // for size_t namespace paddle { namespace memory { namespace detail { -// If uses_pinned_memory is true, CPUAllocator calls mlock, which -// returns pinned and locked memory as staging areas for data exchange -// between host and device. Allocates too much would reduce the amount -// of memory available to the system for paging. So, by default, we -// should set false to uses_pinned_memory. -class CPUAllocator { +// SystemAllocator is the parent class of CPUAllocator and +// GPUAllocator. A BuddyAllocator object uses a SystemAllocator* +// pointing to the underlying system allocator. An alternative to +// this class hierarchy is to pass a system allocator class to +// BuddyAllocator as a template parameter. This approach makes +// BuddyAllocator a class template, and it's very complicated +// algorithm would make the buddy_allocator.h messy. +class SystemAllocator { public: - static void* Alloc(size_t size) { - void* p = std::malloc(size); - if (p != nullptr && FLAGS_uses_pinned_memory) { - mlock(p, size); - } - return p; - } - - static void Free(void* p, size_t size) { - if (p != nullptr && FLAGS_uses_pinned_memory) { - munlock(p, size); - } - std::free(p); - } + virtual ~SystemAllocator() {} + virtual void* Alloc(size_t size) = 0; + virtual void Free(void* p, size_t size) = 0; }; -#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. - -// GPUAllocator calls cudaHostMalloc, which returns -// pinned and locked memory as staging areas for data exchange -// between host and device. Allocates too much would reduce the -// amount of memory available to the system for paging. So, by -// default, we should use GPUAllocator. -class GPUAllocator { +class CPUAllocator : public SystemAllocator { public: - static void* Alloc(size_t size) { - void* p = 0; - cudaError_t result = FLAGS_uses_pinned_memory ? cudaMallocHost(&p, size) - : cudaMalloc(&p, size); - if (result != cudaSuccess) { - cudaGetLastError(); // clear error if there is any. - } - return result == cudaSuccess ? p : nullptr; - } - - static void Free(void* p, size_t size) { - // Purposefully allow cudaErrorCudartUnloading, because - // that is returned if you ever call cudaFree after the - // driver has already shutdown. This happens only if the - // process is terminating, in which case we don't care if - // cudaFree succeeds. - cudaError_t err = FLAGS_uses_pinned_memory ? cudaFreeHost(p) : cudaFree(p); - if (err != cudaErrorCudartUnloading) { - platform::throw_on_error(err, "cudaFree{Host} failed"); - } - } + virtual void* Alloc(size_t size); + virtual void Free(void* p, size_t size); }; +#ifndef PADDLE_ONLY_CPU +class GPUAllocator : public SystemAllocator { + public: + virtual void* Alloc(size_t size); + virtual void Free(void* p, size_t size); +}; #endif // PADDLE_ONLY_CPU } // namespace detail diff --git a/paddle/memory/detail/system_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc index 829d3558ba..c461d8ac62 100644 --- a/paddle/memory/detail/system_allocator_test.cc +++ b/paddle/memory/detail/system_allocator_test.cc @@ -17,44 +17,55 @@ limitations under the License. */ #include #include -#include "glog/logging.h" +#include "gflags/gflags.h" #include "gtest/gtest.h" -template -void TestAllocator(void* p) { - p = Allocator::Alloc(1024); +DECLARE_bool(use_pinned_memory); - int* i = static_cast(p); - std::shared_ptr ptr(i, [](int* p) { Allocator::Free(p, 1024); }); +void TestAllocator(paddle::memory::detail::SystemAllocator* a, size_t size) { + bool freed = false; + { + void* p = a->Alloc(size); + if (size > 0) { + EXPECT_NE(p, nullptr); + } else { + EXPECT_EQ(p, nullptr); + } - EXPECT_NE(p, nullptr); + int* i = static_cast(p); + std::shared_ptr ptr(i, [&freed, a, size](void* p) { + freed = true; + a->Free(p, size); + }); + } + EXPECT_TRUE(freed); } TEST(CPUAllocator, NoLockMem) { - void* p = nullptr; - FLAGS_uses_pinned_memory = false; - TestAllocator(p); - EXPECT_EQ(p, nullptr); + FLAGS_use_pinned_memory = false; + paddle::memory::detail::CPUAllocator a; + TestAllocator(&a, 2048); + TestAllocator(&a, 0); } TEST(CPUAllocator, LockMem) { - void* p = nullptr; - FLAGS_uses_pinned_memory = true; - TestAllocator(p); - EXPECT_EQ(p, nullptr); + FLAGS_use_pinned_memory = true; + paddle::memory::detail::CPUAllocator a; + TestAllocator(&a, 2048); + TestAllocator(&a, 0); } #ifndef PADDLE_ONLY_CPU TEST(GPUAllocator, NoStaging) { - void* p = nullptr; - FLAGS_uses_pinned_memory = false; - TestAllocator(p); - EXPECT_EQ(p, nullptr); + FLAGS_use_pinned_memory = false; + paddle::memory::detail::GPUAllocator a; + TestAllocator(&a, 2048); + TestAllocator(&a, 0); } TEST(GPUAllocator, Staging) { - void* p = nullptr; - FLAGS_uses_pinned_memory = true; - TestAllocator(p); - EXPECT_EQ(p, nullptr); + FLAGS_use_pinned_memory = true; + paddle::memory::detail::GPUAllocator a; + TestAllocator(&a, 2048); + TestAllocator(&a, 0); } #endif // PADDLE_ONLY_CPU From 2f52cb7909c5e8f372015454e3af33166713bfa7 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Tue, 27 Jun 2017 23:46:54 +0000 Subject: [PATCH 106/542] fix pserver test --- go/pserver/client_test.go | 2 +- go/pserver/service_test.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/go/pserver/client_test.go b/go/pserver/client_test.go index 6ecf1fa08a..4a62ae88a4 100644 --- a/go/pserver/client_test.go +++ b/go/pserver/client_test.go @@ -31,7 +31,7 @@ func init() { port[i] = p go func(l net.Listener) { - s, err := pserver.NewService("", time.Second*5) + s, err := pserver.NewService("", 1, time.Second*5) if err != nil { panic(err) } diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index f317535592..1d84f15d78 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -10,7 +10,7 @@ import ( ) func TestFull(t *testing.T) { - s, err := pserver.NewService("", time.Second*5) + s, err := pserver.NewService("", 1, time.Second*5) if err != nil { t.Error(err) } @@ -75,7 +75,7 @@ func TestFull(t *testing.T) { } func TestMultipleInit(t *testing.T) { - s, err := pserver.NewService("", time.Second*5) + s, err := pserver.NewService("", 1, time.Second*5) if err != nil { t.Error(err) } @@ -91,7 +91,7 @@ func TestMultipleInit(t *testing.T) { } func TestUninitialized(t *testing.T) { - s, err := pserver.NewService("", time.Second*5) + s, err := pserver.NewService("", 1, time.Second*5) err = s.SendGrad(pserver.Gradient{}, nil) if err.Error() != pserver.Uninitialized { t.FailNow() @@ -99,7 +99,7 @@ func TestUninitialized(t *testing.T) { } func TestBlockUntilInitialized(t *testing.T) { - s, err := pserver.NewService("", time.Second*5) + s, err := pserver.NewService("", 1, time.Second*5) if err != nil { t.Error(err) } From 3e087f763e9c6c15a4f1d542fb3bdc327f7441c7 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Tue, 27 Jun 2017 16:48:25 -0700 Subject: [PATCH 107/542] Add buddy_allocator.cc and system_allocator.cc --- paddle/memory/detail/buddy_allocator.cc | 35 ++++++++ paddle/memory/detail/buddy_allocator.h | 76 ++++++++-------- paddle/memory/detail/system_allocator.cc | 90 +++++++++++++++++++ paddle/memory/detail/system_allocator_test.cc | 24 ++--- 4 files changed, 177 insertions(+), 48 deletions(-) create mode 100644 paddle/memory/detail/buddy_allocator.cc create mode 100644 paddle/memory/detail/system_allocator.cc diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc new file mode 100644 index 0000000000..895bf319d7 --- /dev/null +++ b/paddle/memory/detail/buddy_allocator.cc @@ -0,0 +1,35 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/memory/detail/system_allocator.h" + +namespace paddle { +namespace memory { +namespace detail { + +BuddyAllocator::BuddyAllocator(size_t pool_size, size_t max_pools, + SystemAllocator* system_allocator) + : pool_size_(pool_size), + max_pools_(max_pools), + system_allocator_(system_allocator) { + PADDLE_ASSERT(pool_size > 0); + PADDLE_ASSERT(max_pools > 0); + PADDLE_ASSERT(system_allocator != nullptr); +} + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h index 35e96fd507..129b137ed7 100644 --- a/paddle/memory/detail/buddy_allocator.h +++ b/paddle/memory/detail/buddy_allocator.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ #pragma once @@ -20,34 +20,38 @@ namespace paddle { namespace memory { namespace detail { -template class BuddyAllocator { - public: - // TODO(gangliao): This is a draft, add Buddy Allocator Algorithm soon - BuddyAllocator() {} - ~BuddyAllocator() {} - - public: - void* Alloc(size_t size) { - return Allocator::Alloc(size); - } - void Free(void*) { - // Because all info like size are stored in meta data, - // thus it's duplicate if add the parameter `size` in - // `Free(void*)` interface. - } - size_t Used(); + public: + BuddyAllocator(size_t pool_size, size_t max_pools, + SystemAllocator* system_allocator); + ~BuddyAllocator(); + + void* Alloc(size_t size); + void Free(void*); + size_t Used(); + + private: + struct Block { + size_t size_; + Block* left_; // left buddy + Block* right_; // right buddy + }; + + // Initially, there is only one pool. If a Alloc founds not enough + // memory from that pool, and there has not been max_num_pools_, + // create a new pool by calling system_allocator_.Alloc(pool_size_). + std::vector pools_; + + size_t pool_size_; // the size of each pool; + size_t max_num_pools_; // the size of all pools; - public: - BuddyAllocator(const BuddyAllocator&) = delete; - BuddyAllocator& operator=(const BuddyAllocator&) = delete; + SystemAllocator* system_allocator_; - private: - size_t min_alloc_size_; - size_t max_alloc_size_; + std::mutex mutex_; - private: - std::mutex mutex_; + // Disable copy and assignment. + BuddyAllocator(const BuddyAllocator&) = delete; + BuddyAllocator& operator=(const BuddyAllocator&) = delete; }; BuddyAllocator* GetCPUBuddyAllocator() { @@ -63,16 +67,16 @@ BuddyAllocator* GetCPUBuddyAllocator() { BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { static BuddyAllocator** as = NULL; if (as == NULL) { - int gpu_num = platform::GetDeviceCount(); + int gpu_num = platform::GetDeviceCount(); as = new BuddyAllocator*[gpu_num]; for (int gpu = 0; gpu < gpu_num; gpu++) { - as[gpu] = new BuddyAllocator(); + as[gpu] = new BuddyAllocator(); } } return as[gpu_id]; } -#endif // PADDLE_ONLY_CPU +#endif // PADDLE_ONLY_CPU } // namespace detail } // namespace memory diff --git a/paddle/memory/detail/system_allocator.cc b/paddle/memory/detail/system_allocator.cc new file mode 100644 index 0000000000..50bec926f8 --- /dev/null +++ b/paddle/memory/detail/system_allocator.cc @@ -0,0 +1,90 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/memory/detail/system_allocator.h" + +#include // for malloc and free +#include // for mlock and munlock + +#include "gflags/gflags.h" +#include "paddle/platform/assert.h" +#include "paddle/platform/cuda.h" + +// If use_pinned_memory is true, CPUAllocator calls mlock, which +// returns pinned and locked memory as staging areas for data exchange +// between host and device. Allocates too much would reduce the amount +// of memory available to the system for paging. So, by default, we +// should set false to use_pinned_memory. +DEFINE_bool(use_pinned_memory, false, + "If set, allocate cpu/gpu pinned memory."); + +namespace paddle { +namespace memory { +namespace detail { + +void* CPUAllocator::Alloc(size_t size) { + // According to http://www.cplusplus.com/reference/cstdlib/malloc/, + // malloc might not return nullptr if size is zero, but the returned + // pointer shall not be dereferenced -- so we make it nullptr. + if (size <= 0) return nullptr; + + void* p = malloc(size); + if (p != nullptr && FLAGS_use_pinned_memory) { + mlock(p, size); + } + return p; +} + +void CPUAllocator::Free(void* p, size_t size) { + if (p != nullptr && FLAGS_use_pinned_memory) { + munlock(p, size); + } + free(p); +} + +#ifndef PADDLE_ONLY_CPU + +void* GPUAllocator::Alloc(size_t size) { + // CUDA documentation doesn't explain if cudaMalloc returns nullptr + // if size is 0. We just make sure it does. + if (size <= 0) { + return nullptr; + } + + void* p = 0; + cudaError_t result = + FLAGS_use_pinned_memory ? cudaMallocHost(&p, size) : cudaMalloc(&p, size); + if (result != cudaSuccess) { + cudaGetLastError(); // clear error if there is any. + } + return result == cudaSuccess ? p : nullptr; +} + +void GPUAllocator::Free(void* p, size_t size) { + // Purposefully allow cudaErrorCudartUnloading, because + // that is returned if you ever call cudaFree after the + // driver has already shutdown. This happens only if the + // process is terminating, in which case we don't care if + // cudaFree succeeds. + cudaError_t err = FLAGS_use_pinned_memory ? cudaFreeHost(p) : cudaFree(p); + if (err != cudaErrorCudartUnloading) { + platform::throw_on_error(err, "cudaFree{Host} failed"); + } +} + +#endif // PADDLE_ONLY_CPU + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/system_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc index c461d8ac62..9bd5706a4e 100644 --- a/paddle/memory/detail/system_allocator_test.cc +++ b/paddle/memory/detail/system_allocator_test.cc @@ -22,10 +22,10 @@ limitations under the License. */ DECLARE_bool(use_pinned_memory); -void TestAllocator(paddle::memory::detail::SystemAllocator* a, size_t size) { +void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) { bool freed = false; { - void* p = a->Alloc(size); + void* p = a.Alloc(size); if (size > 0) { EXPECT_NE(p, nullptr); } else { @@ -33,9 +33,9 @@ void TestAllocator(paddle::memory::detail::SystemAllocator* a, size_t size) { } int* i = static_cast(p); - std::shared_ptr ptr(i, [&freed, a, size](void* p) { + std::shared_ptr ptr(i, [&](void* p) { freed = true; - a->Free(p, size); + a.Free(p, size); }); } EXPECT_TRUE(freed); @@ -44,28 +44,28 @@ void TestAllocator(paddle::memory::detail::SystemAllocator* a, size_t size) { TEST(CPUAllocator, NoLockMem) { FLAGS_use_pinned_memory = false; paddle::memory::detail::CPUAllocator a; - TestAllocator(&a, 2048); - TestAllocator(&a, 0); + TestAllocator(a, 2048); + TestAllocator(a, 0); } TEST(CPUAllocator, LockMem) { FLAGS_use_pinned_memory = true; paddle::memory::detail::CPUAllocator a; - TestAllocator(&a, 2048); - TestAllocator(&a, 0); + TestAllocator(a, 2048); + TestAllocator(a, 0); } #ifndef PADDLE_ONLY_CPU TEST(GPUAllocator, NoStaging) { FLAGS_use_pinned_memory = false; paddle::memory::detail::GPUAllocator a; - TestAllocator(&a, 2048); - TestAllocator(&a, 0); + TestAllocator(a, 2048); + TestAllocator(a, 0); } TEST(GPUAllocator, Staging) { FLAGS_use_pinned_memory = true; paddle::memory::detail::GPUAllocator a; - TestAllocator(&a, 2048); - TestAllocator(&a, 0); + TestAllocator(a, 2048); + TestAllocator(a, 0); } #endif // PADDLE_ONLY_CPU From 80642bee00c3f723d213b0475749aeee60d89795 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 28 Jun 2017 08:22:58 +0800 Subject: [PATCH 108/542] fix_xmap and refine flowers dataset --- python/paddle/v2/dataset/__init__.py | 3 +- python/paddle/v2/dataset/flowers.py | 67 ++++++++++--------- .../paddle/v2/dataset/tests/flowers_test.py | 4 +- python/paddle/v2/reader/decorator.py | 47 +++++++------ .../paddle/v2/reader/tests/decorator_test.py | 18 ++--- 5 files changed, 72 insertions(+), 67 deletions(-) diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index 26252d5bbd..2e4beb6882 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -25,8 +25,9 @@ import uci_housing import sentiment import wmt14 import mq2007 +import flowers __all__ = [ 'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment' - 'uci_housing', 'wmt14', 'mq2007' + 'uci_housing', 'wmt14', 'mq2007', 'flowers' ] diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/v2/dataset/flowers.py index 07c13cf719..a181f3881a 100644 --- a/python/paddle/v2/dataset/flowers.py +++ b/python/paddle/v2/dataset/flowers.py @@ -13,18 +13,18 @@ # limitations under the License. """ This module will download dataset from -http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html +http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html and parse train/test set intopaddle reader creators. -This set contains images of flowers belonging to 102 different categories. +This set contains images of flowers belonging to 102 different categories. The images were acquired by searching the web and taking pictures. There are a minimum of 40 images for each category. The database was used in: Nilsback, M-E. and Zisserman, A. Automated flower classification over a large - number of classes.Proceedings of the Indian Conference on Computer Vision, -Graphics and Image Processing (2008) + number of classes.Proceedings of the Indian Conference on Computer Vision, +Graphics and Image Processing (2008) http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}. """ @@ -34,9 +34,9 @@ from common import download import tarfile import scipy.io as scio from paddle.v2.image import * +from paddle.v2.reader import * import os import numpy as np -import paddle.v2 as paddle from multiprocessing import cpu_count __all__ = ['train', 'test', 'valid'] @@ -53,8 +53,8 @@ def default_mapper(sample): map image bytes data to type needed by model input layer ''' img, label = sample - img = paddle.image.load_image_bytes(img) - img = paddle.image.simple_transform(img, 256, 224, True) + img = load_image_bytes(img) + img = simple_transform(img, 256, 224, True) return img.flatten().astype('float32'), label @@ -63,22 +63,23 @@ def reader_creator(data_file, setid_file, dataset_name, mapper=default_mapper, - buffered_size=1024): + buffered_size=1024, + useXmap=True): ''' - 1. read images from tar file and + 1. read images from tar file and merge images into batch files in 102flowers.tgz_batch/ 2. get a reader to read sample from batch file - - :param data_file: downloaded data file + + :param data_file: downloaded data file :type data_file: string - :param label_file: downloaded label file + :param label_file: downloaded label file :type label_file: string :param setid_file: downloaded setid file containing information about how to split dataset :type setid_file: string :param dataset_name: data set name (tstid|trnid|valid) :type dataset_name: string - :param mapper: a function to map image bytes data to type + :param mapper: a function to map image bytes data to type needed by model input layer :type mapper: callable :param buffered_size: the size of buffer used to process images @@ -105,15 +106,17 @@ def reader_creator(data_file, for sample, label in itertools.izip(data, batch['label']): yield sample, int(label) - return paddle.reader.xmap_readers(mapper, reader, - cpu_count(), buffered_size) + if useXmap: + return xmap_readers(mapper, reader, cpu_count(), buffered_size) + else: + return map_readers(mapper, reader) -def train(mapper=default_mapper, buffered_size=1024): +def train(mapper=default_mapper, buffered_size=1024, useXmap=True): ''' - Create flowers training set reader. - It returns a reader, each sample in the reader is - image pixels in [0, 1] and label in [1, 102] + Create flowers training set reader. + It returns a reader, each sample in the reader is + image pixels in [0, 1] and label in [1, 102] translated from original color image by steps: 1. resize to 256*256 2. random crop to 224*224 @@ -128,15 +131,15 @@ def train(mapper=default_mapper, buffered_size=1024): return reader_creator( download(DATA_URL, 'flowers', DATA_MD5), download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), 'trnid', mapper, - buffered_size) + download(SETID_URL, 'flowers', SETID_MD5), 'tstid', mapper, + buffered_size, useXmap) -def test(mapper=default_mapper, buffered_size=1024): +def test(mapper=default_mapper, buffered_size=1024, useXmap=True): ''' - Create flowers test set reader. - It returns a reader, each sample in the reader is - image pixels in [0, 1] and label in [1, 102] + Create flowers test set reader. + It returns a reader, each sample in the reader is + image pixels in [0, 1] and label in [1, 102] translated from original color image by steps: 1. resize to 256*256 2. random crop to 224*224 @@ -151,15 +154,15 @@ def test(mapper=default_mapper, buffered_size=1024): return reader_creator( download(DATA_URL, 'flowers', DATA_MD5), download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), 'tstid', mapper, - buffered_size) + download(SETID_URL, 'flowers', SETID_MD5), 'trnid', mapper, + buffered_size, useXmap) -def valid(mapper=default_mapper, buffered_size=1024): +def valid(mapper=default_mapper, buffered_size=1024, useXmap=True): ''' - Create flowers validation set reader. - It returns a reader, each sample in the reader is - image pixels in [0, 1] and label in [1, 102] + Create flowers validation set reader. + It returns a reader, each sample in the reader is + image pixels in [0, 1] and label in [1, 102] translated from original color image by steps: 1. resize to 256*256 2. random crop to 224*224 @@ -175,7 +178,7 @@ def valid(mapper=default_mapper, buffered_size=1024): download(DATA_URL, 'flowers', DATA_MD5), download(LABEL_URL, 'flowers', LABEL_MD5), download(SETID_URL, 'flowers', SETID_MD5), 'valid', mapper, - buffered_size) + buffered_size, useXmap) def fetch(): diff --git a/python/paddle/v2/dataset/tests/flowers_test.py b/python/paddle/v2/dataset/tests/flowers_test.py index cc0626f4fe..a8ae9a07ac 100644 --- a/python/paddle/v2/dataset/tests/flowers_test.py +++ b/python/paddle/v2/dataset/tests/flowers_test.py @@ -31,13 +31,13 @@ class TestFlowers(unittest.TestCase): def test_train(self): instances, max_label_value = self.check_reader( paddle.v2.dataset.flowers.train()) - self.assertEqual(instances, 1020) + self.assertEqual(instances, 6149) self.assertEqual(max_label_value, 102) def test_test(self): instances, max_label_value = self.check_reader( paddle.v2.dataset.flowers.test()) - self.assertEqual(instances, 6149) + self.assertEqual(instances, 1020) self.assertEqual(max_label_value, 102) def test_valid(self): diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/v2/reader/decorator.py index e432003129..45a4288751 100644 --- a/python/paddle/v2/reader/decorator.py +++ b/python/paddle/v2/reader/decorator.py @@ -166,12 +166,12 @@ def buffered(reader, size): The buffered data reader will read and save data entries into a buffer. Reading from the buffered data reader will proceed as long as the buffer is not empty. - + :param reader: the data reader to read from. :type reader: callable :param size: max buffer size. :type size: int - + :returns: the buffered data reader. """ @@ -238,7 +238,7 @@ def xmap_readers(mapper, reader, process_num, buffer_size, order=False): :type mapper: callable :param reader: the data reader to read from :type reader: callable - :param process_num: process number to handle original sample + :param process_num: process number to handle original sample :type process_num: int :param buffer_size: max buffer size :type buffer_size: int @@ -248,9 +248,6 @@ def xmap_readers(mapper, reader, process_num, buffer_size, order=False): :rtype: callable """ end = XmapEndSignal() - in_queue = Queue(buffer_size) - out_queue = Queue(buffer_size) - out_order = [0] # define a worker to read samples from reader to in_queue def read_worker(reader, in_queue): @@ -266,12 +263,6 @@ def xmap_readers(mapper, reader, process_num, buffer_size, order=False): in_order += 1 in_queue.put(end) - # start a read worker in a thread - target = order_read_worker if order else read_worker - t = Thread(target=target, args=(reader, in_queue)) - t.daemon = True - t.start() - # define a worker to handle samples from in_queue by mapper # and put mapped samples into out_queue def handle_worker(in_queue, out_queue, mapper): @@ -298,19 +289,27 @@ def xmap_readers(mapper, reader, process_num, buffer_size, order=False): in_queue.put(end) out_queue.put(end) - # start several handle_workers - target = order_handle_worker if order else handle_worker - args = (in_queue, out_queue, mapper, out_order) if order else ( - in_queue, out_queue, mapper) - workers = [] - for i in xrange(process_num): - worker = Thread(target=target, args=args) - worker.daemon = True - workers.append(worker) - for w in workers: - w.start() - def xreader(): + in_queue = Queue(buffer_size) + out_queue = Queue(buffer_size) + out_order = [0] + # start a read worker in a thread + target = order_read_worker if order else read_worker + t = Thread(target=target, args=(reader, in_queue)) + t.daemon = True + t.start() + # start several handle_workers + target = order_handle_worker if order else handle_worker + args = (in_queue, out_queue, mapper, out_order) if order else ( + in_queue, out_queue, mapper) + workers = [] + for i in xrange(process_num): + worker = Thread(target=target, args=args) + worker.daemon = True + workers.append(worker) + for w in workers: + w.start() + sample = out_queue.get() while not isinstance(sample, XmapEndSignal): yield sample diff --git a/python/paddle/v2/reader/tests/decorator_test.py b/python/paddle/v2/reader/tests/decorator_test.py index bb3c5d220b..5a92951b10 100644 --- a/python/paddle/v2/reader/tests/decorator_test.py +++ b/python/paddle/v2/reader/tests/decorator_test.py @@ -132,15 +132,17 @@ class TestXmap(unittest.TestCase): for order in orders: for tNum in thread_nums: for size in buffered_size: - result = [] - for i in paddle.v2.reader.xmap_readers(mapper, + reader = paddle.v2.reader.xmap_readers(mapper, reader_creator_10(0), - tNum, size, order)(): - result.append(i) - if not order: - result.sort() - for idx, e in enumerate(result): - self.assertEqual(e, mapper(idx)) + tNum, size, order) + for n in xrange(3): + result = [] + for i in reader(): + result.append(i) + if not order: + result.sort() + for idx, e in enumerate(result): + self.assertEqual(e, mapper(idx)) if __name__ == '__main__': From c263c21f7e0feebca20ab33cd606330de81e9aee Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Tue, 27 Jun 2017 17:35:27 -0700 Subject: [PATCH 109/542] Update copyright informaiton --- paddle/framework/tensor.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 8962b76a12..067f2a8526 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -1,15 +1,17 @@ -/* - Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #pragma once namespace paddle { From 4cc9680cc60296f6071fa34893fda4f3d6806b97 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Wed, 28 Jun 2017 01:16:28 +0000 Subject: [PATCH 110/542] Make pserver able to get server index without etcd (decouple pserver with etcd) The pserver need to have server index for saving model on the distributed file system. The server index comes from etcd if etcd is used, or user can manually specify them. So we need pserver.NewService() to take index as an argument. Since index could come from etcd, it would be strange if pserver takes an index as argument, at the same time get the index from etcd. so we will need to decouple pserver with etcd. --- go/cmd/pserver/pserver.go | 8 +- go/master/etcd_client.go | 4 +- go/pserver/client_test.go | 3 +- go/pserver/etcd_client.go | 181 +++++++++++++++++++++++++++++++++++++ go/pserver/service.go | 156 ++------------------------------ go/pserver/service_test.go | 8 +- 6 files changed, 201 insertions(+), 159 deletions(-) create mode 100644 go/pserver/etcd_client.go diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index 6c85b1804b..8a42d4f8af 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -30,7 +30,13 @@ func main() { log.SetLevel(level) timeout := time.Second * time.Duration((*etcdTimeout)) - s, err := pserver.NewService(*etcdEndpoint, *numPservers, timeout) + e := pserver.NewEtcdClient(*etcdEndpoint, *numPservers, timeout) + idx, err := e.Register() + if err != nil { + panic(err) + } + + s, err := pserver.NewService(idx) if err != nil { panic(err) } diff --git a/go/master/etcd_client.go b/go/master/etcd_client.go index b7293a7598..f7b4638577 100644 --- a/go/master/etcd_client.go +++ b/go/master/etcd_client.go @@ -18,8 +18,8 @@ const ( DefaultAddrPath = "/master/addr" ) -// EtcdClient is the etcd client that master uses for fault tolerance -// and service registry. +// EtcdClient is the etcd client that the master uses for fault +// tolerance and service registry. type EtcdClient struct { lockPath string statePath string diff --git a/go/pserver/client_test.go b/go/pserver/client_test.go index 4a62ae88a4..5bd16118a7 100644 --- a/go/pserver/client_test.go +++ b/go/pserver/client_test.go @@ -7,7 +7,6 @@ import ( "strconv" "strings" "testing" - "time" "github.com/PaddlePaddle/Paddle/go/pserver" ) @@ -31,7 +30,7 @@ func init() { port[i] = p go func(l net.Listener) { - s, err := pserver.NewService("", 1, time.Second*5) + s, err := pserver.NewService(0) if err != nil { panic(err) } diff --git a/go/pserver/etcd_client.go b/go/pserver/etcd_client.go new file mode 100644 index 0000000000..4d88243edd --- /dev/null +++ b/go/pserver/etcd_client.go @@ -0,0 +1,181 @@ +package pserver + +import ( + "context" + "errors" + "strconv" + "strings" + "time" + + "github.com/PaddlePaddle/Paddle/go/utils/networkhelper" + "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/clientv3/concurrency" + log "github.com/sirupsen/logrus" +) + +// EtcdClient is the etcd client that the pserver uses for fault +// tolerance, service registry and coordination. +type EtcdClient struct { + numPservers int + etcdEndpoints string + etcdClient *clientv3.Client + // etcdTimeout is also used as retry intervals. + etcdTimeout time.Duration + // FIXME: ensure GetExternalIP gets the correct ip for trainers to connect. + externalIP string + // desired number of pservers in the job. + // assume desired will not change during one training job. + desired int +} + +// NewEtcdClient creates an EtcdClient +func NewEtcdClient(endpoints string, numPservers int, timeout time.Duration) *EtcdClient { + return &EtcdClient{ + etcdTimeout: timeout, + numPservers: numPservers, + etcdEndpoints: endpoints, + } +} + +// Register registers the pserver on etcd +// +// Register returns the index of the current pserver. +func (e *EtcdClient) Register() (int, error) { + + var err error + e.externalIP, err = networkhelper.GetExternalIP() + if err != nil { + return 0, err + } + + // initialize connection to etcd. + ep := strings.Split(e.etcdEndpoints, ",") + for { + cli, err := clientv3.New(clientv3.Config{ + Endpoints: ep, + DialTimeout: e.etcdTimeout, + }) + if err != nil { + log.Errorf("connect to etcd error: %v", err) + time.Sleep(e.etcdTimeout) + continue + } + e.etcdClient = cli + log.Debugf("inited client to %s", e.etcdEndpoints) + break + } + // init /ps_desired using transaction, for multiple pservers may want to write + // it at the same time. + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + _, err := e.initDesiredPsercers(ctx, e.numPservers) + cancel() + if err != nil { + log.Warn(err) + time.Sleep(e.etcdTimeout) + continue + } + break + } + // TODO: when implementing extending or reducing pservers, /ps_desired is + // changed, then we need to watch /ps_desired node for events. For now, just + // write once when init and read from it. + // wait and set s.desired init value + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + resp, err := e.etcdClient.Get(ctx, PsDesired) + cancel() + if err != nil { + log.Errorf("getting %s error: %v", PsDesired, err) + time.Sleep(e.etcdTimeout) + continue + } + if len(resp.Kvs) != 0 { + e.desired, err = strconv.Atoi(string(resp.Kvs[0].Value)) + if err != nil { + log.Errorf("value of %s invalid %v\n", PsDesired, err) + time.Sleep(e.etcdTimeout) + // NOTE: wait util ps_desired value change + continue + } + break + } + } + + var pserverIdx int + // try register pserver node on etcd + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + var err error + pserverIdx, err = e.registerPserverEtcd(ctx) + cancel() + if err != nil { + log.Warn(err) + time.Sleep(e.etcdTimeout) + continue + } + break + } + + return pserverIdx, nil +} + +func (e *EtcdClient) initDesiredPsercers(ctx context.Context, numPservers int) (*clientv3.TxnResponse, error) { + return concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error { + dsStr := c.Get(PsDesired) + if dsStr == "" { + c.Put(PsDesired, strconv.Itoa(numPservers)) + } + return nil + }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) +} + +// registerPserverEtcd registers pserver node on etcd using transaction. +func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) { + var idx int + _, err := concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error { + registered := false + for i := 0; i < e.desired; i++ { + psKey := "/ps/" + strconv.Itoa(i) + log.Debugf("checking %s", psKey) + ps := c.Get(psKey) + log.Debugf("got value (%s) for key: %s", ps, psKey) + + if ps == "" { + resp, err := e.etcdClient.Grant(context.TODO(), 5) + if err != nil { + log.Fatal(err) + } + // find the first id and write info + c.Put(psKey, e.externalIP, clientv3.WithLease(resp.ID)) + log.Debugf("set pserver node %s with value %s", psKey, e.externalIP) + ch, kaerr := e.etcdClient.KeepAlive(context.TODO(), resp.ID) + if kaerr != nil { + log.Errorf("keepalive etcd node error: %v", kaerr) + return kaerr + } + + // Eat the keep alive message so etcd + // will not expire the lease. + go func(ch <-chan *clientv3.LeaseKeepAliveResponse) { + ka := <-ch + log.Debugf("keepalive: %d\n", ka.TTL) + }(ch) + log.Debug("register finished") + idx = i + registered = true + break + } + } + if registered == true { + return nil + } + return errors.New("not registerd, may due to already have enough pservers") + }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) + + if err != nil { + return 0, err + } + + return idx, nil +} diff --git a/go/pserver/service.go b/go/pserver/service.go index f966595fdc..f386ebea1e 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -1,18 +1,9 @@ package pserver import ( - "context" "errors" "fmt" - "strconv" - "strings" "sync" - "time" - - "github.com/PaddlePaddle/Paddle/go/utils/networkhelper" - "github.com/coreos/etcd/clientv3" - "github.com/coreos/etcd/clientv3/concurrency" - log "github.com/sirupsen/logrus" ) // ElementType is the type of elements of a Parameter. @@ -55,160 +46,25 @@ type Gradient Parameter // Service is the RPC service for pserver. type Service struct { initialized chan struct{} + idx int mu sync.Mutex opt *optimizer paramMap map[string]Parameter - - etcdEndpoints string - etcdClient *clientv3.Client - // etcdTimeout is also used as retry intervals. - etcdTimeout time.Duration - // desired number of pservers in the job. - // assume desired will not change during one training job. - desired int - // FIXME: ensure GetExternalIP gets the correct ip for trainers to connect. - externalIP string } // NewService creates a new service, will bypass etcd registration if no // endpoints specified. -func NewService(endpoints string, numPservers int, timeout time.Duration) (*Service, error) { - s := &Service{opt: newOptimizer(sgd, 0.005)} +func NewService(idx int) (*Service, error) { + s := &Service{ + idx: idx, + opt: newOptimizer(sgd, 0.005), + } s.paramMap = make(map[string]Parameter) s.initialized = make(chan struct{}) - s.etcdEndpoints = endpoints - s.etcdTimeout = timeout - - var err error - s.externalIP, err = networkhelper.GetExternalIP() - if err != nil { - return nil, err - } - - if endpoints != "" { - // initialize connection to etcd, try - ep := strings.Split(s.etcdEndpoints, ",") - for { - cli, err := clientv3.New(clientv3.Config{ - Endpoints: ep, - DialTimeout: s.etcdTimeout, - }) - if err != nil { - log.Errorf("connect to etcd error: %v", err) - time.Sleep(s.etcdTimeout) - continue - } - s.etcdClient = cli - log.Debugf("inited client to %s", s.etcdEndpoints) - break - } - // init /ps_desired using transaction, for multiple pservers may want to write - // it at the same time. - for { - ctx, cancel := context.WithTimeout(context.Background(), time.Second) - _, err := s.initDesiredPsercers(ctx, numPservers) - cancel() - if err != nil { - log.Warn(err) - time.Sleep(s.etcdTimeout) - continue - } - break - } - // TODO: when implementing extending or reducing pservers, /ps_desired is - // changed, then we need to watch /ps_desired node for events. For now, just - // write once when init and read from it. - // wait and set s.desired init value - for { - ctx, cancel := context.WithTimeout(context.Background(), time.Second) - resp, err := s.etcdClient.Get(ctx, PsDesired) - cancel() - if err != nil { - log.Errorf("getting %s error: %v", PsDesired, err) - time.Sleep(s.etcdTimeout) - continue - } - if len(resp.Kvs) != 0 { - s.desired, err = strconv.Atoi(string(resp.Kvs[0].Value)) - if err != nil { - log.Errorf("value of %s invalid %v\n", PsDesired, err) - time.Sleep(s.etcdTimeout) - // NOTE: wait util ps_desired value change - continue - } - break - } - } - // try register pserver node on etcd - for { - ctx, cancel := context.WithTimeout(context.Background(), time.Second) - _, err := s.registerPserverEtcd(ctx) - cancel() - if err != nil { - log.Warn(err) - time.Sleep(s.etcdTimeout) - continue - } - break - } - } // if endpoints != "" - // Bypass etcd registration if no endpoints specified return s, nil } -func (s *Service) initDesiredPsercers(ctx context.Context, numPservers int) (*clientv3.TxnResponse, error) { - return concurrency.NewSTM(s.etcdClient, func(c concurrency.STM) error { - dsStr := c.Get(PsDesired) - if dsStr == "" { - c.Put(PsDesired, strconv.Itoa(numPservers)) - } - return nil - }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) -} - -// registerPserverEtcd registers pserver node on etcd using transaction. -func (s *Service) registerPserverEtcd(ctx context.Context) (*clientv3.TxnResponse, error) { - return concurrency.NewSTM(s.etcdClient, func(c concurrency.STM) error { - registered := false - for i := 0; i < s.desired; i++ { - psKey := "/ps/" + strconv.Itoa(i) - log.Debugf("checking %s", psKey) - ps := c.Get(psKey) - log.Debugf("got value (%s) for key: %s", ps, psKey) - - if ps == "" { - resp, err := s.etcdClient.Grant(context.TODO(), 5) - if err != nil { - log.Fatal(err) - } - // find the first id and write info - c.Put(psKey, s.externalIP, clientv3.WithLease(resp.ID)) - log.Debugf("set pserver node %s with value %s", psKey, s.externalIP) - ch, kaerr := s.etcdClient.KeepAlive(context.TODO(), resp.ID) - if kaerr != nil { - log.Errorf("keepalive etcd node error: %v", kaerr) - return kaerr - } - - // Eat the keep alive message so etcd - // will not expire the lease. - go func(ch <-chan *clientv3.LeaseKeepAliveResponse) { - ka := <-ch - log.Debugf("keepalive: %d\n", ka.TTL) - }(ch) - log.Debug("register finished") - registered = true - break - } - } - if registered == true { - return nil - } - return errors.New("not registerd, may due to already have enough pservers") - }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) -} - // InitParam initializes a parameter. func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) error { select { diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index 1d84f15d78..d9d887cffd 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -10,7 +10,7 @@ import ( ) func TestFull(t *testing.T) { - s, err := pserver.NewService("", 1, time.Second*5) + s, err := pserver.NewService(0) if err != nil { t.Error(err) } @@ -75,7 +75,7 @@ func TestFull(t *testing.T) { } func TestMultipleInit(t *testing.T) { - s, err := pserver.NewService("", 1, time.Second*5) + s, err := pserver.NewService(0) if err != nil { t.Error(err) } @@ -91,7 +91,7 @@ func TestMultipleInit(t *testing.T) { } func TestUninitialized(t *testing.T) { - s, err := pserver.NewService("", 1, time.Second*5) + s, err := pserver.NewService(0) err = s.SendGrad(pserver.Gradient{}, nil) if err.Error() != pserver.Uninitialized { t.FailNow() @@ -99,7 +99,7 @@ func TestUninitialized(t *testing.T) { } func TestBlockUntilInitialized(t *testing.T) { - s, err := pserver.NewService("", 1, time.Second*5) + s, err := pserver.NewService(0) if err != nil { t.Error(err) } From 98bb8ee295c6bbbb573f0ea33df1f7175a8b6b41 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 28 Jun 2017 10:01:23 +0800 Subject: [PATCH 111/542] fix dim problem --- paddle/py_paddle/dataprovider_converter.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py index edc2e02923..f8242d9f7b 100644 --- a/paddle/py_paddle/dataprovider_converter.py +++ b/paddle/py_paddle/dataprovider_converter.py @@ -109,7 +109,10 @@ class DenseScanner(IScanner): if len(self.__shape__) > 3: raise ValueError( "The dimension of input cannot be greater than 3.") - self.__dim__ = reduce(lambda x, y: x * y, self.__shape__) + if len(self.__shape__) == 0: + self.__dim__ = 1 + else: + self.__dim__ = reduce(lambda x, y: x * y, self.__shape__) if len(self.__shape__) == 1 and self.__dim__ != self.input_type.dim: raise ValueError( "The data size must be equal to it in data layer.") From 8b69c1348c17cf7aca83aacc2c63ef9eaad97467 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 28 Jun 2017 10:34:22 +0800 Subject: [PATCH 112/542] check shape of vector input, should not be a scalar --- paddle/py_paddle/dataprovider_converter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py index f8242d9f7b..7df4a6967f 100644 --- a/paddle/py_paddle/dataprovider_converter.py +++ b/paddle/py_paddle/dataprovider_converter.py @@ -110,9 +110,9 @@ class DenseScanner(IScanner): raise ValueError( "The dimension of input cannot be greater than 3.") if len(self.__shape__) == 0: - self.__dim__ = 1 - else: - self.__dim__ = reduce(lambda x, y: x * y, self.__shape__) + raise ValueError( + "The input should be a vector, please check your input data.") + self.__dim__ = reduce(lambda x, y: x * y, self.__shape__) if len(self.__shape__) == 1 and self.__dim__ != self.input_type.dim: raise ValueError( "The data size must be equal to it in data layer.") From 3e9aa7fd8bfac7434057afcdd6ae62ea7a92bff1 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 28 Jun 2017 10:42:54 +0800 Subject: [PATCH 113/542] FIX: Pass CI --- cmake/generic.cmake | 4 ---- paddle/memory/CMakeLists.txt | 6 ------ paddle/memory/detail/buddy_allocator.cc | 2 +- paddle/memory/detail/buddy_allocator.h | 3 +++ 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 840155750e..69e8164a00 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -78,10 +78,6 @@ # # cc_test(example_test SRCS example_test.cc DEPS example glog gflags) -if(WITH_GPU) - add_definitions(-DPADDLE_WITH_GPU) -endif() - if(NOT APPLE) find_package(Threads REQUIRED) link_libraries(${CMAKE_THREAD_LIBS_INIT}) diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt index e74ce75c93..3943c3cfad 100644 --- a/paddle/memory/CMakeLists.txt +++ b/paddle/memory/CMakeLists.txt @@ -1,7 +1 @@ add_subdirectory(detail) - -if(${WITH_GPU}) - nv_library(memory SRCS memory.cc) -else(${WITH_GPU}) - cc_library(memory SRCS memory.cc) -endif(${WITH_GPU}) diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc index 895bf319d7..ebe680f5ee 100644 --- a/paddle/memory/detail/buddy_allocator.cc +++ b/paddle/memory/detail/buddy_allocator.cc @@ -14,7 +14,7 @@ #pragma once -#include "paddle/memory/detail/system_allocator.h" +#include "paddle/memory/detail/buddy_allocator.h" namespace paddle { namespace memory { diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h index 129b137ed7..702c7d28ee 100644 --- a/paddle/memory/detail/buddy_allocator.h +++ b/paddle/memory/detail/buddy_allocator.h @@ -16,6 +16,9 @@ #include "paddle/memory/detail/system_allocator.h" +#include +#include + namespace paddle { namespace memory { namespace detail { From 9490d243dd0255021b288f9c2e43c57b30264b9b Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 28 Jun 2017 10:46:14 +0800 Subject: [PATCH 114/542] ENH: clang-format --- paddle/memory/detail/buddy_allocator.h | 2 +- paddle/platform/cuda.h | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h index 702c7d28ee..82e6aaedc7 100644 --- a/paddle/memory/detail/buddy_allocator.h +++ b/paddle/memory/detail/buddy_allocator.h @@ -16,8 +16,8 @@ #include "paddle/memory/detail/system_allocator.h" -#include #include +#include namespace paddle { namespace memory { diff --git a/paddle/platform/cuda.h b/paddle/platform/cuda.h index 864a5d3340..8fe891f9ce 100644 --- a/paddle/platform/cuda.h +++ b/paddle/platform/cuda.h @@ -29,13 +29,12 @@ inline void throw_on_error(cudaError_t e, const char* message) { } int GetDeviceCount(void) { - int count; - throw_on_error(cudaGetDeviceCount(&count), - "cudaGetDeviceCount failed"); - return count; + int count; + throw_on_error(cudaGetDeviceCount(&count), "cudaGetDeviceCount failed"); + return count; } } // namespace platform } // namespace paddle -#endif // PADDLE_ONLY_CPU +#endif // PADDLE_ONLY_CPU From a402cf908160c5fb8a4f0d1aa9efdca7109c0375 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 28 Jun 2017 10:46:23 +0800 Subject: [PATCH 115/542] correct the demo code for dense_vector label input --- doc/getstarted/concepts/use_concepts_cn.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/getstarted/concepts/use_concepts_cn.rst b/doc/getstarted/concepts/use_concepts_cn.rst index e63ca11102..f15b11bd78 100644 --- a/doc/getstarted/concepts/use_concepts_cn.rst +++ b/doc/getstarted/concepts/use_concepts_cn.rst @@ -111,7 +111,7 @@ PaddlePaddle支持不同类型的输入数据,主要包括四种类型,和 # define training dataset reader def train_reader(): train_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]]) - train_y = np.array([-2, -3, -7, -7]) + train_y = np.array([[-2], [-3], [-7], [-7]]) def reader(): for i in xrange(train_y.shape[0]): yield train_x[i], train_y[i] From e93c3e4070c37dd6bdf31ec4d3fa4033f3208e2e Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 28 Jun 2017 11:07:52 +0800 Subject: [PATCH 116/542] fix format --- paddle/py_paddle/dataprovider_converter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py index 7df4a6967f..218cb5ec56 100644 --- a/paddle/py_paddle/dataprovider_converter.py +++ b/paddle/py_paddle/dataprovider_converter.py @@ -111,7 +111,8 @@ class DenseScanner(IScanner): "The dimension of input cannot be greater than 3.") if len(self.__shape__) == 0: raise ValueError( - "The input should be a vector, please check your input data.") + "The input should be a vector, please check your input data." + ) self.__dim__ = reduce(lambda x, y: x * y, self.__shape__) if len(self.__shape__) == 1 and self.__dim__ != self.input_type.dim: raise ValueError( From 05ddf23e1dd89dee9c1eeca188f782f38992ce60 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 28 Jun 2017 11:43:57 +0800 Subject: [PATCH 117/542] "add log of create optimizer" --- go/pserver/optimizer.go | 24 ++++++++++++++---------- go/pserver/service.go | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 9cb2801f30..46e614d3a1 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -12,14 +12,15 @@ import "C" import ( "fmt" "unsafe" + + log "github.com/sirupsen/logrus" ) var nullPtr = unsafe.Pointer(uintptr(0)) type optimizer struct { - opt *C.struct_paddle_optimizer - // used in GetParam, reconstruct Parameter from optimizer - ElementType ElementType + opt *C.struct_paddle_optimizer + elementType ElementType } func cArrayToSlice(p unsafe.Pointer, len int) []byte { @@ -37,13 +38,17 @@ func cArrayToSlice(p unsafe.Pointer, len int) []byte { func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { o := &optimizer{} - o.ElementType = paramWithConfigs.Param.ElementType + o.elementType = paramWithConfigs.Param.ElementType p := paramWithConfigs.Param c := paramWithConfigs.Config + log.WithFields(log.Fields{ + "ElementType": p.ElementType, + "ParamSize": len(p.Content), + "ConfigSize": len(c), + }).Info("New Optimizer Created with config:") var cbuffer unsafe.Pointer - cbuffer_len := int(unsafe.Sizeof(p.Content[0])) * len(p.Content) - cbuffer = C.malloc(C.size_t(cbuffer_len)) - C.memcpy(cbuffer, unsafe.Pointer(&p.Content[0]), C.size_t(cbuffer_len)) + cbuffer = C.malloc(C.size_t(len(p.Content))) + C.memcpy(cbuffer, unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content))) o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)), C.paddle_element_type(p.ElementType), cbuffer, C.int(len(p.Content)), (*C.char)(nullPtr), 0) @@ -57,11 +62,10 @@ func (o *optimizer) GetWeights() []byte { } func (o *optimizer) UpdateParameter(g Gradient) error { - if o.ElementType != g.ElementType { - return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", g.Name, o.ElementType, g.ElementType) + if o.elementType != g.ElementType { + return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", g.Name, o.elementType, g.ElementType) } - fmt.Println(g) r := C.paddle_update_parameter(o.opt, C.paddle_element_type(g.ElementType), unsafe.Pointer(&g.Content[0]), C.int(len(g.Content))) if r != 0 { return fmt.Errorf("optimizer update returned error code: %d", r) diff --git a/go/pserver/service.go b/go/pserver/service.go index 2ab622d790..555d379bcb 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -128,7 +128,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { // to save the program from making a copy of the // paramter content. parameter.Name = name - parameter.ElementType = opt.ElementType + parameter.ElementType = opt.elementType parameter.Content = opt.GetWeights() return nil } From 60a65b5d90052d27fbd5928b71304a8f91fce181 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Wed, 28 Jun 2017 13:33:00 +0800 Subject: [PATCH 118/542] design doc for go package management --- doc/design/build_system/README.md | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/doc/design/build_system/README.md b/doc/design/build_system/README.md index 310739f37a..96af6566d0 100644 --- a/doc/design/build_system/README.md +++ b/doc/design/build_system/README.md @@ -105,3 +105,37 @@ shared_library(api ### Implementation As above example CMakeLists.txt executes, each function invocation adds "nodes" to a dependency graph. It also use this graph to generate CMake commands including `add_executable`, `add_dependencies`, `target_link_libraries`, and `add_test`. + +### Using Package Manager For Go + +Building go binaries and libraries need to satisfy their dependencies, generally +we can do `go get ./...` to download and compile all external dependencies. The +problems are: + +1. `go get` will always get the latest code from master branch, so when an external + project updated and deprecates something or made changes to their APIs, builds + may not pass. This is very different with what we already have in `cmake/external` + which download a specific version or commit id of the dependency. +1. Some locations can not access external dependencies through the internet, as mentioned + in https://github.com/PaddlePaddle/Paddle/issues/2605. Using package management + tools can package the dependencies as a "vendor" package, which can be mirrored + at many cloud file hosting, so users what to compile paddle by themselves can + download this "vendor" package from a mirror site. + +#### Godep vs. Glide + +Here's a brief comparison for current Go ecosystem: https://github.com/Masterminds/glide/wiki/Go-Package-Manager-Comparison. There are +also many complaints about `Godep`. A new "official" pakcage management tool has been +started: https://github.com/golang/dep to resolve such problems, but it's currently +at Alpha stage. So the best choice now is glide obviously. + +#### Manage Go Packages + +- Dependencies: `go/glide.yaml` will store the dependencies and their versions which + is directly imported by paddle. `go/glide.lock` will store all dependencies recursively + with their commit id. Builds will "lock" to these packages if we don't `glide up` + them +- Vendor package: `go/vendor` directory will generated when running `cmake` command. `cmake` + will download the code corresponding to `go/glide.lock`. If we put a vendor folder + under `go/`, cmake will just check the commit id to the packages under the folder, + if commit id matches, there will be no download at all. From 6ad1d21c4b22adcb6fb970875256a08622d1af6e Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 28 Jun 2017 14:12:13 +0800 Subject: [PATCH 119/542] refine code of operator --- paddle/framework/scope.cc | 45 +++++++++++++++------------------- paddle/framework/scope.h | 30 +++++++++++++++-------- paddle/framework/scope_test.cc | 42 ++++++++++++++++++------------- 3 files changed, 65 insertions(+), 52 deletions(-) diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index e985598849..5c197cec2a 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -1,18 +1,27 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #include "paddle/framework/scope.h" namespace paddle { namespace framework { -Error Scope::CreateVariable(const std::string& name) { - if (name == "") { - return Error("Variable name should not be empty"); +Variable* Scope::CreateVariable(const std::string& name) { + if (!HasVariable(name)) { + vars_[name] = std::unique_ptr(new Variable()); } - - if (HaveVariable(name)) { - return AlreadyCreated; - } - vars_[name] = std::unique_ptr(new Variable()); - return Error(); + return GetVariable(name); } Variable* Scope::GetVarLocally(const std::string& name) const { @@ -33,22 +42,8 @@ Variable* Scope::GetVariable(const std::string& name) const { } } -Variable* Scope::GetOrCreateVariable(const std::string& name) { - Variable* var = GetVariable(name); - if (var != nullptr) { - return var; - } - - Error err = CreateVariable(name); - if (!err.isOK()) { - return nullptr; - } else { - return GetVariable(name); - } -} - -bool Scope::HaveVariable(const std::string& name) { - return vars_.count(name) != 0; +bool Scope::HasVariable(const std::string &name) { + return (vars_.count(name) > 0 || (parent_ && parent_->HasVariable(name))); } } // namespace framework diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index 90c8141e4f..81491f34d8 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -1,15 +1,28 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #pragma once #include #include +#include + #include "paddle/framework/variable.h" -#include "paddle/utils/Error.h" namespace paddle { namespace framework { -const static Error AlreadyCreated("Variable has already been created"); - /** * Scope is an association of a name to Variable. All variables belong to * `Scope`. You need to specify a scope to run a Net, i.e., `net.Run(&scope)`. @@ -26,20 +39,17 @@ class Scope { // Create Variable in this Scope. Return error if Variable already been // created. - Error __must_check CreateVariable(const std::string& name); + Variable* CreateVariable(const std::string& name); // Get Variable from this Scope, this function will recursive find Variable // from it's parent scope. Return nullptr if not found. Variable* GetVariable(const std::string& name) const; - // find and return Variables in the scope it self. + // Find and return Variables in the scope it self. Variable* GetVarLocally(const std::string& name) const; - // Get a Variable from Scope, if the Variable is not exist then create it. - // User should call this function most of time. - Variable* GetOrCreateVariable(const std::string& name); - - bool HaveVariable(const std::string& name); + // Find if there is a Variable in this scope and it's parent scope + bool HasVariable(const std::string &name); private: std::unordered_map> vars_; diff --git a/paddle/framework/scope_test.cc b/paddle/framework/scope_test.cc index 09fbb78d69..25c144868b 100644 --- a/paddle/framework/scope_test.cc +++ b/paddle/framework/scope_test.cc @@ -1,47 +1,55 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #include "paddle/framework/scope.h" #include "gtest/gtest.h" TEST(Scope, Create) { using paddle::framework::Scope; - using paddle::Error; using paddle::framework::Variable; - using paddle::framework::AlreadyCreated; Scope* scope = new Scope(); - Error err = scope->CreateVariable(""); - EXPECT_FALSE(err.isOK()); + Variable* var0 = scope->CreateVariable(""); + EXPECT_NE(var0, nullptr); Variable* var1 = scope->GetVariable("a"); EXPECT_EQ(var1, nullptr); - Error err1 = scope->CreateVariable("a"); - EXPECT_TRUE(err1.isOK()); - - Error err2 = scope->CreateVariable("a"); - EXPECT_EQ(err2, AlreadyCreated); - - Variable* var2 = scope->GetVariable("a"); + Variable* var2 = scope->CreateVariable("a"); EXPECT_NE(var2, nullptr); - Variable* var3 = scope->GetOrCreateVariable("b"); - EXPECT_NE(var3, nullptr); + Variable* var3 = scope->CreateVariable("a"); + EXPECT_EQ(var2, var3); + + Variable* var4 = scope->GetVariable("a"); + EXPECT_EQ(var2, var4); } TEST(Scope, Parent) { using paddle::framework::Scope; using paddle::framework::Variable; - using paddle::Error; const auto parent_scope_ptr = std::shared_ptr(new Scope()); Scope* scope = new Scope(parent_scope_ptr); - Error err = parent_scope_ptr->CreateVariable("a"); - EXPECT_TRUE(err.isOK()); + Variable* var0 = parent_scope_ptr->CreateVariable("a"); + EXPECT_NE(var0, nullptr); Variable* var1 = scope->GetVarLocally("a"); EXPECT_EQ(var1, nullptr); Variable* var2 = scope->GetVariable("a"); - EXPECT_NE(var2, nullptr); + EXPECT_EQ(var2, var0); } \ No newline at end of file From 3e16f5fdfbb52d4381d2eea2d4a60d414e72cf0d Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 28 Jun 2017 14:15:54 +0800 Subject: [PATCH 120/542] clang format --- paddle/framework/scope.cc | 2 +- paddle/framework/scope.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index 5c197cec2a..72cb744707 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -42,7 +42,7 @@ Variable* Scope::GetVariable(const std::string& name) const { } } -bool Scope::HasVariable(const std::string &name) { +bool Scope::HasVariable(const std::string& name) { return (vars_.count(name) > 0 || (parent_ && parent_->HasVariable(name))); } diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index 81491f34d8..a624fe3bbe 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -14,9 +14,9 @@ limitations under the License. */ #pragma once +#include #include #include -#include #include "paddle/framework/variable.h" @@ -49,7 +49,7 @@ class Scope { Variable* GetVarLocally(const std::string& name) const; // Find if there is a Variable in this scope and it's parent scope - bool HasVariable(const std::string &name); + bool HasVariable(const std::string& name); private: std::unordered_map> vars_; From 7062be0fbdbe2a916d7dd81e81c013bc7ee4a914 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 28 Jun 2017 15:12:30 +0800 Subject: [PATCH 121/542] Add cmake for compile NNPACKConvOp.cpp. --- CMakeLists.txt | 5 +++++ paddle/function/CMakeLists.txt | 5 +++++ paddle/function/nnpack/NNPACKConvOp.cpp | 29 +++++++++++++++++-------- paddle/function/nnpack/nnpack.cmake | 16 ++++++++++++++ 4 files changed, 46 insertions(+), 9 deletions(-) create mode 100644 paddle/function/nnpack/nnpack.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c719d35ec..f645ed04a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,6 +48,7 @@ option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF) option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF) option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) +option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) # CMAKE_BUILD_TYPE if(NOT CMAKE_BUILD_TYPE) @@ -126,6 +127,10 @@ if(WITH_GPU) endif(NOT WITH_DSO) endif(WITH_GPU) +if(USE_NNPACK) + list(APPEND EXTERNAL_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB} "rt") +endif(USE_NNPACK) + add_subdirectory(proto) add_subdirectory(paddle) add_subdirectory(python) diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 5e170714cf..daa2aa150e 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -10,6 +10,11 @@ if(WITH_GPU) cuda_compile(cu_objs ${cu_files}) endif() +if(USE_NNPACK) + include(nnpack/nnpack.cmake) + list(APPEND cpp_files nnpack/NNPACKConvOp.cpp) +endif() + add_library(paddle_function STATIC ${cpp_files} ${cu_objs}) add_dependencies(paddle_function ${external_project_dependencies}) add_dependencies(paddle_function gen_proto_cpp) diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/function/nnpack/NNPACKConvOp.cpp index 57a6681f29..5e4de55469 100644 --- a/paddle/function/nnpack/NNPACKConvOp.cpp +++ b/paddle/function/nnpack/NNPACKConvOp.cpp @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "ConvOp.h" #include "nnpack.h" +#include "paddle/function/ConvOp.h" DEFINE_bool(nnpack_allocate_outside, false, @@ -72,14 +72,22 @@ public: } } + virtual void check(const BufferArgs& inputs, + const BufferArgs& outputs) override { + const TensorShape& output = inputs[0].shape(); + const TensorShape& filter = inputs[1].shape(); + const TensorShape& input = outputs[0].shape(); + checkShape(input, filter, output); + } + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(numInputs_, inputs.size()); CHECK_EQ(numOutputs_, outputs.size()); CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + check(inputs, outputs); const TensorShape& input = inputs[0].shape(); const TensorShape& filter = inputs[1].shape(); const TensorShape& output = outputs[0].shape(); - check(input, filter, output); size_t batchSize = input[0]; size_t inputChannels = input[1]; @@ -92,12 +100,13 @@ public: // size_t outputWidth = output[3]; nnp_size inputSize = {.width = inputWidth, .height = inputHeight}; - nnp_padding padding = {.top = paddingH(), - .right = paddingW(), - .bottom = paddingH(), - .left = paddingW()}; + nnp_padding padding = {.top = (size_t)paddingH(), + .right = (size_t)paddingW(), + .bottom = (size_t)paddingH(), + .left = (size_t)paddingW()}; nnp_size kernelSize = {.width = filterWidth, .height = filterHeight}; - nnp_size outputSubsampling = {.width = strideW(), .height = strideH()}; + nnp_size outputSubsampling = {.width = (size_t)strideW(), + .height = (size_t)strideH()}; float* inputData = inputs[0].data(); float* filterData = inputs[1].data(); @@ -129,7 +138,8 @@ public: CHECK_EQ(status, nnp_status_success); } else { // only supports stride = 1 - CHECK_EQ(stride_, 1); + CHECK_EQ(strideH(), 1); + CHECK_EQ(strideW(), 1); nnp_status status = nnp_convolution_output(algorithm_, batchSize, inputChannels, @@ -189,7 +199,8 @@ public: CHECK_EQ(status, nnp_status_success); } else { // only supports stride = 1 - CHECK_EQ(stride_, 1); + CHECK_EQ(strideH(), 1); + CHECK_EQ(strideW(), 1); nnp_status status = nnp_convolution_output(algorithm_, batchSize, inputChannels, diff --git a/paddle/function/nnpack/nnpack.cmake b/paddle/function/nnpack/nnpack.cmake new file mode 100644 index 0000000000..7182730ae8 --- /dev/null +++ b/paddle/function/nnpack/nnpack.cmake @@ -0,0 +1,16 @@ +# Find the NNPACK library +# NNPACK_ROOT - where to find NNPACK include and library. +# + +set(NNPACK_FOUND OFF) +set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK") +find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include) +find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib) +find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib) + +if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB) + set(NNPACK_FOUND ON) + INCLUDE_DIRECTORIES(${NNPACK_INC_DIR}) +else() + message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})") +endif() From 2d9113dac13000851d0d95818299f3e7c0d532c4 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 28 Jun 2017 15:47:23 +0800 Subject: [PATCH 122/542] Add test for NNPACKConvFunc. --- paddle/function/CMakeLists.txt | 3 + paddle/function/nnpack/NNPACKConvOpTest.cpp | 96 +++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 paddle/function/nnpack/NNPACKConvOpTest.cpp diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index daa2aa150e..4ef8d80ff1 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -13,6 +13,9 @@ endif() if(USE_NNPACK) include(nnpack/nnpack.cmake) list(APPEND cpp_files nnpack/NNPACKConvOp.cpp) + if(WITH_TESTING) + add_unittest(NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp) + endif() endif() add_library(paddle_function STATIC ${cpp_files} ${cu_objs}) diff --git a/paddle/function/nnpack/NNPACKConvOpTest.cpp b/paddle/function/nnpack/NNPACKConvOpTest.cpp new file mode 100644 index 0000000000..e7ce61cc6c --- /dev/null +++ b/paddle/function/nnpack/NNPACKConvOpTest.cpp @@ -0,0 +1,96 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/function/Function.h" +#include "paddle/function/FunctionTest.h" + +DEFINE_string(algo, + "auto", + "The algorithm (auto, ft8x8, ft16x16, wt8x8, " + "implicit-gemm, or direct) for computing convolution of NNPACK."); + +namespace paddle { + +#define IS_NNPACK_SUPPORT(algo, filterSize, stride) \ + if (algo == "direct" && filterSize != 1) continue; \ + if (algo == "direct" && batchSize != 1) continue; \ + if (algo == "wt8x8" && filterSize != 3) continue; \ + if (algo == "implicit-gemm" && batchSize != 1) continue; \ + if (algo != "auto" && algo != "implicit-gemm" && stride > 1) continue; + +class ConvolutionTest { +public: + ConvolutionTest(const std::string& conv1, + const std::string& conv2, + std::string algo = "auto") { + for (size_t batchSize : {1, 32}) { + for (size_t inputSize : {7, 14, 54}) { + for (size_t filterSize : {1, 3, 5}) { + for (size_t inputChannels : {3, 64}) { + for (size_t outputChannels : {3, 64, 128}) { + if (inputChannels < outputChannels) break; + for (size_t stride : {1, 2}) { + // if batchSize > 1 NNPACKConv only supports stride = 1 + if (batchSize > 1 && stride > 1) break; + for (size_t padding : {0, 1}) { + if (padding >= filterSize) break; + size_t outputSize = + (inputSize - filterSize + 2 * padding + stride) / stride; + IS_NNPACK_SUPPORT(algo, filterSize, stride); + LOG(INFO) << " batchSize=" << batchSize + << " inputChannels=" << inputChannels + << " inputHeight=" << inputSize + << " inputWidth=" << inputSize + << " outputChannels=" << outputChannels + << " filterHeight=" << filterSize + << " filterWidth=" << filterSize + << " outputHeight=" << outputSize + << " outputWidth=" << outputSize + << " stride=" << stride << " padding=" << padding; + + Compare2Function test( + conv1, + conv2, + FuncConfig() + .set("padding", padding) + .set("stride", stride) + .set("algo", algo)); + + TensorShape shape0{ + batchSize, inputChannels, inputSize, inputSize}; + TensorShape shape1{ + outputChannels, inputChannels, filterSize, filterSize}; + TensorShape shape2{ + batchSize, outputChannels, outputSize, outputSize}; + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape0)); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape1)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape2)); + test.run(); + } + } + } + } + } + } + } + } +}; + +TEST(Convolution, NNPACK) { + // NNPACK only supports stride = 1 + ConvolutionTest test("GemmConv-CPU", "NNPACKConv-CPU", FLAGS_algo); +} + +} // namespace paddle From b8ffa8b9e9f468f79fea7f0bd452be2f8c64d17a Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 28 Jun 2017 15:57:00 +0800 Subject: [PATCH 123/542] move code to scope.h, remove scope.cc --- paddle/framework/CMakeLists.txt | 4 +-- paddle/framework/scope.cc | 50 --------------------------- paddle/framework/scope.h | 61 ++++++++++++++++++++++----------- paddle/framework/scope_test.cc | 16 ++++----- 4 files changed, 49 insertions(+), 82 deletions(-) delete mode 100644 paddle/framework/scope.cc diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 7ea17f7114..6caeb1be3a 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -6,6 +6,4 @@ nv_test(dim_test SRCS dim_test.cu DEPS ddim) cc_test(variable_test SRCS variable_test.cc) -# scope lib -cc_library(scope SRCS scope.cc) -cc_test(scope_test SRCS scope_test.cc DEPS scope) +cc_test(scope_test SRCS scope_test.cc) diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc deleted file mode 100644 index 72cb744707..0000000000 --- a/paddle/framework/scope.cc +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/framework/scope.h" - -namespace paddle { -namespace framework { - -Variable* Scope::CreateVariable(const std::string& name) { - if (!HasVariable(name)) { - vars_[name] = std::unique_ptr(new Variable()); - } - return GetVariable(name); -} - -Variable* Scope::GetVarLocally(const std::string& name) const { - if (vars_.count(name)) { - return vars_.at(name).get(); - } - return nullptr; -} - -Variable* Scope::GetVariable(const std::string& name) const { - Variable* var = GetVarLocally(name); - if (var != nullptr) { - return var; - } else if (parent_ != nullptr) { - return parent_->GetVariable(name); - } else { - return nullptr; - } -} - -bool Scope::HasVariable(const std::string& name) { - return (vars_.count(name) > 0 || (parent_ && parent_->HasVariable(name))); -} - -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index a624fe3bbe..2f8d6dbd97 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -19,37 +19,58 @@ limitations under the License. */ #include #include "paddle/framework/variable.h" +#include "paddle/platform/assert.h" namespace paddle { namespace framework { /** * Scope is an association of a name to Variable. All variables belong to - * `Scope`. You need to specify a scope to run a Net, i.e., `net.Run(&scope)`. + * Scope. You need to specify a scope to run a Net, i.e., `net.Run(&scope)`. * One net can run in different scopes and update different variable in the * scope. */ class Scope { public: - Scope() {} - - explicit Scope(const std::shared_ptr& scope) : parent_(scope) {} - - ~Scope() {} - - // Create Variable in this Scope. Return error if Variable already been - // created. - Variable* CreateVariable(const std::string& name); - - // Get Variable from this Scope, this function will recursive find Variable - // from it's parent scope. Return nullptr if not found. - Variable* GetVariable(const std::string& name) const; - - // Find and return Variables in the scope it self. - Variable* GetVarLocally(const std::string& name) const; - - // Find if there is a Variable in this scope and it's parent scope - bool HasVariable(const std::string& name); + explicit Scope(const std::shared_ptr& parent = nullptr) + : parent_(parent) {} + + /// Create Variable in this Scope. Failed if Variable already been + /// created. + Variable* CreateVariable(const std::string& name) { + PADDLE_ASSERT(!HasVariable(name)); + vars_[name] = std::unique_ptr(new Variable()); + return GetVariable(name); + } + + /// Get Variable from this Scope, this function will recursive find Variable + /// from it's parent scope. Return nullptr if not found. + Variable* GetVariable(const std::string& name) const { + auto it = vars_.find(name); + if (it != vars_.end()) { + return it->second.get(); + } else if (parent_ != nullptr) { + return parent_->GetVariable(name); + } else { + return nullptr; + } + } + + /// Get Variable from scope, if Variable is not exist, creat one and return. + Variable* GetOrCreateVariable(const std::string& name) { + auto var = GetVariable(name); + if (var) { + return var; + } else { + return CreateVariable(name); + } + } + + /// Find if there is a Variable in this scope and it's parent scope + bool HasVariable(const std::string& name) const { + return (vars_.find(name) != vars_.end() || + (parent_ && parent_->HasVariable(name))); + } private: std::unordered_map> vars_; diff --git a/paddle/framework/scope_test.cc b/paddle/framework/scope_test.cc index 25c144868b..34ee21e1aa 100644 --- a/paddle/framework/scope_test.cc +++ b/paddle/framework/scope_test.cc @@ -28,12 +28,13 @@ TEST(Scope, Create) { EXPECT_EQ(var1, nullptr); Variable* var2 = scope->CreateVariable("a"); - EXPECT_NE(var2, nullptr); - Variable* var3 = scope->CreateVariable("a"); + ASSERT_DEATH({ scope->CreateVariable("a"); }, ""); + + Variable* var3 = scope->GetVariable("a"); EXPECT_EQ(var2, var3); - Variable* var4 = scope->GetVariable("a"); + Variable* var4 = scope->GetOrCreateVariable("a"); EXPECT_EQ(var2, var4); } @@ -47,9 +48,6 @@ TEST(Scope, Parent) { Variable* var0 = parent_scope_ptr->CreateVariable("a"); EXPECT_NE(var0, nullptr); - Variable* var1 = scope->GetVarLocally("a"); - EXPECT_EQ(var1, nullptr); - - Variable* var2 = scope->GetVariable("a"); - EXPECT_EQ(var2, var0); -} \ No newline at end of file + Variable* var1 = scope->GetVariable("a"); + EXPECT_EQ(var0, var1); +} From 3a119efedad1a15f587c9415c70f661853a8d579 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 28 Jun 2017 16:18:22 +0800 Subject: [PATCH 124/542] Adding Enforce to platform Basically from caffe2::logging.h, but only expose `PADDLE_ENFORCE` interface. --- paddle/platform/CMakeLists.txt | 1 + paddle/platform/enforce.h | 116 ++++++++++++++++++++++++++++++++ paddle/platform/enforce_test.cc | 25 +++++++ 3 files changed, 142 insertions(+) create mode 100644 paddle/platform/enforce.h create mode 100644 paddle/platform/enforce_test.cc diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 7abe2ab89e..8435410564 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -3,3 +3,4 @@ nv_test(cuda_test SRCS cuda_test.cu) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) cc_test(must_check_test SRCS must_check_test.cc) +cc_test(enforce_test SRCS enforce_test.cc) diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h new file mode 100644 index 0000000000..e501e80c55 --- /dev/null +++ b/paddle/platform/enforce.h @@ -0,0 +1,116 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once +#include +#include + +namespace paddle { +namespace platform { + +/** + * @brief Enforce exception. Inherits std::exception + * + * All enforce condition not met, will throw an EnforceNotMet exception. + */ +class EnforceNotMet : public std::exception { + public: + EnforceNotMet(const std::string& msg, const char* file, int fileline) + : file_(file), fileline_(fileline) { + std::ostringstream sout; + sout << msg << " at [" << file_ << ":" << fileline_ << "];"; + all_msg_ = sout.str(); + } + + const char* what() const noexcept override { return all_msg_.c_str(); } + + private: + std::string all_msg_; + const char* file_; + int fileline_; +}; + +namespace details { + +inline void MakeStringInternal(std::ostringstream& stream) {} + +template +inline void MakeStringInternal(std::ostringstream& stream, T v) { + stream << v; +} + +template +inline void MakeStringInternal(std::ostringstream& stream, T v, ARGS... args) { + MakeStringInternal(stream, v); + MakeStringInternal(stream, args...); +}; + +/** + * @brief Make string will concat all args into a string. + */ +template +inline std::string MakeString(ARGS... args) { + std::ostringstream sout; + details::MakeStringInternal(sout, args...); + return sout.str(); +} + +/** + * @brief special handle string + */ +template <> +inline std::string MakeString(std::string str) { + return str; +} + +/** + * @brief special handle const char* + */ +template <> +inline std::string MakeString(const char* str) { + return std::string(str); +} +} // namespace details + +// From https://stackoverflow.com/questions/30130930/ +// __buildin_expect is in C++ 11 standard. Since the condition which enforced +// should be true in most situation, it will make the compiler generate faster +// code by adding `UNLIKELY` macro. +#define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) + +/** + * @brief Throw a EnforceNotMet exception, automatically filled __FILE__ & + * __LINE__ + * + * This macro take __VA_ARGS__, user can pass any type if that type can + * serialize to std::ostream + */ +#define PADDLE_THROW(...) \ + do { \ + throw ::paddle::platform::EnforceNotMet( \ + ::paddle::platform::details::MakeString(__VA_ARGS__), __FILE__, \ + __LINE__); \ + } while (0) + +/** + * @brief Enforce a condition, otherwise throw an EnforceNotMet + */ +#define PADDLE_ENFORCE(condition, ...) \ + do { \ + if (UNLIKELY(!(condition))) { \ + PADDLE_THROW(__VA_ARGS__); \ + } \ + } while (0) + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/enforce_test.cc b/paddle/platform/enforce_test.cc new file mode 100644 index 0000000000..d3e945e972 --- /dev/null +++ b/paddle/platform/enforce_test.cc @@ -0,0 +1,25 @@ +#include +#include + +TEST(ENFORCE, OK) { + PADDLE_ENFORCE(true, "Enforce is ok", 123, "now", 0.345); + size_t val = 1; + const size_t limit = 10; + PADDLE_ENFORCE(val < limit, "Enforce is OK too"); +} + +TEST(ENFORCE, FAILED) { + bool in_catch = false; + try { + PADDLE_ENFORCE(false, "Enforce is not ok ", 123, " at all"); + } catch (paddle::platform::EnforceNotMet err) { + in_catch = true; + std::string msg = "Enforce is not ok 123 at all"; + const char* what = err.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); + } + } + + ASSERT_TRUE(in_catch); +} \ No newline at end of file From cdf8d99080c1c36c505cd5dbe7572fe2f71bac6d Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 28 Jun 2017 16:26:16 +0800 Subject: [PATCH 125/542] Bug fix. --- paddle/function/nnpack/NNPACKConvOp.cpp | 4 ++-- paddle/function/nnpack/NNPACKConvOpTest.cpp | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/function/nnpack/NNPACKConvOp.cpp index 5e4de55469..d75fab0403 100644 --- a/paddle/function/nnpack/NNPACKConvOp.cpp +++ b/paddle/function/nnpack/NNPACKConvOp.cpp @@ -74,9 +74,9 @@ public: virtual void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& output = inputs[0].shape(); + const TensorShape& input = inputs[0].shape(); const TensorShape& filter = inputs[1].shape(); - const TensorShape& input = outputs[0].shape(); + const TensorShape& output = outputs[0].shape(); checkShape(input, filter, output); } diff --git a/paddle/function/nnpack/NNPACKConvOpTest.cpp b/paddle/function/nnpack/NNPACKConvOpTest.cpp index e7ce61cc6c..4818011211 100644 --- a/paddle/function/nnpack/NNPACKConvOpTest.cpp +++ b/paddle/function/nnpack/NNPACKConvOpTest.cpp @@ -60,12 +60,15 @@ public: << " outputWidth=" << outputSize << " stride=" << stride << " padding=" << padding; + std::vector paddings = {padding, padding}; + std::vector strides = {stride, stride}; Compare2Function test( conv1, conv2, FuncConfig() - .set("padding", padding) - .set("stride", stride) + .set("paddings", paddings) + .set("strides", strides) + .set("groups", (size_t)1) .set("algo", algo)); TensorShape shape0{ From 1678ad7b3067a8c72ac504fd8cb00e83766cbba2 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 28 Jun 2017 16:33:43 +0800 Subject: [PATCH 126/542] add Create for scope --- paddle/framework/scope.h | 8 +++++++- paddle/framework/scope_test.cc | 8 ++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index 2f8d6dbd97..bb22c4b834 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -31,10 +31,16 @@ namespace framework { * scope. */ class Scope { - public: + private: explicit Scope(const std::shared_ptr& parent = nullptr) : parent_(parent) {} + public: + static std::shared_ptr Create( + const std::shared_ptr& parent = nullptr) { + return std::make_shared(Scope(parent)); + } + /// Create Variable in this Scope. Failed if Variable already been /// created. Variable* CreateVariable(const std::string& name) { diff --git a/paddle/framework/scope_test.cc b/paddle/framework/scope_test.cc index 34ee21e1aa..d73391d977 100644 --- a/paddle/framework/scope_test.cc +++ b/paddle/framework/scope_test.cc @@ -19,7 +19,7 @@ TEST(Scope, Create) { using paddle::framework::Scope; using paddle::framework::Variable; - Scope* scope = new Scope(); + auto scope = Scope::Create(); Variable* var0 = scope->CreateVariable(""); EXPECT_NE(var0, nullptr); @@ -42,10 +42,10 @@ TEST(Scope, Parent) { using paddle::framework::Scope; using paddle::framework::Variable; - const auto parent_scope_ptr = std::shared_ptr(new Scope()); - Scope* scope = new Scope(parent_scope_ptr); + auto parent_scope = Scope::Create(); + auto scope = Scope::Create(parent_scope); - Variable* var0 = parent_scope_ptr->CreateVariable("a"); + Variable* var0 = parent_scope->CreateVariable("a"); EXPECT_NE(var0, nullptr); Variable* var1 = scope->GetVariable("a"); From 42dcffc29c9a6d0cbd27cc5d32d10c53400d287d Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 28 Jun 2017 17:19:31 +0800 Subject: [PATCH 127/542] "add optimizer full test" --- go/pserver/service_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index a09b25dec0..57397fe586 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -10,7 +10,7 @@ import ( "github.com/PaddlePaddle/Paddle/go/pserver" ) -func TestNewName(t *testing.T) { +func TestServiceFull(t *testing.T) { s := pserver.NewService() var p pserver.Parameter p.Name = "param_a" From 6bb84b963d421394c04c43c202ccea874bdb53f6 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 28 Jun 2017 17:20:55 +0800 Subject: [PATCH 128/542] "remove unused debug info" --- go/pserver/optimizer_test.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/go/pserver/optimizer_test.go b/go/pserver/optimizer_test.go index 368047d6f8..49d9df5898 100644 --- a/go/pserver/optimizer_test.go +++ b/go/pserver/optimizer_test.go @@ -2,6 +2,7 @@ package pserver import ( "io/ioutil" + "reflect" "testing" ) @@ -22,3 +23,26 @@ func TestOptimizerCreateRelease(t *testing.T) { o := newOptimizer(param) o.Cleanup() } + +func TestOptimizerFull(t *testing.T) { + p := Parameter{ + Name: "a", + ElementType: Float32, + } + p.Content = []byte{1, 3} + config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb.txt") + if err != nil { + t.Fatalf("read optimizer proto failed") + } + param := ParameterWithConfig{ + Param: p, + Config: config, + } + o := newOptimizer(param) + g := Gradient(p) + if !reflect.DeepEqual(p.Content, o.GetWeights()) { + t.FailNow() + } + o.UpdateParameter(g) + o.Cleanup() +} From d2581f34e8179bdd7e0b9ce8a9d3e847758ff52d Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 28 Jun 2017 17:48:20 +0800 Subject: [PATCH 129/542] change copy right format --- paddle/platform/enforce.h | 22 ++++++++++------------ paddle/platform/enforce_test.cc | 11 +++++++++++ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index e501e80c55..fbd3405a24 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -1,15 +1,13 @@ -/* - Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include diff --git a/paddle/platform/enforce_test.cc b/paddle/platform/enforce_test.cc index d3e945e972..23b32444ad 100644 --- a/paddle/platform/enforce_test.cc +++ b/paddle/platform/enforce_test.cc @@ -1,3 +1,14 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #include #include From fc5972ba2c0c2565d4255fda19f1b68f02c18e62 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 28 Jun 2017 19:54:25 +0800 Subject: [PATCH 130/542] fix requirement config for flowers dataset --- python/setup.py.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/setup.py.in b/python/setup.py.in index 86fc0fc5c0..aa6771709c 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -15,7 +15,8 @@ setup_requires=["requests", "protobuf==3.1", "recordio", "matplotlib", - "rarfile"] + "rarfile", + "scipy>=0.19.0"] if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: setup_requires+=["opencv-python"] From 3919b75884749684e0bd8b502e426fa4949f2c1f Mon Sep 17 00:00:00 2001 From: gongweibao Date: Wed, 28 Jun 2017 12:01:32 +0000 Subject: [PATCH 131/542] modify cmake --- go/master/c/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/master/c/CMakeLists.txt b/go/master/c/CMakeLists.txt index acce698051..3eb598a877 100644 --- a/go/master/c/CMakeLists.txt +++ b/go/master/c/CMakeLists.txt @@ -6,7 +6,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake") project(cxx_go C Go) -include(golang) +#include(golang) include(flags) set(MASTER_LIB_NAME "paddle_master") From b93e863a1c5f31e9404dee8a2a6684119b876a2a Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Wed, 28 Jun 2017 20:02:52 +0800 Subject: [PATCH 132/542] Fix bug in MultiGradientMachine. --- paddle/gserver/gradientmachines/MultiGradientMachine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 8ef5e9d0c1..018da6c76d 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -601,7 +601,7 @@ void TrainerThread::backward() { void TrainerThread::backwardCallback(Parameter* para) { // CPU parameters are merged in the end - if (!para->useGpu()) return; + if (!para->useGpu() || para->isStatic()) return; int paramId = para->getID(); if (multiMachine_->getNumThreads() == 1) { From 9ad846ecee27ff1860debc4658090f1cfa75140f Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 28 Jun 2017 21:20:33 +0800 Subject: [PATCH 133/542] Remove must_check in paddle::platform --- paddle/platform/CMakeLists.txt | 1 - paddle/platform/must_check.h | 26 -------------------------- paddle/platform/must_check_test.cc | 10 ---------- paddle/utils/Error.h | 16 +++++++++++++++- 4 files changed, 15 insertions(+), 38 deletions(-) delete mode 100644 paddle/platform/must_check.h delete mode 100644 paddle/platform/must_check_test.cc diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 7abe2ab89e..c7d7b14518 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -2,4 +2,3 @@ nv_test(cuda_test SRCS cuda_test.cu) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) -cc_test(must_check_test SRCS must_check_test.cc) diff --git a/paddle/platform/must_check.h b/paddle/platform/must_check.h deleted file mode 100644 index 4fcc62afc0..0000000000 --- a/paddle/platform/must_check.h +++ /dev/null @@ -1,26 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -/** - * __must_check macro. It make the function's return value must be used, - * otherwise it will raise a compile warning. And also Paddle treat all compile - * warnings as errors. - */ -#ifdef __GNUC__ -#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 30400 -#define __must_check __attribute__((warn_unused_result)) -#else -#define __must_check -#endif -#else -#define __must_check -#endif diff --git a/paddle/platform/must_check_test.cc b/paddle/platform/must_check_test.cc deleted file mode 100644 index 6ee3ea49ac..0000000000 --- a/paddle/platform/must_check_test.cc +++ /dev/null @@ -1,10 +0,0 @@ -#include -#include - -int __must_check SomeFunctionMustCheck() { return 0; } - -TEST(MustCheck, all) { - // This line should not be compiled, because the - // return value of SomeFunctionMustCheck marked as __must_check - // SomeFunctionMustCheck(); -} \ No newline at end of file diff --git a/paddle/utils/Error.h b/paddle/utils/Error.h index f3d535c69c..27ddaab3f0 100644 --- a/paddle/utils/Error.h +++ b/paddle/utils/Error.h @@ -19,7 +19,21 @@ limitations under the License. */ #include #include #include -#include "paddle/platform/must_check.h" + +/** + * __must_check macro. It make the function's return value must be used, + * otherwise it will raise a compile warning. And also Paddle treat all compile + * warnings as errors. + */ +#ifdef __GNUC__ +#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 30400 +#define __must_check __attribute__((warn_unused_result)) +#else +#define __must_check +#endif +#else +#define __must_check +#endif namespace paddle { From b1a311c44d9554a1710d26c78f487f9786dd1934 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 28 Jun 2017 21:48:40 +0800 Subject: [PATCH 134/542] Add pb_cc_library in generic.cmake Fix #2567 --- cmake/external/protobuf.cmake | 8 ++++++-- cmake/generic.cmake | 33 +++++++++++++++++++++++++++++++++ paddle/api/CMakeLists.txt | 2 +- paddle/capi/CMakeLists.txt | 2 +- paddle/function/CMakeLists.txt | 2 +- paddle/gserver/CMakeLists.txt | 2 +- paddle/math/CMakeLists.txt | 2 +- paddle/optimizer/CMakeLists.txt | 2 +- paddle/parameter/CMakeLists.txt | 2 +- paddle/pserver/CMakeLists.txt | 4 ++-- paddle/testing/CMakeLists.txt | 4 ++-- paddle/trainer/CMakeLists.txt | 2 +- paddle/utils/CMakeLists.txt | 2 +- proto/CMakeLists.txt | 16 +--------------- 14 files changed, 53 insertions(+), 30 deletions(-) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index d43badc1da..891fb29118 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -39,12 +39,16 @@ macro(PROMPT_PROTOBUF_LIB) ADD_LIBRARY(protobuf_lite ${protobuf_LIBTYPE} IMPORTED GLOBAL) SET_PROPERTY(TARGET protobuf_lite PROPERTY IMPORTED_LOCATION ${PROTOBUF_LITE_LIBRARY}) - ADD_LIBRARY(protoc ${protobuf_LIBTYPE} IMPORTED GLOBAL) - SET_PROPERTY(TARGET protoc PROPERTY IMPORTED_LOCATION ${PROTOC_LIBRARY}) + ADD_LIBRARY(libprotoc ${protobuf_LIBTYPE} IMPORTED GLOBAL) + SET_PROPERTY(TARGET libprotoc PROPERTY IMPORTED_LOCATION ${PROTOC_LIBRARY}) + + ADD_EXECUTABLE(protoc IMPORTED GLOBAL) + SET_PROPERTY(TARGET protoc PROPERTY IMPORTED_LOCATION ${PROTOBUF_PROTOC_EXECUTABLE}) FOREACH(dep ${protobuf_DEPS}) ADD_DEPENDENCIES(protobuf ${dep}) ADD_DEPENDENCIES(protobuf_lite ${dep}) + ADD_DEPENDENCIES(libprotoc ${dep}) ADD_DEPENDENCIES(protoc ${dep}) ENDFOREACH() diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 11c1f677ae..0370ab31f3 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -331,3 +331,36 @@ function(go_test TARGET_NAME) add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_test_DEPS}) add_test(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}) endfunction(go_test) + +function(pb_cc_library TARGET_NAME) + set(oneValueArgs "") + set(multiValueArgs SRCS) + cmake_parse_arguments(pb_cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(proto_srcs) + set(proto_hdrs) + foreach(FIL ${pb_cc_library_SRCS}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(FIL_WE ${FIL} NAME_WE) + if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) + get_filename_component(FIL_DIR ${FIL} DIRECTORY) + if(FIL_DIR) + set(FIL_WE "${FIL_DIR}/${FIL_WE}") + endif() + endif() + + list(APPEND proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") + list(APPEND proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") + + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc" + "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h" + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} + ARGS "--cpp_out=${DLL_EXPORT_DECL}${CMAKE_CURRENT_BINARY_DIR}" "-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL} + DEPENDS ${ABS_FIL} protoc + COMMENT "Running C++ protocol buffer compiler on ${FIL}" + VERBATIM ) + endforeach() + set_source_files_properties(${proto_srcs} ${proto_hdrs} PROPERTIES GENERATED TRUE) + include_directories(${CMAKE_CURRENT_BINARY_DIR}) + cc_library(${TARGET_NAME} SRCS ${proto_srcs}) +endfunction() \ No newline at end of file diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt index f2315e31cc..39d8aa075b 100644 --- a/paddle/api/CMakeLists.txt +++ b/paddle/api/CMakeLists.txt @@ -16,7 +16,7 @@ set(API_HEADER Internal.h) add_library(paddle_api STATIC ${API_SOURCES}) -add_dependencies(paddle_api gen_proto_cpp paddle_trainer_lib) +add_dependencies(paddle_api paddle_proto paddle_trainer_lib) INCLUDE(${SWIG_USE_FILE}) INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle) diff --git a/paddle/capi/CMakeLists.txt b/paddle/capi/CMakeLists.txt index 206f512563..11022d1754 100644 --- a/paddle/capi/CMakeLists.txt +++ b/paddle/capi/CMakeLists.txt @@ -26,7 +26,7 @@ target_include_directories(paddle_capi PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) add_style_check_target(paddle_capi ${CAPI_SOURCES} ${CAPI_HEADER} ${CAPI_PRIVATE_HEADER}) -add_dependencies(paddle_capi gen_proto_cpp) +add_dependencies(paddle_capi paddle_proto) # combine all paddle static libraries together, into libpaddle_capi_whole.a diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 5e170714cf..1c39ced3c9 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -12,7 +12,7 @@ endif() add_library(paddle_function STATIC ${cpp_files} ${cu_objs}) add_dependencies(paddle_function ${external_project_dependencies}) -add_dependencies(paddle_function gen_proto_cpp) +add_dependencies(paddle_function paddle_proto) if(WITH_TESTING) if(WITH_GPU) diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 93a6a99848..3bd583773a 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -58,7 +58,7 @@ endif() add_style_check_target(paddle_gserver ${GSERVER_SOURCES}) add_style_check_target(paddle_gserver ${GSERVER_HEADER}) -add_dependencies(paddle_gserver gen_proto_cpp) +add_dependencies(paddle_gserver paddle_proto) if(WITH_TESTING) add_subdirectory(tests) endif() diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index f5657c4690..326cdb156c 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -33,7 +33,7 @@ endif() add_style_check_target(paddle_math ${MATH_SOURCES}) add_style_check_target(paddle_math ${MATH_HEADERS}) -add_dependencies(paddle_math gen_proto_cpp) # depends +add_dependencies(paddle_math paddle_proto) # depends if(WITH_TESTING) add_subdirectory(tests) endif() diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt index 4536f62ec7..bf878baaf0 100644 --- a/paddle/optimizer/CMakeLists.txt +++ b/paddle/optimizer/CMakeLists.txt @@ -10,7 +10,7 @@ set(OPITMIZER_SRCS ) add_library(paddle_optimizer STATIC ${OPITMIZER_SRCS}) -add_dependencies(paddle_optimizer gen_proto_cpp) +add_dependencies(paddle_optimizer paddle_proto) if(WITH_TESTING) add_simple_unittest(serialization_test) diff --git a/paddle/parameter/CMakeLists.txt b/paddle/parameter/CMakeLists.txt index a35e46997f..a9e344afdc 100644 --- a/paddle/parameter/CMakeLists.txt +++ b/paddle/parameter/CMakeLists.txt @@ -7,7 +7,7 @@ add_library(paddle_parameter STATIC ${PARAMETERS_SOURCES}) add_style_check_target(paddle_parameter ${PARAMETERS_SOURCES}) add_style_check_target(paddle_parameter ${PARAMETERS_HEADERS}) -add_dependencies(paddle_parameter gen_proto_cpp) +add_dependencies(paddle_parameter paddle_proto) if(WITH_TESTING) add_subdirectory(tests) endif() diff --git a/paddle/pserver/CMakeLists.txt b/paddle/pserver/CMakeLists.txt index b7f85ea1a6..92dd286f04 100644 --- a/paddle/pserver/CMakeLists.txt +++ b/paddle/pserver/CMakeLists.txt @@ -17,7 +17,7 @@ add_library(paddle_network STATIC add_style_check_target(paddle_network ${NETWORK_SOURCES}) add_style_check_target(paddle_network ${NETWORK_HEADERS}) -add_dependencies(paddle_network gen_proto_cpp) +add_dependencies(paddle_network paddle_proto) ################### paddle_pserver ###################### set(PSERVER_SOURCES @@ -40,7 +40,7 @@ add_library(paddle_pserver STATIC add_style_check_target(paddle_pserver ${PSERVER_SOURCES}) add_style_check_target(paddle_pserver ${PSERVER_HEADERS}) -add_dependencies(paddle_pserver gen_proto_cpp) +add_dependencies(paddle_pserver paddle_proto) set(PSERVER_MAIN_SOURCES ParameterServer2Main.cpp) diff --git a/paddle/testing/CMakeLists.txt b/paddle/testing/CMakeLists.txt index c47add04b0..4aa6eae681 100644 --- a/paddle/testing/CMakeLists.txt +++ b/paddle/testing/CMakeLists.txt @@ -2,7 +2,7 @@ if(WITH_TESTING) add_library(paddle_test_main STATIC TestMain.cpp) - add_dependencies(paddle_test_main gen_proto_cpp) + add_dependencies(paddle_test_main paddle_proto) add_library(paddle_test_util STATIC TestUtil.cpp) - add_dependencies(paddle_test_util gen_proto_cpp) + add_dependencies(paddle_test_util paddle_proto) endif() diff --git a/paddle/trainer/CMakeLists.txt b/paddle/trainer/CMakeLists.txt index f34d53ae99..b8f03fa7e7 100644 --- a/paddle/trainer/CMakeLists.txt +++ b/paddle/trainer/CMakeLists.txt @@ -41,7 +41,7 @@ add_style_check_target(paddle_trainer_lib add_style_check_target(paddle_trainer_lib ${TRAINER_HEADERS}) add_dependencies(paddle_trainer_lib - gen_proto_cpp) + paddle_proto) macro(add_paddle_exe TARGET_NAME) add_executable(${TARGET_NAME} ${ARGN}) diff --git a/paddle/utils/CMakeLists.txt b/paddle/utils/CMakeLists.txt index af59951752..f5c399256a 100644 --- a/paddle/utils/CMakeLists.txt +++ b/paddle/utils/CMakeLists.txt @@ -17,7 +17,7 @@ add_library(paddle_utils STATIC add_style_check_target(paddle_utils ${UTIL_HEADERS}) add_style_check_target(paddle_utils ${UTIL_SOURCES} ${UTIL_ARCH_SOURCES}) -add_dependencies(paddle_utils gen_proto_cpp) +add_dependencies(paddle_utils paddle_proto) if(WITH_TESTING) add_subdirectory(tests) endif() diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index c942620990..948d7db6b2 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -13,18 +13,6 @@ set(PROTO_GEN_PY) foreach(filename ${proto_filenames}) get_filename_component(base_filename ${filename} NAME_WE) - set(CUR_PROTO_GEN - ${CMAKE_CURRENT_BINARY_DIR}/${base_filename}.pb.h - ${CMAKE_CURRENT_BINARY_DIR}/${base_filename}.pb.cc) - set(PROTO_GEN - ${PROTO_GEN} - ${CUR_PROTO_GEN}) - add_custom_command(OUTPUT ${CUR_PROTO_GEN} - COMMAND env ${py_env} ${PROTOBUF_PROTOC_EXECUTABLE} - --cpp_out ${CMAKE_CURRENT_BINARY_DIR} - --proto_path ${PROJ_ROOT}/proto ${PROJ_ROOT}/proto/${filename} - DEPENDS ${filename} ${external_project_dependencies}) - set(CUR_PROTO_GEN_PY ${PROJ_ROOT}/paddle/python/paddle/proto/${base_filename}_pb2.py) set(PROTO_GEN_PY @@ -36,8 +24,6 @@ foreach(filename ${proto_filenames}) DEPENDS ${filename} ${external_project_dependencies}) endforeach() -add_custom_target(gen_proto_cpp ALL DEPENDS ${PROTO_GEN}) add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY}) -add_library(paddle_proto STATIC ${PROTO_GEN}) -target_include_directories(paddle_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) +pb_cc_library(paddle_proto SRCS ${proto_filenames}) From 30b75a51035dee978225f5e5eff2c0d4b9c09aec Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 28 Jun 2017 22:05:04 +0800 Subject: [PATCH 135/542] Also add pb_py_library --- cmake/generic.cmake | 31 ++++++++++++++++++++++++------- proto/CMakeLists.txt | 30 ++---------------------------- 2 files changed, 26 insertions(+), 35 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 0370ab31f3..14b6909829 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -341,13 +341,6 @@ function(pb_cc_library TARGET_NAME) foreach(FIL ${pb_cc_library_SRCS}) get_filename_component(ABS_FIL ${FIL} ABSOLUTE) get_filename_component(FIL_WE ${FIL} NAME_WE) - if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) - get_filename_component(FIL_DIR ${FIL} DIRECTORY) - if(FIL_DIR) - set(FIL_WE "${FIL_DIR}/${FIL_WE}") - endif() - endif() - list(APPEND proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") list(APPEND proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") @@ -363,4 +356,28 @@ function(pb_cc_library TARGET_NAME) set_source_files_properties(${proto_srcs} ${proto_hdrs} PROPERTIES GENERATED TRUE) include_directories(${CMAKE_CURRENT_BINARY_DIR}) cc_library(${TARGET_NAME} SRCS ${proto_srcs}) +endfunction() + +function(pb_py_library TARGET_NAME) + set(oneValueArgs TARGET_DIR) + set(multiValueArgs SRCS) + cmake_parse_arguments(pb_py_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if (NOT ${pb_py_library_TARGET_DIR}) + set(pb_py_library_TARGET_DIR ${CMAKE_CURRENT_BINARY_DIR}) + endif() + + set(py_srcs) + foreach(FIL ${pb_py_library_SRCS}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(FIL_WE ${FIL} NAME_WE) + set(cur_py_src ${pb_py_library_TARGET_DIR}/${FIL_WE}_pb2.py) + list(APPEND py_srcs "${cur_py_src}") + add_custom_command(OUTPUT ${cur_py_src} + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} + ARGS "--python_out=${pb_py_library_TARGET_DIR}" "-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL} + DEPENDS ${ABS_FIL} protoc + COMMENT "Running Python protocol buffer compiler on ${FIL}") + endforeach() + + add_custom_target(${TARGET_NAME} ALL DEPENDS ${py_srcs}) endfunction() \ No newline at end of file diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index 948d7db6b2..9b29d43d73 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -1,29 +1,3 @@ -set(proto_filenames - DataConfig.proto - DataFormat.proto - ModelConfig.proto - ParameterConfig.proto - ParameterService.proto - TrainerConfig.proto - OptimizerConfig.proto - ParameterServerConfig.proto) - -set(PROTO_GEN) -set(PROTO_GEN_PY) - -foreach(filename ${proto_filenames}) - get_filename_component(base_filename ${filename} NAME_WE) - set(CUR_PROTO_GEN_PY - ${PROJ_ROOT}/paddle/python/paddle/proto/${base_filename}_pb2.py) - set(PROTO_GEN_PY - ${CUR_PROTO_GEN_PY} - ${PROTO_GEN_PY}) - add_custom_command(OUTPUT ${CUR_PROTO_GEN_PY} - COMMAND env ${py_env} ${PROTOBUF_PROTOC_EXECUTABLE} --python_out ${PROJ_ROOT}/python/paddle/proto - --proto_path ${PROJ_ROOT}/proto ${PROJ_ROOT}/proto/${filename} - DEPENDS ${filename} ${external_project_dependencies}) -endforeach() - -add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY}) - +file(GLOB proto_filenames . *.proto) pb_cc_library(paddle_proto SRCS ${proto_filenames}) +pb_py_library(gen_proto_py SRCS ${proto_filenames} TARGET_DIR ${CMAKE_CURRENT_SOURCE_DIR}) From 64b78b1656bd023e916447e7ea6c08de3d5c1f88 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 28 Jun 2017 22:27:50 +0800 Subject: [PATCH 136/542] Fix TravisCI --- cmake/generic.cmake | 3 +-- doc/CMakeLists.txt | 7 ------- paddle/gserver/CMakeLists.txt | 2 +- paddle/math/CMakeLists.txt | 2 +- paddle/optimizer/CMakeLists.txt | 2 +- paddle/parameter/CMakeLists.txt | 2 +- paddle/pserver/CMakeLists.txt | 2 +- paddle/trainer/CMakeLists.txt | 3 ++- paddle/utils/CMakeLists.txt | 2 +- proto/CMakeLists.txt | 2 +- 10 files changed, 10 insertions(+), 17 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 14b6909829..24a07c0a24 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -362,10 +362,9 @@ function(pb_py_library TARGET_NAME) set(oneValueArgs TARGET_DIR) set(multiValueArgs SRCS) cmake_parse_arguments(pb_py_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - if (NOT ${pb_py_library_TARGET_DIR}) + if (NOT pb_py_library_TARGET_DIR) set(pb_py_library_TARGET_DIR ${CMAKE_CURRENT_BINARY_DIR}) endif() - set(py_srcs) foreach(FIL ${pb_py_library_SRCS}) get_filename_component(ABS_FIL ${FIL} ABSOLUTE) diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 6fa42fd0c7..94dd3457fb 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -27,10 +27,6 @@ sphinx_add_target(paddle_docs ${CMAKE_CURRENT_SOURCE_DIR} ${SPHINX_HTML_DIR_EN}) -add_dependencies(paddle_docs - gen_proto_py) - - # configured documentation tools and intermediate build results set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build") @@ -51,6 +47,3 @@ sphinx_add_target(paddle_docs_cn ${SPHINX_CACHE_DIR_CN} ${CMAKE_CURRENT_SOURCE_DIR} ${SPHINX_HTML_DIR_CN}) - -add_dependencies(paddle_docs_cn - gen_proto_py) diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 3bd583773a..0012636b8f 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -58,7 +58,7 @@ endif() add_style_check_target(paddle_gserver ${GSERVER_SOURCES}) add_style_check_target(paddle_gserver ${GSERVER_HEADER}) -add_dependencies(paddle_gserver paddle_proto) +add_dependencies(paddle_gserver paddle_proto ${external_project_dependencies}) if(WITH_TESTING) add_subdirectory(tests) endif() diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index 326cdb156c..9981de6160 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -33,7 +33,7 @@ endif() add_style_check_target(paddle_math ${MATH_SOURCES}) add_style_check_target(paddle_math ${MATH_HEADERS}) -add_dependencies(paddle_math paddle_proto) # depends +add_dependencies(paddle_math paddle_proto ${external_project_dependencies}) # depends if(WITH_TESTING) add_subdirectory(tests) endif() diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt index bf878baaf0..9996d01d18 100644 --- a/paddle/optimizer/CMakeLists.txt +++ b/paddle/optimizer/CMakeLists.txt @@ -10,7 +10,7 @@ set(OPITMIZER_SRCS ) add_library(paddle_optimizer STATIC ${OPITMIZER_SRCS}) -add_dependencies(paddle_optimizer paddle_proto) +add_dependencies(paddle_optimizer paddle_proto ${external_project_dependencies}) if(WITH_TESTING) add_simple_unittest(serialization_test) diff --git a/paddle/parameter/CMakeLists.txt b/paddle/parameter/CMakeLists.txt index a9e344afdc..d2ae1c16c6 100644 --- a/paddle/parameter/CMakeLists.txt +++ b/paddle/parameter/CMakeLists.txt @@ -7,7 +7,7 @@ add_library(paddle_parameter STATIC ${PARAMETERS_SOURCES}) add_style_check_target(paddle_parameter ${PARAMETERS_SOURCES}) add_style_check_target(paddle_parameter ${PARAMETERS_HEADERS}) -add_dependencies(paddle_parameter paddle_proto) +add_dependencies(paddle_parameter paddle_proto ${external_project_dependencies}) if(WITH_TESTING) add_subdirectory(tests) endif() diff --git a/paddle/pserver/CMakeLists.txt b/paddle/pserver/CMakeLists.txt index 92dd286f04..f2e0b4b76b 100644 --- a/paddle/pserver/CMakeLists.txt +++ b/paddle/pserver/CMakeLists.txt @@ -40,7 +40,7 @@ add_library(paddle_pserver STATIC add_style_check_target(paddle_pserver ${PSERVER_SOURCES}) add_style_check_target(paddle_pserver ${PSERVER_HEADERS}) -add_dependencies(paddle_pserver paddle_proto) +add_dependencies(paddle_pserver paddle_proto ${external_project_dependencies}) set(PSERVER_MAIN_SOURCES ParameterServer2Main.cpp) diff --git a/paddle/trainer/CMakeLists.txt b/paddle/trainer/CMakeLists.txt index b8f03fa7e7..6414c39956 100644 --- a/paddle/trainer/CMakeLists.txt +++ b/paddle/trainer/CMakeLists.txt @@ -41,7 +41,8 @@ add_style_check_target(paddle_trainer_lib add_style_check_target(paddle_trainer_lib ${TRAINER_HEADERS}) add_dependencies(paddle_trainer_lib - paddle_proto) + paddle_proto + ${external_project_dependencies}) macro(add_paddle_exe TARGET_NAME) add_executable(${TARGET_NAME} ${ARGN}) diff --git a/paddle/utils/CMakeLists.txt b/paddle/utils/CMakeLists.txt index f5c399256a..7a4977935e 100644 --- a/paddle/utils/CMakeLists.txt +++ b/paddle/utils/CMakeLists.txt @@ -17,7 +17,7 @@ add_library(paddle_utils STATIC add_style_check_target(paddle_utils ${UTIL_HEADERS}) add_style_check_target(paddle_utils ${UTIL_SOURCES} ${UTIL_ARCH_SOURCES}) -add_dependencies(paddle_utils paddle_proto) +add_dependencies(paddle_utils paddle_proto ${external_project_dependencies}) if(WITH_TESTING) add_subdirectory(tests) endif() diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index 9b29d43d73..4402f2c899 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -1,3 +1,3 @@ file(GLOB proto_filenames . *.proto) pb_cc_library(paddle_proto SRCS ${proto_filenames}) -pb_py_library(gen_proto_py SRCS ${proto_filenames} TARGET_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +pb_py_library(gen_proto_py SRCS ${proto_filenames} TARGET_DIR ${PROJ_ROOT}/python/paddle/proto) From 44e39246639fe5b3ba1dbf5158531f7eb4fc6175 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 28 Jun 2017 23:04:35 +0800 Subject: [PATCH 137/542] "fix client send empty gradients bug" --- go/pserver/client.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/go/pserver/client.go b/go/pserver/client.go index dda9159772..a8d2d710d8 100644 --- a/go/pserver/client.go +++ b/go/pserver/client.go @@ -123,6 +123,10 @@ func (c *Client) FinishInitParams() error { // SendGrads sends gradients to parameter servers for updating // parameters. func (c *Client) SendGrads(grads []Gradient) error { + if len(grads) == 0 { + log.Info("Send Empty Gradient") + return nil + } errCh := make(chan error, len(grads)) for _, g := range grads { go func(g Gradient) { From 01f44bff669442ffdb67a5baac14aa693cba08c6 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 28 Jun 2017 23:12:19 +0800 Subject: [PATCH 138/542] rename args and add comments 1. rename 'useXmap' to 'use_xmap' 2. add comments about exchanging train data and test data --- python/paddle/v2/dataset/flowers.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/v2/dataset/flowers.py index a181f3881a..158cfe158c 100644 --- a/python/paddle/v2/dataset/flowers.py +++ b/python/paddle/v2/dataset/flowers.py @@ -46,6 +46,12 @@ SETID_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat' DATA_MD5 = '52808999861908f626f3c1f4e79d11fa' LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d' SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c' +# In official 'readme', tstid is the flag of test data +# and trnid is the flag of train data. But test data is more than train data. +# So we exchange the train data and test data. +TRAIN_FLAG = 'tstid' +TEST_FLAG = 'trnid' +VALID_FLAG = 'valid' def default_mapper(sample): @@ -64,7 +70,7 @@ def reader_creator(data_file, dataset_name, mapper=default_mapper, buffered_size=1024, - useXmap=True): + use_xmap=True): ''' 1. read images from tar file and merge images into batch files in 102flowers.tgz_batch/ @@ -106,13 +112,13 @@ def reader_creator(data_file, for sample, label in itertools.izip(data, batch['label']): yield sample, int(label) - if useXmap: + if use_xmap: return xmap_readers(mapper, reader, cpu_count(), buffered_size) else: return map_readers(mapper, reader) -def train(mapper=default_mapper, buffered_size=1024, useXmap=True): +def train(mapper=default_mapper, buffered_size=1024, use_xmap=True): ''' Create flowers training set reader. It returns a reader, each sample in the reader is @@ -131,11 +137,11 @@ def train(mapper=default_mapper, buffered_size=1024, useXmap=True): return reader_creator( download(DATA_URL, 'flowers', DATA_MD5), download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), 'tstid', mapper, - buffered_size, useXmap) + download(SETID_URL, 'flowers', SETID_MD5), TRAIN_FLAG, mapper, + buffered_size, use_xmap) -def test(mapper=default_mapper, buffered_size=1024, useXmap=True): +def test(mapper=default_mapper, buffered_size=1024, use_xmap=True): ''' Create flowers test set reader. It returns a reader, each sample in the reader is @@ -154,11 +160,11 @@ def test(mapper=default_mapper, buffered_size=1024, useXmap=True): return reader_creator( download(DATA_URL, 'flowers', DATA_MD5), download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), 'trnid', mapper, - buffered_size, useXmap) + download(SETID_URL, 'flowers', SETID_MD5), TEST_FLAG, mapper, + buffered_size, use_xmap) -def valid(mapper=default_mapper, buffered_size=1024, useXmap=True): +def valid(mapper=default_mapper, buffered_size=1024, use_xmap=True): ''' Create flowers validation set reader. It returns a reader, each sample in the reader is @@ -177,8 +183,8 @@ def valid(mapper=default_mapper, buffered_size=1024, useXmap=True): return reader_creator( download(DATA_URL, 'flowers', DATA_MD5), download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), 'valid', mapper, - buffered_size, useXmap) + download(SETID_URL, 'flowers', SETID_MD5), VALID_FLAG, mapper, + buffered_size, use_xmap) def fetch(): From c9865824a718e8361941f669e4ca879be6c24bcb Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 29 Jun 2017 01:10:30 +0800 Subject: [PATCH 139/542] Support to init partial network parameters from the tar file. --- python/paddle/v2/parameters.py | 23 +++++---- python/paddle/v2/tests/test_parameters.py | 57 +++++++++++++++++++++-- 2 files changed, 65 insertions(+), 15 deletions(-) diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index ad20241b98..f730ea10bb 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -51,7 +51,7 @@ class Parameters(object): def __init__(self): self.__param_conf__ = dict() self.__gradient_machines__ = [] - self.__tmp_params__ = [] + self.__tmp_params__ = dict() def __append_config__(self, param_conf): """ @@ -128,13 +128,10 @@ class Parameters(object): if len(self.__gradient_machines__) == 0: # create new parameter in python numpy. - if len(self.__tmp_params__) != 0: - ret_list = [ - mat for name, mat in self.__tmp_params__ if name == key - ] - if len(ret_list) == 1: - return ret_list[0] - return np.ndarray(shape=shape, dtype=np.float32) + if key in self.__tmp_params__: + return self.__tmp_params__[key] + else: + return np.ndarray(shape=shape, dtype=np.float32) else: for each_gradient_machine in self.__gradient_machines__: param = __get_parameter_in_gradient_machine__( @@ -187,7 +184,7 @@ class Parameters(object): (shape, value.shape)) if len(self.__gradient_machines__) == 0: - self.__tmp_params__.append((key, value)) + self.__tmp_params__[key] = value else: for each_gradient_machine in self.__gradient_machines__: __copy_parameter_to_gradient_machine__(each_gradient_machine, @@ -231,7 +228,7 @@ class Parameters(object): raise ValueError("gradient_machine should be api.GradientMachine") if len(self.__tmp_params__) != 0: - for name, val in self.__tmp_params__: + for name, val in self.__tmp_params__.iteritems(): try: __copy_parameter_to_gradient_machine__(gradient_machine, name, val) @@ -302,6 +299,12 @@ class Parameters(object): params.deserialize(param_name, f) return params + def init_from_tar(self, f): + tar_param = self.from_tar(f) + for pname in tar_param.names(): + if pname in self.names(): + self.set(pname, tar_param.get(pname)) + def __get_parameter_in_gradient_machine__(gradient_machine, name): """ diff --git a/python/paddle/v2/tests/test_parameters.py b/python/paddle/v2/tests/test_parameters.py index 45372e7dd0..7ba8a939fb 100644 --- a/python/paddle/v2/tests/test_parameters.py +++ b/python/paddle/v2/tests/test_parameters.py @@ -20,14 +20,17 @@ import cStringIO import numpy -def __rand_param_config__(name): +def __rand_param_config__(name, psize=None): conf = ParameterConfig() conf.name = name size = 1 - for i in xrange(2): - dim = random.randint(1, 1000) - conf.dims.append(dim) - size *= dim + if psize is None: + for i in xrange(2): + dim = random.randint(1, 1000) + conf.dims.append(dim) + size *= dim + else: + size = psize conf.size = size assert conf.IsInitialized() return conf @@ -77,6 +80,50 @@ class TestParameters(unittest.TestCase): expected = numpy.array([[1, 1], [1, 2], [1, 1]], numpy.float32) assert numpy.logical_and.reduce(numpy.reshape(val == expected, 6)) + def test_init_from_tar(self): + def get_param(names, size): + p = parameters.Parameters() + for k, v in zip(names, size): + p.__append_config__(__rand_param_config__(k, v)) + for name in p.names(): + param = p.get(name) + param[:] = numpy.random.uniform( + -1.0, 1.0, size=p.get_shape(name)) + p.set(name, param) + return p + + def get_parames(): + name1 = ['param_0', 'param_1'] + size1 = [128, 256] + p1 = get_param(name1, size1) + file1 = cStringIO.StringIO() + p1.to_tar(file1) + file1.seek(0) + + name2 = ['param_0', 'param_1', 'param_2'] + size2 = [128, 256, 288] + p2 = get_param(name2, size2) + file2 = cStringIO.StringIO() + p2.to_tar(file2) + file2.seek(0) + return p1, file1, p2, file2 + + p1, file1, p2, file2 = get_parames() + p2.init_from_tar(file1) + for name in p1.names(): + self.assertEqual(p1.get_shape(name), p2.get_shape(name)) + v1 = p1.get(name) + v2 = p2.get(name) + self.assertTrue(numpy.isclose(v1, v2).all()) + + p1, file1, p2, file2 = get_parames() + p1.init_from_tar(file2) + for name in p1.names(): + self.assertEqual(p1.get_shape(name), p2.get_shape(name)) + v1 = p1.get(name) + v2 = p2.get(name) + self.assertTrue(numpy.isclose(v1, v2).all()) + if __name__ == '__main__': unittest.main() From 555540fcc1b44323161c3dfd56a6f3fc7307433c Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 29 Jun 2017 01:11:58 +0800 Subject: [PATCH 140/542] fix typo --- paddle/py_paddle/dataprovider_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py index 218cb5ec56..43614b9779 100644 --- a/paddle/py_paddle/dataprovider_converter.py +++ b/paddle/py_paddle/dataprovider_converter.py @@ -144,7 +144,7 @@ class DenseScanner(IScanner): if len(self.__shape__) > 1: # The last-two dimenstions are the frame height and width. # For example, the layout is CHW for 3-D feature of image. - # The H and W are the fram height and width. + # The H and W are the frame height and width. h, w = self.__shape__[-2:] argument.setSlotFrameHeight(self.pos, h) argument.setSlotFrameWidth(self.pos, w) From 6215f47c7c572edd94900a9ef4b90fce6726ee70 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Wed, 28 Jun 2017 14:44:40 -0700 Subject: [PATCH 141/542] Rename paddle/strings/ to paddle/string/ --- paddle/{strings => string}/CMakeLists.txt | 0 paddle/{strings => string}/stringpiece.cc | 0 paddle/{strings => string}/stringpiece.h | 0 paddle/{strings => string}/stringpiece_test.cc | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename paddle/{strings => string}/CMakeLists.txt (100%) rename paddle/{strings => string}/stringpiece.cc (100%) rename paddle/{strings => string}/stringpiece.h (100%) rename paddle/{strings => string}/stringpiece_test.cc (100%) diff --git a/paddle/strings/CMakeLists.txt b/paddle/string/CMakeLists.txt similarity index 100% rename from paddle/strings/CMakeLists.txt rename to paddle/string/CMakeLists.txt diff --git a/paddle/strings/stringpiece.cc b/paddle/string/stringpiece.cc similarity index 100% rename from paddle/strings/stringpiece.cc rename to paddle/string/stringpiece.cc diff --git a/paddle/strings/stringpiece.h b/paddle/string/stringpiece.h similarity index 100% rename from paddle/strings/stringpiece.h rename to paddle/string/stringpiece.h diff --git a/paddle/strings/stringpiece_test.cc b/paddle/string/stringpiece_test.cc similarity index 100% rename from paddle/strings/stringpiece_test.cc rename to paddle/string/stringpiece_test.cc From ea1d3acfb4012f491703266fa4caaf8e7e99e8c3 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Wed, 28 Jun 2017 14:52:54 -0700 Subject: [PATCH 142/542] Rename string/stringpiece* into string/piece --- paddle/CMakeLists.txt | 2 +- paddle/string/CMakeLists.txt | 4 +- paddle/string/piece.cc | 138 +++++++++++++++++ paddle/string/{stringpiece.h => piece.h} | 64 ++++---- .../{stringpiece_test.cc => piece_test.cc} | 100 +++++++------ paddle/string/stringpiece.cc | 141 ------------------ 6 files changed, 225 insertions(+), 224 deletions(-) create mode 100644 paddle/string/piece.cc rename paddle/string/{stringpiece.h => piece.h} (57%) rename paddle/string/{stringpiece_test.cc => piece_test.cc} (77%) delete mode 100644 paddle/string/stringpiece.cc diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index 979b68e827..307e99bbe3 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -9,7 +9,7 @@ add_subdirectory(pserver) add_subdirectory(trainer) add_subdirectory(scripts) add_subdirectory(optimizer) -add_subdirectory(strings) +add_subdirectory(string) if(Boost_FOUND) add_subdirectory(memory) diff --git a/paddle/string/CMakeLists.txt b/paddle/string/CMakeLists.txt index 4e55eecd48..0f39660a90 100644 --- a/paddle/string/CMakeLists.txt +++ b/paddle/string/CMakeLists.txt @@ -1,2 +1,2 @@ -cc_library(stringpiece SRCS stringpiece.cc) -cc_test(stringpiece_test SRCS stringpiece_test.cc DEPS stringpiece glog gflags) +cc_library(stringpiece SRCS piece.cc) +cc_test(stringpiece_test SRCS piece_test.cc DEPS stringpiece glog gflags) diff --git a/paddle/string/piece.cc b/paddle/string/piece.cc new file mode 100644 index 0000000000..b80afdec82 --- /dev/null +++ b/paddle/string/piece.cc @@ -0,0 +1,138 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/string/piece.h" + +#include + +#include +#include +#include + +namespace paddle { +namespace string { + +Piece::Piece() : data_(NULL), size_(0) {} + +Piece::Piece(const char* d, size_t n) : data_(d), size_(n) { + if (d == NULL && n != 0) + throw std::invalid_argument("Piece requires len to be 0 for NULL data"); +} + +Piece::Piece(const char* s) : data_(s) { size_ = (s == NULL) ? 0 : strlen(s); } + +Piece::Piece(const std::string& s) : data_(s.data()), size_(s.size()) {} + +char Piece::operator[](size_t n) const { + if (n >= len()) throw std::invalid_argument("index out of Piece length"); + return data_[n]; +} + +int Compare(Piece a, Piece b) { + const size_t min_len = (a.len() < b.len()) ? a.len() : b.len(); + int r = memcmp(a.data(), b.data(), min_len); + if (r == 0) { + if (a.len() < b.len()) + return -1; + else if (a.len() > b.len()) + return 1; + } + return r; +} + +bool operator==(Piece x, Piece y) { + return ((x.len() == y.len()) && + (x.data() == y.data() || memcmp(x.data(), y.data(), x.len()) == 0)); +} + +bool operator!=(Piece x, Piece y) { return !(x == y); } + +bool operator<(Piece x, Piece y) { return Compare(x, y) < 0; } +bool operator>(Piece x, Piece y) { return Compare(x, y) > 0; } + +bool operator<=(Piece x, Piece y) { return Compare(x, y) <= 0; } +bool operator>=(Piece x, Piece y) { return Compare(x, y) >= 0; } + +bool HasPrefix(Piece s, Piece x) { + return ((s.len() >= x.len()) && (memcmp(s.data(), x.data(), x.len()) == 0)); +} + +bool HasSuffix(Piece s, Piece x) { + return ((s.len() >= x.len()) && + (memcmp(s.data() + (s.len() - x.len()), x.data(), x.len()) == 0)); +} + +Piece SkipPrefix(Piece s, size_t n) { + if (n > s.len()) + throw std::invalid_argument("Skip distance larger than Piece length"); + return Piece(s.data() + n, s.len() - n); +} + +Piece SkipSuffix(Piece s, size_t n) { + if (n > s.len()) + throw std::invalid_argument("Skip distance larger than Piece length"); + return Piece(s.data(), s.len() - n); +} + +Piece TrimPrefix(Piece s, Piece x) { + return HasPrefix(s, x) ? SkipPrefix(s, x.len()) : s; +} + +Piece TrimSuffix(Piece s, Piece x) { + return HasSuffix(s, x) ? SkipSuffix(s, x.len()) : s; +} + +bool Contains(Piece s, Piece sub) { + return std::search(s.begin(), s.end(), sub.begin(), sub.end()) != s.end(); +} + +size_t Index(Piece s, Piece sub) { + auto e = std::search(s.begin(), s.end(), sub.begin(), sub.end()); + return e != s.end() ? e - s.data() : Piece::npos; +} + +size_t Find(Piece s, char c, size_t pos) { + if (pos >= s.len()) { + return Piece::npos; + } + const char* result = + reinterpret_cast(memchr(s.data() + pos, c, s.len() - pos)); + return result != nullptr ? result - s.data() : Piece::npos; +} + +size_t RFind(Piece s, char c, size_t pos) { + if (s.len() == 0) return Piece::npos; + for (const char* p = s.data() + std::min(pos, s.len() - 1); p >= s.data(); + p--) { + if (*p == c) { + return p - s.data(); + } + } + return Piece::npos; +} + +Piece SubStr(Piece s, size_t pos, size_t n) { + if (pos > s.len()) pos = s.len(); + if (n > s.len() - pos) n = s.len() - pos; + return Piece(s.data() + pos, n); +} + +std::ostream& operator<<(std::ostream& o, Piece piece) { + return o << piece.ToString(); +} + +} // namespace string +} // namespace paddle diff --git a/paddle/string/stringpiece.h b/paddle/string/piece.h similarity index 57% rename from paddle/string/stringpiece.h rename to paddle/string/piece.h index adff713e86..db7c3e6980 100644 --- a/paddle/string/stringpiece.h +++ b/paddle/string/piece.h @@ -20,33 +20,34 @@ #include namespace paddle { +namespace string { -// StringPiece points into a std::string object but doesn't own the +// Piece points into a std::string object but doesn't own the // string. It is for efficient access to strings. Like Go's string -// type. Not that StringPiece doesn't mutate the underlying string, +// type. Not that Piece doesn't mutate the underlying string, // so it is thread-safe given that the underlying string doesn't -// change. Because StringPiece contains a little data members, and +// change. Because Piece contains a little data members, and // its syntax is simple as it doesn't own/manage the string, it is -// cheap to construct StringPieces and pass them around. -class StringPiece { +// cheap to construct Pieces and pass them around. +class Piece { public: static const size_t npos = static_cast(-1); // We provide non-explicit singleton constructors so users can - // pass in a "const char*" or a "string" wherever a "StringPiece" + // pass in a "const char*" or a "string" wherever a "Piece" // is expected. These contructors ensure that if data_ is NULL, // size_ is 0. - StringPiece(); - StringPiece(const char* d, size_t n); - StringPiece(const char* d); - StringPiece(const std::string& s); + Piece(); + Piece(const char* d, size_t n); + Piece(const char* d); + Piece(const std::string& s); const char* data() const { return data_; } size_t len() const { return size_; } char operator[](size_t n) const; - // StringPiece doesn't own the string, so both iterator and const + // Piece doesn't own the string, so both iterator and const // iterator are const char* indeed. typedef const char* const_iterator; typedef const char* iterator; @@ -63,43 +64,44 @@ private: // Intentionally copyable }; -int Compare(StringPiece a, StringPiece b); +int Compare(Piece a, Piece b); -bool operator==(StringPiece x, StringPiece y); -bool operator!=(StringPiece x, StringPiece y); -bool operator<(StringPiece x, StringPiece y); -bool operator>(StringPiece x, StringPiece y); -bool operator<=(StringPiece x, StringPiece y); -bool operator>=(StringPiece x, StringPiece y); +bool operator==(Piece x, Piece y); +bool operator!=(Piece x, Piece y); +bool operator<(Piece x, Piece y); +bool operator>(Piece x, Piece y); +bool operator<=(Piece x, Piece y); +bool operator>=(Piece x, Piece y); -bool HasPrefix(StringPiece s, StringPiece prefix); -bool HasSuffix(StringPiece s, StringPiece suffix); +bool HasPrefix(Piece s, Piece prefix); +bool HasSuffix(Piece s, Piece suffix); -StringPiece SkipPrefix(StringPiece s, size_t n); -StringPiece SkipSuffix(StringPiece s, size_t n); +Piece SkipPrefix(Piece s, size_t n); +Piece SkipSuffix(Piece s, size_t n); // Skip the prefix (or suffix) if it matches with the string. -StringPiece TrimPrefix(StringPiece s, StringPiece prefix); -StringPiece TrimSuffix(StringPiece s, StringPiece suffix); +Piece TrimPrefix(Piece s, Piece prefix); +Piece TrimSuffix(Piece s, Piece suffix); // Returns if s contains sub. Any s except for empty s contains an // empty sub. -bool Contains(StringPiece s, StringPiece sub); +bool Contains(Piece s, Piece sub); // Return the first occurrence of sub in s, or npos. If both s and // sub is empty, it returns npos; otherwise, if only sub is empty, it // returns 0. -size_t Index(StringPiece s, StringPiece sub); +size_t Index(Piece s, Piece sub); // Return the first occurrence of c in s[pos:end], or npos. -size_t Find(StringPiece s, char c, size_t pos); +size_t Find(Piece s, char c, size_t pos); // Search range is [0..pos] inclusive. If pos == npos, search everything. -size_t RFind(StringPiece s, char c, size_t pos); +size_t RFind(Piece s, char c, size_t pos); -StringPiece SubStr(StringPiece s, size_t pos, size_t n); +Piece SubStr(Piece s, size_t pos, size_t n); -// allow StringPiece to be logged -std::ostream& operator<<(std::ostream& o, StringPiece piece); +// allow Piece to be logged +std::ostream& operator<<(std::ostream& o, Piece piece); +} // namespace string } // namespace paddle diff --git a/paddle/string/stringpiece_test.cc b/paddle/string/piece_test.cc similarity index 77% rename from paddle/string/stringpiece_test.cc rename to paddle/string/piece_test.cc index 2ba66a04f6..cf5152ff5a 100644 --- a/paddle/string/stringpiece_test.cc +++ b/paddle/string/piece_test.cc @@ -14,7 +14,7 @@ limitations under the License. */ -#include "paddle/strings/stringpiece.h" +#include "paddle/string/piece.h" #include @@ -22,42 +22,44 @@ TEST(StringPiece, Construct) { { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ(NULL, s.data()); EXPECT_EQ(0U, s.len()); } - { EXPECT_THROW(paddle::StringPiece s(NULL, 10000U), std::invalid_argument); } { - paddle::StringPiece s(NULL); + EXPECT_THROW(paddle::string::Piece s(NULL, 10000U), std::invalid_argument); + } + { + paddle::string::Piece s(NULL); EXPECT_EQ(0U, s.len()); } { std::string a; EXPECT_EQ(0U, a.size()); - paddle::StringPiece s(a); + paddle::string::Piece s(a); EXPECT_EQ(0U, s.len()); } } TEST(StringPiece, CopyAndAssign) { - paddle::StringPiece empty; + paddle::string::Piece empty; EXPECT_EQ(0U, empty.len()); - paddle::StringPiece a("hello"); - paddle::StringPiece b = a; + paddle::string::Piece a("hello"); + paddle::string::Piece b = a; EXPECT_EQ(b.len(), strlen("hello")); EXPECT_EQ(a, b); std::string storage("hello"); - paddle::StringPiece c(storage); + paddle::string::Piece c(storage); EXPECT_EQ(a, c); EXPECT_NE(a.data(), c.data()); } TEST(StringPiece, Compare) { { - paddle::StringPiece a("hello"); - paddle::StringPiece b("world"); + paddle::string::Piece a("hello"); + paddle::string::Piece b("world"); EXPECT_TRUE(a != b); EXPECT_FALSE(a == b); EXPECT_TRUE(a < b); @@ -68,7 +70,7 @@ TEST(StringPiece, Compare) { EXPECT_GT(Compare(b, a), 0); } { - paddle::StringPiece a, b; + paddle::string::Piece a, b; EXPECT_TRUE(a == b); EXPECT_FALSE(a != b); EXPECT_FALSE(a < b); @@ -82,31 +84,31 @@ TEST(StringPiece, Compare) { TEST(StringPiece, ToString) { { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ(std::string(""), s.ToString()); } { - paddle::StringPiece s(NULL); + paddle::string::Piece s(NULL); EXPECT_EQ(std::string(""), s.ToString()); } { - paddle::StringPiece s("hello"); + paddle::string::Piece s("hello"); EXPECT_EQ(std::string("hello"), s.ToString()); } } TEST(StringPiece, HasPrefixSuffix) { - using paddle::HasPrefix; - using paddle::HasSuffix; + using paddle::string::HasPrefix; + using paddle::string::HasSuffix; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_FALSE(HasPrefix(s, "something")); EXPECT_TRUE(HasPrefix(s, "")); EXPECT_FALSE(HasSuffix(s, "something")); EXPECT_TRUE(HasSuffix(s, "")); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_TRUE(HasPrefix(s, "")); EXPECT_TRUE(HasPrefix(s, "a")); EXPECT_TRUE(HasPrefix(s, "ap")); @@ -120,10 +122,10 @@ TEST(StringPiece, HasPrefixSuffix) { } TEST(StringPiece, SkipPrefixSuffix) { - using paddle::SkipPrefix; - using paddle::SkipSuffix; + using paddle::string::SkipPrefix; + using paddle::string::SkipSuffix; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ("", SkipPrefix(s, 0)); EXPECT_THROW(SkipPrefix(s, 1), std::invalid_argument); @@ -131,7 +133,7 @@ TEST(StringPiece, SkipPrefixSuffix) { EXPECT_THROW(SkipSuffix(s, 1), std::invalid_argument); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_EQ("app", SkipPrefix(s, 0)); EXPECT_EQ("pp", SkipPrefix(s, 1)); EXPECT_EQ("p", SkipPrefix(s, 2)); @@ -147,10 +149,10 @@ TEST(StringPiece, SkipPrefixSuffix) { } TEST(StringPiece, TrimPrefixSuffix) { - using paddle::TrimPrefix; - using paddle::TrimSuffix; + using paddle::string::TrimPrefix; + using paddle::string::TrimSuffix; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ("", TrimPrefix(s, "")); EXPECT_EQ("", TrimPrefix(s, "something")); @@ -158,7 +160,7 @@ TEST(StringPiece, TrimPrefixSuffix) { EXPECT_EQ("", TrimSuffix(s, "something")); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_EQ("app", TrimPrefix(s, "")); EXPECT_EQ("pp", TrimPrefix(s, "a")); EXPECT_EQ("p", TrimPrefix(s, "ap")); @@ -174,14 +176,14 @@ TEST(StringPiece, TrimPrefixSuffix) { } TEST(StringPiece, Contains) { - using paddle::Contains; + using paddle::string::Contains; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_FALSE(Contains(s, "")); EXPECT_FALSE(Contains(s, "something")); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_TRUE(Contains(s, "")); EXPECT_TRUE(Contains(s, "a")); EXPECT_TRUE(Contains(s, "p")); @@ -193,15 +195,15 @@ TEST(StringPiece, Contains) { } TEST(StringPiece, Index) { - using paddle::Index; - auto npos = paddle::StringPiece::npos; + using paddle::string::Index; + auto npos = paddle::string::Piece::npos; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ(npos, Index(s, "")); EXPECT_EQ(npos, Index(s, "something")); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_EQ(0U, Index(s, "")); EXPECT_EQ(0U, Index(s, "a")); EXPECT_EQ(1U, Index(s, "p")); @@ -213,14 +215,14 @@ TEST(StringPiece, Index) { } TEST(StringPiece, Find) { - using paddle::Find; - auto npos = paddle::StringPiece::npos; + using paddle::string::Find; + auto npos = paddle::string::Piece::npos; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ(npos, Find(s, 'a', 0U)); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_EQ(0U, Find(s, 'a', 0U)); EXPECT_EQ(1U, Find(s, 'p', 0U)); EXPECT_EQ(1U, Find(s, 'p', 1U)); @@ -230,14 +232,14 @@ TEST(StringPiece, Find) { } TEST(StringPiece, RFind) { - using paddle::RFind; - auto npos = paddle::StringPiece::npos; + using paddle::string::RFind; + auto npos = paddle::string::Piece::npos; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ(npos, RFind(s, 'a', 0U)); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_EQ(2U, RFind(s, 'p', 2U)); EXPECT_EQ(0U, RFind(s, 'a', 2U)); EXPECT_EQ(1U, RFind(s, 'p', 1U)); @@ -247,15 +249,15 @@ TEST(StringPiece, RFind) { } TEST(StringPiece, SubStr) { - using paddle::SubStr; + using paddle::string::SubStr; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ("", SubStr(s, 0, 0)); EXPECT_EQ("", SubStr(s, 0, 1)); EXPECT_EQ("", SubStr(s, 1, 0)); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_EQ("", SubStr(s, 0, 0)); EXPECT_EQ("", SubStr(s, 1, 0)); EXPECT_EQ("", SubStr(s, 2, 0)); @@ -279,15 +281,15 @@ TEST(StringPiece, SubStr) { } TEST(StringPiece, StreamOutput) { - using paddle::StringPiece; + using paddle::string::Piece; std::stringstream o; - o << StringPiece(); + o << paddle::string::Piece(); EXPECT_EQ("", o.str()); - o << StringPiece("hello"); + o << paddle::string::Piece("hello"); EXPECT_EQ("hello", o.str()); - o << StringPiece(); + o << paddle::string::Piece(); EXPECT_EQ("hello", o.str()); } diff --git a/paddle/string/stringpiece.cc b/paddle/string/stringpiece.cc deleted file mode 100644 index 415b3558d5..0000000000 --- a/paddle/string/stringpiece.cc +++ /dev/null @@ -1,141 +0,0 @@ -/* - Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "paddle/strings/stringpiece.h" - -#include - -#include -#include -#include - -namespace paddle { - -StringPiece::StringPiece() : data_(NULL), size_(0) {} - -StringPiece::StringPiece(const char* d, size_t n) : data_(d), size_(n) { - if (d == NULL && n != 0) - throw std::invalid_argument( - "StringPiece requires len to be 0 for NULL data"); -} - -StringPiece::StringPiece(const char* s) : data_(s) { - size_ = (s == NULL) ? 0 : strlen(s); -} - -StringPiece::StringPiece(const std::string& s) - : data_(s.data()), size_(s.size()) {} - -char StringPiece::operator[](size_t n) const { - if (n >= len()) - throw std::invalid_argument("index out of StringPiece length"); - return data_[n]; -} - -int Compare(StringPiece a, StringPiece b) { - const size_t min_len = (a.len() < b.len()) ? a.len() : b.len(); - int r = memcmp(a.data(), b.data(), min_len); - if (r == 0) { - if (a.len() < b.len()) - return -1; - else if (a.len() > b.len()) - return 1; - } - return r; -} - -bool operator==(StringPiece x, StringPiece y) { - return ((x.len() == y.len()) && - (x.data() == y.data() || memcmp(x.data(), y.data(), x.len()) == 0)); -} - -bool operator!=(StringPiece x, StringPiece y) { return !(x == y); } - -bool operator<(StringPiece x, StringPiece y) { return Compare(x, y) < 0; } -bool operator>(StringPiece x, StringPiece y) { return Compare(x, y) > 0; } - -bool operator<=(StringPiece x, StringPiece y) { return Compare(x, y) <= 0; } -bool operator>=(StringPiece x, StringPiece y) { return Compare(x, y) >= 0; } - -bool HasPrefix(StringPiece s, StringPiece x) { - return ((s.len() >= x.len()) && (memcmp(s.data(), x.data(), x.len()) == 0)); -} - -bool HasSuffix(StringPiece s, StringPiece x) { - return ((s.len() >= x.len()) && - (memcmp(s.data() + (s.len() - x.len()), x.data(), x.len()) == 0)); -} - -StringPiece SkipPrefix(StringPiece s, size_t n) { - if (n > s.len()) - throw std::invalid_argument("Skip distance larger than StringPiece length"); - return StringPiece(s.data() + n, s.len() - n); -} - -StringPiece SkipSuffix(StringPiece s, size_t n) { - if (n > s.len()) - throw std::invalid_argument("Skip distance larger than StringPiece length"); - return StringPiece(s.data(), s.len() - n); -} - -StringPiece TrimPrefix(StringPiece s, StringPiece x) { - return HasPrefix(s, x) ? SkipPrefix(s, x.len()) : s; -} - -StringPiece TrimSuffix(StringPiece s, StringPiece x) { - return HasSuffix(s, x) ? SkipSuffix(s, x.len()) : s; -} - -bool Contains(StringPiece s, StringPiece sub) { - return std::search(s.begin(), s.end(), sub.begin(), sub.end()) != s.end(); -} - -size_t Index(StringPiece s, StringPiece sub) { - auto e = std::search(s.begin(), s.end(), sub.begin(), sub.end()); - return e != s.end() ? e - s.data() : StringPiece::npos; -} - -size_t Find(StringPiece s, char c, size_t pos) { - if (pos >= s.len()) { - return StringPiece::npos; - } - const char* result = - reinterpret_cast(memchr(s.data() + pos, c, s.len() - pos)); - return result != nullptr ? result - s.data() : StringPiece::npos; -} - -size_t RFind(StringPiece s, char c, size_t pos) { - if (s.len() == 0) return StringPiece::npos; - for (const char* p = s.data() + std::min(pos, s.len() - 1); p >= s.data(); - p--) { - if (*p == c) { - return p - s.data(); - } - } - return StringPiece::npos; -} - -StringPiece SubStr(StringPiece s, size_t pos, size_t n) { - if (pos > s.len()) pos = s.len(); - if (n > s.len() - pos) n = s.len() - pos; - return StringPiece(s.data() + pos, n); -} - -std::ostream& operator<<(std::ostream& o, StringPiece piece) { - return o << piece.ToString(); -} - -} // namespace paddle From 6cb7cb36911ec36be344a5800c142284983ae2f6 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Wed, 28 Jun 2017 17:23:17 -0700 Subject: [PATCH 143/542] Add paddle/string/printf and tests --- paddle/string/CMakeLists.txt | 2 + paddle/string/printf.h | 105 +++ paddle/string/printf_test.cc | 16 + paddle/string/tinyformat/tinyformat.h | 902 ++++++++++++++++++++++++++ 4 files changed, 1025 insertions(+) create mode 100644 paddle/string/printf.h create mode 100644 paddle/string/printf_test.cc create mode 100644 paddle/string/tinyformat/tinyformat.h diff --git a/paddle/string/CMakeLists.txt b/paddle/string/CMakeLists.txt index 0f39660a90..5becf62672 100644 --- a/paddle/string/CMakeLists.txt +++ b/paddle/string/CMakeLists.txt @@ -1,2 +1,4 @@ cc_library(stringpiece SRCS piece.cc) cc_test(stringpiece_test SRCS piece_test.cc DEPS stringpiece glog gflags) + +cc_test(stringprintf_test SRCS printf_test.cc DEPS glog gflags) diff --git a/paddle/string/printf.h b/paddle/string/printf.h new file mode 100644 index 0000000000..0767f8f5b5 --- /dev/null +++ b/paddle/string/printf.h @@ -0,0 +1,105 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Compared with std::stringstream, there are primary purpose of +// string::Printf: +// +// 1. Type-safe printing, with why and how explained in +// http://www.drdobbs.com/stringprintf-a-typesafe-printf-family-fo/184401999. +// Implementation includes +// +// https://github.com/c42f/tinyformat +// boost::format +// std::stringstream +// +// std::stringstream is not convenient enough in many cases. For example: +// +// std::cout << std::setprecision(2) << std::fixed << 1.23456 << "\n"; +// +// boost::format is the most convenient one. We can have +// +// std::cout << format("%2% %1%") % 36 % 77; +// +// or +// +// format fmter("%2% %1%"); +// fmter % 36; fmter % 77; +// std::cout << fmter.c_str(); +// +// But the overloading of % might be overkilling and it would be +// more efficient if it can write to std::cout directly. +// +// tinyformat has an interface compatible with the C-printf style, +// and it can writes to a stream or returns a std::string: +// +// std::cout << tfm::printf( +// "%s, %s %d, %.2d:%.2d\n", +// weekday, month, day, hour, min); +// +// or +// +// tfm::format(std::cout, +// "%s, %s %d, %.2d:%.2d\n", +// weekday, month, day, hour, min); +// +// 2. High-performance -- most printed strings are not too long and +// doens't need dynamic memory allocation. Many StringPrintf +// implementations doesn't enforce type-safe, but are +// high-performance, including +// +// https://developers.google.com/optimization/reference/base/stringprintf/ +// https://github.com/adobe/chromium/blob/master/base/stringprintf.h +// https://github.com/google/protobuf/blob/master/src/google/protobuf/stubs/stringprintf.h +// +// According to +// https://github.com/c42f/tinyformat#compile-time-and-code-bloat, +// boost::format runs too slow and results in large executable binary +// files. So here we port tinyformat. + +#pragma once + +#include +#include +#include "paddle/string/tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat + +namespace paddle { +namespace string { + +template +void Fprintf(std::ostream& out, const char* fmt, const Args&... args) { + tinyformat::vformat(out, fmt, makeFormatList(args...)); +} + +template +std::string Sprintf(const char* fmt, const Args&... args) { + std::ostringstream oss; + tinyformat::format(oss, fmt, args...); + return oss.str(); +} + +template +void printf(const char* fmt, const Args&... args) { + tinyformat::format(std::cout, fmt, args...); +} + +template +void printfln(const char* fmt, const Args&... args) { + tinyformat::format(std::cout, fmt, args...); + std::cout << '\n'; +} + +} // namespace string +} // namespace paddle diff --git a/paddle/string/printf_test.cc b/paddle/string/printf_test.cc new file mode 100644 index 0000000000..d8f2454165 --- /dev/null +++ b/paddle/string/printf_test.cc @@ -0,0 +1,16 @@ +#include "paddle/string/printf.h" + +#include + +#include "gtest/gtest.h" + +TEST(StringPrintf, StringPrintf) { + std::string weekday = "Wednesday"; + const char* month = "July"; + size_t day = 27; + long hour = 14; + int min = 44; + EXPECT_EQ(std::string("Wednesday, July 27, 14:44"), + paddle::string::Sprintf( + "%s, %s %d, %.2d:%.2d", weekday, month, day, hour, min)); +} diff --git a/paddle/string/tinyformat/tinyformat.h b/paddle/string/tinyformat/tinyformat.h new file mode 100644 index 0000000000..f0e5e0160f --- /dev/null +++ b/paddle/string/tinyformat/tinyformat.h @@ -0,0 +1,902 @@ +// tinyformat.h +// Copyright (C) 2011, Chris Foster [chris42f (at) gmail (d0t) com] +// +// Boost Software License - Version 1.0 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +//------------------------------------------------------------------------------ +// Tinyformat: A minimal type safe printf replacement +// +// tinyformat.h is a type safe printf replacement library in a single C++ +// header file. Design goals include: +// +// * Type safety and extensibility for user defined types. +// * C99 printf() compatibility, to the extent possible using std::ostream +// * Simplicity and minimalism. A single header file to include and distribute +// with your projects. +// * Augment rather than replace the standard stream formatting mechanism +// * C++98 support, with optional C++11 niceties +// +// +// Main interface example usage +// ---------------------------- +// +// To print a date to std::cout: +// +// std::string weekday = "Wednesday"; +// const char* month = "July"; +// size_t day = 27; +// long hour = 14; +// int min = 44; +// +// tfm::printf("%s, %s %d, %.2d:%.2d\n", weekday, month, day, hour, min); +// +// The strange types here emphasize the type safety of the interface; it is +// possible to print a std::string using the "%s" conversion, and a +// size_t using the "%d" conversion. A similar result could be achieved +// using either of the tfm::format() functions. One prints on a user provided +// stream: +// +// tfm::format(std::cerr, "%s, %s %d, %.2d:%.2d\n", +// weekday, month, day, hour, min); +// +// The other returns a std::string: +// +// std::string date = tfm::format("%s, %s %d, %.2d:%.2d\n", +// weekday, month, day, hour, min); +// std::cout << date; +// +// These are the three primary interface functions. There is also a +// convenience function printfln() which appends a newline to the usual result +// of printf() for super simple logging. +// +// +// User defined format functions +// ----------------------------- +// +// Simulating variadic templates in C++98 is pretty painful since it requires +// writing out the same function for each desired number of arguments. To make +// this bearable tinyformat comes with a set of macros which are used +// internally to generate the API, but which may also be used in user code. +// +// The three macros TINYFORMAT_ARGTYPES(n), TINYFORMAT_VARARGS(n) and +// TINYFORMAT_PASSARGS(n) will generate a list of n argument types, +// type/name pairs and argument names respectively when called with an integer +// n between 1 and 16. We can use these to define a macro which generates the +// desired user defined function with n arguments. To generate all 16 user +// defined function bodies, use the macro TINYFORMAT_FOREACH_ARGNUM. For an +// example, see the implementation of printf() at the end of the source file. +// +// Sometimes it's useful to be able to pass a list of format arguments through +// to a non-template function. The FormatList class is provided as a way to do +// this by storing the argument list in a type-opaque way. Continuing the +// example from above, we construct a FormatList using makeFormatList(): +// +// FormatListRef formatList = tfm::makeFormatList(weekday, month, day, hour, +// min); +// +// The format list can now be passed into any non-template function and used +// via a call to the vformat() function: +// +// tfm::vformat(std::cout, "%s, %s %d, %.2d:%.2d\n", formatList); +// +// +// Additional API information +// -------------------------- +// +// Error handling: Define TINYFORMAT_ERROR to customize the error handling for +// format strings which are unsupported or have the wrong number of format +// specifiers (calls assert() by default). +// +// User defined types: Uses operator<< for user defined types by default. +// Overload formatValue() for more control. + +#pragma once + +#include +#include +#include +#include + +namespace paddle { +namespace string { +namespace tinyformat { + +#ifndef TINYFORMAT_ERROR +#define TINYFORMAT_ERROR(reason) assert(0 && reason) +#endif + +//------------------------------------------------------------------------------ +namespace detail { + +// Test whether type T1 is convertible to type T2 +template +struct is_convertible { +private: + // two types of different size + struct fail { + char dummy[2]; + }; + struct succeed { + char dummy; + }; + // Try to convert a T1 to a T2 by plugging into tryConvert + static fail tryConvert(...); + static succeed tryConvert(const T2 &); + static const T1 &makeT1(); + +public: + // Standard trick: the (...) version of tryConvert will be chosen from + // the overload set only if the version taking a T2 doesn't match. + // Then we compare the sizes of the return types to check which + // function matched. Very neat, in a disgusting kind of way :) + static const bool value = sizeof(tryConvert(makeT1())) == sizeof(succeed); +}; + +// Format the value by casting to type fmtT. This default implementation +// should never be called. +template ::value> +struct formatValueAsType { + static void invoke(std::ostream & /*out*/, const T & /*value*/) { assert(0); } +}; +// Specialized version for types that can actually be converted to fmtT, as +// indicated by the "convertible" template parameter. +template +struct formatValueAsType { + static void invoke(std::ostream &out, const T &value) { + out << static_cast(value); + } +}; + +// Convert an arbitrary type to integer. The version with convertible=false +// throws an error. +template ::value> +struct convertToInt { + static int invoke(const T & /*value*/) { + TINYFORMAT_ERROR( + "tinyformat: Cannot convert from argument type to " + "integer for use as variable width or precision"); + return 0; + } +}; +// Specialization for convertToInt when conversion is possible +template +struct convertToInt { + static int invoke(const T &value) { return static_cast(value); } +}; + +// Format at most ntrunc characters to the given stream. +template +inline void formatTruncated(std::ostream &out, const T &value, int ntrunc) { + std::ostringstream tmp; + tmp << value; + std::string result = tmp.str(); + out.write(result.c_str(), + (std::min)(ntrunc, static_cast(result.size()))); +} +#define TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(type) \ + inline void formatTruncated(std::ostream &out, type *value, int ntrunc) { \ + std::streamsize len = 0; \ + while (len < ntrunc && value[len] != 0) ++len; \ + out.write(value, len); \ + } +// Overload for const char* and char*. Could overload for signed & unsigned +// char too, but these are technically unneeded for printf compatibility. +TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(const char) +TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(char) +#undef TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR + +} // namespace detail + +//------------------------------------------------------------------------------ +// Variable formatting functions. May be overridden for user-defined types if +// desired. + +/// Format a value into a stream, delegating to operator<< by default. +/// +/// Users may override this for their own types. When this function is called, +/// the stream flags will have been modified according to the format string. +/// The format specification is provided in the range [fmtBegin, fmtEnd). For +/// truncating conversions, ntrunc is set to the desired maximum number of +/// characters, for example "%.7s" calls formatValue with ntrunc = 7. +/// +/// By default, formatValue() uses the usual stream insertion operator +/// operator<< to format the type T, with special cases for the %c and %p +/// conversions. +template +inline void formatValue(std::ostream &out, + const char * /*fmtBegin*/, + const char *fmtEnd, + int ntrunc, + const T &value) { + // The mess here is to support the %c and %p conversions: if these + // conversions are active we try to convert the type to a char or const + // void* respectively and format that instead of the value itself. For the + // %p conversion it's important to avoid dereferencing the pointer, which + // could otherwise lead to a crash when printing a dangling (const char*). + const bool canConvertToChar = detail::is_convertible::value; + const bool canConvertToVoidPtr = + detail::is_convertible::value; + if (canConvertToChar && *(fmtEnd - 1) == 'c') + detail::formatValueAsType::invoke(out, value); + else if (canConvertToVoidPtr && *(fmtEnd - 1) == 'p') + detail::formatValueAsType::invoke(out, value); + else if (ntrunc >= 0) { + // Take care not to overread C strings in truncating conversions like + // "%.4s" where at most 4 characters may be read. + detail::formatTruncated(out, value, ntrunc); + } else + out << value; +} + +// Overloaded version for char types to support printing as an integer +#define TINYFORMAT_DEFINE_FORMATVALUE_CHAR(charType) \ + inline void formatValue(std::ostream &out, \ + const char * /*fmtBegin*/, \ + const char *fmtEnd, \ + int /**/, \ + charType value) { \ + switch (*(fmtEnd - 1)) { \ + case 'u': \ + case 'd': \ + case 'i': \ + case 'o': \ + case 'X': \ + case 'x': \ + out << static_cast(value); \ + break; \ + default: \ + out << value; \ + break; \ + } \ + } +// per 3.9.1: char, signed char and unsigned char are all distinct types +TINYFORMAT_DEFINE_FORMATVALUE_CHAR(char) +TINYFORMAT_DEFINE_FORMATVALUE_CHAR(signed char) +TINYFORMAT_DEFINE_FORMATVALUE_CHAR(unsigned char) +#undef TINYFORMAT_DEFINE_FORMATVALUE_CHAR + +//------------------------------------------------------------------------------ +// Tools for emulating variadic templates in C++98. The basic idea here is +// stolen from the boost preprocessor metaprogramming library and cut down to +// be just general enough for what we need. + +#define TINYFORMAT_ARGTYPES(n) TINYFORMAT_ARGTYPES_##n +#define TINYFORMAT_VARARGS(n) TINYFORMAT_VARARGS_##n +#define TINYFORMAT_PASSARGS(n) TINYFORMAT_PASSARGS_##n +#define TINYFORMAT_PASSARGS_TAIL(n) TINYFORMAT_PASSARGS_TAIL_##n + +// To keep it as transparent as possible, the macros below have been generated +// using python via the excellent cog.py code generation script. This avoids +// the need for a bunch of complex (but more general) preprocessor tricks as +// used in boost.preprocessor. +// +// To rerun the code generation in place, use `cog.py -r tinyformat.h` +// (see http://nedbatchelder.com/code/cog). Alternatively you can just create +// extra versions by hand. + +/*[[[cog +maxParams = 16 + +def makeCommaSepLists(lineTemplate, elemTemplate, startInd=1): + for j in range(startInd,maxParams+1): + list = ', '.join([elemTemplate % {'i':i} for i in range(startInd,j+1)]) + cog.outl(lineTemplate % {'j':j, 'list':list}) + +makeCommaSepLists('#define TINYFORMAT_ARGTYPES_%(j)d %(list)s', + 'class T%(i)d') + +cog.outl() +makeCommaSepLists('#define TINYFORMAT_VARARGS_%(j)d %(list)s', + 'const T%(i)d& v%(i)d') + +cog.outl() +makeCommaSepLists('#define TINYFORMAT_PASSARGS_%(j)d %(list)s', 'v%(i)d') + +cog.outl() +cog.outl('#define TINYFORMAT_PASSARGS_TAIL_1') +makeCommaSepLists('#define TINYFORMAT_PASSARGS_TAIL_%(j)d , %(list)s', + 'v%(i)d', startInd = 2) + +cog.outl() +cog.outl('#define TINYFORMAT_FOREACH_ARGNUM(m) \\\n ' + + ' '.join(['m(%d)' % (j,) for j in range(1,maxParams+1)])) +]]]*/ +#define TINYFORMAT_ARGTYPES_1 class T1 +#define TINYFORMAT_ARGTYPES_2 class T1, class T2 +#define TINYFORMAT_ARGTYPES_3 class T1, class T2, class T3 +#define TINYFORMAT_ARGTYPES_4 class T1, class T2, class T3, class T4 +#define TINYFORMAT_ARGTYPES_5 class T1, class T2, class T3, class T4, class T5 +#define TINYFORMAT_ARGTYPES_6 \ + class T1, class T2, class T3, class T4, class T5, class T6 +#define TINYFORMAT_ARGTYPES_7 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7 +#define TINYFORMAT_ARGTYPES_8 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8 +#define TINYFORMAT_ARGTYPES_9 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9 +#define TINYFORMAT_ARGTYPES_10 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10 +#define TINYFORMAT_ARGTYPES_11 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10, class T11 +#define TINYFORMAT_ARGTYPES_12 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10, class T11, class T12 +#define TINYFORMAT_ARGTYPES_13 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10, class T11, class T12, class T13 +#define TINYFORMAT_ARGTYPES_14 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10, class T11, class T12, class T13, \ + class T14 +#define TINYFORMAT_ARGTYPES_15 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10, class T11, class T12, class T13, \ + class T14, class T15 +#define TINYFORMAT_ARGTYPES_16 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10, class T11, class T12, class T13, \ + class T14, class T15, class T16 + +#define TINYFORMAT_VARARGS_1 const T1 &v1 +#define TINYFORMAT_VARARGS_2 const T1 &v1, const T2 &v2 +#define TINYFORMAT_VARARGS_3 const T1 &v1, const T2 &v2, const T3 &v3 +#define TINYFORMAT_VARARGS_4 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4 +#define TINYFORMAT_VARARGS_5 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5 +#define TINYFORMAT_VARARGS_6 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6 +#define TINYFORMAT_VARARGS_7 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7 +#define TINYFORMAT_VARARGS_8 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8 +#define TINYFORMAT_VARARGS_9 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9 +#define TINYFORMAT_VARARGS_10 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10 +#define TINYFORMAT_VARARGS_11 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10, \ + const T11 &v11 +#define TINYFORMAT_VARARGS_12 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10, \ + const T11 &v11, const T12 &v12 +#define TINYFORMAT_VARARGS_13 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10, \ + const T11 &v11, const T12 &v12, const T13 &v13 +#define TINYFORMAT_VARARGS_14 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10, \ + const T11 &v11, const T12 &v12, const T13 &v13, const T14 &v14 +#define TINYFORMAT_VARARGS_15 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10, \ + const T11 &v11, const T12 &v12, const T13 &v13, const T14 &v14, \ + const T15 &v15 +#define TINYFORMAT_VARARGS_16 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10, \ + const T11 &v11, const T12 &v12, const T13 &v13, const T14 &v14, \ + const T15 &v15, const T16 &v16 + +#define TINYFORMAT_PASSARGS_1 v1 +#define TINYFORMAT_PASSARGS_2 v1, v2 +#define TINYFORMAT_PASSARGS_3 v1, v2, v3 +#define TINYFORMAT_PASSARGS_4 v1, v2, v3, v4 +#define TINYFORMAT_PASSARGS_5 v1, v2, v3, v4, v5 +#define TINYFORMAT_PASSARGS_6 v1, v2, v3, v4, v5, v6 +#define TINYFORMAT_PASSARGS_7 v1, v2, v3, v4, v5, v6, v7 +#define TINYFORMAT_PASSARGS_8 v1, v2, v3, v4, v5, v6, v7, v8 +#define TINYFORMAT_PASSARGS_9 v1, v2, v3, v4, v5, v6, v7, v8, v9 +#define TINYFORMAT_PASSARGS_10 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10 +#define TINYFORMAT_PASSARGS_11 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 +#define TINYFORMAT_PASSARGS_12 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12 +#define TINYFORMAT_PASSARGS_13 \ + v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13 +#define TINYFORMAT_PASSARGS_14 \ + v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14 +#define TINYFORMAT_PASSARGS_15 \ + v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 +#define TINYFORMAT_PASSARGS_16 \ + v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16 + +#define TINYFORMAT_PASSARGS_TAIL_1 +#define TINYFORMAT_PASSARGS_TAIL_2 , v2 +#define TINYFORMAT_PASSARGS_TAIL_3 , v2, v3 +#define TINYFORMAT_PASSARGS_TAIL_4 , v2, v3, v4 +#define TINYFORMAT_PASSARGS_TAIL_5 , v2, v3, v4, v5 +#define TINYFORMAT_PASSARGS_TAIL_6 , v2, v3, v4, v5, v6 +#define TINYFORMAT_PASSARGS_TAIL_7 , v2, v3, v4, v5, v6, v7 +#define TINYFORMAT_PASSARGS_TAIL_8 , v2, v3, v4, v5, v6, v7, v8 +#define TINYFORMAT_PASSARGS_TAIL_9 , v2, v3, v4, v5, v6, v7, v8, v9 +#define TINYFORMAT_PASSARGS_TAIL_10 , v2, v3, v4, v5, v6, v7, v8, v9, v10 +#define TINYFORMAT_PASSARGS_TAIL_11 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 +#define TINYFORMAT_PASSARGS_TAIL_12 \ + , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12 +#define TINYFORMAT_PASSARGS_TAIL_13 \ + , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13 +#define TINYFORMAT_PASSARGS_TAIL_14 \ + , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14 +#define TINYFORMAT_PASSARGS_TAIL_15 \ + , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 +#define TINYFORMAT_PASSARGS_TAIL_16 \ + , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16 + +#define TINYFORMAT_FOREACH_ARGNUM(m) \ + m(1) m(2) m(3) m(4) m(5) m(6) m(7) m(8) m(9) m(10) m(11) m(12) m(13) m(14) \ + m(15) m(16) +//[[[end]]] + +namespace detail { + +// Type-opaque holder for an argument to format(), with associated actions on +// the type held as explicit function pointers. This allows FormatArg's for +// each argument to be allocated as a homogenous array inside FormatList +// whereas a naive implementation based on inheritance does not. +class FormatArg { +public: + FormatArg() {} + + template + FormatArg(const T &value) + : m_value(static_cast(&value)), + m_formatImpl(&formatImpl), + m_toIntImpl(&toIntImpl) {} + + void format(std::ostream &out, + const char *fmtBegin, + const char *fmtEnd, + int ntrunc) const { + m_formatImpl(out, fmtBegin, fmtEnd, ntrunc, m_value); + } + + int toInt() const { return m_toIntImpl(m_value); } + +private: + template + static void formatImpl(std::ostream &out, + const char *fmtBegin, + const char *fmtEnd, + int ntrunc, + const void *value) { + formatValue(out, fmtBegin, fmtEnd, ntrunc, *static_cast(value)); + } + + template + static int toIntImpl(const void *value) { + return convertToInt::invoke(*static_cast(value)); + } + + const void *m_value; + void (*m_formatImpl)(std::ostream &out, + const char *fmtBegin, + const char *fmtEnd, + int ntrunc, + const void *value); + int (*m_toIntImpl)(const void *value); +}; + +// Parse and return an integer from the string c, as atoi() +// On return, c is set to one past the end of the integer. +inline int parseIntAndAdvance(const char *&c) { + int i = 0; + for (; *c >= '0' && *c <= '9'; ++c) i = 10 * i + (*c - '0'); + return i; +} + +// Print literal part of format string and return next format spec +// position. +// +// Skips over any occurrences of '%%', printing a literal '%' to the +// output. The position of the first % character of the next +// nontrivial format spec is returned, or the end of string. +inline const char *printFormatStringLiteral(std::ostream &out, + const char *fmt) { + const char *c = fmt; + for (;; ++c) { + switch (*c) { + case '\0': + out.write(fmt, c - fmt); + return c; + case '%': + out.write(fmt, c - fmt); + if (*(c + 1) != '%') return c; + // for "%%", tack trailing % onto next literal section. + fmt = ++c; + break; + default: + break; + } + } +} + +// Parse a format string and set the stream state accordingly. +// +// The format mini-language recognized here is meant to be the one from C99, +// with the form "%[flags][width][.precision][length]type". +// +// Formatting options which can't be natively represented using the ostream +// state are returned in spacePadPositive (for space padded positive numbers) +// and ntrunc (for truncating conversions). argIndex is incremented if +// necessary to pull out variable width and precision . The function returns a +// pointer to the character after the end of the current format spec. +inline const char *streamStateFromFormat(std::ostream &out, + bool &spacePadPositive, + int &ntrunc, + const char *fmtStart, + const detail::FormatArg *formatters, + int &argIndex, + int numFormatters) { + if (*fmtStart != '%') { + TINYFORMAT_ERROR( + "tinyformat: Not enough conversion specifiers in format string"); + return fmtStart; + } + // Reset stream state to defaults. + out.width(0); + out.precision(6); + out.fill(' '); + // Reset most flags; ignore irrelevant unitbuf & skipws. + out.unsetf(std::ios::adjustfield | std::ios::basefield | + std::ios::floatfield | std::ios::showbase | std::ios::boolalpha | + std::ios::showpoint | std::ios::showpos | std::ios::uppercase); + bool precisionSet = false; + bool widthSet = false; + int widthExtra = 0; + const char *c = fmtStart + 1; + // 1) Parse flags + for (;; ++c) { + switch (*c) { + case '#': + out.setf(std::ios::showpoint | std::ios::showbase); + continue; + case '0': + // overridden by left alignment ('-' flag) + if (!(out.flags() & std::ios::left)) { + // Use internal padding so that numeric values are + // formatted correctly, eg -00010 rather than 000-10 + out.fill('0'); + out.setf(std::ios::internal, std::ios::adjustfield); + } + continue; + case '-': + out.fill(' '); + out.setf(std::ios::left, std::ios::adjustfield); + continue; + case ' ': + // overridden by show positive sign, '+' flag. + if (!(out.flags() & std::ios::showpos)) spacePadPositive = true; + continue; + case '+': + out.setf(std::ios::showpos); + spacePadPositive = false; + widthExtra = 1; + continue; + default: + break; + } + break; + } + // 2) Parse width + if (*c >= '0' && *c <= '9') { + widthSet = true; + out.width(parseIntAndAdvance(c)); + } + if (*c == '*') { + widthSet = true; + int width = 0; + if (argIndex < numFormatters) + width = formatters[argIndex++].toInt(); + else + TINYFORMAT_ERROR( + "tinyformat: Not enough arguments to read variable width"); + if (width < 0) { + // negative widths correspond to '-' flag set + out.fill(' '); + out.setf(std::ios::left, std::ios::adjustfield); + width = -width; + } + out.width(width); + ++c; + } + // 3) Parse precision + if (*c == '.') { + ++c; + int precision = 0; + if (*c == '*') { + ++c; + if (argIndex < numFormatters) + precision = formatters[argIndex++].toInt(); + else + TINYFORMAT_ERROR( + "tinyformat: Not enough arguments to read variable precision"); + } else { + if (*c >= '0' && *c <= '9') + precision = parseIntAndAdvance(c); + else if (*c == '-') // negative precisions ignored, treated as zero. + parseIntAndAdvance(++c); + } + out.precision(precision); + precisionSet = true; + } + // 4) Ignore any C99 length modifier + while (*c == 'l' || *c == 'h' || *c == 'L' || *c == 'j' || *c == 'z' || + *c == 't') + ++c; + // 5) We're up to the conversion specifier character. + // Set stream flags based on conversion specifier (thanks to the + // boost::format class for forging the way here). + bool intConversion = false; + switch (*c) { + case 'u': + case 'd': + case 'i': + out.setf(std::ios::dec, std::ios::basefield); + intConversion = true; + break; + case 'o': + out.setf(std::ios::oct, std::ios::basefield); + intConversion = true; + break; + case 'X': + out.setf(std::ios::uppercase); + case 'x': + case 'p': + out.setf(std::ios::hex, std::ios::basefield); + intConversion = true; + break; + case 'E': + out.setf(std::ios::uppercase); + case 'e': + out.setf(std::ios::scientific, std::ios::floatfield); + out.setf(std::ios::dec, std::ios::basefield); + break; + case 'F': + out.setf(std::ios::uppercase); + case 'f': + out.setf(std::ios::fixed, std::ios::floatfield); + break; + case 'G': + out.setf(std::ios::uppercase); + case 'g': + out.setf(std::ios::dec, std::ios::basefield); + // As in boost::format, let stream decide float format. + out.flags(out.flags() & ~std::ios::floatfield); + break; + case 'a': + case 'A': + TINYFORMAT_ERROR( + "tinyformat: the %a and %A conversion specs " + "are not supported"); + break; + case 'c': + // Handled as special case inside formatValue() + break; + case 's': + if (precisionSet) ntrunc = static_cast(out.precision()); + // Make %s print booleans as "true" and "false" + out.setf(std::ios::boolalpha); + break; + case 'n': + // Not supported - will cause problems! + TINYFORMAT_ERROR("tinyformat: %n conversion spec not supported"); + break; + case '\0': + TINYFORMAT_ERROR( + "tinyformat: Conversion spec incorrectly " + "terminated by end of string"); + return c; + default: + break; + } + if (intConversion && precisionSet && !widthSet) { + // "precision" for integers gives the minimum number of digits (to be + // padded with zeros on the left). This isn't really supported by the + // iostreams, but we can approximately simulate it with the width if + // the width isn't otherwise used. + out.width(out.precision() + widthExtra); + out.setf(std::ios::internal, std::ios::adjustfield); + out.fill('0'); + } + return c + 1; +} + +//------------------------------------------------------------------------------ +inline void formatImpl(std::ostream &out, + const char *fmt, + const detail::FormatArg *formatters, + int numFormatters) { + // Saved stream state + std::streamsize origWidth = out.width(); + std::streamsize origPrecision = out.precision(); + std::ios::fmtflags origFlags = out.flags(); + char origFill = out.fill(); + + for (int argIndex = 0; argIndex < numFormatters; ++argIndex) { + // Parse the format string + fmt = printFormatStringLiteral(out, fmt); + bool spacePadPositive = false; + int ntrunc = -1; + const char *fmtEnd = streamStateFromFormat(out, + spacePadPositive, + ntrunc, + fmt, + formatters, + argIndex, + numFormatters); + if (argIndex >= numFormatters) { + // Check args remain after reading any variable width/precision + TINYFORMAT_ERROR("tinyformat: Not enough format arguments"); + return; + } + const FormatArg &arg = formatters[argIndex]; + // Format the arg into the stream. + if (!spacePadPositive) + arg.format(out, fmt, fmtEnd, ntrunc); + else { + // The following is a special case with no direct correspondence + // between stream formatting and the printf() behaviour. Simulate + // it crudely by formatting into a temporary string stream and + // munging the resulting string. + std::ostringstream tmpStream; + tmpStream.copyfmt(out); + tmpStream.setf(std::ios::showpos); + arg.format(tmpStream, fmt, fmtEnd, ntrunc); + std::string result = tmpStream.str(); // allocates... yuck. + for (size_t i = 0, iend = result.size(); i < iend; ++i) + if (result[i] == '+') result[i] = ' '; + out << result; + } + fmt = fmtEnd; + } + + // Print remaining part of format string. + fmt = printFormatStringLiteral(out, fmt); + if (*fmt != '\0') + TINYFORMAT_ERROR( + "tinyformat: Too many conversion specifiers in format string"); + + // Restore stream state + out.width(origWidth); + out.precision(origPrecision); + out.flags(origFlags); + out.fill(origFill); +} + +} // namespace detail + +/// List of template arguments format(), held in a type-opaque way. +/// +/// A const reference to FormatList (typedef'd as FormatListRef) may be +/// conveniently used to pass arguments to non-template functions: All type +/// information has been stripped from the arguments, leaving just enough of a +/// common interface to perform formatting as required. +class FormatList { +public: + FormatList(detail::FormatArg *formatters, int N) + : m_formatters(formatters), m_N(N) {} + + friend void vformat(std::ostream &out, + const char *fmt, + const FormatList &list); + +private: + const detail::FormatArg *m_formatters; + int m_N; +}; + +/// Reference to type-opaque format list for passing to vformat() +typedef const FormatList &FormatListRef; + +namespace detail { + +// Format list subclass with fixed storage to avoid dynamic allocation +template +class FormatListN : public FormatList { +public: + template + FormatListN(const Args &... args) + : FormatList(&m_formatterStore[0], N), + m_formatterStore{FormatArg(args)...} { + static_assert(sizeof...(args) == N, "Number of args must be N"); + } + +private: + FormatArg m_formatterStore[N]; +}; + +// Special 0-arg version - MSVC says zero-sized C array in struct is nonstandard +template <> +class FormatListN<0> : public FormatList { +public: + FormatListN() : FormatList(0, 0) {} +}; + +} // namespace detail + +//------------------------------------------------------------------------------ +// Primary API functions + +/// Make type-agnostic format list from list of template arguments. +/// +/// The exact return type of this function is an implementation detail and +/// shouldn't be relied upon. Instead it should be stored as a FormatListRef: +/// +/// FormatListRef formatList = makeFormatList( /*...*/ ); +template +detail::FormatListN makeFormatList(const Args &... args) { + return detail::FormatListN(args...); +} + +/// Format list of arguments to the stream according to the given format string. +/// +/// The name vformat() is chosen for the semantic similarity to vprintf(): the +/// list of format arguments is held in a single function argument. +inline void vformat(std::ostream &out, const char *fmt, FormatListRef list) { + detail::formatImpl(out, fmt, list.m_formatters, list.m_N); +} + +/// Format list of arguments to the stream according to given format string. +template +void format(std::ostream &out, const char *fmt, const Args &... args) { + vformat(out, fmt, makeFormatList(args...)); +} + +/// Format list of arguments according to the given format string and return +/// the result as a string. +template +std::string format(const char *fmt, const Args &... args) { + std::ostringstream oss; + format(oss, fmt, args...); + return oss.str(); +} + +/// Format list of arguments to std::cout, according to the given format string +template +void printf(const char *fmt, const Args &... args) { + format(std::cout, fmt, args...); +} + +template +void printfln(const char *fmt, const Args &... args) { + format(std::cout, fmt, args...); + std::cout << '\n'; +} + +} // namespace tinyformat +} // namespace string +} // namespace paddle From 098f956c098f5980a231575ed663b4704b6df5c5 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Wed, 28 Jun 2017 17:36:32 -0700 Subject: [PATCH 144/542] Correct function implementation dependencies --- paddle/string/printf.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/paddle/string/printf.h b/paddle/string/printf.h index 0767f8f5b5..8b5ce63a8e 100644 --- a/paddle/string/printf.h +++ b/paddle/string/printf.h @@ -80,25 +80,19 @@ namespace string { template void Fprintf(std::ostream& out, const char* fmt, const Args&... args) { - tinyformat::vformat(out, fmt, makeFormatList(args...)); + tinyformat::vformat(out, fmt, tinyformat::makeFormatList(args...)); } template std::string Sprintf(const char* fmt, const Args&... args) { std::ostringstream oss; - tinyformat::format(oss, fmt, args...); + Fprintf(oss, fmt, args...); return oss.str(); } template -void printf(const char* fmt, const Args&... args) { - tinyformat::format(std::cout, fmt, args...); -} - -template -void printfln(const char* fmt, const Args&... args) { - tinyformat::format(std::cout, fmt, args...); - std::cout << '\n'; +void Printf(const char* fmt, const Args&... args) { + Fprintf(std::cout, fmt, args...); } } // namespace string From 5157ba692d53657c96f41c0a380219fe7a7a6b5a Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Wed, 28 Jun 2017 20:25:56 +0000 Subject: [PATCH 145/542] create save model design doc --- doc/design/cluster_train/save_model.md | 100 +++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 doc/design/cluster_train/save_model.md diff --git a/doc/design/cluster_train/save_model.md b/doc/design/cluster_train/save_model.md new file mode 100644 index 0000000000..3a9a24fb9c --- /dev/null +++ b/doc/design/cluster_train/save_model.md @@ -0,0 +1,100 @@ +# Design Doc: Save Model + +## Overview + +The model is the output of the training process. There are two +ways from which user can obtain a model: + +- Save model triggered by user code: user code asks PaddlePaddle to + save a model. +- Convert model from the snapshot: model being converted from + pservers' periodic snapshot. In this way, the user can cancel a job + at any time, and still have a relatively fresh model (we snapshot + around every 5 minutes). + +### Save Model Triggered by User Code + +Both trainers and pservers have access to the model. So the model can +be saved from a trainer or pservers. We need to decide on where the +model is saved from. + +#### Dense Model vs. Sparse Model + +There are two types of model: dense and sparse model (when the +parameter is configured to be sparse). Pservers always jointly have +the entire model at any given time. Trainers only have the entire +dense model, but only have a fraction of the sparse model at any given +time. + +#### Pservers Saving Model + +The benefit of letting pservers save model is they have the entire +model all the time. However, since pservers are on different nodes, it +requires a merging process to merge model shards into the same +model. Thus requires the pservers to write models to a distributed +filesystem, making the snapshot shards visible to the merge program. + +#### Trainer Saving Model + +The benefit of letting one trainer to save the model is it does not +require a distributed filesystem. And it's reusing the same save model +logic when the trainer is training locally - except when training +sparse model, the trainer needs to download the entire sparse model +during the saving process. + +#### Conclusion + +Given trainer saving model does not require a distributed filesystem, +and is an intuitive extension to training locally, we decide to let +the trainer save the model. + + +### Convert Model from Snapshot + +TODO + + +## Timeline + +We first implement trainer save the model. Converting the latest +snapshot to a model will be a TODO for future. + + +## Trainer Save Model + +### Trainer Election + +One trainer will be elected as the one to save the model. When using +etcd, trainer ID is a randomly generated UUID, we will utilize etcd to +elect one trainer. When not using etcd, unique trainer IDs will be +given by the administrator, the trainer whose ID is "0" is elected to +save the model. + +### Model Save Path + +Each trainer will be given the directory to save the model. The +elected trainer will save the model to +`given-directory/trainerID`. Since the tainerID is unique, this would +prevent concurrent save to the same file when multiple trainers are +elected to save the model when split-brain problem happens. + +### What Happens When Model Is Saving + +It takes some time to save model, we need to define what will happen +when save model is taking place. + +When saving a dense model, the trainer uses the local model. Pservers +does not need to pause model update. + +When saving a sparse model. The trainer needs to download the entire +sparse model while saving. To get the most accurate model, the model +update needs to be paused before the download starts and resumed after +the download finishes. Otherwise, the trainer gets a model that is +"polluted": some part of the model is old, some part of the model is +new. + +It's unclear that the "polluted" model will be inferiod due to the +stochastic nature of deep learning, and pausing the model update will +add more complexity to the system. Since supporting sparse model is a +TODO item. We defer the evaluation of pause the model update or not +during saving model to the future. From fc3d03142582dcd673cc97fb3b0239bac59815f4 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 29 Jun 2017 09:38:25 +0800 Subject: [PATCH 146/542] first add --- go/master/c/client.go | 5 ++ go/master/client.go | 3 +- python/paddle/v2/master/client.py | 3 ++ python/paddle/v2/reader/creator.py | 49 ++++++++++++++----- python/paddle/v2/reader/tests/creator_test.py | 2 +- 5 files changed, 49 insertions(+), 13 deletions(-) diff --git a/go/master/c/client.go b/go/master/c/client.go index b186474dc3..b88911b858 100644 --- a/go/master/c/client.go +++ b/go/master/c/client.go @@ -88,7 +88,12 @@ func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int { c := get(client) r := c.NextRecord() + if r == nil { + // EOF + return -1 + } if len(r) == 0 { + // Empty record *record = (*C.uchar)(nullPtr) return 0 } diff --git a/go/master/client.go b/go/master/client.go index 8451820c19..4f8df5ba66 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -60,6 +60,7 @@ func (c *Client) getRecords() { } err = f.Close() + c.ch <- nil if err != nil { log.Errorln(err) } @@ -112,7 +113,7 @@ func (c *Client) monitorMaster(addr Addresser) { // // SetDataset can be call multiple times from different nodes. But // only the first call will be honored. -func (c *Client) SetDataset(globPaths []string) error { +func (c *Client) SetDataset(globPaths ...string) error { return c.conn.Call("Service.SetDataset", globPaths, nil) } diff --git a/python/paddle/v2/master/client.py b/python/paddle/v2/master/client.py index de8e9bb88e..9fd3ef0860 100644 --- a/python/paddle/v2/master/client.py +++ b/python/paddle/v2/master/client.py @@ -30,6 +30,9 @@ class client(object): p = ctypes.c_char_p() ret = ctypes.pointer(p) size = lib.paddle_next_record(self.c, ret) + if size < 0: + # EOF + return None if size == 0: # Empty record return "" diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 9f888b16d6..669867fd10 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -57,22 +57,49 @@ def text_file(path): return reader -def recordio(path): +def recordio_local(paths): """ - Creates a data reader that outputs record one one by one from given recordio file - :path: path of recordio file - :returns: data reader of recordio file + Creates a data reader that outputs record one one by one + from given local recordio fils path. + :path: path of recordio files. + :returns: data reader of recordio files. """ import recordio as rec def reader(): - f = rec.reader(path) - while True: - r = f.read() - if r is None: - break - yield r - f.close() + for i, path in enumerate(paths): + f = rec.reader(path) + while True: + r = f.read() + if r is None: + break + yield r + f.close() return reader + + +def recordio(paths, addr="", buf_size=100): + """ + Creates a data reader that outputs record one one by one + from given local or cloud recordio path. + :path: path of recordio files. + :returns: data reader of recordio files. + """ + import os + import paddle.v2.master.client as cloud + + if len(os.environ["KUBERNETES_SERVICE_HOST"]) == 0: + return recordio_local(path) + + c = cloud(addr, buf_size) + c.set_dataset(paths) + + while True: + r = client.next_record() + if r is None: + break + yield r + + c.close() diff --git a/python/paddle/v2/reader/tests/creator_test.py b/python/paddle/v2/reader/tests/creator_test.py index ba4f558874..b42d273ecf 100644 --- a/python/paddle/v2/reader/tests/creator_test.py +++ b/python/paddle/v2/reader/tests/creator_test.py @@ -38,7 +38,7 @@ class TestRecordIO(unittest.TestCase): def test_recordio(self): path = os.path.join( os.path.dirname(__file__), "test_recordio_creator.dat") - reader = paddle.v2.reader.creator.recordio(path) + reader = paddle.v2.reader.creator.recordio([path]) for idx, r in enumerate(reader()): self.assertSequenceEqual(r, str(idx)) From 48fdfd5a0350fea69d070716bd9c74ff56ebbc24 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 29 Jun 2017 09:40:29 +0800 Subject: [PATCH 147/542] fix bugs --- python/paddle/v2/dataset/uci_housing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py index c715ea9681..ec10ce646e 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/v2/dataset/uci_housing.py @@ -14,7 +14,7 @@ """ UCI Housing dataset. -This module will paddle.v2.dataset.common.download dataset from +This module will download dataset from https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and parse training set and test set into paddle reader creators. """ From 0e6ddcc7bc63eb6ddfe5f12f4d9060625befe41a Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 29 Jun 2017 10:01:10 +0800 Subject: [PATCH 148/542] ENH: Add GPU throw error --- paddle/platform/error.h | 87 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 paddle/platform/error.h diff --git a/paddle/platform/error.h b/paddle/platform/error.h new file mode 100644 index 0000000000..93424bb610 --- /dev/null +++ b/paddle/platform/error.h @@ -0,0 +1,87 @@ +#pragma once + +#include +#include +#include + +#ifndef PADDLE_ONLY_CPU + +#include +#include +#include +#include +#include + +#endif // PADDLE_ONLY_CPU + +namespace paddle { +namespace platform { + +#ifndef PADDLE_ONLY_CPU + +inline void throw_on_error(cudaError_t e, const char* message) { + if (e) { + throw thrust::system_error(e, thrust::cuda_category(), message); + } +} + +inline void throw_on_error(curandStatus_t stat, const char* message) { + if (stat != CURAND_STATUS_SUCCESS) { + throw thrust::system_error(cudaErrorLaunchFailure, thrust::cuda_category(), + message); + } +} + +inline void throw_on_error(cudnnStatus_t stat, const char* message) { + std::stringstream ss; + if (stat == CUDNN_STATUS_SUCCESS) { + return; + } else { + ss << cudnnGetErrorString(stat); + ss << ", " << message; + throw std::runtime_error(ss.str()); + } +} + +inline void throw_on_error(cublasStatus_t stat, const char* message) { + std::stringstream ss; + if (stat == CUBLAS_STATUS_SUCCESS) { + return; + } else if (stat == CUBLAS_STATUS_NOT_INITIALIZED) { + ss << "CUBLAS: not initialized"; + } else if (stat == CUBLAS_STATUS_ALLOC_FAILED) { + ss << "CUBLAS: alloc failed"; + } else if (stat == CUBLAS_STATUS_INVALID_VALUE) { + ss << "CUBLAS: invalid value"; + } else if (stat == CUBLAS_STATUS_ARCH_MISMATCH) { + ss << "CUBLAS: arch mismatch"; + } else if (stat == CUBLAS_STATUS_MAPPING_ERROR) { + ss << "CUBLAS: mapping error"; + } else if (stat == CUBLAS_STATUS_EXECUTION_FAILED) { + ss << "CUBLAS: execution failed"; + } else if (stat == CUBLAS_STATUS_INTERNAL_ERROR) { + ss << "CUBLAS: internal error"; + } else if (stat == CUBLAS_STATUS_NOT_SUPPORTED) { + ss << "CUBLAS: not supported"; + } else if (stat == CUBLAS_STATUS_LICENSE_ERROR) { + ss << "CUBLAS: license error"; + } + ss << ", " << message; + throw std::runtime_error(ss.str()); +} + +inline void throw_on_error(cublasStatus_t stat) { + const char* message = ""; + throw_on_error(stat, message); +} + +#endif // PADDLE_ONLY_CPU + +inline void throw_on_error(int stat, const char* message) { + if (stat) { + throw std::runtime_error(message + (", stat = " + std::to_string(stat))); + } +} + +} // namespace platform +} // namespace paddle From c5e8e27ba6783a947965900931ed41c9ef2123fb Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Thu, 29 Jun 2017 10:02:20 +0800 Subject: [PATCH 149/542] "change empty gradient to error" --- go/pserver/client.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/pserver/client.go b/go/pserver/client.go index a8d2d710d8..6938b9d5ce 100644 --- a/go/pserver/client.go +++ b/go/pserver/client.go @@ -1,6 +1,7 @@ package pserver import ( + "errors" "hash/fnv" "sort" "time" @@ -124,8 +125,7 @@ func (c *Client) FinishInitParams() error { // parameters. func (c *Client) SendGrads(grads []Gradient) error { if len(grads) == 0 { - log.Info("Send Empty Gradient") - return nil + return errors.New("no gradient received") } errCh := make(chan error, len(grads)) for _, g := range grads { From 11a8dfe78e1626a6535a1d0ba8220c2dd3fa050c Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 29 Jun 2017 10:57:40 +0800 Subject: [PATCH 150/542] Use Parameters.from_tar for static method. --- python/paddle/v2/parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index f730ea10bb..4c4ff4c7c2 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -300,7 +300,7 @@ class Parameters(object): return params def init_from_tar(self, f): - tar_param = self.from_tar(f) + tar_param = Parameters.from_tar(f) for pname in tar_param.names(): if pname in self.names(): self.set(pname, tar_param.get(pname)) From f0a3fb6e36e06512d537068ecd7c5f553a88da83 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 29 Jun 2017 11:10:59 +0800 Subject: [PATCH 151/542] Using paddle::string in enforce --- paddle/framework/CMakeLists.txt | 3 +- paddle/{platform => framework}/enforce.h | 63 +++---------------- .../{platform => framework}/enforce_test.cc | 9 ++- paddle/platform/CMakeLists.txt | 1 - 4 files changed, 14 insertions(+), 62 deletions(-) rename paddle/{platform => framework}/enforce.h (60%) rename paddle/{platform => framework}/enforce_test.cc (82%) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index e3c3155aa9..b06ecc2628 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -1,6 +1,5 @@ cc_library(ddim SRCS ddim.cc) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) - nv_test(dim_test SRCS dim_test.cu DEPS ddim) - cc_test(variable_test SRCS variable_test.cc) +cc_test(enforce_test SRCS enforce_test.cc) diff --git a/paddle/platform/enforce.h b/paddle/framework/enforce.h similarity index 60% rename from paddle/platform/enforce.h rename to paddle/framework/enforce.h index fbd3405a24..56cb7f9564 100644 --- a/paddle/platform/enforce.h +++ b/paddle/framework/enforce.h @@ -10,11 +10,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include #include namespace paddle { -namespace platform { +namespace framework { /** * @brief Enforce exception. Inherits std::exception @@ -23,10 +24,9 @@ namespace platform { */ class EnforceNotMet : public std::exception { public: - EnforceNotMet(const std::string& msg, const char* file, int fileline) - : file_(file), fileline_(fileline) { + EnforceNotMet(const std::string& msg, const char* file, int fileline) { std::ostringstream sout; - sout << msg << " at [" << file_ << ":" << fileline_ << "];"; + sout << msg << " at [" << file << ":" << fileline << "];"; all_msg_ = sout.str(); } @@ -34,52 +34,8 @@ class EnforceNotMet : public std::exception { private: std::string all_msg_; - const char* file_; - int fileline_; }; -namespace details { - -inline void MakeStringInternal(std::ostringstream& stream) {} - -template -inline void MakeStringInternal(std::ostringstream& stream, T v) { - stream << v; -} - -template -inline void MakeStringInternal(std::ostringstream& stream, T v, ARGS... args) { - MakeStringInternal(stream, v); - MakeStringInternal(stream, args...); -}; - -/** - * @brief Make string will concat all args into a string. - */ -template -inline std::string MakeString(ARGS... args) { - std::ostringstream sout; - details::MakeStringInternal(sout, args...); - return sout.str(); -} - -/** - * @brief special handle string - */ -template <> -inline std::string MakeString(std::string str) { - return str; -} - -/** - * @brief special handle const char* - */ -template <> -inline std::string MakeString(const char* str) { - return std::string(str); -} -} // namespace details - // From https://stackoverflow.com/questions/30130930/ // __buildin_expect is in C++ 11 standard. Since the condition which enforced // should be true in most situation, it will make the compiler generate faster @@ -93,11 +49,10 @@ inline std::string MakeString(const char* str) { * This macro take __VA_ARGS__, user can pass any type if that type can * serialize to std::ostream */ -#define PADDLE_THROW(...) \ - do { \ - throw ::paddle::platform::EnforceNotMet( \ - ::paddle::platform::details::MakeString(__VA_ARGS__), __FILE__, \ - __LINE__); \ +#define PADDLE_THROW(...) \ + do { \ + throw ::paddle::framework::EnforceNotMet( \ + ::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__); \ } while (0) /** @@ -110,5 +65,5 @@ inline std::string MakeString(const char* str) { } \ } while (0) -} // namespace platform +} // namespace framework } // namespace paddle diff --git a/paddle/platform/enforce_test.cc b/paddle/framework/enforce_test.cc similarity index 82% rename from paddle/platform/enforce_test.cc rename to paddle/framework/enforce_test.cc index 23b32444ad..f8da1a192f 100644 --- a/paddle/platform/enforce_test.cc +++ b/paddle/framework/enforce_test.cc @@ -10,10 +10,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include +#include TEST(ENFORCE, OK) { - PADDLE_ENFORCE(true, "Enforce is ok", 123, "now", 0.345); + PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345); size_t val = 1; const size_t limit = 10; PADDLE_ENFORCE(val < limit, "Enforce is OK too"); @@ -22,8 +22,8 @@ TEST(ENFORCE, OK) { TEST(ENFORCE, FAILED) { bool in_catch = false; try { - PADDLE_ENFORCE(false, "Enforce is not ok ", 123, " at all"); - } catch (paddle::platform::EnforceNotMet err) { + PADDLE_ENFORCE(false, "Enforce is not ok %d at all", 123); + } catch (paddle::framework::EnforceNotMet err) { in_catch = true; std::string msg = "Enforce is not ok 123 at all"; const char* what = err.what(); @@ -31,6 +31,5 @@ TEST(ENFORCE, FAILED) { ASSERT_EQ(what[i], msg[i]); } } - ASSERT_TRUE(in_catch); } \ No newline at end of file diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index bc72e62be4..c7d7b14518 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -2,4 +2,3 @@ nv_test(cuda_test SRCS cuda_test.cu) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) -cc_test(enforce_test SRCS enforce_test.cc) From aabe171566ca455925dd5a597497106e240656cb Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 29 Jun 2017 11:32:25 +0800 Subject: [PATCH 152/542] merge CreateVar and GetOrCreateVar --- paddle/framework/scope.h | 21 +++++++-------------- paddle/framework/scope_test.cc | 13 +++++++++---- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index bb22c4b834..88a13145ca 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -19,7 +19,6 @@ limitations under the License. */ #include #include "paddle/framework/variable.h" -#include "paddle/platform/assert.h" namespace paddle { namespace framework { @@ -44,9 +43,13 @@ class Scope { /// Create Variable in this Scope. Failed if Variable already been /// created. Variable* CreateVariable(const std::string& name) { - PADDLE_ASSERT(!HasVariable(name)); - vars_[name] = std::unique_ptr(new Variable()); - return GetVariable(name); + auto var = GetVariable(name); + if (var) { + return var; + } else { + vars_[name] = std::unique_ptr(new Variable()); + return GetVariable(name); + } } /// Get Variable from this Scope, this function will recursive find Variable @@ -62,16 +65,6 @@ class Scope { } } - /// Get Variable from scope, if Variable is not exist, creat one and return. - Variable* GetOrCreateVariable(const std::string& name) { - auto var = GetVariable(name); - if (var) { - return var; - } else { - return CreateVariable(name); - } - } - /// Find if there is a Variable in this scope and it's parent scope bool HasVariable(const std::string& name) const { return (vars_.find(name) != vars_.end() || diff --git a/paddle/framework/scope_test.cc b/paddle/framework/scope_test.cc index d73391d977..ec6236ec62 100644 --- a/paddle/framework/scope_test.cc +++ b/paddle/framework/scope_test.cc @@ -24,18 +24,22 @@ TEST(Scope, Create) { Variable* var0 = scope->CreateVariable(""); EXPECT_NE(var0, nullptr); + /// GetVariable will return nullptr if not exist. Variable* var1 = scope->GetVariable("a"); EXPECT_EQ(var1, nullptr); + /// CreateVariable will return one. Variable* var2 = scope->CreateVariable("a"); + EXPECT_NE(var2, nullptr); - ASSERT_DEATH({ scope->CreateVariable("a"); }, ""); - + /// Get the created variable. Variable* var3 = scope->GetVariable("a"); EXPECT_EQ(var2, var3); - Variable* var4 = scope->GetOrCreateVariable("a"); - EXPECT_EQ(var2, var4); + /// CreateVariable will just return the variable if it's + /// already exist. + Variable* var4 = scope->CreateVariable("a"); + EXPECT_EQ(var4, var2); } TEST(Scope, Parent) { @@ -48,6 +52,7 @@ TEST(Scope, Parent) { Variable* var0 = parent_scope->CreateVariable("a"); EXPECT_NE(var0, nullptr); + /// GetVariable will get Variable from parent scope if exist. Variable* var1 = scope->GetVariable("a"); EXPECT_EQ(var0, var1); } From 456f9cc89f6ec5a80e08522ff1fafb8f20a21fa6 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 29 Jun 2017 12:11:03 +0800 Subject: [PATCH 153/542] Remove Python protobuf function --- cmake/generic.cmake | 23 ----------------------- proto/CMakeLists.txt | 19 ++++++++++++++++++- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 24a07c0a24..8736d30059 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -357,26 +357,3 @@ function(pb_cc_library TARGET_NAME) include_directories(${CMAKE_CURRENT_BINARY_DIR}) cc_library(${TARGET_NAME} SRCS ${proto_srcs}) endfunction() - -function(pb_py_library TARGET_NAME) - set(oneValueArgs TARGET_DIR) - set(multiValueArgs SRCS) - cmake_parse_arguments(pb_py_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - if (NOT pb_py_library_TARGET_DIR) - set(pb_py_library_TARGET_DIR ${CMAKE_CURRENT_BINARY_DIR}) - endif() - set(py_srcs) - foreach(FIL ${pb_py_library_SRCS}) - get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(FIL_WE ${FIL} NAME_WE) - set(cur_py_src ${pb_py_library_TARGET_DIR}/${FIL_WE}_pb2.py) - list(APPEND py_srcs "${cur_py_src}") - add_custom_command(OUTPUT ${cur_py_src} - COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - ARGS "--python_out=${pb_py_library_TARGET_DIR}" "-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL} - DEPENDS ${ABS_FIL} protoc - COMMENT "Running Python protocol buffer compiler on ${FIL}") - endforeach() - - add_custom_target(${TARGET_NAME} ALL DEPENDS ${py_srcs}) -endfunction() \ No newline at end of file diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index 4402f2c899..1cf39d6944 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -1,3 +1,20 @@ file(GLOB proto_filenames . *.proto) pb_cc_library(paddle_proto SRCS ${proto_filenames}) -pb_py_library(gen_proto_py SRCS ${proto_filenames} TARGET_DIR ${PROJ_ROOT}/python/paddle/proto) + +set(PROTO_GEN) +set(PROTO_GEN_PY) + +foreach(filename ${proto_filenames}) + get_filename_component(base_filename ${filename} NAME_WE) + set(CUR_PROTO_GEN_PY + ${PROJ_ROOT}/paddle/python/paddle/proto/${base_filename}_pb2.py) + set(PROTO_GEN_PY + ${CUR_PROTO_GEN_PY} + ${PROTO_GEN_PY}) + add_custom_command(OUTPUT ${CUR_PROTO_GEN_PY} + COMMAND env ${py_env} ${PROTOBUF_PROTOC_EXECUTABLE} --python_out ${PROJ_ROOT}/python/paddle/proto + --proto_path ${PROJ_ROOT}/proto ${PROJ_ROOT}/proto/${filename} + DEPENDS ${filename} ${external_project_dependencies}) +endforeach() + +add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY}) From 4874810ba5a1e6f8f6b4a9530e6854f65077a59e Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 29 Jun 2017 04:28:44 +0000 Subject: [PATCH 154/542] fix bugs --- go/master/client.go | 2 +- python/paddle/v2/reader/creator.py | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/go/master/client.go b/go/master/client.go index 4f8df5ba66..fa479338c5 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -113,7 +113,7 @@ func (c *Client) monitorMaster(addr Addresser) { // // SetDataset can be call multiple times from different nodes. But // only the first call will be honored. -func (c *Client) SetDataset(globPaths ...string) error { +func (c *Client) SetDataset(globPaths []string) error { return c.conn.Call("Service.SetDataset", globPaths, nil) } diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 669867fd10..3376d7accb 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -93,13 +93,17 @@ def recordio(paths, addr="", buf_size=100): if len(os.environ["KUBERNETES_SERVICE_HOST"]) == 0: return recordio_local(path) - c = cloud(addr, buf_size) - c.set_dataset(paths) + def reader(): + c = cloud(addr, buf_size) + c.set_dataset(paths) + + while True: + r = client.next_record() + if r is None: + break + yield r - while True: - r = client.next_record() - if r is None: - break - yield r + c.close() + + return reader - c.close() From 32d6587242e5a2e97fe5b9e675273fa96fd99c5a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 29 Jun 2017 12:28:48 +0800 Subject: [PATCH 155/542] Use protobuf_generate_cpp --- cmake/external/protobuf.cmake | 7 +++++++ cmake/generic.cmake | 17 +---------------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 891fb29118..2f267adc20 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -13,6 +13,10 @@ # limitations under the License. INCLUDE(ExternalProject) +# Always invoke `FIND_PACKAGE(Protobuf)` for importing function protobuf_generate_cpp +FIND_PACKAGE(Protobuf QUIET) +SET(PROTOBUF_FOUND "OFF") + # Print and set the protobuf library information, # finish this cmake process and exit from this file. @@ -44,6 +48,9 @@ macro(PROMPT_PROTOBUF_LIB) ADD_EXECUTABLE(protoc IMPORTED GLOBAL) SET_PROPERTY(TARGET protoc PROPERTY IMPORTED_LOCATION ${PROTOBUF_PROTOC_EXECUTABLE}) + # FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`. + # make `protobuf_generate_cpp` happy. + SET(Protobuf_PROTOC_EXECUTABLE ${PROTOBUF_PROTOC_EXECUTABLE}) FOREACH(dep ${protobuf_DEPS}) ADD_DEPENDENCIES(protobuf ${dep}) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 8736d30059..cdf917a1e9 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -338,22 +338,7 @@ function(pb_cc_library TARGET_NAME) cmake_parse_arguments(pb_cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(proto_srcs) set(proto_hdrs) - foreach(FIL ${pb_cc_library_SRCS}) - get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(FIL_WE ${FIL} NAME_WE) - list(APPEND proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") - list(APPEND proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") - - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc" - "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h" - COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - ARGS "--cpp_out=${DLL_EXPORT_DECL}${CMAKE_CURRENT_BINARY_DIR}" "-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL} - DEPENDS ${ABS_FIL} protoc - COMMENT "Running C++ protocol buffer compiler on ${FIL}" - VERBATIM ) - endforeach() - set_source_files_properties(${proto_srcs} ${proto_hdrs} PROPERTIES GENERATED TRUE) + protobuf_generate_cpp(proto_srcs proto_hdrs ${pb_cc_library_SRCS}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) cc_library(${TARGET_NAME} SRCS ${proto_srcs}) endfunction() From 23d6c594eca369820b5f4dfcd0a38a9f4cd6122e Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 29 Jun 2017 12:33:07 +0800 Subject: [PATCH 156/542] add comments --- python/paddle/v2/parameters.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index 4c4ff4c7c2..bbaf8bfa97 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -284,6 +284,18 @@ class Parameters(object): @staticmethod def from_tar(f): + """ + Create a `Parameters` object from the given file. And + the `Parameters` only contains the parameters in this + file. It is adapted the parameters are same in the + defined network and the given file. For example, it + can be used in the inference. + + :param f: the initialized model file. + :type f: tar file + :return: A Parameters object. + :rtype: Parameters. + """ params = Parameters() tar = tarfile.TarFile(fileobj=f, mode='r') for finfo in tar: @@ -300,6 +312,15 @@ class Parameters(object): return params def init_from_tar(self, f): + """ + Different from `from_tar`, this interface can be used to + init partial network parameters from another saved model. + + :param f: the initialized model file. + :type f: tar file + :return: Nothing. + """ + tar_param = Parameters.from_tar(f) for pname in tar_param.names(): if pname in self.names(): From b88ca542baa4bb5e5822912dceeb4ceb2080c660 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 29 Jun 2017 12:43:51 +0800 Subject: [PATCH 157/542] Rename pb_cc_library -> proto_library --- cmake/generic.cmake | 6 +++--- proto/CMakeLists.txt | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index cdf917a1e9..779f627115 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -332,13 +332,13 @@ function(go_test TARGET_NAME) add_test(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}) endfunction(go_test) -function(pb_cc_library TARGET_NAME) +function(proto_library TARGET_NAME) set(oneValueArgs "") set(multiValueArgs SRCS) - cmake_parse_arguments(pb_cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(proto_srcs) set(proto_hdrs) - protobuf_generate_cpp(proto_srcs proto_hdrs ${pb_cc_library_SRCS}) + protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) cc_library(${TARGET_NAME} SRCS ${proto_srcs}) endfunction() diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index 1cf39d6944..436bea53e5 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -1,5 +1,5 @@ file(GLOB proto_filenames . *.proto) -pb_cc_library(paddle_proto SRCS ${proto_filenames}) +proto_library(paddle_proto SRCS ${proto_filenames}) set(PROTO_GEN) set(PROTO_GEN_PY) From 4a4ec31e0d2f07b5e29acfd1b5b0b62d40f7ab91 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 29 Jun 2017 12:53:10 +0800 Subject: [PATCH 158/542] Fix TravisCI --- paddle/cuda/CMakeLists.txt | 2 +- proto/CMakeLists.txt | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/paddle/cuda/CMakeLists.txt b/paddle/cuda/CMakeLists.txt index f9061e96de..73ffa690d9 100755 --- a/paddle/cuda/CMakeLists.txt +++ b/paddle/cuda/CMakeLists.txt @@ -83,7 +83,7 @@ else() ${CUDA_CXX_SOURCES}) endif() -add_dependencies(paddle_cuda ${external_project_dependencies}) +add_dependencies(paddle_cuda paddle_proto ${external_project_dependencies}) add_style_check_target(paddle_cuda ${CUDA_SOURCES} diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index 436bea53e5..70dd4d674c 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -5,16 +5,18 @@ set(PROTO_GEN) set(PROTO_GEN_PY) foreach(filename ${proto_filenames}) - get_filename_component(base_filename ${filename} NAME_WE) + get_filename_component(ABS_FIL ${filename} ABSOLUTE) + get_filename_component(FIL_WE ${filename} NAME_WE) set(CUR_PROTO_GEN_PY - ${PROJ_ROOT}/paddle/python/paddle/proto/${base_filename}_pb2.py) + ${PROJ_ROOT}/paddle/python/paddle/proto/${FIL_WE}_pb2.py) set(PROTO_GEN_PY ${CUR_PROTO_GEN_PY} ${PROTO_GEN_PY}) add_custom_command(OUTPUT ${CUR_PROTO_GEN_PY} - COMMAND env ${py_env} ${PROTOBUF_PROTOC_EXECUTABLE} --python_out ${PROJ_ROOT}/python/paddle/proto - --proto_path ${PROJ_ROOT}/proto ${PROJ_ROOT}/proto/${filename} - DEPENDS ${filename} ${external_project_dependencies}) + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} + ARGS "--python_out=${PROJ_ROOT}/python/paddle/proto" + "-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL} + DEPENDS ${ABS_FIL} ${external_project_dependencies}) endforeach() add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY}) From 9af8d86b7ceedbc244873ee5207392231bab540a Mon Sep 17 00:00:00 2001 From: Yancey Date: Thu, 29 Jun 2017 13:20:13 +0800 Subject: [PATCH 159/542] Trainer library discover master by etcd (#2551) * add trainer library * modifty file name * move trainer to master client * update * update * modify monitor master to receive a chan * update * use etcd client from etcd_client.go * update * update * remove etcd client without lock * update * update the comment * update commonts --- go/master/c/client.go | 30 +++++++++++++++++++++++++----- go/master/client.go | 24 ++++-------------------- go/master/client_internal_test.go | 13 ++++--------- go/master/client_test.go | 8 +++----- go/master/etcd_client.go | 28 ++++++++++++++++++++++++++++ 5 files changed, 64 insertions(+), 39 deletions(-) diff --git a/go/master/c/client.go b/go/master/c/client.go index b186474dc3..9e35e98600 100644 --- a/go/master/c/client.go +++ b/go/master/c/client.go @@ -13,10 +13,13 @@ typedef int paddle_master_client; import "C" import ( + "strings" "sync" + "time" "unsafe" "github.com/PaddlePaddle/Paddle/go/master" + "github.com/coreos/etcd/clientv3" log "github.com/sirupsen/logrus" ) @@ -48,16 +51,33 @@ func remove(client C.paddle_master_client) *master.Client { return h } -type addresser string - -func (a addresser) Address() string { - return string(a) +//export paddle_new_etcd_master_client +func paddle_new_etcd_master_client(etcdEndpoints *C.char, timeout int, bufSize int) C.paddle_master_client { + p := C.GoString(etcdEndpoints) + cli, err := clientv3.New(clientv3.Config{ + Endpoints: strings.Split(p, ","), + DialTimeout: time.Second * time.Duration(timeout), + }) + if err != nil { + panic(err) + } + ch := make(chan string, 1) + a, err := master.GetKey(cli, master.DefaultAddrPath, timeout) + if err != nil { + panic(err) + } + ch <- a + go master.WatchKey(cli, master.DefaultAddrPath, ch) + c := master.NewClient(ch, bufSize) + return add(c) } //export paddle_new_master_client func paddle_new_master_client(addr *C.char, bufSize int) C.paddle_master_client { a := C.GoString(addr) - c := master.NewClient(addresser(a), bufSize) + ch := make(chan string, 1) + ch <- a + c := master.NewClient(ch, bufSize) return add(c) } diff --git a/go/master/client.go b/go/master/client.go index 8451820c19..d3bea49d0a 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -2,18 +2,12 @@ package master import ( "os" - "time" "github.com/PaddlePaddle/Paddle/go/connection" "github.com/PaddlePaddle/recordio" log "github.com/sirupsen/logrus" ) -// Addresser provide the address of the master server. -type Addresser interface { - Address() string -} - // Client is the client of the master server. type Client struct { conn *connection.Conn @@ -24,11 +18,11 @@ type Client struct { // // bufSize is the record buffer size. NextRecord will read from this // buffer. -func NewClient(addr Addresser, bufSize int) *Client { +func NewClient(addrCh <-chan string, bufSize int) *Client { c := &Client{} c.conn = connection.New() c.ch = make(chan []byte, bufSize) - go c.monitorMaster(addr) + go c.monitorMaster(addrCh) go c.getRecords() return c } @@ -72,12 +66,10 @@ func (c *Client) getRecords() { } } -func (c *Client) monitorMaster(addr Addresser) { +func (c *Client) monitorMaster(addrCh <-chan string) { lastMaster := "" - monitor := func() { - // get the lastest address of the master server, + for curMaster := range addrCh { // connect to the new address once address changed. - curMaster := addr.Address() if curMaster != lastMaster { if curMaster == "" { err := c.conn.Close() @@ -94,18 +86,10 @@ func (c *Client) monitorMaster(addr Addresser) { // to retry next time. curMaster = lastMaster } - } } - lastMaster = curMaster } - - monitor() - ticker := time.NewTicker(10 * time.Second) - for _ = range ticker.C { - monitor() - } } // SetDataset set dataset for the master server to dispatch. diff --git a/go/master/client_internal_test.go b/go/master/client_internal_test.go index 251225780a..364dce7b58 100644 --- a/go/master/client_internal_test.go +++ b/go/master/client_internal_test.go @@ -26,12 +26,6 @@ func init() { log.SetLevel(log.ErrorLevel) } -type TestAddresser string - -func (a TestAddresser) Address() string { - return string(a) -} - func TestGetFinishTask(t *testing.T) { const path = "/tmp/master_client_test_0" @@ -45,7 +39,6 @@ func TestGetFinishTask(t *testing.T) { if err != nil { panic(err) } - go func(l net.Listener) { s, err := NewService(&InMemStore{}, chunkPerTask, time.Second, 1) if err != nil { @@ -82,9 +75,11 @@ func TestGetFinishTask(t *testing.T) { // Manually intialize client to avoid calling c.getRecords() c := &Client{} c.conn = connection.New() - go c.monitorMaster(TestAddresser(fmt.Sprintf(":%d", p))) + addr := fmt.Sprintf(":%d", p) + ch := make(chan string, 1) + ch <- addr + go c.monitorMaster(ch) c.SetDataset([]string{path}) - checkOnePass := func(i int) { var tasks []Task for idx := 0; idx < totalTask; idx++ { diff --git a/go/master/client_test.go b/go/master/client_test.go index 85a86761c2..c00aeebfd5 100644 --- a/go/master/client_test.go +++ b/go/master/client_test.go @@ -20,7 +20,6 @@ func TestNextRecord(t *testing.T) { path = "/tmp/master_client_TestFull" total = 50 ) - l, err := net.Listen("tcp", ":0") if err != nil { panic(err) @@ -31,7 +30,6 @@ func TestNextRecord(t *testing.T) { if err != nil { panic(err) } - go func(l net.Listener) { s, err := master.NewService(&master.InMemStore{}, 10, time.Second, 1) if err != nil { @@ -63,10 +61,10 @@ func TestNextRecord(t *testing.T) { } w.Close() f.Close() - - c := master.NewClient(master.TestAddresser(fmt.Sprintf(":%d", p)), 10) + curAddr := make(chan string, 1) + curAddr <- fmt.Sprintf(":%d", p) + c := master.NewClient(curAddr, 10) c.SetDataset([]string{path}) - for pass := 0; pass < 50; pass++ { received := make(map[byte]bool) for i := 0; i < total; i++ { diff --git a/go/master/etcd_client.go b/go/master/etcd_client.go index f7b4638577..e27c014792 100644 --- a/go/master/etcd_client.go +++ b/go/master/etcd_client.go @@ -142,3 +142,31 @@ func (e *EtcdClient) Load() ([]byte, error) { state := kvs[0].Value return state, nil } + +// GetKey gets the value by the specify key. +func GetKey(c *clientv3.Client, key string, timeout int) (string, error) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*time.Duration(timeout)) + resp, err := c.Get(ctx, key) + cancel() + if err != nil { + return "", err + } + kvs := resp.Kvs + if len(kvs) == 0 { + return "", nil + } + v := kvs[0].Value + return string(v), nil +} + +// WatchKey watches the specify key and send to valChan if there is some event. +func WatchKey(c *clientv3.Client, key string, valChan chan<- string) { + rch := c.Watch(context.Background(), key) + for wresp := range rch { + for _, ev := range wresp.Events { + // if received event is DELETE, the value will be an empty string + log.Infof("received event %s, %q : %q\n", ev.Type, ev.Kv.Key, ev.Kv.Value) + valChan <- string(ev.Kv.Value) + } + } +} From d3b77a5bc053b77309ecc094450e755604217674 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 29 Jun 2017 13:56:38 +0800 Subject: [PATCH 160/542] ENH: Add Gpu info --- paddle/platform/gpu_info.cc | 49 +++++++++++++++++++++++++++++++++++++ paddle/platform/gpu_info.h | 36 +++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 paddle/platform/gpu_info.cc create mode 100644 paddle/platform/gpu_info.h diff --git a/paddle/platform/gpu_info.cc b/paddle/platform/gpu_info.cc new file mode 100644 index 0000000000..4208d83078 --- /dev/null +++ b/paddle/platform/gpu_info.cc @@ -0,0 +1,49 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/gpu_info.h" +#include "gflags/gflags.h" +#include "paddle/platform/error.h" + +DEFINE_double(fraction_of_gpu_memory_to_use, 0.95, + "Default use 95% of GPU memory for PaddlePaddle," + "reserve the rest for page tables, etc"); + +namespace paddle { +namespace platform { + +int GpuDeviceCount() { + int count; + throw_on_error( + cudaGetDeviceCount(&count), + "cudaGetDeviceCount failed in paddle::platform::GpuDeviceCount"); + return count; +} + +void GpuMemoryUsage(size_t& available, size_t& total) { + throw_on_error(cudaMemGetInfo(&available, &total), + "cudaMemGetInfo failed in paddle::platform::GetMemoryUsage"); +} + +size_t GpuMaxAllocSize() { + size_t total = 0; + size_t available = 0; + + GpuMemoryUsage(available, total); + + return total * FLAGS_fraction_of_gpu_memory_to_use; +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/gpu_info.h b/paddle/platform/gpu_info.h new file mode 100644 index 0000000000..174f093b43 --- /dev/null +++ b/paddle/platform/gpu_info.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifndef PADDLE_ONLY_CPU + +#include + +namespace paddle { +namespace platform { + +//! Get the total number of GPU devices in system. +int GpuDeviceCount(); + +//!Get the memory usage of current GPU device. +void GpuMemoryUsage(size_t& available, size_t& total); + +//! Get the maximum allocation size of current GPU device. +size_t GpuMaxAllocSize(); + +} // namespace platform +} // namespace paddle + +#endif // PADDLE_ONLY_CPU From b29923f902dc6da1416a94bc153448f1546e62b2 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 29 Jun 2017 13:56:57 +0800 Subject: [PATCH 161/542] ENH: Add CPU info --- paddle/platform/cpu_info.cc | 55 +++++++++++++++++++++++++++++++++++++ paddle/platform/cpu_info.h | 26 ++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 paddle/platform/cpu_info.cc create mode 100644 paddle/platform/cpu_info.h diff --git a/paddle/platform/cpu_info.cc b/paddle/platform/cpu_info.cc new file mode 100644 index 0000000000..deff76502e --- /dev/null +++ b/paddle/platform/cpu_info.cc @@ -0,0 +1,55 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/cpu_info.h" + +#ifdef __APPLE__ +#include +#include +#else +#include +#endif + +#include "gflags/gflags.h" +#include "paddle/platform/error.h" + +DEFINE_double(fraction_of_cpu_memory_to_use, 1, + "Default use 100% of CPU memory for PaddlePaddle," + "reserve the rest for page tables, etc"); + +namespace paddle { +namespace platform { + +inline size_t CpuTotalPhysicalMemory() { +#ifdef __APPLE__ + int mib[2]; + mib[0] = CTL_HW; + mib[1] = HW_MEMSIZE; + int64_t size = 0; + size_t len = sizeof(size); + if (sysctl(mib, 2, &size, &len, NULL, 0) == 0) return (size_t)size; + return 0L; +#else + long pages = sysconf(_SC_PHYS_PAGES); + long page_size = sysconf(_SC_PAGE_SIZE); + return pages * page_size; +#endif +} + +size_t CpuTotalMemory() { + return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory(); +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/cpu_info.h b/paddle/platform/cpu_info.h new file mode 100644 index 0000000000..3b768589e1 --- /dev/null +++ b/paddle/platform/cpu_info.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +namespace paddle { +namespace platform { + +//! Get the total memory on host machine. +size_t CpuTotalMemory(); + +} // namespace platform +} // namespace paddle From 169022d0148a77cd10f16a82e841a75750e7e173 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 29 Jun 2017 14:04:47 +0800 Subject: [PATCH 162/542] FIX: Improve fallback gpu allocator --- paddle/memory/detail/CMakeLists.txt | 4 +- paddle/memory/detail/system_allocator.cc | 64 ++++++++++++++----- paddle/memory/detail/system_allocator.h | 15 +++-- paddle/memory/detail/system_allocator_test.cc | 14 ++-- paddle/platform/CMakeLists.txt | 4 ++ paddle/platform/cpu_info_test.cc | 18 ++++++ paddle/platform/cuda.h | 40 ------------ 7 files changed, 85 insertions(+), 74 deletions(-) create mode 100644 paddle/platform/cpu_info_test.cc delete mode 100644 paddle/platform/cuda.h diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index 72d3749ad7..6caa97a76b 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1,6 +1,8 @@ if(${WITH_GPU}) nv_library(system_allocator SRCS system_allocator.cc DEPS gflags) - nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags) + nv_test(system_allocator_test + SRCS system_allocator_test.cc + DEPS system_allocator gpu_info gflags) else(${WITH_GPU}) cc_library(system_allocator SRCS system_allocator.cc DEPS gflags) cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags) diff --git a/paddle/memory/detail/system_allocator.cc b/paddle/memory/detail/system_allocator.cc index 50bec926f8..332ff062d4 100644 --- a/paddle/memory/detail/system_allocator.cc +++ b/paddle/memory/detail/system_allocator.cc @@ -13,32 +13,39 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/memory/detail/system_allocator.h" +#include "paddle/platform/assert.h" +#include "paddle/platform/error.h" +#include "paddle/platform/gpu_info.h" #include // for malloc and free #include // for mlock and munlock #include "gflags/gflags.h" -#include "paddle/platform/assert.h" -#include "paddle/platform/cuda.h" // If use_pinned_memory is true, CPUAllocator calls mlock, which // returns pinned and locked memory as staging areas for data exchange // between host and device. Allocates too much would reduce the amount // of memory available to the system for paging. So, by default, we // should set false to use_pinned_memory. -DEFINE_bool(use_pinned_memory, false, - "If set, allocate cpu/gpu pinned memory."); +DEFINE_bool(use_pinned_memory, false, "If set, allocate cpu pinned memory."); namespace paddle { namespace memory { namespace detail { -void* CPUAllocator::Alloc(size_t size) { +void* CPUAllocator::Alloc(size_t& index, size_t size) { // According to http://www.cplusplus.com/reference/cstdlib/malloc/, // malloc might not return nullptr if size is zero, but the returned // pointer shall not be dereferenced -- so we make it nullptr. if (size <= 0) return nullptr; + if (FLAGS_use_pinned_memory) { + void* p = malloc(size); + if (p != nullptr) { + mlock(p, size); + } + } + void* p = malloc(size); if (p != nullptr && FLAGS_use_pinned_memory) { mlock(p, size); @@ -46,7 +53,7 @@ void* CPUAllocator::Alloc(size_t size) { return p; } -void CPUAllocator::Free(void* p, size_t size) { +void CPUAllocator::Free(void* p, size_t size, size_t index) { if (p != nullptr && FLAGS_use_pinned_memory) { munlock(p, size); } @@ -55,29 +62,52 @@ void CPUAllocator::Free(void* p, size_t size) { #ifndef PADDLE_ONLY_CPU -void* GPUAllocator::Alloc(size_t size) { +void* GPUAllocator::Alloc(size_t& index, size_t size) { // CUDA documentation doesn't explain if cudaMalloc returns nullptr // if size is 0. We just make sure it does. - if (size <= 0) { - return nullptr; - } + if (size <= 0) return nullptr; + size_t available = 0; + size_t capacity = 0; + paddle::platform::GpuMemoryUsage(available, capacity); + + // Reserve memory for page tables, etc. + size_t reserving = capacity - paddle::platform::GpuMaxAllocSize(); + size_t remaining = available > reserving ? available - reserving : 0; + + // If remaining size no less than expected size, using general + // cudaMalloc to allocate GPU memory. void* p = 0; - cudaError_t result = - FLAGS_use_pinned_memory ? cudaMallocHost(&p, size) : cudaMalloc(&p, size); - if (result != cudaSuccess) { - cudaGetLastError(); // clear error if there is any. + if (size <= remaining) { + cudaError_t result = cudaMalloc(&p, size); + if (result == cudaSuccess) { + index = 0; + total_alloc_size_ += size; + return p; + } } - return result == cudaSuccess ? p : nullptr; + + // If remaining size less than expected size or cudaMalloc failed, + // cudaMallocHost will be considered as a fallback allocator. + cudaError_t result = cudaMallocHost(&p, size); + if (result == cudaSuccess) { + index = 1; + total_alloc_size_ += size; + return p; + } + + return nullptr; } -void GPUAllocator::Free(void* p, size_t size) { +void GPUAllocator::Free(void* p, size_t size, size_t index) { // Purposefully allow cudaErrorCudartUnloading, because // that is returned if you ever call cudaFree after the // driver has already shutdown. This happens only if the // process is terminating, in which case we don't care if // cudaFree succeeds. - cudaError_t err = FLAGS_use_pinned_memory ? cudaFreeHost(p) : cudaFree(p); + PADDLE_ASSERT(total_alloc_size_ >= size); + total_alloc_size_ -= size; + cudaError_t err = index == 1 ? cudaFreeHost(p) : cudaFree(p); if (err != cudaErrorCudartUnloading) { platform::throw_on_error(err, "cudaFree{Host} failed"); } diff --git a/paddle/memory/detail/system_allocator.h b/paddle/memory/detail/system_allocator.h index 184b383f7f..e15302ce4f 100644 --- a/paddle/memory/detail/system_allocator.h +++ b/paddle/memory/detail/system_allocator.h @@ -30,21 +30,24 @@ namespace detail { class SystemAllocator { public: virtual ~SystemAllocator() {} - virtual void* Alloc(size_t size) = 0; - virtual void Free(void* p, size_t size) = 0; + virtual void* Alloc(size_t& index, size_t size) = 0; + virtual void Free(void* p, size_t size, size_t index) = 0; }; class CPUAllocator : public SystemAllocator { public: - virtual void* Alloc(size_t size); - virtual void Free(void* p, size_t size); + virtual void* Alloc(size_t& index, size_t size); + virtual void Free(void* p, size_t size, size_t index); }; #ifndef PADDLE_ONLY_CPU class GPUAllocator : public SystemAllocator { public: - virtual void* Alloc(size_t size); - virtual void Free(void* p, size_t size); + virtual void* Alloc(size_t& index, size_t size); + virtual void Free(void* p, size_t size, size_t index); + + private: + size_t total_alloc_size_ = 0; }; #endif // PADDLE_ONLY_CPU diff --git a/paddle/memory/detail/system_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc index 9bd5706a4e..ba44e06ddb 100644 --- a/paddle/memory/detail/system_allocator_test.cc +++ b/paddle/memory/detail/system_allocator_test.cc @@ -25,7 +25,8 @@ DECLARE_bool(use_pinned_memory); void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) { bool freed = false; { - void* p = a.Alloc(size); + size_t index; + void* p = a.Alloc(index, size); if (size > 0) { EXPECT_NE(p, nullptr); } else { @@ -35,7 +36,7 @@ void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) { int* i = static_cast(p); std::shared_ptr ptr(i, [&](void* p) { freed = true; - a.Free(p, size); + a.Free(p, size, index); }); } EXPECT_TRUE(freed); @@ -56,14 +57,7 @@ TEST(CPUAllocator, LockMem) { } #ifndef PADDLE_ONLY_CPU -TEST(GPUAllocator, NoStaging) { - FLAGS_use_pinned_memory = false; - paddle::memory::detail::GPUAllocator a; - TestAllocator(a, 2048); - TestAllocator(a, 0); -} -TEST(GPUAllocator, Staging) { - FLAGS_use_pinned_memory = true; +TEST(GPUAllocator, Alloc) { paddle::memory::detail::GPUAllocator a; TestAllocator(a, 2048); TestAllocator(a, 0); diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 7abe2ab89e..17342356d6 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -1,3 +1,7 @@ +cc_library(cpu_info SRCS cpu_info.cc) +cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info gflags) + +nv_library(gpu_info SRCS gpu_info.cc) nv_test(cuda_test SRCS cuda_test.cu) cc_library(place SRCS place.cc) diff --git a/paddle/platform/cpu_info_test.cc b/paddle/platform/cpu_info_test.cc new file mode 100644 index 0000000000..5b7ce7c763 --- /dev/null +++ b/paddle/platform/cpu_info_test.cc @@ -0,0 +1,18 @@ +#include "paddle/platform/cpu_info.h" + +#include +#include + +#include "gflags/gflags.h" +#include "gtest/gtest.h" + +DECLARE_double(fraction_of_cpu_memory_to_use); + +TEST(CpuMemoryUsage, Print) { + std::stringstream ss; + size_t mem_size = paddle::platform::CpuTotalMemory() / 1024 / 1024 / 1024; + ss << std::to_string( + static_cast(FLAGS_fraction_of_cpu_memory_to_use * 100)) + << "% of CPU Memory Usage: " << mem_size << " GB"; + std::cout << ss.str(); +} diff --git a/paddle/platform/cuda.h b/paddle/platform/cuda.h deleted file mode 100644 index 8fe891f9ce..0000000000 --- a/paddle/platform/cuda.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#ifndef PADDLE_ONLY_CPU - -#include -#include - -namespace paddle { -namespace platform { - -inline void throw_on_error(cudaError_t e, const char* message) { - if (e) { - throw thrust::system_error(e, thrust::cuda_category(), message); - } -} - -int GetDeviceCount(void) { - int count; - throw_on_error(cudaGetDeviceCount(&count), "cudaGetDeviceCount failed"); - return count; -} - -} // namespace platform -} // namespace paddle - -#endif // PADDLE_ONLY_CPU From 2d3c186a96178132784ecb9ba81403b67c3b7f67 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 29 Jun 2017 14:14:55 +0800 Subject: [PATCH 163/542] Follow comments --- cmake/generic.cmake | 6 ++++-- proto/CMakeLists.txt | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 779f627115..8117dbc53e 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -87,6 +87,9 @@ # go_library(example SHARED) # +# including binary directory for generated headers. +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + if(NOT APPLE) find_package(Threads REQUIRED) link_libraries(${CMAKE_THREAD_LIBS_INIT}) @@ -339,6 +342,5 @@ function(proto_library TARGET_NAME) set(proto_srcs) set(proto_hdrs) protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS}) - include_directories(${CMAKE_CURRENT_BINARY_DIR}) - cc_library(${TARGET_NAME} SRCS ${proto_srcs}) + cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS protobuf) endfunction() diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index 70dd4d674c..18584cafe7 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -1,4 +1,5 @@ file(GLOB proto_filenames . *.proto) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) proto_library(paddle_proto SRCS ${proto_filenames}) set(PROTO_GEN) From 9af54c9ee98cabb9e70a10261a1c19cd8e8b3297 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 29 Jun 2017 14:39:37 +0800 Subject: [PATCH 164/542] do not use default argument in Scope --- paddle/framework/scope.h | 41 +++++++++++++++++++++++----------- paddle/framework/scope_test.cc | 6 ++--- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index 88a13145ca..a4470f726f 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -24,24 +24,31 @@ namespace paddle { namespace framework { /** + * @brief Scope that manage all variables. + * * Scope is an association of a name to Variable. All variables belong to * Scope. You need to specify a scope to run a Net, i.e., `net.Run(&scope)`. * One net can run in different scopes and update different variable in the * scope. */ class Scope { - private: - explicit Scope(const std::shared_ptr& parent = nullptr) - : parent_(parent) {} - public: - static std::shared_ptr Create( - const std::shared_ptr& parent = nullptr) { - return std::make_shared(Scope(parent)); - } + /** + * @brief Initialize s Scope without parent. + */ + Scope() {} + + /** + * @brief Initialize a Scope with parent. + */ + explicit Scope(const std::shared_ptr& parent) : parent_(parent) {} - /// Create Variable in this Scope. Failed if Variable already been - /// created. + /** + * @brief Create Variable + * + * Create Variable in this Scope. Return the exist one if Variable already + * been created. + */ Variable* CreateVariable(const std::string& name) { auto var = GetVariable(name); if (var) { @@ -52,8 +59,12 @@ class Scope { } } - /// Get Variable from this Scope, this function will recursive find Variable - /// from it's parent scope. Return nullptr if not found. + /** + * @brief Get Variable. + * + * Get Variable from this Scope, this function will recursive find Variable + * from it's parent scope. Return nullptr if not found. + */ Variable* GetVariable(const std::string& name) const { auto it = vars_.find(name); if (it != vars_.end()) { @@ -65,7 +76,11 @@ class Scope { } } - /// Find if there is a Variable in this scope and it's parent scope + /** + * @brief If this scope has a Var named name. + * + * Find if there is a Variable in this scope and it's parent scope + */ bool HasVariable(const std::string& name) const { return (vars_.find(name) != vars_.end() || (parent_ && parent_->HasVariable(name))); diff --git a/paddle/framework/scope_test.cc b/paddle/framework/scope_test.cc index ec6236ec62..df1afb200c 100644 --- a/paddle/framework/scope_test.cc +++ b/paddle/framework/scope_test.cc @@ -19,7 +19,7 @@ TEST(Scope, Create) { using paddle::framework::Scope; using paddle::framework::Variable; - auto scope = Scope::Create(); + auto scope = std::make_shared(); Variable* var0 = scope->CreateVariable(""); EXPECT_NE(var0, nullptr); @@ -46,8 +46,8 @@ TEST(Scope, Parent) { using paddle::framework::Scope; using paddle::framework::Variable; - auto parent_scope = Scope::Create(); - auto scope = Scope::Create(parent_scope); + auto parent_scope = std::make_shared(); + auto scope = std::make_shared(parent_scope); Variable* var0 = parent_scope->CreateVariable("a"); EXPECT_NE(var0, nullptr); From 3c925feb71a9b8c40cad60cf4c453ba083ed69a9 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 29 Jun 2017 14:52:50 +0800 Subject: [PATCH 165/542] update design doc --- doc/design/scope.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/design/scope.md b/doc/design/scope.md index 4d14a64977..afe6bc028c 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -59,9 +59,9 @@ class Scope { Scope(const std::shared_ptr& scope): parent_(scope) {} Variable* GetVariable(const std::string& name) const { - Variable* var = GetVarLocally(name); - if (var != nullptr) { - return var; + auto it = vars_.find(name); + if (it != vars_.end()) { + return it->second.get(); } else if (parent_ != nullptr) { return parent_->GetVariable(name); } else { @@ -97,8 +97,8 @@ class Scope { // return nullptr if not found. Variable* GetVariable(const std::string& name) const; - // return Error if already contains same name variable. - Error CreateVariable(const std::string& name); + // return if already contains same name variable. + Variable* CreateVariable(const std::string& name); private: std::shared_ptr parent_; From 3d44fd5bf38b8cd74fccc17081972b5a9a0eaa2e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 29 Jun 2017 14:53:29 +0800 Subject: [PATCH 166/542] Follow yiqun's comments --- cmake/generic.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 8117dbc53e..61353a4a26 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -88,7 +88,7 @@ # # including binary directory for generated headers. -include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CMAKE_BINARY_DIR}) if(NOT APPLE) find_package(Threads REQUIRED) From c18275ffb3d80047cb77eedcd88d1ffb11d72ea5 Mon Sep 17 00:00:00 2001 From: qijun Date: Thu, 29 Jun 2017 14:56:16 +0800 Subject: [PATCH 167/542] add more choice for eigen downloading --- cmake/external/eigen.cmake | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake index 253d436bcc..45f44f617d 100644 --- a/cmake/external/eigen.cmake +++ b/cmake/external/eigen.cmake @@ -7,8 +7,17 @@ INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/eigen3) ExternalProject_Add( eigen3 ${EXTERNAL_PROJECT_LOG_ARGS} - URL "https://bitbucket.org/eigen/eigen/get/3.3.4.tar.gz" - URL_MD5 "1a47e78efe365a97de0c022d127607c3" + # for latest version, please get from official website + # URL "https://bitbucket.org/eigen/eigen/get/3.3.4.tar.gz" + # URL_MD5 "1a47e78efe365a97de0c022d127607c3" + + # for no-ssl http support, please get from bazel's mirror + # URL "http://mirror.bazel.build/bitbucket.org/eigen/eigen/get/f3a22f35b044.tar.gz" + # URL_MD5 "4645c66075982da6fa0bcf6b20f3e8f7" + + # get from github mirror + GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" + GIT_TAG "a46d2e7337c4656f00abe54a8115f6d76153a048" PREFIX ${EIGEN_SOURCE_DIR} UPDATE_COMMAND "" CONFIGURE_COMMAND "" From 52efb243b82b7b68868cfe5391eafb0376ddb839 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 29 Jun 2017 15:06:46 +0800 Subject: [PATCH 168/542] Fix CI tests --- paddle/testing/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/testing/CMakeLists.txt b/paddle/testing/CMakeLists.txt index 4aa6eae681..4245df5ab7 100644 --- a/paddle/testing/CMakeLists.txt +++ b/paddle/testing/CMakeLists.txt @@ -2,7 +2,7 @@ if(WITH_TESTING) add_library(paddle_test_main STATIC TestMain.cpp) - add_dependencies(paddle_test_main paddle_proto) + add_dependencies(paddle_test_main paddle_proto ${external_project_dependencies}) add_library(paddle_test_util STATIC TestUtil.cpp) - add_dependencies(paddle_test_util paddle_proto) + add_dependencies(paddle_test_util paddle_proto ${external_project_dependencies}) endif() From b5ab4b69bcfa604a1ebbb964da1765ff2c586a6a Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 29 Jun 2017 15:11:40 +0800 Subject: [PATCH 169/542] Follow comments, mainly use std::copy to simplify logic. --- .../gserver/layers/DetectionOutputLayer.cpp | 20 ++-- paddle/gserver/layers/DetectionOutputLayer.h | 6 +- paddle/gserver/layers/MultiBoxLossLayer.cpp | 109 ++++++++++-------- python/paddle/trainer/config_parser.py | 4 +- .../paddle/trainer_config_helpers/layers.py | 7 +- 5 files changed, 74 insertions(+), 72 deletions(-) diff --git a/paddle/gserver/layers/DetectionOutputLayer.cpp b/paddle/gserver/layers/DetectionOutputLayer.cpp index 2a4d7f8b5b..8ab838e191 100644 --- a/paddle/gserver/layers/DetectionOutputLayer.cpp +++ b/paddle/gserver/layers/DetectionOutputLayer.cpp @@ -48,8 +48,6 @@ void DetectionOutputLayer::forward(PassType passType) { Matrix::resizeOrCreate(locTmpBuffer_, 1, locSizeSum_, false, useGpu_); Matrix::resizeOrCreate( confTmpBuffer_, confSizeSum_ / numClasses_, numClasses_, false, useGpu_); - locBuffer_ = locTmpBuffer_; - confBuffer_ = confTmpBuffer_; size_t locOffset = 0; size_t confOffset = 0; @@ -68,7 +66,7 @@ void DetectionOutputLayer::forward(PassType passType) { locSizeSum_, locOffset, batchSize, - *locBuffer_, + *locTmpBuffer_, kNCHWToNHWC); confOffset += appendWithPermute(*inConf, height, @@ -76,7 +74,7 @@ void DetectionOutputLayer::forward(PassType passType) { confSizeSum_, confOffset, batchSize, - *confBuffer_, + *confTmpBuffer_, kNCHWToNHWC); } CHECK_EQ(locOffset, locSizeSum_ / batchSize); @@ -100,23 +98,25 @@ void DetectionOutputLayer::forward(PassType passType) { priorValue = priorCpuValue_; } else { priorValue = getInputValue(*getPriorBoxLayer()); + locBuffer_ = locTmpBuffer_; + confBuffer_ = confTmpBuffer_; } confBuffer_->softmax(*confBuffer_); size_t numPriors = priorValue->getElementCnt() / 8; - vector> allDecodedBBoxes; + std::vector> allDecodedBBoxes; for (size_t n = 0; n < batchSize; ++n) { - vector decodedBBoxes; + std::vector decodedBBoxes; for (size_t i = 0; i < numPriors; ++i) { size_t priorOffset = i * 8; size_t locPredOffset = n * numPriors * 4 + i * 4; - vector priorBBoxVec; + std::vector priorBBoxVec; getBBoxFromPriorData( priorValue->getData() + priorOffset, 1, priorBBoxVec); - vector> priorBBoxVar; + std::vector> priorBBoxVar; getBBoxVarFromPriorData( priorValue->getData() + priorOffset, 1, priorBBoxVar); - vector locPredData; + std::vector locPredData; for (size_t j = 0; j < 4; ++j) locPredData.push_back(*(locBuffer_->getData() + locPredOffset + j)); NormalizedBBox bbox = @@ -126,7 +126,7 @@ void DetectionOutputLayer::forward(PassType passType) { allDecodedBBoxes.push_back(decodedBBoxes); } - vector>> allIndices; + std::vector>> allIndices; size_t numKept = getDetectionIndices(confBuffer_->getData(), numPriors, numClasses_, diff --git a/paddle/gserver/layers/DetectionOutputLayer.h b/paddle/gserver/layers/DetectionOutputLayer.h index 38271cb054..9cc568219c 100644 --- a/paddle/gserver/layers/DetectionOutputLayer.h +++ b/paddle/gserver/layers/DetectionOutputLayer.h @@ -19,17 +19,13 @@ limitations under the License. */ #include "DetectionUtil.h" #include "Layer.h" -using std::vector; -using std::map; -using std::pair; - namespace paddle { /** * The detection output layer for a SSD detection task. This layer apply the * Non-maximum suppression to the all predicted bounding box and keep the * Top-K bounding boxes. - * - Input: This layer need three input layers: This first input layer + * - Input: This layer needs three input layers: This first input layer * is the priorbox layer. The rest two input layers are convolution * layers for generating bbox location offset and the classification * confidence. diff --git a/paddle/gserver/layers/MultiBoxLossLayer.cpp b/paddle/gserver/layers/MultiBoxLossLayer.cpp index 27a2cc3fa4..f2d7b8eb1d 100644 --- a/paddle/gserver/layers/MultiBoxLossLayer.cpp +++ b/paddle/gserver/layers/MultiBoxLossLayer.cpp @@ -17,10 +17,6 @@ limitations under the License. */ #include #include "DataLayer.h" -using std::vector; -using std::map; -using std::pair; - namespace paddle { REGISTER_LAYER(multibox_loss, MultiBoxLossLayer); @@ -133,7 +129,7 @@ void MultiBoxLossLayer::forward(PassType passType) { } // Get max scores for each prior bbox. Used in negative mining - vector> allMaxConfScore; + std::vector> allMaxConfScore; numPriors_ = priorValue->getElementCnt() / 8; getMaxConfidenceScores(confBuffer_->getData(), batchSize, @@ -151,18 +147,18 @@ void MultiBoxLossLayer::forward(PassType passType) { allMatchIndices_.clear(); allNegIndices_.clear(); - pair retPair = generateMatchIndices(*priorValue, - numPriors_, - *labelValue, - labelIndex, - seqNum, - allMaxConfScore, - batchSize, - overlapThreshold_, - negOverlap_, - negPosRatio_, - &allMatchIndices_, - &allNegIndices_); + std::pair retPair = generateMatchIndices(*priorValue, + numPriors_, + *labelValue, + labelIndex, + seqNum, + allMaxConfScore, + batchSize, + overlapThreshold_, + negOverlap_, + negPosRatio_, + &allMatchIndices_, + &allNegIndices_); numMatches_ = retPair.first; numNegs_ = retPair.second; @@ -175,30 +171,31 @@ void MultiBoxLossLayer::forward(PassType passType) { Matrix::resizeOrCreate(locGTData_, numMatches_ * 4, 1, false, false); Matrix::resizeOrCreate(locDiff_, numMatches_ * 4, 1, false, false); locDiff_->zeroMem(); - vector locGTData; + std::vector locGTData; + real* locDiffData = locDiff_->getData(); + const real* locBufferData = locBuffer_->getData(); for (size_t n = 0; n < batchSize; ++n) { for (size_t i = 0; i < numPriors_; ++i) { if (allMatchIndices_[n][i] == -1) continue; // match none size_t locOffset = n * (locBuffer_->getElementCnt() / batchSize) + i * 4; - locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[0]; - locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[1]; - locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[2]; - locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[3]; - + std::copy(locBufferData + locOffset, + locBufferData + locOffset + 4, + locDiffData + count); + count += 4; const int gtIdx = allMatchIndices_[n][i]; size_t priorOffset = i * 8; - vector priorBBoxVec; + std::vector priorBBoxVec; getBBoxFromPriorData( priorValue->getData() + priorOffset, 1, priorBBoxVec); - vector> priorBBoxVar; + std::vector> priorBBoxVar; getBBoxVarFromPriorData( priorValue->getData() + priorOffset, 1, priorBBoxVar); size_t labelOffset = (labelIndex[n] + gtIdx) * 6; - vector gtBBoxVec; + std::vector gtBBoxVec; getBBoxFromLabelData(labelValue->getData() + labelOffset, 1, gtBBoxVec); - vector gtEncode; + std::vector gtEncode; encodeBBoxWithVar( priorBBoxVec[0], priorBBoxVar[0], gtBBoxVec[0], gtEncode); locGTData.insert(locGTData.end(), gtEncode.begin(), gtEncode.end()); @@ -218,7 +215,9 @@ void MultiBoxLossLayer::forward(PassType passType) { confProb_->zeroMem(); size_t count = 0; - vector confPredData; + std::vector confPredData; + real* confProbData = confProb_->getData(); + const real* confBufferData = confBuffer_->getData(); for (size_t n = 0; n < batchSize; ++n) { for (size_t i = 0; i < numPriors_; ++i) { if (allMatchIndices_[n][i] == -1) continue; @@ -226,11 +225,13 @@ void MultiBoxLossLayer::forward(PassType passType) { const int gtLabel = (labelValue->getData() + labelOffset)[0]; confGTData_->getData()[count] = gtLabel; size_t confOffset = n * numPriors_ * numClasses_ + i * numClasses_; - for (size_t j = 0; j < numClasses_; ++j) { - confProb_->getData()[count * numClasses_ + j] = - (confBuffer_->getData() + confOffset)[j]; - confPredData.push_back((confBuffer_->getData() + confOffset)[j]); - } + std::copy(confBufferData + confOffset, + confBufferData + confOffset + numClasses_, + confProbData + count * numClasses_); + confPredData.reserve(confPredData.size() + numClasses_); + confPredData.insert(confPredData.end(), + confBufferData + confOffset, + confBufferData + confOffset + numClasses_); ++count; } // Negative mining samples @@ -238,14 +239,17 @@ void MultiBoxLossLayer::forward(PassType passType) { confGTData_->getData()[count] = backgroundId_; size_t confOffset = n * numPriors_ * numClasses_ + allNegIndices_[n][i] * numClasses_; - for (size_t j = 0; j < numClasses_; ++j) { - confProb_->getData()[count * numClasses_ + j] = - (confBuffer_->getData() + confOffset)[j]; - confPredData.push_back((confBuffer_->getData() + confOffset)[j]); - } - count++; + std::copy(confBufferData + confOffset, + confBufferData + confOffset + numClasses_, + confProbData + count * numClasses_); + confPredData.reserve(confPredData.size() + numClasses_); + confPredData.insert(confPredData.end(), + confBufferData + confOffset, + confBufferData + confOffset + numClasses_); + ++count; } } + CHECK_EQ(numConf_, count); confProb_->softmax(*confProb_); MatrixPtr confLossOutput; Matrix::resizeOrCreate(confLossOutput, numConf_, 1, false, false); @@ -254,7 +258,7 @@ void MultiBoxLossLayer::forward(PassType passType) { } real loss = locLoss_ + confLoss_; MatrixPtr outV = getOutputValue(); - vector tmp(batchSize, loss); + std::vector tmp(batchSize, loss); outV->copyFrom(&tmp[0], batchSize); } @@ -274,16 +278,18 @@ void MultiBoxLossLayer::backward(const UpdateCallback& callback) { locDiff_->getData()[i] *= (1. / numMatches_); // Copy gradient back size_t count = 0; - for (size_t n = 0; n < batchSize; ++n) + const real* locDiffData = locDiff_->getData(); + for (size_t n = 0; n < batchSize; ++n) { for (size_t i = 0; i < numPriors_; ++i) { if (allMatchIndices_[n][i] == -1) continue; - real* locDiffData = locBuffer_->getData() + n * numPriors_ * 4 + i * 4; - locDiffData[0] = (locDiff_->getData() + count * 4)[0]; - locDiffData[1] = (locDiff_->getData() + count * 4)[1]; - locDiffData[2] = (locDiff_->getData() + count * 4)[2]; - locDiffData[3] = (locDiff_->getData() + count * 4)[3]; + real* locBufferData = + locBuffer_->getData() + n * numPriors_ * 4 + i * 4; + std::copy(locDiffData + count * 4, + locDiffData + (count + 1) * 4, + locBufferData); ++count; } + } CHECK_EQ(count, numMatches_); } @@ -293,21 +299,24 @@ void MultiBoxLossLayer::backward(const UpdateCallback& callback) { for (size_t i = 0; i < numConf_ * numClasses_; ++i) confProb_->getData()[i] *= (1. / numMatches_); size_t count = 0; + const real* confProbData = confProb_->getData(); for (size_t n = 0; n < batchSize; ++n) { for (size_t i = 0; i < numPriors_; ++i) { if (allMatchIndices_[n][i] == -1) continue; real* confDiffData = confBuffer_->getData() + n * numPriors_ * numClasses_ + i * numClasses_; - for (size_t j = 0; j < numClasses_; ++j) - confDiffData[j] = (confProb_->getData() + count * numClasses_)[j]; + std::copy(confProbData + count * numClasses_, + confProbData + (count + 1) * numClasses_, + confDiffData); ++count; } for (size_t i = 0; i < allNegIndices_[n].size(); ++i) { int idx = allNegIndices_[n][i]; real* confDiffData = confBuffer_->getData() + n * numPriors_ * numClasses_ + idx * numClasses_; - for (size_t j = 0; j < numClasses_; ++j) - confDiffData[j] = (confProb_->getData() + count * numClasses_)[j]; + std::copy(confProbData + count * numClasses_, + confProbData + (count + 1) * numClasses_, + confDiffData); ++count; } } diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index c46b335d99..17f6704ea1 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1679,7 +1679,7 @@ class PriorBoxLayer(LayerBase): @config_layer('multibox_loss') class MultiBoxLossLayer(LayerBase): def __init__(self, name, inputs, input_num, num_classes, overlap_threshold, - neg_pos_ratio, neg_overlap, background_id): + neg_pos_ratio, neg_overlap, background_id, **xargs): super(MultiBoxLossLayer, self).__init__(name, 'multibox_loss', 0, inputs) config_assert( @@ -1701,7 +1701,7 @@ class MultiBoxLossLayer(LayerBase): class DetectionOutputLayer(LayerBase): def __init__(self, name, inputs, size, input_num, num_classes, nms_threshold, nms_top_k, keep_top_k, confidence_threshold, - background_id): + background_id, **xargs): super(DetectionOutputLayer, self).__init__(name, 'detection_output', 0, inputs) config_assert( diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 770559dc77..1286ed198e 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1092,22 +1092,19 @@ def multibox_loss_layer(input_loc, :type background_id: int :return: LayerOutput """ - input_loc_num = 0 - input_conf_num = 0 - if isinstance(input_loc, LayerOutput): input_loc = [input_loc] assert isinstance(input_loc, collections.Sequence) # list or tuple for each in input_loc: assert isinstance(each, LayerOutput) - input_loc_num += 1 + input_loc_num = len(input_loc) if isinstance(input_conf, LayerOutput): input_conf = [input_conf] assert isinstance(input_conf, collections.Sequence) # list or tuple for each in input_conf: assert isinstance(each, LayerOutput) - input_conf_num += 1 + input_conf_num = len(input_conf) # Check the input layer number. assert input_loc_num == input_conf_num From a1e7284ad139e6578036b7d872563aa1a2642351 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 29 Jun 2017 15:24:12 +0800 Subject: [PATCH 170/542] Fix TravisCI --- paddle/pserver/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/pserver/CMakeLists.txt b/paddle/pserver/CMakeLists.txt index f2e0b4b76b..2245c7d88c 100644 --- a/paddle/pserver/CMakeLists.txt +++ b/paddle/pserver/CMakeLists.txt @@ -17,7 +17,7 @@ add_library(paddle_network STATIC add_style_check_target(paddle_network ${NETWORK_SOURCES}) add_style_check_target(paddle_network ${NETWORK_HEADERS}) -add_dependencies(paddle_network paddle_proto) +add_dependencies(paddle_network paddle_proto ${external_project_dependencies}) ################### paddle_pserver ###################### set(PSERVER_SOURCES From b0ad9c907422e1256bc5ae6881913f71cd9d4aed Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 28 Jun 2017 10:12:23 +0800 Subject: [PATCH 171/542] enable intializing memory state for lstmemory_group. --- .../paddle/trainer_config_helpers/layers.py | 51 ++++++++++--------- .../paddle/trainer_config_helpers/networks.py | 48 ++++++++++------- 2 files changed, 57 insertions(+), 42 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 84ed160773..a601d5c84a 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1149,10 +1149,10 @@ def pooling_layer(input, @layer_support(DROPOUT) def lstmemory(input, name=None, + size=None, reverse=False, act=None, gate_act=None, - size=None, state_act=None, bias_attr=None, param_attr=None, @@ -1194,6 +1194,8 @@ def lstmemory(input, :param name: The lstmemory layer name. :type name: basestring + :param size: DEPRECATED. size of the lstm cell + :type size: int :param input: input layer name. :type input: LayerOutput :param reverse: is sequence process reversed or not. @@ -1220,15 +1222,15 @@ def lstmemory(input, assert state_act.support_hppl assert act.support_hppl assert input.size is not None and input.size % 4 == 0 + if size is not None: if input.size / 4 == size: plog = logger.warning else: plog = logger.fatal - - plog("NOTE: The lstmemory layer[%s]'s size is set by previous input " - "layer. The lstm size should be equal with input layer size/4. The" - " size which is set explicitly will be ignored." % name) + plog("size of lstmemory layer: %s is automatically set to " + "size of input layer / 4. The parameter size passing to " + "this layer is ignored." % (name)) Layer( name=name, @@ -1255,11 +1257,11 @@ def lstmemory(input, @wrap_name_default("gru") @layer_support(DROPOUT) def grumemory(input, + size=None, name=None, reverse=False, act=None, gate_act=None, - size=None, bias_attr=None, param_attr=None, layer_attr=None): @@ -1318,6 +1320,8 @@ def grumemory(input, :type name: None|basestring :param input: input layer. :type input: LayerOutput. + :param size: DEPRECATED. size of the gru cell + :type size: int :param reverse: Whether sequence process is reversed or not. :type reverse: bool :param act: activation type, TanhActivation by default. This activation @@ -1334,9 +1338,6 @@ def grumemory(input, :type param_attr: ParameterAttribute|None|False :param layer_attr: Extra Layer attribute :type layer_attr: ExtraLayerAttribute|None - :param size: Stub parameter of size, but actually not used. If set this size - will get a warning. - :type size: None :return: LayerOutput object. :rtype: LayerOutput """ @@ -1348,9 +1349,9 @@ def grumemory(input, plog = logger.warning else: plog = logger.fatal - plog("NOTE: the gru memory layer's size is set by previous input layer," - " and should be input size / 3. Set size explicitly will be " - "ignored.") + plog("size of grumemory layer: %s is automatically set to " + "size of input layer / 3. The parameter size passing to this " + "layer is ignored." % (name)) Layer( name=name, @@ -2524,8 +2525,8 @@ def img_cmrnorm_layer(input, @wrap_bias_attr_default() -@wrap_param_attr_default(default_factory=lambda _: ParamAttr(initial_mean=1.0, - initial_std=0.)) +@wrap_param_attr_default( + default_factory=lambda _: ParamAttr(initial_mean=1.0, initial_std=0.)) @wrap_act_default(act=ReluActivation()) @wrap_name_default("batch_norm") @layer_support(DROPOUT) @@ -3013,25 +3014,25 @@ def lstm_step_layer(input, bias_attr=None, layer_attr=None): """ - LSTM Step Layer. It used in recurrent_group. The lstm equations are shown - as follow. + LSTM Step Layer. This function is used only in recurrent_group. + The lstm equations are shown as follows. .. math:: - i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i) + i_t & = \\sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i) - f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f) + f_t & = \\sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f) - c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c) + c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c) - o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o) + o_t & = \\sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o) h_t & = o_t tanh(c_t) The input of lstm step is :math:`Wx_t + Wh_{t-1}`, and user should use :code:`mixed_layer` and :code:`full_matrix_projection` to calculate these - input vector. + input vectors. The state of lstm step is :math:`c_{t-1}`. And lstm step layer will do @@ -3042,14 +3043,14 @@ def lstm_step_layer(input, ... - This layer contains two outputs. Default output is :math:`h_t`. The other - output is :math:`o_t`, which name is 'state' and can use + This layer has two outputs. Default output is :math:`h_t`. The other + output is :math:`o_t`, whose name is 'state' and can use :code:`get_output_layer` to extract this output. :param name: Layer's name. :type name: basestring - :param size: Layer's size. NOTE: lstm layer's size, should be equal as - :code:`input.size/4`, and should be equal as + :param size: Layer's size. NOTE: lstm layer's size, should be equal to + :code:`input.size/4`, and should be equal to :code:`state.size`. :type size: int :param input: input layer. :math:`Wx_t + Wh_{t-1}` diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 67154a8d7d..0d730e0995 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -614,6 +614,7 @@ def simple_lstm(input, @wrap_name_default('lstm_unit') def lstmemory_unit(input, + memory_boot=None, name=None, size=None, param_attr=None, @@ -626,9 +627,9 @@ def lstmemory_unit(input, lstm_layer_attr=None, get_output_layer_attr=None): """ - Define calculations that a LSTM unit performs in a single time step. - This function itself is not a recurrent layer, so that it can not be - directly applied to sequence input. This function is always used in + Define calculations that a LSTM unit performs during a single time step. + This function itself is not a recurrent layer, so it can not be + directly used to process sequence inputs. This function is always used in recurrent_group (see layers.py for more details) to implement attention mechanism. @@ -638,13 +639,13 @@ def lstmemory_unit(input, .. math:: - i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i) + i_t & = \\sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i) - f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f) + f_t & = \\sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f) - c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c) + c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c) - o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o) + o_t & = \\sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o) h_t & = o_t tanh(c_t) @@ -661,6 +662,8 @@ def lstmemory_unit(input, :param input: input layer name. :type input: LayerOutput + :param memory_boot: the initialization state of the LSTM cell. + :type memory_boot: LayerOutput | None :param name: lstmemory unit name. :type name: basestring :param size: lstmemory unit size. @@ -692,7 +695,8 @@ def lstmemory_unit(input, assert input.size % 4 == 0 size = input.size / 4 out_mem = memory(name=name, size=size) - state_mem = memory(name="%s_state" % name, size=size) + state_mem = memory( + name="%s_state" % name, size=size, boot_layer=memory_boot) with mixed_layer( name="%s_input_recurrent" % name, @@ -726,6 +730,7 @@ def lstmemory_unit(input, def lstmemory_group(input, size=None, name=None, + memory_boot=None, reverse=False, param_attr=None, act=None, @@ -737,7 +742,7 @@ def lstmemory_group(input, lstm_layer_attr=None, get_output_layer_attr=None): """ - lstm_group is a recurrent layer group version of Long Short Term Memory. It + lstm_group is a recurrent_group version of Long Short Term Memory. It does exactly the same calculation as the lstmemory layer (see lstmemory in layers.py for the maths) does. A promising benefit is that LSTM memory cell states, or hidden states in every time step are accessible to the @@ -748,8 +753,8 @@ def lstmemory_group(input, NOTE: In PaddlePaddle's implementation, the following input-to-hidden multiplications: - :math:`W_{xi}x_{t}` , :math:`W_{xf}x_{t}`, - :math:`W_{xc}x_t`, :math:`W_{xo}x_{t}` are not done in lstmemory_unit to + :math:`W_{x_i}x_{t}` , :math:`W_{x_f}x_{t}`, + :math:`W_{x_c}x_t`, :math:`W_{x_o}x_{t}` are not done in lstmemory_unit to speed up the calculations. Consequently, an additional mixed_layer with full_matrix_projection must be included before lstmemory_unit is called. @@ -765,8 +770,10 @@ def lstmemory_group(input, :param input: input layer name. :type input: LayerOutput - :param name: lstmemory group name. + :param name: name of the lstmemory group. :type name: basestring + :param memory_boot: the initialization state of LSTM cell. + :type memory_boot: LayerOutput | None :param size: lstmemory group size. :type size: int :param reverse: is lstm reversed @@ -798,6 +805,7 @@ def lstmemory_group(input, def __lstm_step__(ipt): return lstmemory_unit( input=ipt, + memory_boot=memory_boot, name=name, size=size, mixed_bias_attr=mixed_bias_attr, @@ -819,6 +827,7 @@ def lstmemory_group(input, @wrap_name_default('gru_unit') def gru_unit(input, + memory_boot=None, size=None, name=None, gru_bias_attr=None, @@ -829,8 +838,8 @@ def gru_unit(input, naive=False): """ Define calculations that a gated recurrent unit performs in a single time - step. This function itself is not a recurrent layer, so that it can not be - directly applied to sequence input. This function is almost always used in + step. This function itself is not a recurrent layer, so it can not be + directly used to process sequence inputs. This function is always used in the recurrent_group (see layers.py for more details) to implement attention mechanism. @@ -838,6 +847,8 @@ def gru_unit(input, :param input: input layer name. :type input: LayerOutput + :param memory_boot: the initialization state of the LSTM cell. + :type memory_boot: LayerOutput | None :param name: name of the gru group. :type name: basestring :param size: hidden size of the gru. @@ -856,7 +867,7 @@ def gru_unit(input, if size is None: size = input.size / 3 - out_mem = memory(name=name, size=size) + out_mem = memory(name=name, size=size, boot_layer=memory_boot) if naive: __step__ = gru_step_naive_layer @@ -878,6 +889,7 @@ def gru_unit(input, @wrap_name_default('gru_group') def gru_group(input, + memory_boot=None, size=None, name=None, reverse=False, @@ -888,7 +900,7 @@ def gru_group(input, gru_layer_attr=None, naive=False): """ - gru_group is a recurrent layer group version of Gated Recurrent Unit. It + gru_group is a recurrent_group version of Gated Recurrent Unit. It does exactly the same calculation as the grumemory layer does. A promising benefit is that gru hidden states are accessible to the user. This is especially useful in attention model. If you do not need to access @@ -908,6 +920,8 @@ def gru_group(input, :param input: input layer name. :type input: LayerOutput + :param memory_boot: the initialization state of the LSTM cell. + :type memory_boot: LayerOutput | None :param name: name of the gru group. :type name: basestring :param size: hidden size of the gru. @@ -929,6 +943,7 @@ def gru_group(input, def __gru_step__(ipt): return gru_unit( input=ipt, + memory_boot=memory_boot, name=name, size=size, gru_bias_attr=gru_bias_attr, @@ -1083,7 +1098,6 @@ def simple_gru2(input, return grumemory( name=name, - size=size, input=m, reverse=reverse, bias_attr=gru_bias_attr, From e6c14f7e000d047cf3d3a1e18e2a13e3349b1ff9 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 29 Jun 2017 16:30:03 +0800 Subject: [PATCH 172/542] ENH: Polish cpu info interface --- paddle/platform/CMakeLists.txt | 3 +- paddle/platform/cpu_info.cc | 14 +++++++- paddle/platform/cpu_info.h | 10 ++++-- paddle/platform/cpu_info_test.cc | 13 ++++--- paddle/platform/cuda_test.cu | 59 -------------------------------- 5 files changed, 30 insertions(+), 69 deletions(-) delete mode 100644 paddle/platform/cuda_test.cu diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index d0bedf6ba9..969c91985d 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -1,8 +1,7 @@ cc_library(cpu_info SRCS cpu_info.cc) -cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info gflags) +cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info gflags glog) nv_library(gpu_info SRCS gpu_info.cc) -nv_test(cuda_test SRCS cuda_test.cu) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) diff --git a/paddle/platform/cpu_info.cc b/paddle/platform/cpu_info.cc index deff76502e..3da04420e5 100644 --- a/paddle/platform/cpu_info.cc +++ b/paddle/platform/cpu_info.cc @@ -47,9 +47,21 @@ inline size_t CpuTotalPhysicalMemory() { #endif } -size_t CpuTotalMemory() { +size_t CpuMaxAllocSize() { + // For distributed systems, it requires configuring and limiting + // the fraction of memory to use. return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory(); } +size_t CpuMinChunkSize() { + // Allow to allocate the minimum chunk size is 256 bytes. + return 1 << 8; +} + +size_t CpuMaxChunkSize() { + // Allow to allocate the maximum chunk size is roughly 3% of CPU memory. + return CpuMaxAllocSize() / 32; +} + } // namespace platform } // namespace paddle diff --git a/paddle/platform/cpu_info.h b/paddle/platform/cpu_info.h index 3b768589e1..8df7c7b4bc 100644 --- a/paddle/platform/cpu_info.h +++ b/paddle/platform/cpu_info.h @@ -19,8 +19,14 @@ limitations under the License. */ namespace paddle { namespace platform { -//! Get the total memory on host machine. -size_t CpuTotalMemory(); +//! Get the maximum allocation size for a machine. +size_t CpuMaxAllocSize(); + +//! Get the minimum chunk size for buddy allocator. +size_t CpuMinChunkSize(); + +//! Get the maximum chunk size for buddy allocator. +size_t CpuMaxChunkSize(); } // namespace platform } // namespace paddle diff --git a/paddle/platform/cpu_info_test.cc b/paddle/platform/cpu_info_test.cc index 5b7ce7c763..8fb195aa7c 100644 --- a/paddle/platform/cpu_info_test.cc +++ b/paddle/platform/cpu_info_test.cc @@ -1,18 +1,21 @@ #include "paddle/platform/cpu_info.h" +#include "paddle/string/printf.h" #include #include #include "gflags/gflags.h" +#include "glog/logging.h" #include "gtest/gtest.h" DECLARE_double(fraction_of_cpu_memory_to_use); TEST(CpuMemoryUsage, Print) { std::stringstream ss; - size_t mem_size = paddle::platform::CpuTotalMemory() / 1024 / 1024 / 1024; - ss << std::to_string( - static_cast(FLAGS_fraction_of_cpu_memory_to_use * 100)) - << "% of CPU Memory Usage: " << mem_size << " GB"; - std::cout << ss.str(); + size_t memory_size = paddle::platform::CpuMaxAllocSize() / 1024 / 1024 / 1024; + float use_percent = FLAGS_fraction_of_cpu_memory_to_use * 100; + + std::cout << paddle::string::Sprintf("\n%.2f %% of CPU Memory Usage: %d GB\n", + use_percent, memory_size) + << std::endl; } diff --git a/paddle/platform/cuda_test.cu b/paddle/platform/cuda_test.cu deleted file mode 100644 index 4067dda2f1..0000000000 --- a/paddle/platform/cuda_test.cu +++ /dev/null @@ -1,59 +0,0 @@ -#include -#include -#include "gtest/gtest.h" - -#define CHECK_ERR(x) \ - if (x != cudaSuccess) { \ - fprintf(stderr, \ - "%s in %s at line %d\n", \ - cudaGetErrorString(err), \ - __FILE__, \ - __LINE__); \ - exit(-1); \ - } - -__global__ void vecAdd(float *d_A, float *d_B, float *d_C, int n) { - int i = blockDim.x * blockIdx.x + threadIdx.x; - if (i < n) { - d_C[i] = d_A[i] + d_B[i]; - } -} - -TEST(Cuda, Equality) { - int n = 10; - // Memory allocation for h_A, h_B and h_C (in the host) - float h_A[10] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0}; - float h_B[10] = {0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0}; - float h_C[10]; - float *d_A, *d_B, *d_C; - cudaError_t err; - // Memory allocation for d_A, d_B and d_C (in the device) - err = cudaMalloc((void **)&d_A, sizeof(float) * n); - CHECK_ERR(err); - - err = cudaMalloc((void **)&d_B, sizeof(float) * n); - CHECK_ERR(err); - - err = cudaMalloc((void **)&d_C, sizeof(float) * n); - CHECK_ERR(err); - - // Copying memory to device - err = cudaMemcpy(d_A, h_A, sizeof(float) * n, cudaMemcpyHostToDevice); - CHECK_ERR(err); - - err = cudaMemcpy(d_B, h_B, sizeof(float) * n, cudaMemcpyHostToDevice); - CHECK_ERR(err); - - // Calling the kernel - vecAdd<<>>(d_A, d_B, d_C, n); - - // Copying results back to host - err = cudaMemcpy(h_C, d_C, sizeof(float) * n, cudaMemcpyDeviceToHost); - CHECK_ERR(err); - - EXPECT_EQ(h_C[0], 1.0); - for (int i = 1; i < n - 1; ++i) { - EXPECT_EQ(h_C[i], 11.0); - } - EXPECT_EQ(h_C[9], 1.0); -} From 6e7209f0584f73eb22313d98c676333379736d1e Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 29 Jun 2017 16:30:32 +0800 Subject: [PATCH 173/542] ENH: Add gpu info interface --- paddle/platform/gpu_info.cc | 25 +++++++++++++++++++++++++ paddle/platform/gpu_info.h | 6 ++++++ 2 files changed, 31 insertions(+) diff --git a/paddle/platform/gpu_info.cc b/paddle/platform/gpu_info.cc index 4208d83078..d6c6fe34ef 100644 --- a/paddle/platform/gpu_info.cc +++ b/paddle/platform/gpu_info.cc @@ -42,8 +42,33 @@ size_t GpuMaxAllocSize() { GpuMemoryUsage(available, total); + // Reserve the rest for page tables, etc. return total * FLAGS_fraction_of_gpu_memory_to_use; } +size_t GpuMinChunkSize() { + // Allow to allocate the minimum chunk size is 256 bytes. + return 1 << 8; +} + +size_t GpuMaxChunkSize() { + // Allow to allocate the maximum chunk size is roughly 3% of CPU memory. + size_t total = 0; + size_t available = 0; + + GpuMemoryUsage(available, total); + + // Reserving the rest memory for page tables, etc. + size_t reserving = (1 - FLAGS_fraction_of_gpu_memory_to_use) * total; + + // If available less than minimum chunk size, no usable memory exists. + available = std::max(available, GpuMinChunkSize()) - GpuMinChunkSize(); + + // If available less than reserving, no usable memory exists. + size_t usable = std::max(available, reserving) - reserving; + + return usable; +} + } // namespace platform } // namespace paddle diff --git a/paddle/platform/gpu_info.h b/paddle/platform/gpu_info.h index 174f093b43..d7bf0f4093 100644 --- a/paddle/platform/gpu_info.h +++ b/paddle/platform/gpu_info.h @@ -30,6 +30,12 @@ void GpuMemoryUsage(size_t& available, size_t& total); //! Get the maximum allocation size of current GPU device. size_t GpuMaxAllocSize(); +//! Get the minimum chunk size for GPU buddy allocator. +size_t GpuMinChunkSize(); + +//! Get the maximum chunk size for GPU buddy allocator. +size_t GpuMaxChunkSize(); + } // namespace platform } // namespace paddle From 464886bf56cd91ffcd6617390d62dbd13c90a093 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 29 Jun 2017 16:31:05 +0800 Subject: [PATCH 174/542] FIX: fix typo in piece.h --- paddle/string/piece.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/string/piece.h b/paddle/string/piece.h index db7c3e6980..0272529d1c 100644 --- a/paddle/string/piece.h +++ b/paddle/string/piece.h @@ -35,7 +35,7 @@ public: // We provide non-explicit singleton constructors so users can // pass in a "const char*" or a "string" wherever a "Piece" - // is expected. These contructors ensure that if data_ is NULL, + // is expected. These constructors ensure that if data_ is NULL, // size_ is 0. Piece(); Piece(const char* d, size_t n); From 5c68aacad1abe9eefc4f1039aca4962b6c6d601f Mon Sep 17 00:00:00 2001 From: caoying03 Date: Thu, 29 Jun 2017 16:48:33 +0800 Subject: [PATCH 175/542] follow comments. --- python/paddle/trainer_config_helpers/networks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 0d730e0995..b77932ce5f 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -770,12 +770,12 @@ def lstmemory_group(input, :param input: input layer name. :type input: LayerOutput + :param size: lstmemory group size. + :type size: int :param name: name of the lstmemory group. :type name: basestring :param memory_boot: the initialization state of LSTM cell. :type memory_boot: LayerOutput | None - :param size: lstmemory group size. - :type size: int :param reverse: is lstm reversed :type reverse: bool :param param_attr: Parameter config, None if use default. From 0fa409246b98c636d4dd32553782ca962f70a6f7 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 29 Jun 2017 09:43:00 +0000 Subject: [PATCH 176/542] fix bugs --- go/master/c/client.go | 18 ++++++++++++++++-- go/master/client.go | 21 +++++++++++++++------ go/master/client_test.go | 18 ++++++++++++++---- python/paddle/v2/reader/creator.py | 6 ++---- 4 files changed, 47 insertions(+), 16 deletions(-) diff --git a/go/master/c/client.go b/go/master/c/client.go index b88911b858..79e13e4b63 100644 --- a/go/master/c/client.go +++ b/go/master/c/client.go @@ -13,6 +13,7 @@ typedef int paddle_master_client; import "C" import ( + "io" "sync" "unsafe" @@ -84,14 +85,27 @@ func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int return C.PADDLE_MASTER_OK } +// return value: +// 0:ok +// -1:EOF +// -2:error //export paddle_next_record func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int { c := get(client) - r := c.NextRecord() - if r == nil { + r, err := c.NextRecord() + if err == io.EOF { // EOF + *record = (*C.uchar)(nullPtr) return -1 } + + if err != nil { + // Error + // TODO: return the type of error? + *record = (*C.uchar)(nullPtr) + return -2 + } + if len(r) == 0 { // Empty record *record = (*C.uchar)(nullPtr) diff --git a/go/master/client.go b/go/master/client.go index fa479338c5..c122d17c8f 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -1,6 +1,7 @@ package master import ( + "io" "os" "time" @@ -17,7 +18,12 @@ type Addresser interface { // Client is the client of the master server. type Client struct { conn *connection.Conn - ch chan []byte + ch chan record +} + +type record struct { + r []byte + err error } // NewClient creates a new Client. @@ -27,7 +33,7 @@ type Client struct { func NewClient(addr Addresser, bufSize int) *Client { c := &Client{} c.conn = connection.New() - c.ch = make(chan []byte, bufSize) + c.ch = make(chan record, bufSize) go c.monitorMaster(addr) go c.getRecords() return c @@ -52,18 +58,20 @@ func (c *Client) getRecords() { s := recordio.NewRangeScanner(f, &chunk.Index, -1, -1) for s.Scan() { - c.ch <- s.Record() + c.ch <- record{s.Record(), nil} } if s.Err() != nil { + c.ch <- record{nil, s.Err()} log.Errorln(err, chunk.Path) } err = f.Close() - c.ch <- nil if err != nil { log.Errorln(err) } + + c.ch <- record{nil, io.EOF} } // We treat a task as finished whenever the last data @@ -133,6 +141,7 @@ func (c *Client) taskFinished(taskID int) error { // // NextRecord will block until the next record is available. It is // thread-safe. -func (c *Client) NextRecord() []byte { - return <-c.ch +func (c *Client) NextRecord() ([]byte, error) { + r := <-c.ch + return r.r, r.err } diff --git a/go/master/client_test.go b/go/master/client_test.go index 85a86761c2..05201941e3 100644 --- a/go/master/client_test.go +++ b/go/master/client_test.go @@ -2,6 +2,7 @@ package master_test import ( "fmt" + "io" "net" "net/http" "net/rpc" @@ -69,13 +70,22 @@ func TestNextRecord(t *testing.T) { for pass := 0; pass < 50; pass++ { received := make(map[byte]bool) - for i := 0; i < total; i++ { - r := c.NextRecord() + for i := 0; i <= total; i++ { + r, err := c.NextRecord() + if err == io.EOF { + break + } + + if err != nil { + t.Fatal(pass, i, "Read error:", err) + } + if len(r) != 1 { - t.Fatal("Length should be 1.", r) + t.Fatal(pass, i, "Length should be 1.", r) } + if received[r[0]] { - t.Fatal("Received duplicate.", received, r) + t.Fatal(pass, i, "Received duplicate.", received, r) } received[r[0]] = true } diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 3376d7accb..b575f57dc6 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -79,7 +79,6 @@ def recordio_local(paths): return reader - def recordio(paths, addr="", buf_size=100): """ Creates a data reader that outputs record one one by one @@ -90,8 +89,8 @@ def recordio(paths, addr="", buf_size=100): import os import paddle.v2.master.client as cloud - if len(os.environ["KUBERNETES_SERVICE_HOST"]) == 0: - return recordio_local(path) + if "KUBERNETES_SERVICE_HOST" not in os.environ.keys(): + return recordio_local(paths) def reader(): c = cloud(addr, buf_size) @@ -106,4 +105,3 @@ def recordio(paths, addr="", buf_size=100): c.close() return reader - From b79784ee9e0fd67933d4793e8ab4564f7a30c780 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 29 Jun 2017 09:52:21 +0000 Subject: [PATCH 177/542] fix bugs --- python/paddle/v2/master/client.py | 18 ++++++++++++++---- python/paddle/v2/reader/creator.py | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/python/paddle/v2/master/client.py b/python/paddle/v2/master/client.py index 9fd3ef0860..0cc01b7310 100644 --- a/python/paddle/v2/master/client.py +++ b/python/paddle/v2/master/client.py @@ -26,17 +26,27 @@ class client(object): holder[idx] = c_ptr lib.paddle_set_dataset(self.c, holder, len(paths)) + # return format: (record, errno) + # errno = 0: ok + # = -1: EOF + # < -1: error def next_record(self): p = ctypes.c_char_p() ret = ctypes.pointer(p) size = lib.paddle_next_record(self.c, ret) - if size < 0: + if size == -1: # EOF - return None + return None, -1 + + if size < -1: + # Error + return None, size + if size == 0: # Empty record - return "" + return "", 0 + record = ret.contents.value[:size] # Memory created from C should be freed. lib.mem_free(ret.contents) - return record + return record, 0 diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index b575f57dc6..2e8626e565 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -97,7 +97,7 @@ def recordio(paths, addr="", buf_size=100): c.set_dataset(paths) while True: - r = client.next_record() + r, err = client.next_record() if r is None: break yield r From cc0704227024b58d3721bdd305b9814b57a9d139 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Thu, 29 Jun 2017 18:50:11 +0800 Subject: [PATCH 178/542] update design doc --- doc/design/build_system/README.md | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/doc/design/build_system/README.md b/doc/design/build_system/README.md index 96af6566d0..3e540e072d 100644 --- a/doc/design/build_system/README.md +++ b/doc/design/build_system/README.md @@ -108,14 +108,14 @@ As above example CMakeLists.txt executes, each function invocation adds "nodes" ### Using Package Manager For Go -Building go binaries and libraries need to satisfy their dependencies, generally +Building Go binaries and libraries need to satisfy their dependencies, generally we can do `go get ./...` to download and compile all external dependencies. The problems are: -1. `go get` will always get the latest code from master branch, so when an external - project updated and deprecates something or made changes to their APIs, builds - may not pass. This is very different with what we already have in `cmake/external` - which download a specific version or commit id of the dependency. +1. `go get` will always get the latest code from the default branch of the + remote repo, so changes of dependents might break the build. This is very + different with what we already have in `cmake/external` which download a + specific version or commit id of the dependency. 1. Some locations can not access external dependencies through the internet, as mentioned in https://github.com/PaddlePaddle/Paddle/issues/2605. Using package management tools can package the dependencies as a "vendor" package, which can be mirrored @@ -124,10 +124,20 @@ problems are: #### Godep vs. Glide -Here's a brief comparison for current Go ecosystem: https://github.com/Masterminds/glide/wiki/Go-Package-Manager-Comparison. There are -also many complaints about `Godep`. A new "official" pakcage management tool has been -started: https://github.com/golang/dep to resolve such problems, but it's currently -at Alpha stage. So the best choice now is glide obviously. +As mentioned by @wangkuiyi, [Here](https://github.com/golang/go/wiki/PackageManagementTools) +list dozens of Go package managers. We choose the tool using following principles: + +- Most "active" projects with more stars, more pull requests or commits +- Commonly used project + +Then we shall choose between the most popular tools: Godep and Glide. + +Here's a brief comparison between Godep and Glide +: https://github.com/Masterminds/glide/wiki/Go-Package-Manager-Comparison. There are +also many complaints about using `Godep`. There's also a new "official" pakcage +management tool has been started at: https://github.com/golang/dep to resolve +such problems, but it's currently at Alpha stage. So the best choice now is +glide obviously. #### Manage Go Packages From 7993ff794d23f07804cbe1308e6d8c143b9cdc1f Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Thu, 29 Jun 2017 19:04:03 +0800 Subject: [PATCH 179/542] update design doc --- doc/design/build_system/README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/design/build_system/README.md b/doc/design/build_system/README.md index 3e540e072d..bf0e4dddc1 100644 --- a/doc/design/build_system/README.md +++ b/doc/design/build_system/README.md @@ -122,15 +122,16 @@ problems are: at many cloud file hosting, so users what to compile paddle by themselves can download this "vendor" package from a mirror site. -#### Godep vs. Glide +#### Choose A Suitable Tool As mentioned by @wangkuiyi, [Here](https://github.com/golang/go/wiki/PackageManagementTools) list dozens of Go package managers. We choose the tool using following principles: - Most "active" projects with more stars, more pull requests or commits -- Commonly used project +- Widely used project -Then we shall choose between the most popular tools: Godep and Glide. +After comparing all these projects, we shall choose between the most popular +tools: Godep and Glide. Here's a brief comparison between Godep and Glide : https://github.com/Masterminds/glide/wiki/Go-Package-Manager-Comparison. There are From 26cd0bb5a59d913f8c216ceee0c6abb46317e31e Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 29 Jun 2017 19:13:24 +0800 Subject: [PATCH 180/542] ENH: count allocated fallback size for performance --- paddle/memory/detail/system_allocator.cc | 52 +++++++++++++------ paddle/memory/detail/system_allocator.h | 3 +- .../paddle/trainer_config_helpers/networks.py | 4 +- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/paddle/memory/detail/system_allocator.cc b/paddle/memory/detail/system_allocator.cc index 332ff062d4..2b0fbfa87e 100644 --- a/paddle/memory/detail/system_allocator.cc +++ b/paddle/memory/detail/system_allocator.cc @@ -39,22 +39,22 @@ void* CPUAllocator::Alloc(size_t& index, size_t size) { // pointer shall not be dereferenced -- so we make it nullptr. if (size <= 0) return nullptr; - if (FLAGS_use_pinned_memory) { - void* p = malloc(size); - if (p != nullptr) { - mlock(p, size); - } - } + index = 0; // unlock memory void* p = malloc(size); - if (p != nullptr && FLAGS_use_pinned_memory) { - mlock(p, size); + + if (p != nullptr) { + if (FLAGS_use_pinned_memory) { + index = 1; + mlock(p, size); // lock memory + } } + return p; } void CPUAllocator::Free(void* p, size_t size, size_t index) { - if (p != nullptr && FLAGS_use_pinned_memory) { + if (p != nullptr && index == 1) { munlock(p, size); } free(p); @@ -73,26 +73,34 @@ void* GPUAllocator::Alloc(size_t& index, size_t size) { // Reserve memory for page tables, etc. size_t reserving = capacity - paddle::platform::GpuMaxAllocSize(); - size_t remaining = available > reserving ? available - reserving : 0; + size_t usable = available > reserving ? available - reserving : 0; // If remaining size no less than expected size, using general // cudaMalloc to allocate GPU memory. void* p = 0; - if (size <= remaining) { + if (size <= usable) { cudaError_t result = cudaMalloc(&p, size); if (result == cudaSuccess) { index = 0; - total_alloc_size_ += size; + gpu_alloc_size_ += size; return p; } } // If remaining size less than expected size or cudaMalloc failed, // cudaMallocHost will be considered as a fallback allocator. + // + // NOTE: here, we use GpuMaxAllocSize() as the maximum memory size + // of host fallback allocation. Allocates too much would reduce + // the amount of memory available to the underlying system for paging. + usable = paddle::platform::GpuMaxAllocSize() - fallback_alloc_size_; + + if (size > usable) return nullptr; + cudaError_t result = cudaMallocHost(&p, size); if (result == cudaSuccess) { index = 1; - total_alloc_size_ += size; + fallback_alloc_size_ += size; return p; } @@ -100,16 +108,26 @@ void* GPUAllocator::Alloc(size_t& index, size_t size) { } void GPUAllocator::Free(void* p, size_t size, size_t index) { + cudaError_t err; + + if (index == 0) { + PADDLE_ASSERT(gpu_alloc_size_ >= size); + gpu_alloc_size_ -= size; + err = cudaFree(p); + } else { + PADDLE_ASSERT(fallback_alloc_size_ >= size); + fallback_alloc_size_ -= size; + err = cudaFreeHost(p); + } + // Purposefully allow cudaErrorCudartUnloading, because // that is returned if you ever call cudaFree after the // driver has already shutdown. This happens only if the // process is terminating, in which case we don't care if // cudaFree succeeds. - PADDLE_ASSERT(total_alloc_size_ >= size); - total_alloc_size_ -= size; - cudaError_t err = index == 1 ? cudaFreeHost(p) : cudaFree(p); if (err != cudaErrorCudartUnloading) { - platform::throw_on_error(err, "cudaFree{Host} failed"); + platform::throw_on_error(err, + "cudaFree{Host} failed in GPUAllocator::Free."); } } diff --git a/paddle/memory/detail/system_allocator.h b/paddle/memory/detail/system_allocator.h index e15302ce4f..7093c42967 100644 --- a/paddle/memory/detail/system_allocator.h +++ b/paddle/memory/detail/system_allocator.h @@ -47,7 +47,8 @@ class GPUAllocator : public SystemAllocator { virtual void Free(void* p, size_t size, size_t index); private: - size_t total_alloc_size_ = 0; + size_t gpu_alloc_size_ = 0; + size_t fallback_alloc_size_ = 0; }; #endif // PADDLE_ONLY_CPU diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 67154a8d7d..1bf59ed484 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -1381,7 +1381,7 @@ def inputs(layers, *args): if len(args) != 0: layers.extend(args) - Inputs(* [l.name for l in layers]) + Inputs(*[l.name for l in layers]) def outputs(layers, *args): @@ -1424,7 +1424,7 @@ def outputs(layers, *args): assert len(layers) > 0 if HasInputsSet(): # input already set - Outputs(* [l.name for l in layers]) + Outputs(*[l.name for l in layers]) return # just return outputs. if len(layers) != 1: From fb51c3dc895b78df966dd0d9713657289b1986b3 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 29 Jun 2017 19:57:10 +0800 Subject: [PATCH 181/542] FIX: add compile dependency gflags --- paddle/platform/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 969c91985d..5cbe491b2b 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -1,7 +1,7 @@ -cc_library(cpu_info SRCS cpu_info.cc) +cc_library(cpu_info SRCS cpu_info.cc DEPS gflags) cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info gflags glog) -nv_library(gpu_info SRCS gpu_info.cc) +nv_library(gpu_info SRCS gpu_info.cc DEPS gflags) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) From 3a0919bab31fd64ea6ae73a61755b92c619a411e Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 29 Jun 2017 21:31:42 +0800 Subject: [PATCH 182/542] Add test for configuration and add doc. --- doc/api/v2/config/layer.rst | 13 ++++++++++ .../tests/configs/file_list.sh | 2 +- .../configs/test_detection_output_layer.py | 23 +++++++++++++++++ .../tests/configs/test_multibox_loss_layer.py | 25 +++++++++++++++++++ 4 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index c7b017bc07..0a8465919d 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -445,6 +445,11 @@ smooth_l1_cost .. autoclass:: paddle.v2.layer.smooth_l1_cost :noindex: +multibox_loss +-------------- +.. autoclass:: paddle.v2.layer.multibox_loss + :noindex: + Check Layer ============ @@ -468,3 +473,11 @@ prelu -------- .. autoclass:: paddle.v2.layer.prelu :noindex: + +Detection output Layer +====================== + +detection_output +--- +.. autoclass:: paddle.v2.layer.detection_output + :noindex: diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index c24102255f..45fb848886 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -6,6 +6,6 @@ img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cos test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer -test_prelu_layer test_row_conv) +test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py new file mode 100644 index 0000000000..3572a2cb07 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py @@ -0,0 +1,23 @@ +from paddle.trainer_config_helpers import * + +settings(batch_size=1000, learning_rate=1e-5) + +input_loc = data_layer(name='input_loc', size=16, height=16, width=1) + +input_conf = data_layer(name='input_conf', size=8, height=1, width=8) + +priorbox = data_layer(name='priorbox', size=32, height=4, width=8) + +detout = detection_output_layer( + input_loc=input_loc, + input_conf=input_conf, + priorbox=priorbox, + num_classes=21, + nms_threshold=0.45, + nms_top_k=400, + keep_top_k=200, + confidence_threshold=0.01, + background_id=0, + name='test_detection_output') + +outputs(detout) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py new file mode 100644 index 0000000000..c3376c47bd --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py @@ -0,0 +1,25 @@ +from paddle.trainer_config_helpers import * + +settings(batch_size=1000, learning_rate=1e-5) + +input_loc = data_layer(name='input_loc', size=16, height=16, width=1) + +input_conf = data_layer(name='input_conf', size=8, height=1, width=8) + +priorbox = data_layer(name='priorbox', size=32, height=4, width=8) + +label = data_layer(name='label', size=24, height=4, width=6) + +multibox_loss = multibox_loss_layer( + input_loc=input_loc, + input_conf=input_conf, + priorbox=priorbox, + label=label, + num_classes=21, + overlap_threshold=0.5, + neg_pos_ratio=3.0, + neg_overlap=0.5, + background_id=0, + name='test_multibox_loss') + +outputs(multibox_loss) From 7c066f6e3e43cfc2b43d46f5e860a291b125b3d4 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Fri, 30 Jun 2017 00:45:07 +0000 Subject: [PATCH 183/542] fix according to comments --- doc/design/cluster_train/save_model.md | 52 +++++++++++++++----------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/doc/design/cluster_train/save_model.md b/doc/design/cluster_train/save_model.md index 3a9a24fb9c..76ac8d8387 100644 --- a/doc/design/cluster_train/save_model.md +++ b/doc/design/cluster_train/save_model.md @@ -7,24 +7,34 @@ ways from which user can obtain a model: - Save model triggered by user code: user code asks PaddlePaddle to save a model. -- Convert model from the snapshot: model being converted from - pservers' periodic snapshot. In this way, the user can cancel a job - at any time, and still have a relatively fresh model (we snapshot - around every 5 minutes). +- Convert model from the checkpoint: model being converted from + pservers' periodic checkpoint. In this way, the user can cancel a + job at any time, and still have a relatively fresh model (we + checkpoint around every 5 minutes). -### Save Model Triggered by User Code +### Trainer Saving Model vs. Pservers Saving Model Both trainers and pservers have access to the model. So the model can be saved from a trainer or pservers. We need to decide on where the model is saved from. -#### Dense Model vs. Sparse Model +#### Dense Update vs. Sparse Update + +There are two types of model update methods: dense update and sparse +update (when the parameter is configured to be sparse). + +- Dense update + + Every trainer has it's own full copy of the model. Every model + update will update the entire model. + +- Sparse update + + The training input is sparse, and the trainer does not have the + entire model. It will only download the sub-model necessary related + to the input. When updating the model, only the sub-model related to + the training input is updated. -There are two types of model: dense and sparse model (when the -parameter is configured to be sparse). Pservers always jointly have -the entire model at any given time. Trainers only have the entire -dense model, but only have a fraction of the sparse model at any given -time. #### Pservers Saving Model @@ -32,15 +42,15 @@ The benefit of letting pservers save model is they have the entire model all the time. However, since pservers are on different nodes, it requires a merging process to merge model shards into the same model. Thus requires the pservers to write models to a distributed -filesystem, making the snapshot shards visible to the merge program. +filesystem, making the checkpoint shards visible to the merge program. #### Trainer Saving Model The benefit of letting one trainer to save the model is it does not require a distributed filesystem. And it's reusing the same save model -logic when the trainer is training locally - except when training -sparse model, the trainer needs to download the entire sparse model -during the saving process. +logic when the trainer is training locally - except when doing sparse +update, the trainer needs to download the entire model during the +saving process. #### Conclusion @@ -49,7 +59,7 @@ and is an intuitive extension to training locally, we decide to let the trainer save the model. -### Convert Model from Snapshot +### Convert Model from Checkpoint TODO @@ -86,15 +96,15 @@ when save model is taking place. When saving a dense model, the trainer uses the local model. Pservers does not need to pause model update. -When saving a sparse model. The trainer needs to download the entire -sparse model while saving. To get the most accurate model, the model -update needs to be paused before the download starts and resumed after -the download finishes. Otherwise, the trainer gets a model that is +When doing sparse update. The trainer needs to download the entire +model while saving. To get the most accurate model, the model update +needs to be paused before the download starts and resumed after the +download finishes. Otherwise, the trainer gets a model that is "polluted": some part of the model is old, some part of the model is new. It's unclear that the "polluted" model will be inferiod due to the stochastic nature of deep learning, and pausing the model update will -add more complexity to the system. Since supporting sparse model is a +add more complexity to the system. Since supporting sparse update is a TODO item. We defer the evaluation of pause the model update or not during saving model to the future. From 59cf5e77962d743b7535e3ffd72e7ebe00c29502 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Wed, 28 Jun 2017 22:32:33 +0000 Subject: [PATCH 184/542] Fix Go cmake --- CMakeLists.txt | 31 ++++++++++++++++++++++---- cmake/generic.cmake | 26 ++++++--------------- go/master/c/CMakeLists.txt | 20 +---------------- go/pserver/cclient/test/CMakeLists.txt | 2 +- paddle/trainer/CMakeLists.txt | 4 ++-- python/CMakeLists.txt | 18 ++++++++++----- python/paddle/v2/__init__.py | 1 + python/setup.py.in | 2 ++ 8 files changed, 54 insertions(+), 50 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 24a7066adc..edea8279df 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -130,14 +130,37 @@ if(WITH_GPU) endif(WITH_GPU) add_subdirectory(proto) -add_subdirectory(paddle) -add_subdirectory(python) +# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be +# placed after this block, because they depends on it. if(WITH_GOLANG) - #TODO (add go/master/c back when fixed) - add_subdirectory(go/pserver/cclient) + # we need to symlink Paddle directory into GOPATH. If we + # don't do it and we have code that depends on Paddle, go + # get ./... will download a new Paddle repo from Github, + # without the changes in our current Paddle repo that we + # want to build. + set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") + file(MAKE_DIRECTORY ${GOPATH}) + set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle") + add_custom_target(go_path) + add_custom_command(TARGET go_path + # Symlink Paddle directory into GOPATH + COMMAND mkdir -p ${PADDLE_IN_GOPATH} + COMMAND rm -rf ${PADDLE_IN_GOPATH} + COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} + # Automatically get all dependencies specified in the source code + # We can't run `go get -d ./...` for every target, because + # multiple `go get` can not run concurrently, but make need to be + # able to run with multiple jobs. + COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./go/... + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) + add_subdirectory(go/master/c) + add_subdirectory(go/pserver/cclient) endif(WITH_GOLANG) +add_subdirectory(paddle) +add_subdirectory(python) if(WITH_DOC) add_subdirectory(doc) endif() diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 11c1f677ae..8a9bf12ccc 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -253,10 +253,6 @@ function(nv_test TARGET_NAME) endif() endfunction(nv_test) -set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") -file(MAKE_DIRECTORY ${GOPATH}) -set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle") - function(go_library TARGET_NAME) set(options STATIC static SHARED shared) set(oneValueArgs "") @@ -265,10 +261,10 @@ function(go_library TARGET_NAME) if (go_library_SHARED OR go_library_shared) set(BUILD_MODE "-buildmode=c-shared") - set(LIB_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}") + set(${TARGET_NAME}_LIB_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}") else() set(BUILD_MODE "-buildmode=c-archive") - set(LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}") endif() # Add dummy code to support `make target_name` under Terminal Command @@ -283,25 +279,17 @@ function(go_library TARGET_NAME) add_dependencies(${TARGET_NAME} ${go_library_DEPS}) endif(go_library_DEPS) - # we need to symlink Paddle directory into GOPATH. If we - # don't do it and we have code that depends on Paddle, go - # get ./... will download a new Paddle repo from Github, - # without the changes in our current Paddle repo that we - # want to build. + set(${TARGET_NAME}_LIB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${${TARGET_NAME}_LIB_NAME}" CACHE STRING "output library path for target ${TARGET_NAME}") + file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" - # Symlink Paddle directory into GOPATH - COMMAND mkdir -p ${PADDLE_IN_GOPATH} - COMMAND rm -rf ${PADDLE_IN_GOPATH} - COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} - # Automatically get all dependencies specified in the source code - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./... + COMMAND rm "${${TARGET_NAME}_LIB_PATH}" # Golang build source code COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} - -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" + -o "${${TARGET_NAME}_LIB_PATH}" ${GO_SOURCE} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + add_dependencies(${TARGET_NAME} go_path) endfunction(go_library) function(go_binary TARGET_NAME) diff --git a/go/master/c/CMakeLists.txt b/go/master/c/CMakeLists.txt index acce698051..a4e92635ba 100644 --- a/go/master/c/CMakeLists.txt +++ b/go/master/c/CMakeLists.txt @@ -1,21 +1,3 @@ cmake_minimum_required(VERSION 3.0) -get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) -get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY) -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake") - -project(cxx_go C Go) - -include(golang) -include(flags) - -set(MASTER_LIB_NAME "paddle_master") -go_library(${MASTER_LIB_NAME} SHARED) - -if(PROJ_ROOT) - add_custom_command(OUTPUT ${PROJ_ROOT}/python/paddle/v2/master/lib${MASTER_LIB_NAME}.so - COMMAND rm ${CMAKE_CURRENT_BINARY_DIR}/lib${MASTER_LIB_NAME}.h - COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/lib${MASTER_LIB_NAME}.so ${PROJ_ROOT}/python/paddle/v2/master/ - DEPENDS ${MASTER_LIB_NAME}) - add_custom_target(paddle_master_shared ALL DEPENDS ${PROJ_ROOT}/python/paddle/v2/master/lib${MASTER_LIB_NAME}.so) -endif(PROJ_ROOT) +go_library(paddle_master SHARED) diff --git a/go/pserver/cclient/test/CMakeLists.txt b/go/pserver/cclient/test/CMakeLists.txt index 916e4e99a2..170730cceb 100644 --- a/go/pserver/cclient/test/CMakeLists.txt +++ b/go/pserver/cclient/test/CMakeLists.txt @@ -1,3 +1,3 @@ -cc_library(main SRCS main.c DEPS paddle_pserver_cclient) +cc_binary(main SRCS main.c DEPS paddle_pserver_cclient) cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient) diff --git a/paddle/trainer/CMakeLists.txt b/paddle/trainer/CMakeLists.txt index f34d53ae99..54e74248e7 100644 --- a/paddle/trainer/CMakeLists.txt +++ b/paddle/trainer/CMakeLists.txt @@ -72,6 +72,6 @@ endif() if(WITH_GOLANG) add_dependencies(paddle_trainer_lib paddle_pserver_cclient) - target_link_libraries(paddle_trainer ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a) - target_link_libraries(paddle_trainer_lib ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a) + target_link_libraries(paddle_trainer paddle_pserver_cclient) + target_link_libraries(paddle_trainer_lib paddle_pserver_cclient) endif(WITH_GOLANG) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 3640dd3a75..a9842152c8 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -7,10 +7,18 @@ file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py) file(GLOB_RECURSE V2_PY_FILES ./paddle/v2/ *.py) set(PY_FILES paddle/__init__.py - ${TRAINER_PY_FILES} - ${HELPERS_PY_FILES} - ${UTILS_PY_FILES} - ${V2_PY_FILES}) + ${TRAINER_PY_FILES} + ${HELPERS_PY_FILES} + ${UTILS_PY_FILES} + ${V2_PY_FILES}) + +add_custom_target(copy_paddle_master) +if(WITH_GOLANG) + add_custom_command(TARGET copy_paddle_master + COMMAND cp ${paddle_master_LIB_PATH} ${PROJ_ROOT}/python/paddle/v2/master/ + ) + add_dependencies(copy_paddle_master paddle_master) +endif(WITH_GOLANG) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) @@ -18,7 +26,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp - DEPENDS gen_proto_py ${PY_FILES} ${external_project_dependencies}) + DEPENDS gen_proto_py ${PY_FILES} ${external_project_dependencies} copy_paddle_master) add_custom_target(paddle_python ALL DEPENDS ${OUTPUT_DIR}/.timestamp) diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 6a1e23a343..3ba5c31871 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -56,6 +56,7 @@ __all__ = [ 'plot', 'evaluator', 'image', + 'master', ] diff --git a/python/setup.py.in b/python/setup.py.in index 86fc0fc5c0..e507acaf21 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -8,6 +8,7 @@ packages=['paddle', 'paddle.v2', 'paddle.v2.dataset', 'paddle.v2.reader', + 'paddle.v2.master', 'paddle.v2.plot'] setup_requires=["requests", @@ -25,6 +26,7 @@ setup(name='paddle', description='Parallel Distributed Deep Learning', install_requires=setup_requires, packages=packages, + package_data={'paddle.v2.master': ['${paddle_master_LIB_NAME}'], }, package_dir={ '': '${CMAKE_CURRENT_SOURCE_DIR}' }, From b3c5808e13bc94fbc933c803c59fed979a11f515 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Fri, 30 Jun 2017 03:11:57 +0000 Subject: [PATCH 185/542] rm cloud EOF --- go/master/c/client.go | 7 ------- go/master/client.go | 3 --- go/master/client_test.go | 7 +------ python/paddle/v2/master/client.py | 5 ----- 4 files changed, 1 insertion(+), 21 deletions(-) diff --git a/go/master/c/client.go b/go/master/c/client.go index 79e13e4b63..a37894fefe 100644 --- a/go/master/c/client.go +++ b/go/master/c/client.go @@ -13,7 +13,6 @@ typedef int paddle_master_client; import "C" import ( - "io" "sync" "unsafe" @@ -93,12 +92,6 @@ func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int { c := get(client) r, err := c.NextRecord() - if err == io.EOF { - // EOF - *record = (*C.uchar)(nullPtr) - return -1 - } - if err != nil { // Error // TODO: return the type of error? diff --git a/go/master/client.go b/go/master/client.go index c122d17c8f..985b96b0af 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -1,7 +1,6 @@ package master import ( - "io" "os" "time" @@ -70,8 +69,6 @@ func (c *Client) getRecords() { if err != nil { log.Errorln(err) } - - c.ch <- record{nil, io.EOF} } // We treat a task as finished whenever the last data diff --git a/go/master/client_test.go b/go/master/client_test.go index 05201941e3..0a401d8a43 100644 --- a/go/master/client_test.go +++ b/go/master/client_test.go @@ -2,7 +2,6 @@ package master_test import ( "fmt" - "io" "net" "net/http" "net/rpc" @@ -70,12 +69,8 @@ func TestNextRecord(t *testing.T) { for pass := 0; pass < 50; pass++ { received := make(map[byte]bool) - for i := 0; i <= total; i++ { + for i := 0; i < total; i++ { r, err := c.NextRecord() - if err == io.EOF { - break - } - if err != nil { t.Fatal(pass, i, "Read error:", err) } diff --git a/python/paddle/v2/master/client.py b/python/paddle/v2/master/client.py index 0cc01b7310..6ddb09e4e8 100644 --- a/python/paddle/v2/master/client.py +++ b/python/paddle/v2/master/client.py @@ -28,16 +28,11 @@ class client(object): # return format: (record, errno) # errno = 0: ok - # = -1: EOF # < -1: error def next_record(self): p = ctypes.c_char_p() ret = ctypes.pointer(p) size = lib.paddle_next_record(self.c, ret) - if size == -1: - # EOF - return None, -1 - if size < -1: # Error return None, size From 97bbd179569f48bfcf1a3ff3225c331ad8e3fbf4 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Fri, 30 Jun 2017 03:14:29 +0000 Subject: [PATCH 186/542] rm cloud EOF --- go/master/c/client.go | 1 - 1 file changed, 1 deletion(-) diff --git a/go/master/c/client.go b/go/master/c/client.go index a37894fefe..13ed3b7680 100644 --- a/go/master/c/client.go +++ b/go/master/c/client.go @@ -86,7 +86,6 @@ func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int // return value: // 0:ok -// -1:EOF // -2:error //export paddle_next_record func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int { From a53952165bbaebb34d2ce91ca361b470a2a3238a Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 30 Jun 2017 11:22:49 +0800 Subject: [PATCH 187/542] FIX: merge objects to static lib --- cmake/generic.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 61353a4a26..f2e3934c27 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -146,9 +146,9 @@ function(merge_static_libs TARGET_NAME) set(outlibfile "$") foreach(lib ${libs}) - add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND ${CMAKE_AR} ru ${outlibfile} @"../${objlistfile}" - WORKING_DIRECTORY ${objdir}) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND ${CMAKE_AR} ru ${outlibfile} @"../${lib}.objlist" + WORKING_DIRECTORY ${lib}.objdir) endforeach() add_custom_command(TARGET ${TARGET_NAME} POST_BUILD From 38790c1c210c57b9cab5f1abe14203e053831ec5 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Thu, 29 Jun 2017 21:30:43 -0700 Subject: [PATCH 188/542] fix according to comment --- CMakeLists.txt | 21 --------------------- cmake/configure.cmake | 24 ++++++++++++++++++++++++ 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index edea8279df..b248191223 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,27 +134,6 @@ add_subdirectory(proto) # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be # placed after this block, because they depends on it. if(WITH_GOLANG) - # we need to symlink Paddle directory into GOPATH. If we - # don't do it and we have code that depends on Paddle, go - # get ./... will download a new Paddle repo from Github, - # without the changes in our current Paddle repo that we - # want to build. - set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") - file(MAKE_DIRECTORY ${GOPATH}) - set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle") - add_custom_target(go_path) - add_custom_command(TARGET go_path - # Symlink Paddle directory into GOPATH - COMMAND mkdir -p ${PADDLE_IN_GOPATH} - COMMAND rm -rf ${PADDLE_IN_GOPATH} - COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} - # Automatically get all dependencies specified in the source code - # We can't run `go get -d ./...` for every target, because - # multiple `go get` can not run concurrently, but make need to be - # able to run with multiple jobs. - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./go/... - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - ) add_subdirectory(go/master/c) add_subdirectory(go/pserver/cclient) endif(WITH_GOLANG) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index e8425aedbd..f6dca6d575 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -69,3 +69,27 @@ endif(NOT WITH_GPU) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}") + +if(WITH_GOLANG) + # we need to symlink Paddle directory into GOPATH. If we + # don't do it and we have code that depends on Paddle, go + # get ./... will download a new Paddle repo from Github, + # without the changes in our current Paddle repo that we + # want to build. + set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") + file(MAKE_DIRECTORY ${GOPATH}) + set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle") + add_custom_target(go_path) + add_custom_command(TARGET go_path + # Symlink Paddle directory into GOPATH + COMMAND mkdir -p ${PADDLE_IN_GOPATH} + COMMAND rm -rf ${PADDLE_IN_GOPATH} + COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} + # Automatically get all dependencies specified in the source code + # We can't run `go get -d ./...` for every target, because + # multiple `go get` can not run concurrently, but make need to be + # able to run with multiple jobs. + COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./go/... + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) +endif(WITH_GOLANG) From d09bbb559d67848b6a17ff87432580f2c32db9e8 Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 30 Jun 2017 12:44:18 +0800 Subject: [PATCH 189/542] FIX: Always build protobuf from source --- cmake/external/protobuf.cmake | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 2f267adc20..3c74944bc2 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -144,18 +144,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ENDFUNCTION() SET(PROTOBUF_VERSION 3.1) -IF(NOT CMAKE_CROSSCOMPILING) - FIND_PACKAGE(Protobuf ${PROTOBUF_VERSION}) - - IF(PROTOBUF_FOUND) - SET_PROTOBUF_VERSION() - IF("${PROTOBUF_VERSION}" VERSION_LESS "3.1.0") - SET(PROTOBUF_FOUND OFF) - ELSE() - PROMPT_PROTOBUF_LIB() - ENDIF() - ENDIF(PROTOBUF_FOUND) -ELSE() +IF(CMAKE_CROSSCOMPILING) build_protobuf(protobuf_host TRUE) LIST(APPEND external_project_dependencies protobuf_host) From 5fc9b116d5d5c8346e2c36e5dc75046ea5456086 Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 30 Jun 2017 12:58:02 +0800 Subject: [PATCH 190/542] ENH: Remove deprecated python build --- cmake/external/python.cmake | 187 ------------------------------------ 1 file changed, 187 deletions(-) diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake index f4d0daab06..6546b2c83b 100644 --- a/cmake/external/python.cmake +++ b/cmake/external/python.cmake @@ -32,193 +32,6 @@ IF(PYTHONINTERP_FOUND) MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " "please use pip to upgrade protobuf. pip install -U protobuf") ENDIF() -ELSE(PYTHONINTERP_FOUND) - MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.") - ##################################### PYTHON ######################################## - SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python) - SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python) - SET(_python_DIR ${PYTHON_INSTALL_DIR}) - - IF(UNIX) - SET(PYTHON_FOUND ON) - SET(PYTHON_INCLUDE_DIR "${PYTHON_INSTALL_DIR}/include/python2.7" CACHE PATH "Python include dir" FORCE) - SET(PYTHON_LIBRARIES "${PYTHON_INSTALL_DIR}/lib/libpython2.7.a" CACHE FILEPATH "Python library" FORCE) - SET(PYTHON_EXECUTABLE ${PYTHON_INSTALL_DIR}/bin/python CACHE FILEPATH "Python executable" FORCE) - SET(PY_SITE_PACKAGES_PATH "${PYTHON_INSTALL_DIR}/lib/python2.7/site-packages" CACHE PATH "Python site-packages path" FORCE) - ELSEIF(WIN32) - SET(PYTHON_FOUND ON) - SET(PYTHON_INCLUDE_DIR "${PYTHON_INSTALL_DIR}/include" CACHE PATH "Python include dir" FORCE) - SET(PYTHON_LIBRARIES "${PYTHON_INSTALL_DIR}/libs/python27.lib" CACHE FILEPATH "Python library" FORCE) - SET(PYTHON_EXECUTABLE "${PYTHON_INSTALL_DIR}/bin/python.exe" CACHE FILEPATH "Python executable" FORCE) - SET(PY_SITE_PACKAGES_PATH "${PYTHON_INSTALL_DIR}/Lib/site-packages" CACHE PATH "Python site-packages path" FORCE) - ELSE() - MESSAGE(FATAL_ERROR "Unknown system !") - ENDIF() - - IF(APPLE) - LIST(APPEND EXTERNAL_PROJECT_OPTIONAL_CMAKE_ARGS - -DCMAKE_BUILD_WITH_INSTALL_RPATH:BOOL=ON - ) - ENDIF() - - SET(EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS) - - # Force Python build to "Release". - IF(CMAKE_CONFIGURATION_TYPES) - SET(SAVED_CMAKE_CFG_INTDIR ${CMAKE_CFG_INTDIR}) - SET(CMAKE_CFG_INTDIR "Release") - ELSE() - LIST(APPEND EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS - -DCMAKE_BUILD_TYPE:STRING=Release - ) - ENDIF() - - ExternalProject_Add(python - ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "https://github.com/python-cmake-buildsystem/python-cmake-buildsystem.git" - PREFIX ${PYTHON_SOURCES_DIR} - UPDATE_COMMAND "" - CMAKE_ARGS -DPYTHON_VERSION=2.7.12 - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - CMAKE_CACHE_ARGS - -DCMAKE_INSTALL_PREFIX:PATH=${PYTHON_INSTALL_DIR} - -DBUILD_LIBPYTHON_SHARED:BOOL=OFF - -DUSE_SYSTEM_LIBRARIES:BOOL=OFF - -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT} - -DZLIB_INCLUDE_DIR:PATH=${ZLIB_INCLUDE_DIR} - -DZLIB_LIBRARY:FILEPATH=${ZLIB_LIBRARIES} - -DDOWNLOAD_SOURCES:BOOL=ON - -DINSTALL_WINDOWS_TRADITIONAL:BOOL=OFF - ${EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS} - ${EXTERNAL_PROJECT_OPTIONAL_CMAKE_ARGS} - DEPENDS zlib - ) - - SET(py_env - PATH=${PYTHON_INSTALL_DIR}/bin - PYTHONHOME=${PYTHON_INSTALL_DIR} - PYTHONPATH=${PYTHON_INSTALL_DIR}/lib:${PYTHON_INSTALL_DIR}/lib/python2.7:${PY_SITE_PACKAGES_PATH}) - #################################################################################### - - ##################################### SETUPTOOLS ################################### - SET(SETUPTOOLS_SOURCES_DIR ${PYTHON_SOURCES_DIR}/setuptools) - ExternalProject_Add(setuptools - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${SETUPTOOLS_SOURCES_DIR} - URL "https://pypi.python.org/packages/source/s/setuptools/setuptools-18.3.2.tar.gz" - BUILD_IN_SOURCE 1 - PATCH_COMMAND "" - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - INSTALL_COMMAND "" - BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install - DEPENDS python zlib - ) - ##################################################################################### - - ##################################### SIX ########################################### - SET(SIX_SOURCES_DIR ${PYTHON_SOURCES_DIR}/six) - ExternalProject_Add(six - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${SIX_SOURCES_DIR} - URL https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz - BUILD_IN_SOURCE 1 - PATCH_COMMAND "" - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - INSTALL_COMMAND "" - BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install - DEPENDS python setuptools - ) - ##################################################################################### - - ##################################### CYTHON ######################################## - SET(CYTHON_SOURCES_DIR ${PYTHON_SOURCES_DIR}/cython) - ExternalProject_Add(cython - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${CYTHON_SOURCES_DIR} - URL https://github.com/cython/cython/archive/0.25.2.tar.gz - GIT_TAG 0.25.2 - BUILD_IN_SOURCE 1 - CONFIGURE_COMMAND "" - PATCH_COMMAND "" - UPDATE_COMMAND "" - INSTALL_COMMAND "" - BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install - DEPENDS python - ) - #################################################################################### - - ##################################### NUMPY ######################################## - SET(NUMPY_SOURCES_DIR ${PYTHON_SOURCES_DIR}/numpy) - SET(NUMPY_TAG_VERSION "v1.11.3") - SET(NUMPY_VERSION "1.11.3") - - SET(EGG_NAME "") - SET(PYTHON_NUMPY_INCLUDE_DIR "") - IF(WIN32) - SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-${HOST_SYSTEM}.egg") - ELSE(WIN32) - IF(APPLE) - SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-${HOST_SYSTEM}-${MACOS_VERSION}") - ELSE(APPLE) - SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-linux") - SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-linux") - ENDIF(APPLE) - - FOREACH(suffix x86_64 intel fat64 fat32 universal) - LIST(APPEND PYTHON_NUMPY_INCLUDE_DIR ${PY_SITE_PACKAGES_PATH}/${EGG_NAME}-${suffix}.egg/numpy/core/include) - ENDFOREACH() - ENDIF(WIN32) - - ExternalProject_Add(numpy - ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY https://github.com/numpy/numpy.git - GIT_TAG ${NUMPY_TAG_VERSION} - CONFIGURE_COMMAND "" - UPDATE_COMMAND "" - PREFIX ${NUMPY_SOURCES_DIR} - BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py build - INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install - BUILD_IN_SOURCE 1 - DEPENDS python setuptools cython - ) - #################################################################################### - - ##################################### WHEEL ######################################## - SET(WHEEL_SOURCES_DIR ${PYTHON_SOURCES_DIR}/wheel) - ExternalProject_Add(wheel - ${EXTERNAL_PROJECT_LOG_ARGS} - URL https://pypi.python.org/packages/source/w/wheel/wheel-0.29.0.tar.gz - PREFIX ${WHEEL_SOURCES_DIR} - CONFIGURE_COMMAND "" - UPDATE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install - BUILD_IN_SOURCE 1 - DEPENDS python setuptools - ) - #################################################################################### - - ################################### PROTOBUF ####################################### - SET(PY_PROTOBUF_SOURCES_DIR ${PYTHON_SOURCES_DIR}/protobuf) - ExternalProject_Add(python-protobuf - ${EXTERNAL_PROJECT_LOG_ARGS} - URL https://pypi.python.org/packages/e0/b0/0a1b364fe8a7d177b4b7d4dca5b798500dc57a7273b93cca73931b305a6a/protobuf-3.1.0.post1.tar.gz - URL_MD5 38b5fb160c768d2f8444d0c6d637ff91 - PREFIX ${PY_PROTOBUF_SOURCES_DIR} - BUILD_IN_SOURCE 1 - PATCH_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py build - INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install - DEPENDS python setuptools six - ) - #################################################################################### - - LIST(APPEND external_project_dependencies python setuptools six cython wheel python-protobuf numpy) - ENDIF(PYTHONINTERP_FOUND) IF(WITH_PYTHON) From 0c70f34c60845f08563f031ce815c1d565dfab6b Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Fri, 30 Jun 2017 16:59:52 +0800 Subject: [PATCH 191/542] Fix bug for flowers dataset and row_conv. --- python/paddle/trainer/config_parser.py | 4 ++-- .../configs/protostr/test_row_conv.protostr | 2 +- python/paddle/v2/dataset/flowers.py | 17 +++++++++++------ 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 58e4902f57..b7418101d8 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2082,10 +2082,10 @@ class MaxOutLayer(LayerBase): class RowConvLayer(LayerBase): def __init__(self, name, inputs, context_length, **xargs): super(RowConvLayer, self).__init__( - name, 'maxout', 0, inputs=inputs, **xargs) + name, 'row_conv', 0, inputs=inputs, **xargs) config_assert( len(self.inputs) == 1, - 'TransLayer must have one and only one input') + 'row convolution layer must have one and only one input.') input_layer = self.get_input_layer(0) row_conv_conf = self.config.inputs[0].row_conv_conf row_conv_conf.context_length = context_length diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr index 9ec15d2a19..19c9f16574 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr @@ -7,7 +7,7 @@ layers { } layers { name: "__row_conv_layer_0__" - type: "maxout" + type: "row_conv" size: 2560 active_type: "relu" inputs { diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/v2/dataset/flowers.py index 158cfe158c..ef92fec75f 100644 --- a/python/paddle/v2/dataset/flowers.py +++ b/python/paddle/v2/dataset/flowers.py @@ -30,6 +30,7 @@ http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}. """ import cPickle import itertools +import functools from common import download import tarfile import scipy.io as scio @@ -54,21 +55,25 @@ TEST_FLAG = 'trnid' VALID_FLAG = 'valid' -def default_mapper(sample): +def default_mapper(is_train, sample): ''' map image bytes data to type needed by model input layer ''' img, label = sample img = load_image_bytes(img) - img = simple_transform(img, 256, 224, True) + img = simple_transform(img, 256, 224, is_train) return img.flatten().astype('float32'), label +train_mapper = functools.partial(default_mapper, True) +test_mapper = functools.partial(default_mapper, False) + + def reader_creator(data_file, label_file, setid_file, dataset_name, - mapper=default_mapper, + mapper, buffered_size=1024, use_xmap=True): ''' @@ -118,7 +123,7 @@ def reader_creator(data_file, return map_readers(mapper, reader) -def train(mapper=default_mapper, buffered_size=1024, use_xmap=True): +def train(mapper=train_mapper, buffered_size=1024, use_xmap=True): ''' Create flowers training set reader. It returns a reader, each sample in the reader is @@ -141,7 +146,7 @@ def train(mapper=default_mapper, buffered_size=1024, use_xmap=True): buffered_size, use_xmap) -def test(mapper=default_mapper, buffered_size=1024, use_xmap=True): +def test(mapper=test_mapper, buffered_size=1024, use_xmap=True): ''' Create flowers test set reader. It returns a reader, each sample in the reader is @@ -164,7 +169,7 @@ def test(mapper=default_mapper, buffered_size=1024, use_xmap=True): buffered_size, use_xmap) -def valid(mapper=default_mapper, buffered_size=1024, use_xmap=True): +def valid(mapper=test_mapper, buffered_size=1024, use_xmap=True): ''' Create flowers validation set reader. It returns a reader, each sample in the reader is From 9bb33f27f8313c2515b6dbfcfe8352b4a2c3bef6 Mon Sep 17 00:00:00 2001 From: qijun Date: Fri, 30 Jun 2017 17:10:40 +0800 Subject: [PATCH 192/542] fix input shape of train_y --- doc/getstarted/concepts/src/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/getstarted/concepts/src/train.py b/doc/getstarted/concepts/src/train.py index 679d0a931a..7e604f23de 100644 --- a/doc/getstarted/concepts/src/train.py +++ b/doc/getstarted/concepts/src/train.py @@ -31,7 +31,7 @@ def event_handler(event): # define training dataset reader def train_reader(): train_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]]) - train_y = np.array([-2, -3, -7, -7]) + train_y = np.array([[-2], [-3], [-7], [-7]]) def reader(): for i in xrange(train_y.shape[0]): From 9e445eca89ae936ec82034c21b8311ccecdfc0ef Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 30 Jun 2017 17:16:59 +0800 Subject: [PATCH 193/542] FIX: Replace static libs check via system warning --- cmake/generic.cmake | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 03dabe7283..88be13b2ac 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -101,23 +101,16 @@ function(merge_static_libs TARGET_NAME) # First get the file names of the libraries to be merged foreach(lib ${libs}) - get_target_property(libtype ${lib} TYPE) - if(NOT libtype STREQUAL "STATIC_LIBRARY") - message(FATAL_ERROR "merge_static_libs can only process static libraries") - endif() set(libfiles ${libfiles} $) endforeach() if(APPLE) # Use OSX's libtool to merge archives - add_custom_target(${TARGET_NAME}_archive - COMMAND libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS ${libs} - ) - add_library(${TARGET_NAME} STATIC IMPORTED GLOBAL) - set_property(TARGET ${TARGET_NAME} PROPERTY - IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a") - add_dependencies(${TARGET_NAME} ${TARGET_NAME}_archive) + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) + file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") + add_library(${TARGET_NAME} STATIC ${dummyfile}) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" + COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}) else() # general UNIX: use "ar" to extract objects and re-add to a common lib foreach(lib ${libs}) set(objlistfile ${lib}.objlist) # list of objects in the input library From e287034d73109a652a47b4f5132b5366f251711f Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Fri, 30 Jun 2017 17:29:07 +0800 Subject: [PATCH 194/542] minus mean in flowers dataset. --- python/paddle/v2/dataset/flowers.py | 3 ++- python/paddle/v2/image.py | 26 ++++++++++++++++++++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/v2/dataset/flowers.py index ef92fec75f..fb9062fbb4 100644 --- a/python/paddle/v2/dataset/flowers.py +++ b/python/paddle/v2/dataset/flowers.py @@ -61,7 +61,8 @@ def default_mapper(is_train, sample): ''' img, label = sample img = load_image_bytes(img) - img = simple_transform(img, 256, 224, is_train) + img = simple_transform( + img, 256, 224, is_train, mean=[103.94, 116.78, 123, 68]) return img.flatten().astype('float32'), label diff --git a/python/paddle/v2/image.py b/python/paddle/v2/image.py index 0d648e9ae6..965d965335 100644 --- a/python/paddle/v2/image.py +++ b/python/paddle/v2/image.py @@ -262,7 +262,12 @@ def left_right_flip(im): return im[:, ::-1, :] -def simple_transform(im, resize_size, crop_size, is_train, is_color=True): +def simple_transform(im, + resize_size, + crop_size, + is_train, + is_color=True, + mean=None): """ Simply data argumentation for training. These operations include resizing, croping and flipping. @@ -288,7 +293,19 @@ def simple_transform(im, resize_size, crop_size, is_train, is_color=True): im = left_right_flip(im) else: im = center_crop(im, crop_size) - im = to_chw(im) + if len(im.shape) == 3: + im = to_chw(im) + + im = im.astype('float32') + if mean is not None: + mean = np.array(mean, dtype=np.float32) + # mean value, may be one value per channel + if mean.ndim == 1: + mean = mean[:, np.newaxis, np.newaxis] + else: + # elementwise mean + assert len(mean.shape) == len(im) + im -= mean return im @@ -297,7 +314,8 @@ def load_and_transform(filename, resize_size, crop_size, is_train, - is_color=True): + is_color=True, + mean=None): """ Load image from the input file `filename` and transform image for data argumentation. Please refer to the `simple_transform` interface @@ -318,5 +336,5 @@ def load_and_transform(filename, :type is_train: bool """ im = load_image(filename) - im = simple_transform(im, resize_size, crop_size, is_train, is_color) + im = simple_transform(im, resize_size, crop_size, is_train, is_color, mean) return im From 0925681543ed8d2b50a67bd6695614a17fea9006 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Fri, 30 Jun 2017 17:45:28 +0800 Subject: [PATCH 195/542] fix typo. --- python/paddle/v2/dataset/flowers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/v2/dataset/flowers.py index fb9062fbb4..e2a21e6e3e 100644 --- a/python/paddle/v2/dataset/flowers.py +++ b/python/paddle/v2/dataset/flowers.py @@ -62,7 +62,7 @@ def default_mapper(is_train, sample): img, label = sample img = load_image_bytes(img) img = simple_transform( - img, 256, 224, is_train, mean=[103.94, 116.78, 123, 68]) + img, 256, 224, is_train, mean=[103.94, 116.78, 123.68]) return img.flatten().astype('float32'), label From b5514602b6019a4b30515079e4be17bf4276cb19 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Fri, 30 Jun 2017 17:46:48 +0800 Subject: [PATCH 196/542] Add the use_nnpack parameter in ExpandConvLayer, so that the convolution calculation can be switched to the NNPACK function. --- paddle/function/nnpack/NNPACKConvOp.cpp | 5 +- paddle/gserver/layers/ExpandConvLayer.cpp | 56 +++++++++++++++-------- 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/function/nnpack/NNPACKConvOp.cpp index d75fab0403..e8080c3d71 100644 --- a/paddle/function/nnpack/NNPACKConvOp.cpp +++ b/paddle/function/nnpack/NNPACKConvOp.cpp @@ -70,6 +70,9 @@ public: if (threadpool_) { pthreadpool_destroy(threadpool_); } + if (workspaceBuffer_) { + free(workspaceBuffer_); + } } virtual void check(const BufferArgs& inputs, @@ -160,7 +163,7 @@ public: CHECK_EQ(status, nnp_status_success); } - LOG(INFO) << "workspace size is " << needSize; + VLOG(3) << "workspace size is " << needSize; if (needSize > workspaceSize_) { workspaceSize_ = needSize; if (workspaceBuffer_) { diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp index 914689e66c..29e2113aff 100644 --- a/paddle/gserver/layers/ExpandConvLayer.cpp +++ b/paddle/gserver/layers/ExpandConvLayer.cpp @@ -16,6 +16,10 @@ limitations under the License. */ #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" +DEFINE_bool(use_nnpack, + false, + "Whether to use nnpack for convolution calculation."); + namespace paddle { /* @@ -37,26 +41,38 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, for (int i = 0; i < config_.inputs_size(); i++) { std::vector paddings = {(size_t)paddingY_[i], (size_t)padding_[i]}; std::vector strides = {(size_t)strideY_[i], (size_t)stride_[i]}; - createFunction(forward_, - !isDeconv_ ? "GemmConv" : "GemmConvGradInput", - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", (size_t)groups_[i])); - - createFunction(backward_, - !isDeconv_ ? "GemmConvGradInput" : "GemmConv", - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", (size_t)groups_[i])); - - createFunction(backward_, - "GemmConvGradFilter", - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", (size_t)groups_[i])); + + if (FLAGS_use_nnpack) { + CHECK_EQ(isDeconv_, false); + createFunction(forward_, + "NNPACKConv", + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("groups", (size_t)groups_[i]) + .set("algo", "auto")); + } else { + createFunction(forward_, + !isDeconv_ ? "GemmConv" : "GemmConvGradInput", + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("groups", (size_t)groups_[i])); + + createFunction(backward_, + !isDeconv_ ? "GemmConvGradInput" : "GemmConv", + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("groups", (size_t)groups_[i])); + + createFunction(backward_, + "GemmConvGradFilter", + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("groups", (size_t)groups_[i])); + } } return true; } From 47f1031fb7e0644ab2797343f818d32f1c45fa38 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Fri, 30 Jun 2017 18:06:49 +0800 Subject: [PATCH 197/542] Modify the type of alog parameter. --- paddle/gserver/layers/ExpandConvLayer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp index 29e2113aff..af79e65a7c 100644 --- a/paddle/gserver/layers/ExpandConvLayer.cpp +++ b/paddle/gserver/layers/ExpandConvLayer.cpp @@ -50,7 +50,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, .set("paddings", paddings) .set("strides", strides) .set("groups", (size_t)groups_[i]) - .set("algo", "auto")); + .set("algo", std::string("auto"))); } else { createFunction(forward_, !isDeconv_ ? "GemmConv" : "GemmConvGradInput", From 260427d2df5398ab3dac0ea3b8d6c54e2aa087fb Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Fri, 30 Jun 2017 18:20:21 +0800 Subject: [PATCH 198/542] "fix copy go master lib2python" --- python/CMakeLists.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index a9842152c8..361e764e25 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -13,8 +13,11 @@ set(PY_FILES paddle/__init__.py ${V2_PY_FILES}) add_custom_target(copy_paddle_master) + +SET(COPY_PADDLE_MASTER "") if(WITH_GOLANG) - add_custom_command(TARGET copy_paddle_master + SET(COPY_PADDLE_MASTER "copy_paddle_master") + add_custom_command(TARGET ${COPY_PADDLE_MASTER} COMMAND cp ${paddle_master_LIB_PATH} ${PROJ_ROOT}/python/paddle/v2/master/ ) add_dependencies(copy_paddle_master paddle_master) @@ -26,7 +29,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp - DEPENDS gen_proto_py ${PY_FILES} ${external_project_dependencies} copy_paddle_master) + DEPENDS gen_proto_py ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) add_custom_target(paddle_python ALL DEPENDS ${OUTPUT_DIR}/.timestamp) From f712b027a75fbfdf2456c42d66010dc3c5f100f7 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Fri, 30 Jun 2017 18:40:32 +0800 Subject: [PATCH 199/542] "fix protobuf depend" --- go/pserver/cclient/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/pserver/cclient/CMakeLists.txt b/go/pserver/cclient/CMakeLists.txt index e12cf88068..7fe74c62f1 100644 --- a/go/pserver/cclient/CMakeLists.txt +++ b/go/pserver/cclient/CMakeLists.txt @@ -1,4 +1,4 @@ -cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags) +cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf) go_library(paddle_pserver_cclient STATIC) if(WITH_TESTING) add_subdirectory(test) From 62e582e8109ff08089f72e88511162fe51ae031f Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Fri, 30 Jun 2017 18:23:46 +0000 Subject: [PATCH 200/542] polish wording and grammar. --- doc/design/cluster_train/save_model.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/doc/design/cluster_train/save_model.md b/doc/design/cluster_train/save_model.md index 76ac8d8387..b70f00176b 100644 --- a/doc/design/cluster_train/save_model.md +++ b/doc/design/cluster_train/save_model.md @@ -15,13 +15,13 @@ ways from which user can obtain a model: ### Trainer Saving Model vs. Pservers Saving Model Both trainers and pservers have access to the model. So the model can -be saved from a trainer or pservers. We need to decide on where the -model is saved from. +be saved from a trainer or pservers. We need to decide where the model +is saved from. #### Dense Update vs. Sparse Update There are two types of model update methods: dense update and sparse -update (when the parameter is configured to be sparse). +update (when the model parameter is configured to be sparse). - Dense update @@ -48,15 +48,15 @@ filesystem, making the checkpoint shards visible to the merge program. The benefit of letting one trainer to save the model is it does not require a distributed filesystem. And it's reusing the same save model -logic when the trainer is training locally - except when doing sparse -update, the trainer needs to download the entire model during the -saving process. +logic when training locally - except when doing sparse update, the +trainer needs to download the entire model during the saving process. #### Conclusion Given trainer saving model does not require a distributed filesystem, -and is an intuitive extension to training locally, we decide to let -the trainer save the model. +and is an intuitive extension to trainer saving model when training +locally, we decide to let the trainer save the model when doing +distributed training. ### Convert Model from Checkpoint @@ -84,16 +84,16 @@ save the model. Each trainer will be given the directory to save the model. The elected trainer will save the model to -`given-directory/trainerID`. Since the tainerID is unique, this would -prevent concurrent save to the same file when multiple trainers are -elected to save the model when split-brain problem happens. +`given-directory/trainerID`. Since the trainer ID is unique, this +would prevent concurrent save to the same file when multiple trainers +are elected to save the model when split-brain problem happens. ### What Happens When Model Is Saving It takes some time to save model, we need to define what will happen when save model is taking place. -When saving a dense model, the trainer uses the local model. Pservers +When doing dense update, the trainer uses the local model. Pservers does not need to pause model update. When doing sparse update. The trainer needs to download the entire @@ -103,7 +103,7 @@ download finishes. Otherwise, the trainer gets a model that is "polluted": some part of the model is old, some part of the model is new. -It's unclear that the "polluted" model will be inferiod due to the +It's unclear that the "polluted" model will be inferior due to the stochastic nature of deep learning, and pausing the model update will add more complexity to the system. Since supporting sparse update is a TODO item. We defer the evaluation of pause the model update or not From a7f9625391f95bbd44b13a946d595b88e1464d42 Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Sat, 1 Jul 2017 18:38:19 +0800 Subject: [PATCH 201/542] update --- paddle/scripts/docker/build.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 54e80fee34..a182e5f4ae 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -50,7 +50,6 @@ cmake .. \ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \ -DWITH_TESTING=${WITH_TESTING:-OFF} \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -exit 1 cat < Date: Sun, 2 Jul 2017 16:14:18 +0800 Subject: [PATCH 202/542] "polish cgo link option" --- go/pserver/optimizer.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 46e614d3a1..070896f7c7 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -1,6 +1,5 @@ package pserver -// #cgo pkg-config: protobuf // #cgo CFLAGS: -I ../../ // //FIXME: ldflags contain "build" path // #cgo LDFLAGS: ../../build/go/pserver/cclient/libpaddle_go_optimizer.a -lstdc++ @@ -50,7 +49,7 @@ func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { cbuffer = C.malloc(C.size_t(len(p.Content))) C.memcpy(cbuffer, unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content))) o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)), - C.paddle_element_type(p.ElementType), cbuffer, C.int(len(p.Content)), + C.paddle_element_type(p.ElementType), cbuffer, C.int(len(p.Content)*C.sizeof_float), (*C.char)(nullPtr), 0) return o } From e2c842353739ce3577b0f79267976928a44deb55 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sun, 2 Jul 2017 17:06:28 +0800 Subject: [PATCH 203/542] add simple_op_design.md --- doc/design/simple_op_design.md | 273 +++++++++++++++++++++++++++++++++ 1 file changed, 273 insertions(+) create mode 100644 doc/design/simple_op_design.md diff --git a/doc/design/simple_op_design.md b/doc/design/simple_op_design.md new file mode 100644 index 0000000000..93c0f68ca9 --- /dev/null +++ b/doc/design/simple_op_design.md @@ -0,0 +1,273 @@ +## Interaction between C++ and Python + +Users employ API in Python to describe their own network, however, the network construction actually happens in C++. so Protobuf is introduced to send the message between Python and C++. + +The Interaction between Python and C++ can be simplified as two steps: + +1. C++ tells Python how many Ops there are, and what parameter do users need to offer to initialize a new Op. Python then builds API for each Op at compile time. + +2. Users invoke APIs built by Python and provide necessary parameters. These parameters will be sent to C++ fo finish Op construction task. + +### Message form C++ to Python + +We define a Protobuf message class `OpProto` to hold message needed in the first step. What should an `OpProto` contain? This question is equivalent to “What message do we need to offer, to build a Python API which is legal and user oriented and can use to describe a whole Op.” + +Following message are necessary: + +1. Op's name, and its simple comment. +2. Input and output variable number; each variable's name, type, and comment. +3. Op's attributes; each attribute includes name, type, comment, **default value** and **value range**. + +So `OpProto` can be defined as follows: + +```proto +enum AttrType { + INT = 1; + FLOAT = 2; + STRING = 3; + INTS = 4; + FLOATS = 5; + STRINGS = 6; +}; + +message AttrValue { + AttrType type = 1; + optional int iv = 2; + optional float fv = 3; + optional string sv = 4; + repeated int ivs = 5; + repeated float fvs = 6; + repeated string svs = 7; +}; + +message AttrProto { + required string name = 1; + required string comment = 2; + optional AttrValue default = 3; + optional AttrValue max = 4; + optional AttrValue min = 5; + required AttrType type = 6; +}; + +message VarProto { + required string name = 1; + required string comment = 2; +}; + +message OpProto { + repeated VarProto inputs = 1; + repeated VarProto outputs = 2; + repeated AttrProto attrs = 3; + required string type = 4; + required string comment = 5; +}; +``` + +The default value and value range didn't appear in out previous design. By adding these two fields, we are able to check attribute validity in Python and find out possible error as soon as possible. What's more, by providing the message about default value and value range to Python docstring, it helps to automatically generate more comprehensive documents. + +### Message from Python to C++ + +To hold message needed in the above second step, we define Protobuf message class `OpDesc`. It is used to hold user-specified parameters in Op describing. + +```proto +message OpDesc { + required string type = 1; + repeated string inputs = 2; + repeated string outputs = 3; + map attrs = 4; +}; +``` + +## OpProto Register + +Every Op has its own `OpProto`. For using convenience, we need to register them and record all their messages. For each `Op` class, we define a corresponding `OpMaker` class, in whose constructor we implement the `OpProto`'s building process. `OpMaker`'s constructor will be invoked by another function `OpRegistry::RegisterOp()`. + +```cpp +class OpProtoMaker { +public: + OpProtoMaker(OpProto* proto): proto_(proto) {} +protected: + OpProto* proto_; + void AddInput(const std::string& name, const std::string& desc) {...} + void AddAttr(const std::string& name, const std::string& desc, TypeId type) {...} + void AddComment(const std::string& comment) { ... } +}; + +class OpRegistry { +public: + using OpCreator = std::function; + + template + static void RegisterOp(const std::string& name) { + gCreators_[name] = [](const OpDesc& desc) { + return new OpType(desc); + }; + OpProto& opProto = gProtos_[name]; + OpMaker()(&opProto); + } + + static map gCreators_; + static map gProtos_; +}; + +template +class OpRegister { + public: + OpRegister(std::string type) { + OpRegistry::RegisterOp(type); + } +}; + +#define REGISTER_OP(op_class, op_maker_class, type_name) \ + class op_class##Register { \ + private: \ + const static OpRegister<#op_class, #op_maker_class> reg; \ + }; \ + const Register op_class##Register::reg(#type_name); + +class CosineOp { +// ... +} + +struct CosineOpProtoMaker : public OpProtoMaker { + CosineOpProtoMaker(OpProto* proto) : OpProtoMaker(proto) { + AddInput("input", "input of cosine op"); + AddAttr("scale", "scale of cosine op", float).Default(1.0).LargerThan(0.0); + AddType("cos"); + AddComment("This is cos op"); + } +} + +REGISTER_OP(CosineOp, CosineOpProtoMaker, cos); +``` + +In `REGISTER_OP(CosineOp, CosineOpProtoMaker, cos)`, we register not only `CosineOp` but also `CosineOpProto`. As fields of `CosineOpProto`, the default value and value range of `scale` are also registered here. + +## Python API + +Python APIs are divided into two types, high-level API and low-level API. + +### High-Level API + +High-level API is called by users directly, so it should keep its style consistent with existing V2 APIs. + +Here is a sample about how a define a fc layer: + +```python +hd = fc_layer(input=data, size=56, with_bias=True, activation="sigmoid"); +``` + +`hd` is the output of `fc_layer` and it's a `variable`. It can be further sent into other layers as input. + +The definition of `fc_layer()`: + +```python +def fc_layer(input, size, with_bias, activation): + attr_map = {"size":size} + check_attrs(attr_map) + w = make_variable('w') + if with_bias: + b = make_variable('b') + else: + b = None + fc_output = make_variable('fc_output'); + fc_op(input, w, b, fc_output, attr_map) + act_output = make_variable('sigmod_output'); + if activation == "sigmod": + sigmod_op(fc_output, act_output); + elif: + # ... + return act_output; +``` + +### Low Leval API + +In above sample, `fc_op` and `sigmod_op` are low-level API. They build `OpDesc` and invoke corresponding C++ code. + +*TODO* + +## Op and Kernal + +After completely defined, an Op will be run in a network. However, Op's computing method may differ on different devices. One solution is that write an `Op`'s member function `Op::run()`, which contains computing methods of all possible devices. That may be a bad idea because we have to change all `Op`'s code to add a new device. + +Another choice is adding a concept named `kernal`. A `Kernal` describes an op's computing process on a certain device. After stripping `Variable` and `kernal`, `Op` becomes a pure conceptual class, which holds neither data nor detailed computing process. + +```cpp +class KernalBase { +public: + virtual void RunOnDevice(std::vector input_vars, + std::vector input_vars, + const OpAttrs* attrs) = 0; +}; + +template +class CosineKernal : public KernalBase { +public: + virtual void RunOnDevice(std::vector input_vars, + std::vector input_vars, + const OpAttrs* attrs) { + // no implementation + } +}; + +template <> +class CosineKernal : public KernalBase { +public: + virtual void RunOnDevice(std::vector input_vars, + std::vector input_vars, + const OpAttrs* attrs) { + CosineOpAttrs* cosine_attrs = static_cast(attrs); + // computing code + // ... + } +}; + +struct OpAttrs {...}; + +class Op { + public: + std::string get_kernal_name() { + return kernel_name_; + } + const vector& get_input_names() { + return input_names_; + } + const vector& get_output_names() { + return output_names_; + } + // ... + private: + std::vector input_names_; + std::vector output_names_; + std::string kernal_name_; + +} + +struct CosineOpAttrs : public OpAttrs { + float scale_; +} + +class CosineOp : public Op { + public: + const CosineOpAtrrs* get_attrs() { + return &attrs; + } + + private: + CosineOpAttrs attrs; +} + +RunOp(const Op& op, Scope scope) { + Kernal* kernal = get_kernal(scope, op.get_kernal_name()); + std::vector input_vars = + get_variables(scope, op.get_input_name()); + std::vector output_vars = + get_variables(scope, op.get_output_name()); + + kernal->RunOnDevice(input_vars, output_vars, op.get_attrs()); +} +``` + +All `Kernal` need to be registered beforehand, just like `Op`. + +Now, `Op` is no longer has `Run()` function. It only contains names of variables and kernels. During network running, `RunOp()` is called to invoke `Op`'s corresponding `Kernal`. `get_kernal()` is supposed to return `kernal` for current device. From cee264f3f9e389ed24bbf504d9ab82ebd6354512 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Mon, 3 Jul 2017 09:51:03 +0800 Subject: [PATCH 204/542] update for comments --- Dockerfile | 7 ++++--- cmake/generic.cmake | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index bf227737c5..d10fad6370 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,11 +34,12 @@ RUN apt-get update && \ net-tools && \ apt-get clean -y -# Install Go +# Install Go and glide RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ tar -C /usr/local -xzf go.tgz && \ mkdir /root/gopath && \ - rm go.tgz + rm go.tgz \ + curl https://glide.sh/get | sh ENV GOROOT=/usr/local/go GOPATH=/root/gopath # should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. ENV PATH=${PATH}:${GOROOT}/bin @@ -57,7 +58,7 @@ RUN pip install --upgrade pip && \ pip install -U docopt PyYAML sphinx && \ pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ - pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ + pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ pip install rarfile # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 92e14f2581..ca358da8f1 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -302,7 +302,7 @@ function(go_binary TARGET_NAME) -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${go_binary_SRCS}" WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") - # add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_link ${TARGET_NAME}_timestamp ${go_binary_DEPS}) + # TODO: don't know what ${TARGET_NAME}_link does add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_timestamp ${go_binary_DEPS}) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin) endfunction(go_binary) From 6f7a9dd5c0d6280c663909add5fe2ff4c0f28c71 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Mon, 3 Jul 2017 09:57:03 +0800 Subject: [PATCH 205/542] remove unnessesary comments --- go/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/go/CMakeLists.txt b/go/CMakeLists.txt index 9774a89e42..014697d155 100644 --- a/go/CMakeLists.txt +++ b/go/CMakeLists.txt @@ -12,10 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# FIXME(typhoonzero): Download glide into cmake build temprary GOPATH add_subdirectory(pserver/cclient) add_subdirectory(cmd/pserver) add_subdirectory(cmd/master) add_subdirectory(master/c) -#TODO (add go/master/c back when fixed) From 26e661bc51e2fac36c3692d748b7db8a950cb370 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Mon, 3 Jul 2017 03:05:36 +0000 Subject: [PATCH 206/542] fix by helin's comments --- go/master/c/client.go | 4 ++-- python/paddle/v2/master/client.py | 4 ++-- python/paddle/v2/reader/creator.py | 34 ++++++++++++++++++------------ 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/go/master/c/client.go b/go/master/c/client.go index 635688f196..31f4311974 100644 --- a/go/master/c/client.go +++ b/go/master/c/client.go @@ -106,7 +106,7 @@ func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int // return value: // 0:ok -// -2:error +// -1:error //export paddle_next_record func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int { c := get(client) @@ -115,7 +115,7 @@ func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int { // Error // TODO: return the type of error? *record = (*C.uchar)(nullPtr) - return -2 + return -1 } if len(r) == 0 { diff --git a/python/paddle/v2/master/client.py b/python/paddle/v2/master/client.py index 6ddb09e4e8..70f9e43c96 100644 --- a/python/paddle/v2/master/client.py +++ b/python/paddle/v2/master/client.py @@ -28,12 +28,12 @@ class client(object): # return format: (record, errno) # errno = 0: ok - # < -1: error + # < 0: error def next_record(self): p = ctypes.c_char_p() ret = ctypes.pointer(p) size = lib.paddle_next_record(self.c, ret) - if size < -1: + if size < 0: # Error return None, size diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 2e8626e565..20624d5286 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -57,29 +57,31 @@ def text_file(path): return reader -def recordio_local(paths): +def recordio_local(paths, buf_size=100): """ - Creates a data reader that outputs record one one by one - from given local recordio fils path. + Creates a data reader from given RecordIO file paths separated by ",", + glob pattern is supported. :path: path of recordio files. :returns: data reader of recordio files. """ import recordio as rec + import paddle.v2.reader.decorator as dec def reader(): - for i, path in enumerate(paths): - f = rec.reader(path) - while True: - r = f.read() - if r is None: - break - yield r - f.close() + a = ','.join(paths) + f = rec.reader(a) + while True: + r = f.read() + if r is None: + break + yield r + f.close() + + return dec.buffered(reader, buf_size) - return reader -def recordio(paths, addr="", buf_size=100): +def recordio(paths, buf_size=100): """ Creates a data reader that outputs record one one by one from given local or cloud recordio path. @@ -92,6 +94,12 @@ def recordio(paths, addr="", buf_size=100): if "KUBERNETES_SERVICE_HOST" not in os.environ.keys(): return recordio_local(paths) + host_name = "MASTER_SERVICE_HOST" + if host_name not in os.environ.keys(): + raise Exception('not find ' + host_name + ' in environ.') + + addr = os.environ(host) + def reader(): c = cloud(addr, buf_size) c.set_dataset(paths) From 275e5b7d42903ea3c9bf4e4fed3f9eab45c727bf Mon Sep 17 00:00:00 2001 From: liaogang Date: Mon, 3 Jul 2017 11:12:18 +0800 Subject: [PATCH 207/542] FIX: yapf format version --- python/paddle/trainer_config_helpers/networks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index f0b6625dc3..b77932ce5f 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -1395,7 +1395,7 @@ def inputs(layers, *args): if len(args) != 0: layers.extend(args) - Inputs(*[l.name for l in layers]) + Inputs(* [l.name for l in layers]) def outputs(layers, *args): @@ -1438,7 +1438,7 @@ def outputs(layers, *args): assert len(layers) > 0 if HasInputsSet(): # input already set - Outputs(*[l.name for l in layers]) + Outputs(* [l.name for l in layers]) return # just return outputs. if len(layers) != 1: From b9c15b6fffa55b6c42e6f06cc3eba3ccceea073c Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 3 Jul 2017 12:23:26 +0800 Subject: [PATCH 208/542] "remove unused tests" --- go/pserver/optimizer.go | 6 +++--- go/pserver/optimizer_test.go | 24 ------------------------ go/pserver/service_test.go | 3 +-- 3 files changed, 4 insertions(+), 29 deletions(-) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 070896f7c7..b4a040f46b 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -49,7 +49,7 @@ func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { cbuffer = C.malloc(C.size_t(len(p.Content))) C.memcpy(cbuffer, unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content))) o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)), - C.paddle_element_type(p.ElementType), cbuffer, C.int(len(p.Content)*C.sizeof_float), + C.paddle_element_type(p.ElementType), cbuffer, C.int(len(p.Content)/C.sizeof_float), (*C.char)(nullPtr), 0) return o } @@ -57,7 +57,7 @@ func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { func (o *optimizer) GetWeights() []byte { var buffer unsafe.Pointer buffer_len := C.paddle_optimizer_get_weights(o.opt, &buffer) - return cArrayToSlice(buffer, int(buffer_len)) + return cArrayToSlice(buffer, int(buffer_len)*C.sizeof_float) } func (o *optimizer) UpdateParameter(g Gradient) error { @@ -65,7 +65,7 @@ func (o *optimizer) UpdateParameter(g Gradient) error { return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", g.Name, o.elementType, g.ElementType) } - r := C.paddle_update_parameter(o.opt, C.paddle_element_type(g.ElementType), unsafe.Pointer(&g.Content[0]), C.int(len(g.Content))) + r := C.paddle_update_parameter(o.opt, C.paddle_element_type(g.ElementType), unsafe.Pointer(&g.Content[0]), C.int(len(g.Content))/C.sizeof_float) if r != 0 { return fmt.Errorf("optimizer update returned error code: %d", r) } diff --git a/go/pserver/optimizer_test.go b/go/pserver/optimizer_test.go index 49d9df5898..368047d6f8 100644 --- a/go/pserver/optimizer_test.go +++ b/go/pserver/optimizer_test.go @@ -2,7 +2,6 @@ package pserver import ( "io/ioutil" - "reflect" "testing" ) @@ -23,26 +22,3 @@ func TestOptimizerCreateRelease(t *testing.T) { o := newOptimizer(param) o.Cleanup() } - -func TestOptimizerFull(t *testing.T) { - p := Parameter{ - Name: "a", - ElementType: Float32, - } - p.Content = []byte{1, 3} - config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb.txt") - if err != nil { - t.Fatalf("read optimizer proto failed") - } - param := ParameterWithConfig{ - Param: p, - Config: config, - } - o := newOptimizer(param) - g := Gradient(p) - if !reflect.DeepEqual(p.Content, o.GetWeights()) { - t.FailNow() - } - o.UpdateParameter(g) - o.Cleanup() -} diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index c62f92e09b..f86619447c 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -10,8 +10,7 @@ import ( "github.com/PaddlePaddle/Paddle/go/pserver" ) - -func TestFull(t *testing.T) { +func TestServiceFull(t *testing.T) { s, err := pserver.NewService(0) if err != nil { t.Error(err) From 722853312118c6ed1c0624baede84a2c8c2379a7 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 3 Jul 2017 12:49:08 +0800 Subject: [PATCH 209/542] test --- go/pserver/cclient/test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/pserver/cclient/test/CMakeLists.txt b/go/pserver/cclient/test/CMakeLists.txt index bd50f1db2a..f287f85071 100644 --- a/go/pserver/cclient/test/CMakeLists.txt +++ b/go/pserver/cclient/test/CMakeLists.txt @@ -1,2 +1,2 @@ cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient) - +add_style_check_target(test_cclient test_cclient.c) From f4281ceee47fca4a1002efeea93f9ba39f99de76 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 3 Jul 2017 14:27:37 +0800 Subject: [PATCH 210/542] renew simple_op_design --- doc/design/simple_op_design.md | 110 ++++++--------------------------- 1 file changed, 19 insertions(+), 91 deletions(-) diff --git a/doc/design/simple_op_design.md b/doc/design/simple_op_design.md index 93c0f68ca9..49ca5db5da 100644 --- a/doc/design/simple_op_design.md +++ b/doc/design/simple_op_design.md @@ -43,10 +43,7 @@ message AttrValue { message AttrProto { required string name = 1; required string comment = 2; - optional AttrValue default = 3; - optional AttrValue max = 4; - optional AttrValue min = 5; - required AttrType type = 6; + required AttrType type = 3; }; message VarProto { @@ -63,7 +60,24 @@ message OpProto { }; ``` -The default value and value range didn't appear in out previous design. By adding these two fields, we are able to check attribute validity in Python and find out possible error as soon as possible. What's more, by providing the message about default value and value range to Python docstring, it helps to automatically generate more comprehensive documents. +To generate Python code automatically: + +```python +def create_python_ops_creatation_functions(): + op_protos = paddle.framework.OpRegistry.get_all_op_proto() + for type_name in op_protos: + op_proto = op_protos[type_name] + def __impl__(**kwargs): # User must use key word args in Paddle API + inputs = [kwargs.get(ipt.name, "") for ipt in op_proto.inputs] + outputs = [kwargs.get(opt.name, "") for opt in op_proto.outputs] + attrs = [cast_to_op_attr(attr, kwargs.get(attr.name, None)) for attr in op_proto.attrs] + opdesc = (input, outputs, type_name, attrs) + return paddle.framework.OpRegistry.CreateOp(opdesc) + __impl__.__doc__ = create_doc_string(op_proto) + globals()[type_name] = __impl__ + +create_python_ops_creatation_functions() +``` ### Message from Python to C++ @@ -185,89 +199,3 @@ def fc_layer(input, size, with_bias, activation): In above sample, `fc_op` and `sigmod_op` are low-level API. They build `OpDesc` and invoke corresponding C++ code. *TODO* - -## Op and Kernal - -After completely defined, an Op will be run in a network. However, Op's computing method may differ on different devices. One solution is that write an `Op`'s member function `Op::run()`, which contains computing methods of all possible devices. That may be a bad idea because we have to change all `Op`'s code to add a new device. - -Another choice is adding a concept named `kernal`. A `Kernal` describes an op's computing process on a certain device. After stripping `Variable` and `kernal`, `Op` becomes a pure conceptual class, which holds neither data nor detailed computing process. - -```cpp -class KernalBase { -public: - virtual void RunOnDevice(std::vector input_vars, - std::vector input_vars, - const OpAttrs* attrs) = 0; -}; - -template -class CosineKernal : public KernalBase { -public: - virtual void RunOnDevice(std::vector input_vars, - std::vector input_vars, - const OpAttrs* attrs) { - // no implementation - } -}; - -template <> -class CosineKernal : public KernalBase { -public: - virtual void RunOnDevice(std::vector input_vars, - std::vector input_vars, - const OpAttrs* attrs) { - CosineOpAttrs* cosine_attrs = static_cast(attrs); - // computing code - // ... - } -}; - -struct OpAttrs {...}; - -class Op { - public: - std::string get_kernal_name() { - return kernel_name_; - } - const vector& get_input_names() { - return input_names_; - } - const vector& get_output_names() { - return output_names_; - } - // ... - private: - std::vector input_names_; - std::vector output_names_; - std::string kernal_name_; - -} - -struct CosineOpAttrs : public OpAttrs { - float scale_; -} - -class CosineOp : public Op { - public: - const CosineOpAtrrs* get_attrs() { - return &attrs; - } - - private: - CosineOpAttrs attrs; -} - -RunOp(const Op& op, Scope scope) { - Kernal* kernal = get_kernal(scope, op.get_kernal_name()); - std::vector input_vars = - get_variables(scope, op.get_input_name()); - std::vector output_vars = - get_variables(scope, op.get_output_name()); - - kernal->RunOnDevice(input_vars, output_vars, op.get_attrs()); -} -``` - -All `Kernal` need to be registered beforehand, just like `Op`. - -Now, `Op` is no longer has `Run()` function. It only contains names of variables and kernels. During network running, `RunOp()` is called to invoke `Op`'s corresponding `Kernal`. `get_kernal()` is supposed to return `kernal` for current device. From 40573cd56f723ebde6328ccd5dabe4a363c9f3db Mon Sep 17 00:00:00 2001 From: Superjom Date: Mon, 3 Jul 2017 14:41:43 +0800 Subject: [PATCH 211/542] add net headers --- paddle/framework/net.cc | 23 +++++ paddle/framework/net.h | 182 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 205 insertions(+) create mode 100644 paddle/framework/net.cc create mode 100644 paddle/framework/net.h diff --git a/paddle/framework/net.cc b/paddle/framework/net.cc new file mode 100644 index 0000000000..0ce9296820 --- /dev/null +++ b/paddle/framework/net.cc @@ -0,0 +1,23 @@ +#include "paddle/framework/net.h" + +namespace paddle { +namespace framework { + +PlainNet::PlainNet(const NetDesc& def) {} + +virtual Error PlainNet::InferShape() { + for (auto& op : ops_) { + // wrong shape + auto err = op.InferShape(); + if (!err) return err; + } + // ok + return Error(); +} + +virtual Error PlainNet::Run(Scope* scope = nullptr, + OpContext* context = nullptr, OpIndex begin = -1, + OpIndex end = -1) const {} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/net.h b/paddle/framework/net.h new file mode 100644 index 0000000000..88bdf0bb68 --- /dev/null +++ b/paddle/framework/net.h @@ -0,0 +1,182 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/scope.h" + +namespace paddle { +namespace framework { + +// operator's index stored in a network. +typedef int OpIndex; +/** + * NOTE following codes are some definitions of unimplemented concepts. + * We write some basic implementation to make Net compilable. These APIs will + * keep updating if the concepts related are implemented. + */ + +// Operator's runtime context. +struct OpContext { + int dev_id; + DevType dev_type{kCPU}; + enum DevType { kCPU, kGPU }; +}; + +// Proto definitions, use `struct`s for simpility. +struct VarDesc { + std::string type; + std::vector dims; +}; +struct OpDesc { + std::string type; + std::vector inputs; + std::vector outputs; +}; +struct struct NetDesc { + std::vector ops; +}; +class Operator { + public: + Operator(const OpDesc &def) {} + Error InferShape() {} + Error Run() {} +}; + +/** + * @brief Network that manage the operators it has. + * + * Network is the container and controller of a set of operators, user can build + * a real network from a NetDesc which is a protobuf message and use + * Network.Run() * to run all the operators in the network. + + * A network object knows all Operators belonging to this network. Variables, + * which are inputs and outputs of these operators, are created and managed by a + * hierarchy of Scope objects. + * + * This is the base class of network, all the networks should implement the apis + * it defines. + */ +class Net { + public: + /** + * @brief Infer shapes of all inputs and outputs of operators. + */ + virtual Error InferShape(Scope *scope) override; + /** + * @brief Run the network. + * + * Run all the operators and return success(true) or not, with all the + * variables are located in `scope`. `context` describes the detail execution + * environment for ops. `begin` and `end` specify the scope of `ops_` to run, + * If no positive indexes are provided, all operators in `ops_` will run. + */ + virtual Error Run(Scope *scope, OpContext *context, OpIndex begin = -1, + OpIndex end = -1) const = 0; + + /** + * @brief Add an Operator according to `def`. + */ + virtual OpIndex AddOp(const proto::OpDef &def) = 0; + + /** + * @brief Add optimizer operators acctording to `attrs`. + */ + virtual Error AddOptimizerOps(const OptAttrs &attrs) = 0; + + /** + * @brief Add backward operators. + */ + virtual Error AddBackwardOps() = 0; + + /** + * @brief Create a network. + */ + static std::unique_ptr Create(const NetDesc &def = NetDesc()); +}; + +/** + * @brief a basic implementation of Net. + * + * PlainNet is a very simple Net, it create a list of operators, and run them + * sequentially following the order they added. + */ +class PlainNet : public Net { + public: + /** + * @brief Initialize a PlainNet. + * + * Initialize from a network describe by `def`. NetDesc is the definition of + * a network. + */ + PlainNet(const NetDesc &def); + + /** + * Infer all the operators' input and output varialbes' shapes, will be called + * before every mini-batch + */ + virtual Error InferShape(Scope *scope) override; + + /** + * @brief Run the network. + * + * Run all the operators with the `scope`, if no scope is provided, default + * scope will be used instead. If no OpContext is provicded, default context + * will be used. + */ + virtual Error Run(Scope *scope = nullptr, OpContext *context = nullptr, + OpIndex begin = -1, OpIndex end = -1) const override; + + /** + * @brief Add an operator to this network. + */ + virtual OpIndex AddOp(const proto::OpDef &def) override; + + /** + * @brief Add all optimizer operators related into the network. + */ + virtual Error AddOptimizerOps(const OptAttrs &attrs) override; + + /** + * @brief Add all backward operators related into the network. + */ + virtual Error AddBackwardOps() override; + + protected: + /** + * @brief Build the network. + * + * Create operators accordding to `def`, will be called by the constructor. + */ + Error BuildNet(const NetDesc &def); + + /** + * @brief Add an operator into this network. + * + * Add a operator which is identified as `type` and has attributes described + * in `attrs`, the `inputs` are the keys of readonly input variables, + * `outputs` are keys of mutable output variables. An `OpIndex` will be + * returned to indicate the offset of the new operator in `ops_`. + */ + OpIndex AddOp(const std::string &type, const std::vector &inputs, + const std::vector &outputs, + const OprAttr &attrs = OprAttr()); + + private: + // the operators owned by `Network`. + std::vector ops_; +}; + +} // namespace framework +} // namespace paddle From 99e22a825d0d86968d24fc6717b765055c8ac73b Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 3 Jul 2017 14:53:31 +0800 Subject: [PATCH 212/542] "fix pre-commit hook failed" --- go/pserver/cclient/test/test_cclient.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/go/pserver/cclient/test/test_cclient.c b/go/pserver/cclient/test/test_cclient.c index 5bd4913ba3..b16769b433 100644 --- a/go/pserver/cclient/test/test_cclient.c +++ b/go/pserver/cclient/test/test_cclient.c @@ -16,7 +16,7 @@ void sendGrads(paddle_pserver_client c) { "param_a", PADDLE_ELEMENT_TYPE_FLOAT32, grad_a, 2000}; paddle_gradient grad2 = { "param_b", PADDLE_ELEMENT_TYPE_FLOAT32, grad_b, 3000}; - paddle_gradient* grads[2] = {&grad1, &grad2}; + paddle_gradient *grads[2] = {&grad1, &grad2}; if (paddle_send_grads(c, grads, 2)) { fail(); } @@ -39,14 +39,12 @@ void getParams(paddle_pserver_client c) { param_b.content = content_b; param_b.content_len = 3000; - paddle_parameter* params[2] = {¶m_a, ¶m_b}; + paddle_parameter *params[2] = {¶m_a, ¶m_b}; if (paddle_get_params(c, params, 2)) { fail(); } } - - int main() { char addr[] = "localhost:3000"; paddle_pserver_client c = paddle_new_pserver_client(addr, 1); @@ -54,8 +52,10 @@ int main() { size_t config_proto_len = 0; ssize_t nread; FILE *fp = fopen("testdata/optimizer.pb.txt", "r"); - if(!fp) { fail(); } - while((nread = getline(&config_proto, &config_proto_len, fp)) != -1) { + if (!fp) { + fail(); + } + while ((nread = getline(&config_proto, &config_proto_len, fp)) != -1) { printf("%s", config_proto); } fclose(fp); @@ -70,7 +70,8 @@ retry: param.name = name_a; param.content = content_a; param.content_len = 2000; - int error = paddle_init_param(c, param, (void *)config_proto, config_proto_len); + int error = + paddle_init_param(c, param, (void *)config_proto, config_proto_len); if (error != 0) { goto retry; } From 16b8e59e1ab8cb33d175ce6d4bfe3f19419acb06 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 3 Jul 2017 15:32:51 +0800 Subject: [PATCH 213/542] Update new authors --- AUTHORS.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/AUTHORS.md b/AUTHORS.md index d5baee2161..08eaab10ea 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -1,5 +1,23 @@ | Github account | name | |---|---| +| beckett1124 | Bin Qi | +| Canpio | Jiayi Feng | +| chengxiaohua1105 | Xiaohua Cheng | +| xushaoyong | Shaoyong Xu | +| liuyuan | Yuan Liu | +| xujun05 | Jun Xu | +| dzhwinter | Zhihong Dong | +| Guo Sheng | Sheng Guo | +| kuke | Yibing Liu | +| llxxxll | YongFeng Liu | +| cxysteven | Xingyi Cheng | +| NHZlX | Zhaolong Xing | +| pakchoi | Chuanjiang Song | +| pkuyym | Yaming Yang | +| Superjom | Chunwei Yan | +| wanghaoshuang | Haoshuang Wang | +| wangzhen-nlp | Zhen Wang | +| wwhu | Weiwei Hu | | reyoung | Yang Yu | | gangliao | Gang Liao | | luotao01 | Tao Luo | From 85c4352ea8eeaa419d3542f378089fc9e1180565 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 3 Jul 2017 15:37:47 +0800 Subject: [PATCH 214/542] "pass style check" --- go/pserver/cclient/test/testdata/optimizer.pb.txt | Bin 50 -> 51 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/go/pserver/cclient/test/testdata/optimizer.pb.txt b/go/pserver/cclient/test/testdata/optimizer.pb.txt index 27dd3bc5f19e2964b4b674cff8860233cbdb445a..27c8a584df40ab714edfd730f0ff7b7bd3783964 100644 GIT binary patch delta 6 NcmXpqo?yht1poyT0W$yq delta 4 LcmXpunqUL~0=NMv From 696ba1d2e1f3fdac763c4dd29b5353b512f9b7fa Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 3 Jul 2017 16:01:50 +0800 Subject: [PATCH 215/542] init tensor_test.cc --- paddle/framework/CMakeLists.txt | 1 + paddle/framework/tensor.h | 5 +-- paddle/framework/tensor_test.cc | 71 +++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 paddle/framework/tensor_test.cc diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 6aa6b9bc2d..41bf3837aa 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -2,6 +2,7 @@ cc_library(ddim SRCS ddim.cc) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) +cc_test(tensor_test SRCS tensor_test.cc DEPS ddim) cc_test(variable_test SRCS variable_test.cc) cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 067f2a8526..8d658d5097 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -19,13 +19,12 @@ namespace framework { class Tensor { using paddle::platform::Place; - using paddle::platform::get_place; public: template const T* data() const { - PADDLE_ASSERT(holder_ != nullptr, - "Tensor::data must be called after Tensor::mutable_data"); + PADDLE_ENFORCE(holder_ != nullptr, + "Tensor::data must be called after Tensor::mutable_data"); return static_cast(holder->Ptr()); } diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc new file mode 100644 index 0000000000..fa44b24b64 --- /dev/null +++ b/paddle/framework/tensor_test.cc @@ -0,0 +1,71 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/framework/tensor.h" +#include + +TEST(Tensor, Data) { + using namespace paddle::framework; + using namespace paddle::platform; + + Tensor cpu_tensor; +} + +/* mutable_data() is not tested at present + because Memory::Alloc() and Memory::Free() have not been ready. + +TEST(Tensor, MutableData) { + using namespace paddle::framework; + using namespace paddle::platform; + + Tensor cpu_tensor; + float* p1 = nullptr; + float* p2 = nullptr; + // initialization + p1 = cpu_tensor.mutable_data(make_ddim({1, 2, 3}), CPUPlace()); + EXPECT_NE(p1, nullptr); + // set cpu_tensor a new dim with large size + // momery is supposed to be re-allocated + p2 = cpu_tensor.mutable_data(make_ddim({3, 4})); + EXPECT_NE(p2, nullptr); + EXPECT_NE(p1, p2); + // set cpu_tensor a new dim with same size + // momery block is supposed to be unchanged + p1 = cpu_tensor.mutable_data(make_ddim({2, 2, 3})); + EXPECT_EQ(p1, p2); + // set cpu_tensor a new dim with smaller size + // momery block is supposed to be unchanged + p2 = cpu_tensor.mutable_data(make_ddim({2, 2})); + EXPECT_EQ(p1, p2); + + Tensor gpu_tensor; + float* p1 = nullptr; + float* p2 = nullptr; + // initialization + p1 = gpu_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); + EXPECT_NE(p1, nullptr); + // set gpu_tensor a new dim with large size + // momery is supposed to be re-allocated + p2 = gpu_tensor.mutable_data(make_ddim({3, 4})); + EXPECT_NE(p2, nullptr); + EXPECT_NE(p1, p2); + // set gpu_tensor a new dim with same size + // momery block is supposed to be unchanged + p1 = gpu_tensor.mutable_data(make_ddim({2, 2, 3})); + EXPECT_EQ(p1, p2); + // set gpu_tensor a new dim with smaller size + // momery block is supposed to be unchanged + p2 = gpu_tensor.mutable_data(make_ddim({2, 2})); + EXPECT_EQ(p1, p2); +} +*/ \ No newline at end of file From 9f408dfb1b81daee795d9c0d8ed177e6ab4e10a8 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 3 Jul 2017 16:20:25 +0800 Subject: [PATCH 216/542] fix some compile error --- paddle/framework/tensor.h | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 8d658d5097..7fa662fbb5 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -14,32 +14,39 @@ limitations under the License. */ #pragma once +#include +#include +#include +#include "paddle/framework/ddim.h" +#include "paddle/framework/enforce.h" +#include "paddle/memory/memory.h" +#include "paddle/platform/assert.h" +#include "paddle/platform/place.h" + namespace paddle { namespace framework { class Tensor { - using paddle::platform::Place; - public: template const T* data() const { PADDLE_ENFORCE(holder_ != nullptr, "Tensor::data must be called after Tensor::mutable_data"); - return static_cast(holder->Ptr()); + return static_cast(holder_->Ptr()); } template ::value>::type> - T* mutable_data(DDim dims, Place place) { + typename std::enable_if::value>::type* = nullptr> + T* mutable_data(DDim dims, paddle::platform::Place place) { if (holder_ == nullptr || holder_->Place() != place || - holder_->Size() < dims.product() * sizeof(T)) { - holder_.reset(new PlaceholderImpl(place, dims.product() * sizeof(T))); + holder_->Size() < product(dims) * sizeof(T)) { + holder_.reset(new PlaceholderImpl(place, product(dims) * sizeof(T))); } return static_cast(holder_->Ptr()); } template ::value>::type> + typename std::enable_if::value>::type* = nullptr> T* mutable_data(DDim dims) { return mutable_data(dims, paddle::platform::get_place()); } @@ -50,24 +57,24 @@ class Tensor { struct Placeholder { virtual ~Placeholder() {} virtual void* Ptr() const = 0; - virtual Place Place() const = 0; + virtual paddle::platform::Place Place() const = 0; virtual size_t Size() const = 0; }; template struct PlaceholderImpl : public Placeholder { - PlaceholderImpl(Place pl, size_t size) + PlaceholderImpl(paddle::platform::Place pl, size_t size) : ptr_(paddle::memory::Alloc(pl, size), paddle::memory::Deleter(pl)), place_(pl), size_(size) {} virtual void* Ptr() const { return static_cast(ptr_.get()); } virtual size_t Size() const { return size_; } - virtual Place Place() const { return place_; } + virtual paddle::platform::Place Place() const { return place_; } std::unique_ptr ptr_; - Place place_; // record the place of ptr_. - size_t size_; // size of the memory block. + paddle::platform::Place place_; // record the place of ptr_. + size_t size_; // size of the memory block. }; std::unique_ptr holder_; // holds the memory block if allocated. From bdd27208778e82ca037b2b3f6d25337403db4092 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 3 Jul 2017 16:26:33 +0800 Subject: [PATCH 217/542] Add OpProto implementation OpProto is a proto message that helps 3rd-party language bindings, e.g. `Python`, to generate operator creation methods. The operator creation method is the low-level API for 3rd-party language bindings. Op creation methods take the user's input in that language, and convert users inputs into `OpDesc` message, then passing that `OpDesc` message to Paddle's C++ core and create an operator. * A separated `attr_type.proto` is added, because that file wound be included by `op_desc.proto` in future. --- paddle/framework/CMakeLists.txt | 1 + paddle/framework/attr_type.proto | 28 +++++++++++++ paddle/framework/op_proto.proto | 69 ++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 paddle/framework/attr_type.proto create mode 100644 paddle/framework/op_proto.proto diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 6aa6b9bc2d..3284015908 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -5,3 +5,4 @@ nv_test(dim_test SRCS dim_test.cu DEPS ddim) cc_test(variable_test SRCS variable_test.cc) cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) +proto_library(op_proto SRCS op_proto.proto attr_type.proto) diff --git a/paddle/framework/attr_type.proto b/paddle/framework/attr_type.proto new file mode 100644 index 0000000000..2d8e0476d7 --- /dev/null +++ b/paddle/framework/attr_type.proto @@ -0,0 +1,28 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +syntax="proto2"; +package paddle.framework; + +// Attribute Type for paddle's Op. +// Op contains many attributes. Each type of attributes could be different. +// The AttrType will be shared between AttrDesc and AttrProto. +enum AttrType { + INT = 0; + FLOAT = 1; + STRING = 2; + INTS = 3; + FLOATS = 4; + STRINGS = 5; +} \ No newline at end of file diff --git a/paddle/framework/op_proto.proto b/paddle/framework/op_proto.proto new file mode 100644 index 0000000000..22df6f9c6b --- /dev/null +++ b/paddle/framework/op_proto.proto @@ -0,0 +1,69 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +// Protocol Message for 3rd-party language binding. +// +// Paddle Python package will use `OpProto` to generate op creation methods. +// The op creation methods take user's input and generate `OpDesc` proto message, +// then pass `OpDesc` to C++ side and create Op pointer. +// +syntax="proto2"; +package paddle.framework; + +import "attr_type.proto"; + +// Attribute protocol message for 3rd-party language binding. +// It will store the Op support what attribute and what type. +message AttrProto { + // Supported attribute name. e.g. `scale` for cosine op. + required string name = 1; + + // Supported attribute type. + required AttrType type = 2; + + // Supported attribute comments. It helps 3rd-party language generate doc-string. + required string comment = 3; +} + +// Input or output message for 3rd-party language binding. +// It contains parameter name and its comments. +message VarProto { + // Input or output name in that op creation function. + // e.g. `cos(a, b, output, ...)`, "a", "b", "output" are names. + required string name = 1; + + // The comment for that input. It helps 3rd-party language generate doc-string. + required string comment = 2; +} + +// Op protocol message for 3rd-party language binding. +// It contains all information for generating op creation method. +message OpProto { + // The input information to generate op creation method. + repeated VarProto inputs = 1; + + // The output information to generate op creation method. + repeated VarProto outputs = 2; + + // The attribute information to generate op creation method. + repeated AttrProto attrs = 3; + + // The comments for that Op. It helps 3rd-party language generate + // doc-string. The whole documentation of that Op is generated by comment, + // inputs, outputs, attrs together. + required string comment = 4; + + // The type of that Op. + required string type = 5; +} From c9cd5b6e9dd9c92ae236709c61e3cde7a17ee2b9 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 3 Jul 2017 16:39:36 +0800 Subject: [PATCH 218/542] Update Authors.md --- AUTHORS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/AUTHORS.md b/AUTHORS.md index 08eaab10ea..09698ac140 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -18,6 +18,8 @@ | wanghaoshuang | Haoshuang Wang | | wangzhen-nlp | Zhen Wang | | wwhu | Weiwei Hu | +| xinghai-sun | XingHai Sun | +| zhaopu7 | Pu Zhao | | reyoung | Yang Yu | | gangliao | Gang Liao | | luotao01 | Tao Luo | From 9bf98168281952efee1ed5fd1a61b743b0847834 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 3 Jul 2017 16:47:11 +0800 Subject: [PATCH 219/542] Add OpProto unittest. --- paddle/framework/CMakeLists.txt | 4 +++- paddle/framework/op_proto_test.cc | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 paddle/framework/op_proto_test.cc diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 3284015908..50107faaed 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -5,4 +5,6 @@ nv_test(dim_test SRCS dim_test.cu DEPS ddim) cc_test(variable_test SRCS variable_test.cc) cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) -proto_library(op_proto SRCS op_proto.proto attr_type.proto) +proto_library(attr_type SRCS attr_type.proto) +proto_library(op_proto SRCS op_proto.proto) +cc_test(op_proto_test SRCS op_proto_test.cc DEPS attr_type op_proto protobuf) diff --git a/paddle/framework/op_proto_test.cc b/paddle/framework/op_proto_test.cc new file mode 100644 index 0000000000..9c054bde44 --- /dev/null +++ b/paddle/framework/op_proto_test.cc @@ -0,0 +1,31 @@ +#include +#include + +TEST(TestOpProto, ALL) { + paddle::framework::OpProto proto; + { + auto ipt = proto.mutable_inputs()->Add(); + *ipt->mutable_name() = "a"; + *ipt->mutable_comment() = "the one input of cosine op"; + } + { + auto ipt = proto.mutable_inputs()->Add(); + *ipt->mutable_name() = "b"; + *ipt->mutable_comment() = "the other input of cosine op"; + } + { + auto opt = proto.mutable_outputs()->Add(); + *opt->mutable_name() = "output"; + *opt->mutable_comment() = "the output of cosine op"; + } + { + auto attr = proto.mutable_attrs()->Add(); + *attr->mutable_name() = "scale"; + attr->set_type(paddle::framework::AttrType::FLOAT); + *attr->mutable_comment() = "the scale attribute of cosine op"; + } + proto.set_type("cos"); + *proto.mutable_comment() = "cosine op, output = scale * cos(a, b)"; + + ASSERT_TRUE(proto.IsInitialized()); +} \ No newline at end of file From 0e61730039b11861d5a90188987bad2241a08f95 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 3 Jul 2017 12:05:38 +0800 Subject: [PATCH 220/542] stride pooling for max and average layer --- paddle/gserver/layers/MaxLayer.h | 5 ++ .../layers/SequenceLastInstanceLayer.cpp | 3 +- paddle/gserver/layers/SequencePoolLayer.cpp | 5 +- paddle/gserver/layers/SequencePoolLayer.h | 2 - paddle/gserver/tests/test_LayerGrad.cpp | 12 ++++- paddle/parameter/Argument.cpp | 6 +-- paddle/parameter/Argument.h | 2 +- paddle/parameter/tests/test_argument.cpp | 4 +- python/paddle/trainer/config_parser.py | 8 +++ .../paddle/trainer_config_helpers/layers.py | 12 +++++ .../protostr/test_sequence_pooling.protostr | 51 +++++++++++++++++++ .../tests/configs/test_sequence_pooling.py | 8 +++ 12 files changed, 103 insertions(+), 15 deletions(-) diff --git a/paddle/gserver/layers/MaxLayer.h b/paddle/gserver/layers/MaxLayer.h index baa58ca2d7..adf7ab4ae4 100644 --- a/paddle/gserver/layers/MaxLayer.h +++ b/paddle/gserver/layers/MaxLayer.h @@ -26,6 +26,11 @@ namespace paddle { * If SequenceLevel = kNonSeq: * Output: output size is the number of input sequences (NOT input instances) * output[i] = max_{for each instance in this sequence}{input[i]} + * If stride_ > 0: + * Output: a shorten sequence. The operation of getting max instance of a + * sequence is independently performed on every slice of the input + * sequence, which is obtained by sliding a window with the window + * size set to stride_. * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: output size is the number of input sub-sequences diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp index 944c705166..8127cbf09c 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp @@ -73,8 +73,7 @@ bool SequenceLastInstanceLayer::init(const LayerMap& layerMap, void SequenceLastInstanceLayer::forward(PassType passType) { SequencePoolLayer::forward(passType); - auto starts = (stride_ > 0) ? stridePositions_->getData() - : startPositions_->getData(false); + auto starts = startPositions_->getData(false); MatrixPtr inputValue = getInputValue(0); MatrixPtr outputValue = getOutputValue(); diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp index 4179a9e7e0..2a693b110a 100644 --- a/paddle/gserver/layers/SequencePoolLayer.cpp +++ b/paddle/gserver/layers/SequencePoolLayer.cpp @@ -72,9 +72,8 @@ void SequencePoolLayer::forward(PassType passType) { if (stride_ > 0) { CHECK_EQ(input.hasSubseq(), 0UL) << "sequence stride pooling is invalid for hasSubseq now"; - output_.poolSequenceWithStride( - input, stride_, &stridePositions_, reversed_); - newBatchSize_ = stridePositions_->getSize() - 1; + output_.poolSequenceWithStride(input, stride_, &startPositions_, reversed_); + newBatchSize_ = startPositions_->getSize() - 1; } resetOutput(newBatchSize_, dim); diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h index 293d1bf278..058627def8 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/gserver/layers/SequencePoolLayer.h @@ -47,8 +47,6 @@ protected: size_t newBatchSize_; ICpuGpuVectorPtr startPositions_; int stride_; - // Store the start position of each window. - IVectorPtr stridePositions_; // Whether the input sequence is reversed or not. bool reversed_ = false; diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 297756025b..ed067e7c3a 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -845,8 +845,12 @@ void testDegradeLayer(bool hasSubseq, TEST(Layer, MaxLayer) { testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq - testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq - testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq + testDegradeLayer(false, + "max", + "non-seq", + 5); // seq max to a shorten seq, stride window = 5 + testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq + testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq } TEST(Layer, SequenceLastInstanceLayer) { @@ -868,6 +872,10 @@ TEST(Layer, SequenceLastInstanceLayer) { TEST(Layer, AverageLayer) { testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq + testDegradeLayer(false, + "max", + "non-seq", + 5); // seq average to a shorten seq, stride window = 5 testDegradeLayer( true, "average", "non-seq", -1); // hasSubseq average to non-seq testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 5beced3bb5..ef72b973c1 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -561,7 +561,7 @@ void Argument::degradeSequence(const Argument& input) { void Argument::poolSequenceWithStride(const Argument& input, size_t stride, - IVectorPtr* stridePostions, + ICpuGpuVectorPtr* stridePostions, bool reversed) { // If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5, // then sequenceStartPositions = [0, 2, 3, 4, 7]. @@ -598,8 +598,8 @@ void Argument::poolSequenceWithStride(const Argument& input, stridePos.emplace_back(starts[numSequences]); int size = stridePos.size(); CHECK_EQ(size - 1, tgtBuf[numSequences]); - IVector::resizeOrCreate(*stridePostions, size, false); - (*stridePostions)->copyFrom(stridePos.data(), size); + ICpuGpuVector::resizeOrCreate(*stridePostions, size, false); + (*stridePostions)->getMutableVector(false)->copyFrom(stridePos.data(), size); } void Argument::getValueString( diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 09bd633616..0ccdef802e 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -299,7 +299,7 @@ struct Argument { */ void poolSequenceWithStride(const Argument& input, size_t stride, - IVectorPtr* stridePositions, + ICpuGpuVectorPtr* stridePositions, bool reversed = false); /** * @brief getValueString will return the argument's output in string. There diff --git a/paddle/parameter/tests/test_argument.cpp b/paddle/parameter/tests/test_argument.cpp index 98ab013548..19df6ea957 100644 --- a/paddle/parameter/tests/test_argument.cpp +++ b/paddle/parameter/tests/test_argument.cpp @@ -31,7 +31,7 @@ TEST(Argument, poolSequenceWithStride) { int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30}; for (auto reversed : {false, true}) { - IVectorPtr stridePositions; + ICpuGpuVectorPtr stridePositions; output.poolSequenceWithStride( input, 5 /* stride */, &stridePositions, reversed); @@ -45,7 +45,7 @@ TEST(Argument, poolSequenceWithStride) { CHECK_EQ(stridePositions->getSize(), 8UL); auto result = reversed ? strideResultReversed : strideResult; for (int i = 0; i < 8; i++) { - CHECK_EQ(stridePositions->getData()[i], result[i]); + CHECK_EQ(stridePositions->getData(false)[i], result[i]); } } } diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index b7418101d8..5ca7df7476 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2420,10 +2420,14 @@ class MaxLayer(LayerBase): trans_type='non-seq', bias=False, output_max_index=None, + stride=-1, **xargs): super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs) config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input') + if trans_type == 'seq': + config_assert(stride == -1, 'subseq does not support stride window') self.config.trans_type = trans_type + self.config.seq_pool_stride = stride for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) self.set_layer_size(input_layer.size) @@ -2685,11 +2689,15 @@ class AverageLayer(LayerBase): average_strategy='average', trans_type='non-seq', bias=False, + stride=-1, **xargs): super(AverageLayer, self).__init__( name, 'average', 0, inputs=inputs, **xargs) self.config.average_strategy = average_strategy + if trans_type == 'seq': + config_assert(stride == -1, 'subseq does not support stride window') self.config.trans_type = trans_type + self.config.seq_pool_stride = stride config_assert(len(inputs) == 1, 'AverageLayer must have 1 input') for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index a601d5c84a..5e8bf4b203 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1090,10 +1090,16 @@ def pooling_layer(input, name=None, bias_attr=None, agg_level=AggregateLevel.TO_NO_SEQUENCE, + stride=-1, layer_attr=None): """ Pooling layer for sequence inputs, not used for Image. + If stride > 0, this layer slides a window whose size is determined by stride, + and return the pooling value of the window as the output. Thus, a long sequence + will be shorten. Note that for sequence with sub-sequence, the default value + of stride is -1. + The example usage is: .. code-block:: python @@ -1112,6 +1118,8 @@ def pooling_layer(input, :param pooling_type: Type of pooling, MaxPooling(default), AvgPooling, SumPooling, SquareRootNPooling. :type pooling_type: BasePoolingType|None + :param stride: window size. + :type stride: Int :param bias_attr: Bias parameter attribute. False if no bias. :type bias_attr: ParameterAttribute|None|False :param layer_attr: The Extra Attributes for layer, such as dropout. @@ -1129,12 +1137,16 @@ def pooling_layer(input, extra_dict['output_max_index'] = pooling_type.output_max_index extra_dict.update(ExtraLayerAttribute.to_kwargs(layer_attr)) + if agg_level == AggregateLevel.TO_SEQUENCE: + assert stride == -1 + Layer( name=name, type=pooling_type.name, inputs=[Input(input.name)], bias=ParamAttr.to_bias(bias_attr), trans_type=agg_level, + stride=stride, **extra_dict) return LayerOutput( diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr index 5a217f5544..8989561df0 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr @@ -14,6 +14,7 @@ layers { input_layer_name: "dat_in" } trans_type: "seq" + seq_pool_stride: -1 } layers { name: "__seq_pooling_1__" @@ -24,6 +25,7 @@ layers { input_layer_name: "dat_in" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__seq_pooling_2__" @@ -35,6 +37,7 @@ layers { } average_strategy: "average" trans_type: "seq" + seq_pool_stride: -1 } layers { name: "__seq_pooling_3__" @@ -46,6 +49,7 @@ layers { } average_strategy: "average" trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__seq_pooling_4__" @@ -57,6 +61,7 @@ layers { } average_strategy: "sum" trans_type: "seq" + seq_pool_stride: -1 } layers { name: "__seq_pooling_5__" @@ -68,6 +73,7 @@ layers { } average_strategy: "sum" trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__seq_pooling_6__" @@ -77,8 +83,44 @@ layers { inputs { input_layer_name: "dat_in" } + trans_type: "non-seq" + seq_pool_stride: 5 +} +layers { + name: "__seq_pooling_7__" + type: "average" + size: 100 + active_type: "" + inputs { + input_layer_name: "dat_in" + } + average_strategy: "average" + trans_type: "non-seq" + seq_pool_stride: 5 +} +layers { + name: "__seq_pooling_8__" + type: "average" + size: 100 + active_type: "" + inputs { + input_layer_name: "dat_in" + } + average_strategy: "sum" + trans_type: "non-seq" + seq_pool_stride: 5 +} +layers { + name: "__seq_pooling_9__" + type: "max" + size: 100 + active_type: "" + inputs { + input_layer_name: "dat_in" + } output_max_index: true trans_type: "non-seq" + seq_pool_stride: -1 } input_layer_names: "dat_in" output_layer_names: "__seq_pooling_0__" @@ -88,6 +130,9 @@ output_layer_names: "__seq_pooling_3__" output_layer_names: "__seq_pooling_4__" output_layer_names: "__seq_pooling_5__" output_layer_names: "__seq_pooling_6__" +output_layer_names: "__seq_pooling_7__" +output_layer_names: "__seq_pooling_8__" +output_layer_names: "__seq_pooling_9__" sub_models { name: "root" layer_names: "dat_in" @@ -98,6 +143,9 @@ sub_models { layer_names: "__seq_pooling_4__" layer_names: "__seq_pooling_5__" layer_names: "__seq_pooling_6__" + layer_names: "__seq_pooling_7__" + layer_names: "__seq_pooling_8__" + layer_names: "__seq_pooling_9__" input_layer_names: "dat_in" output_layer_names: "__seq_pooling_0__" output_layer_names: "__seq_pooling_1__" @@ -106,6 +154,9 @@ sub_models { output_layer_names: "__seq_pooling_4__" output_layer_names: "__seq_pooling_5__" output_layer_names: "__seq_pooling_6__" + output_layer_names: "__seq_pooling_7__" + output_layer_names: "__seq_pooling_8__" + output_layer_names: "__seq_pooling_9__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py index 3c49eb56c1..3c205eabd8 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py @@ -14,6 +14,14 @@ for pt in POOL_TYPE: for al in AGG_LEVEL: opts.append(pooling_layer(input=din, agg_level=al, pooling_type=pt())) +for pt in POOL_TYPE: + opts.append( + pooling_layer( + input=din, + agg_level=AggregateLevel.TO_NO_SEQUENCE, + pooling_type=pt(), + stride=5)) + opts.append( pooling_layer( input=din, pooling_type=MaxPooling(output_max_index=True))) From e146fe836bc5178b497329dacddc7a1dc5063bcd Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 3 Jul 2017 17:22:58 +0800 Subject: [PATCH 221/542] fix compile errors and add assert test --- paddle/framework/tensor.h | 25 ++++++-- paddle/framework/tensor_test.cc | 100 ++++++++++++++++++-------------- 2 files changed, 76 insertions(+), 49 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 7fa662fbb5..73eedd7375 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -20,7 +20,6 @@ limitations under the License. */ #include "paddle/framework/ddim.h" #include "paddle/framework/enforce.h" #include "paddle/memory/memory.h" -#include "paddle/platform/assert.h" #include "paddle/platform/place.h" namespace paddle { @@ -63,21 +62,35 @@ class Tensor { template struct PlaceholderImpl : public Placeholder { - PlaceholderImpl(paddle::platform::Place pl, size_t size) - : ptr_(paddle::memory::Alloc(pl, size), paddle::memory::Deleter(pl)), - place_(pl), + private: + class Deleter { + public: + Deleter(platform::Place place) : place_(place) {} + void operator()(T* ptr) { + paddle::memory::Free(place_, static_cast(ptr)); + } + + private: + paddle::platform::Place place_; + }; + + public: + PlaceholderImpl(paddle::platform::Place place, size_t size) + : ptr_(static_cast(paddle::memory::Alloc(place, size)), + Deleter(place)), + place_(place), size_(size) {} virtual void* Ptr() const { return static_cast(ptr_.get()); } virtual size_t Size() const { return size_; } virtual paddle::platform::Place Place() const { return place_; } - std::unique_ptr ptr_; + std::unique_ptr ptr_; paddle::platform::Place place_; // record the place of ptr_. size_t size_; // size of the memory block. }; - std::unique_ptr holder_; // holds the memory block if allocated. + std::shared_ptr holder_; // holds the memory block if allocated. }; } // namespace framework diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index fa44b24b64..f76a31e921 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -13,12 +13,23 @@ #include "paddle/framework/tensor.h" #include +#include -TEST(Tensor, Data) { - using namespace paddle::framework; - using namespace paddle::platform; +TEST(Tensor, ASSERT) { + paddle::framework::Tensor cpu_tensor; - Tensor cpu_tensor; + bool caught = false; + try { + const double* p __attribute__((unused)) = cpu_tensor.data(); + } catch (paddle::framework::EnforceNotMet err) { + caught = true; + std::string msg = "Tensor::data must be called after Tensor::mutable_data"; + const char* what = err.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); + } + } + ASSERT_TRUE(caught); } /* mutable_data() is not tested at present @@ -27,45 +38,48 @@ TEST(Tensor, Data) { TEST(Tensor, MutableData) { using namespace paddle::framework; using namespace paddle::platform; + { + Tensor cpu_tensor; + float* p1 = nullptr; + float* p2 = nullptr; + // initialization + p1 = cpu_tensor.mutable_data(make_ddim({1, 2, 3}), CPUPlace()); + EXPECT_NE(p1, nullptr); + // set cpu_tensor a new dim with large size + // momery is supposed to be re-allocated + p2 = cpu_tensor.mutable_data(make_ddim({3, 4})); + EXPECT_NE(p2, nullptr); + EXPECT_NE(p1, p2); + // set cpu_tensor a new dim with same size + // momery block is supposed to be unchanged + p1 = cpu_tensor.mutable_data(make_ddim({2, 2, 3})); + EXPECT_EQ(p1, p2); + // set cpu_tensor a new dim with smaller size + // momery block is supposed to be unchanged + p2 = cpu_tensor.mutable_data(make_ddim({2, 2})); + EXPECT_EQ(p1, p2); + } - Tensor cpu_tensor; - float* p1 = nullptr; - float* p2 = nullptr; - // initialization - p1 = cpu_tensor.mutable_data(make_ddim({1, 2, 3}), CPUPlace()); - EXPECT_NE(p1, nullptr); - // set cpu_tensor a new dim with large size - // momery is supposed to be re-allocated - p2 = cpu_tensor.mutable_data(make_ddim({3, 4})); - EXPECT_NE(p2, nullptr); - EXPECT_NE(p1, p2); - // set cpu_tensor a new dim with same size - // momery block is supposed to be unchanged - p1 = cpu_tensor.mutable_data(make_ddim({2, 2, 3})); - EXPECT_EQ(p1, p2); - // set cpu_tensor a new dim with smaller size - // momery block is supposed to be unchanged - p2 = cpu_tensor.mutable_data(make_ddim({2, 2})); - EXPECT_EQ(p1, p2); - - Tensor gpu_tensor; - float* p1 = nullptr; - float* p2 = nullptr; - // initialization - p1 = gpu_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); - EXPECT_NE(p1, nullptr); - // set gpu_tensor a new dim with large size - // momery is supposed to be re-allocated - p2 = gpu_tensor.mutable_data(make_ddim({3, 4})); - EXPECT_NE(p2, nullptr); - EXPECT_NE(p1, p2); - // set gpu_tensor a new dim with same size - // momery block is supposed to be unchanged - p1 = gpu_tensor.mutable_data(make_ddim({2, 2, 3})); - EXPECT_EQ(p1, p2); - // set gpu_tensor a new dim with smaller size - // momery block is supposed to be unchanged - p2 = gpu_tensor.mutable_data(make_ddim({2, 2})); - EXPECT_EQ(p1, p2); + { + Tensor gpu_tensor; + float* p1 = nullptr; + float* p2 = nullptr; + // initialization + p1 = gpu_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); + EXPECT_NE(p1, nullptr); + // set gpu_tensor a new dim with large size + // momery is supposed to be re-allocated + p2 = gpu_tensor.mutable_data(make_ddim({3, 4})); + EXPECT_NE(p2, nullptr); + EXPECT_NE(p1, p2); + // set gpu_tensor a new dim with same size + // momery block is supposed to be unchanged + p1 = gpu_tensor.mutable_data(make_ddim({2, 2, 3})); + EXPECT_EQ(p1, p2); + // set gpu_tensor a new dim with smaller size + // momery block is supposed to be unchanged + p2 = gpu_tensor.mutable_data(make_ddim({2, 2})); + EXPECT_EQ(p1, p2); + } } */ \ No newline at end of file From 9b1240456342670098a48884182879cef8789425 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Mon, 3 Jul 2017 18:48:06 +0800 Subject: [PATCH 222/542] update dockerfile --- Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index d10fad6370..91bda8c734 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,11 +38,12 @@ RUN apt-get update && \ RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ tar -C /usr/local -xzf go.tgz && \ mkdir /root/gopath && \ - rm go.tgz \ - curl https://glide.sh/get | sh + rm go.tgz ENV GOROOT=/usr/local/go GOPATH=/root/gopath # should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. ENV PATH=${PATH}:${GOROOT}/bin +# install glide +RUN curl https://glide.sh/get | sh # git credential to skip password typing RUN git config --global credential.helper store From d054a5eef806d76458f9155bf5a4ffb98ba474d3 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 3 Jul 2017 19:08:27 +0800 Subject: [PATCH 223/542] re-submit --- paddle/framework/tensor.h | 2 +- paddle/framework/tensor_test.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 73eedd7375..f777661a1c 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -30,7 +30,7 @@ class Tensor { template const T* data() const { PADDLE_ENFORCE(holder_ != nullptr, - "Tensor::data must be called after Tensor::mutable_data"); + "Tensor::data must be called after Tensor::mutable_data."); return static_cast(holder_->Ptr()); } diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index f76a31e921..727d81f8d7 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -23,7 +23,7 @@ TEST(Tensor, ASSERT) { const double* p __attribute__((unused)) = cpu_tensor.data(); } catch (paddle::framework::EnforceNotMet err) { caught = true; - std::string msg = "Tensor::data must be called after Tensor::mutable_data"; + std::string msg = "Tensor::data must be called after Tensor::mutable_data."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ++i) { ASSERT_EQ(what[i], msg[i]); @@ -82,4 +82,4 @@ TEST(Tensor, MutableData) { EXPECT_EQ(p1, p2); } } -*/ \ No newline at end of file +*/ From 43df61505991fd2c9fa50d08e7cc0717b740ab45 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Mon, 3 Jul 2017 19:18:47 +0800 Subject: [PATCH 224/542] update dockerfile --- Dockerfile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 91bda8c734..ed5910d93b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,12 +38,14 @@ RUN apt-get update && \ RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ tar -C /usr/local -xzf go.tgz && \ mkdir /root/gopath && \ + mkdir /root/gopath/bin && \ + mkdir /root/gopath/src && \ rm go.tgz ENV GOROOT=/usr/local/go GOPATH=/root/gopath # should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. -ENV PATH=${PATH}:${GOROOT}/bin +ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin # install glide -RUN curl https://glide.sh/get | sh +RUN curl -q https://glide.sh/get | sh # git credential to skip password typing RUN git config --global credential.helper store From 2d1f95de873542ae591b4575e14539f26945b162 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 3 Jul 2017 19:33:33 +0800 Subject: [PATCH 225/542] fix a compile error --- paddle/framework/tensor.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index f777661a1c..6a152f6a6d 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -37,8 +37,10 @@ class Tensor { template ::value>::type* = nullptr> T* mutable_data(DDim dims, paddle::platform::Place place) { - if (holder_ == nullptr || holder_->Place() != place || - holder_->Size() < product(dims) * sizeof(T)) { + if (holder_ == nullptr || + !(holder_->Place() == + place) /* some versions of boost::variant don't have operator!= */ + || holder_->Size() < product(dims) * sizeof(T)) { holder_.reset(new PlaceholderImpl(place, product(dims) * sizeof(T))); } return static_cast(holder_->Ptr()); From 89110fd2660098bc949a1f13f7b53515e0c931a3 Mon Sep 17 00:00:00 2001 From: liaogang Date: Mon, 3 Jul 2017 19:51:32 +0800 Subject: [PATCH 226/542] ENH: Add useGpu in system allocator --- paddle/memory/detail/system_allocator.cc | 4 ++++ paddle/memory/detail/system_allocator.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/paddle/memory/detail/system_allocator.cc b/paddle/memory/detail/system_allocator.cc index 2b0fbfa87e..75a2c91ef9 100644 --- a/paddle/memory/detail/system_allocator.cc +++ b/paddle/memory/detail/system_allocator.cc @@ -60,6 +60,8 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) { free(p); } +bool CPUAllocator::UseGpu() { return false; } + #ifndef PADDLE_ONLY_CPU void* GPUAllocator::Alloc(size_t& index, size_t size) { @@ -131,6 +133,8 @@ void GPUAllocator::Free(void* p, size_t size, size_t index) { } } +bool GPUAllocator::UseGpu() { return true; } + #endif // PADDLE_ONLY_CPU } // namespace detail diff --git a/paddle/memory/detail/system_allocator.h b/paddle/memory/detail/system_allocator.h index 7093c42967..f3bbfef843 100644 --- a/paddle/memory/detail/system_allocator.h +++ b/paddle/memory/detail/system_allocator.h @@ -32,12 +32,14 @@ class SystemAllocator { virtual ~SystemAllocator() {} virtual void* Alloc(size_t& index, size_t size) = 0; virtual void Free(void* p, size_t size, size_t index) = 0; + virtual bool UseGpu() = 0; }; class CPUAllocator : public SystemAllocator { public: virtual void* Alloc(size_t& index, size_t size); virtual void Free(void* p, size_t size, size_t index); + virtual bool UseGpu(); }; #ifndef PADDLE_ONLY_CPU @@ -45,7 +47,7 @@ class GPUAllocator : public SystemAllocator { public: virtual void* Alloc(size_t& index, size_t size); virtual void Free(void* p, size_t size, size_t index); - + virtual bool UseGpu(); private: size_t gpu_alloc_size_ = 0; size_t fallback_alloc_size_ = 0; From 929f9cbdff08090a222495db7db601f164cebb8c Mon Sep 17 00:00:00 2001 From: liaogang Date: Mon, 3 Jul 2017 19:52:04 +0800 Subject: [PATCH 227/542] ENH: Add Metadata for memory block --- paddle/memory/detail/metadata.cc | 62 ++++++++++++++++++++++++++++++++ paddle/memory/detail/metadata.h | 53 +++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 paddle/memory/detail/metadata.cc create mode 100644 paddle/memory/detail/metadata.h diff --git a/paddle/memory/detail/metadata.cc b/paddle/memory/detail/metadata.cc new file mode 100644 index 0000000000..4607cd8512 --- /dev/null +++ b/paddle/memory/detail/metadata.cc @@ -0,0 +1,62 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/memory/detail/metadata.h" + +#include + +namespace paddle { +namespace memory { +namespace detail { + +Metadata::Metadata(MemoryBlock::Type t, size_t i, size_t s, size_t ts, + MemoryBlock* l, MemoryBlock* r) + : type(t), + index(i), + size(s), + total_size(ts), + left_buddy(l), + right_buddy(r) {} + +template +inline void hash_combine(std::size_t& seed, const T& v) { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +inline size_t hash(const Metadata* metadata, size_t initial_seed) { + size_t seed = initial_seed; + + hash_combine(seed, (size_t)metadata->type); + hash_combine(seed, metadata->index); + hash_combine(seed, metadata->size); + hash_combine(seed, metadata->total_size); + hash_combine(seed, metadata->left_buddy); + hash_combine(seed, metadata->right_buddy); + + return seed; +} + +void Metadata::update_guards() { + guard_begin = hash(this, 1); + guard_end = hash(this, 2); +} + +bool Metadata::check_guards() const { + return guard_begin == hash(this, 1) && guard_end == hash(this, 2); +} + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/metadata.h b/paddle/memory/detail/metadata.h new file mode 100644 index 0000000000..ddb826571b --- /dev/null +++ b/paddle/memory/detail/metadata.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/memory/detail/memory_block.h" + +#include + +namespace paddle { +namespace memory { +namespace detail { + +class Metadata { + public: + Metadata(MemoryBlock::Type t, size_t i, size_t s, size_t ts, MemoryBlock* l, + MemoryBlock* r); + + public: + /*! \brief Update the guards when metadata is changed */ + void update_guards(); + + /*! \brief Check consistency to previous modification */ + bool check_guards() const; + + public: + // TODO(gangliao): compress this + // clang-format off + size_t guard_begin = 0; + MemoryBlock::Type type = MemoryBlock::INVALID_CHUNK; + size_t index = 0; + size_t size = 0; + size_t total_size = 0; + MemoryBlock* left_buddy = nullptr; + MemoryBlock* right_buddy = nullptr; + size_t guard_end = 0; + // clang-format on +}; + +} // namespace detail +} // namespace memory +} // namespace paddle From bbd3eab7ee88f02131edb41738a966aa0f1a0e88 Mon Sep 17 00:00:00 2001 From: liaogang Date: Mon, 3 Jul 2017 19:54:32 +0800 Subject: [PATCH 228/542] ENH: Add Alloc for buddy Allocator * Free will be added soon --- paddle/memory/detail/buddy_allocator.cc | 157 ++++++++++++++++++++++-- paddle/memory/detail/buddy_allocator.h | 88 +++++++++---- 2 files changed, 209 insertions(+), 36 deletions(-) diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc index ebe680f5ee..2462ba084b 100644 --- a/paddle/memory/detail/buddy_allocator.cc +++ b/paddle/memory/detail/buddy_allocator.cc @@ -12,22 +12,161 @@ See the License for the specific language governing permissions and limitations under the License. */ -#pragma once - #include "paddle/memory/detail/buddy_allocator.h" +#include "glog/logging.h" namespace paddle { namespace memory { namespace detail { -BuddyAllocator::BuddyAllocator(size_t pool_size, size_t max_pools, - SystemAllocator* system_allocator) - : pool_size_(pool_size), - max_pools_(max_pools), - system_allocator_(system_allocator) { - PADDLE_ASSERT(pool_size > 0); - PADDLE_ASSERT(max_pools > 0); +BuddyAllocator::BuddyAllocator(SystemAllocator* system_allocator, + size_t min_chunk_size, size_t max_chunk_size) { + PADDLE_ASSERT(min_chunk_size > 0); + PADDLE_ASSERT(max_chunk_size > 0); PADDLE_ASSERT(system_allocator != nullptr); + + system_allocator_ = std::move(system_allocator); + min_chunk_size_ = min_chunk_size; + max_chunk_size_ = max_chunk_size; +} + +inline size_t align(size_t size, size_t alignment) { + size_t remaining = size % alignment; + return remaining == 0 ? size : size + (alignment - remaining); +} + +void* BuddyAllocator::Alloc(size_t unaligned_size) { + // adjust allocation alignment + size_t size = align(unaligned_size + sizeof(Metadata), min_chunk_size_); + + // acquire the allocator lock + std::lock_guard lock(mutex_); + + DLOG(INFO) << "Allocate " << unaligned_size << " bytes from chunk size " + << size; + + // if the allocation is huge, send directly to the system allocator + if (size > max_chunk_size_) { + DLOG(INFO) << "Allocate from system allocator."; + + return SystemAlloc(size); + } + + // query and allocate from the existing chunk + auto it = FindExistChunk(size); + + // refill the pool if failure + if (it == pool_.end()) { + it = RefillPool(); + } else { + DLOG(INFO) << " Allocation from existing memory block " << std::get<2>(*it) + << " at address " + << reinterpret_cast(std::get<2>(*it))->data(); + } + + // if still failure, fail fatally + if (it == pool_.end()) { + return nullptr; + } + + total_used_ += size; + total_free_ -= size; + + // split the allocation and return data for use + return reinterpret_cast(SplitToAlloc(it, size))->data(); +} + +void* BuddyAllocator::SystemAlloc(size_t size) { + size_t index = 0; + void* p = system_allocator_->Alloc(index, size); + + DLOG(INFO) << "Allocated " << p << " from system allocator."; + + if (p == nullptr) return nullptr; + + static_cast(p)->init(cache_, MemoryBlock::HUGE_CHUNK, index, + size, nullptr, nullptr); + + return static_cast(p)->data(); +} + +BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() { +#ifndef PADDLE_ONLY_CPU + if (system_allocator_->UseGpu()) { + if ((total_used_ + total_free_) == 0) { + // Compute the maximum allocation size for the first allocation. + max_chunk_size_ = platform::GpuMaxChunkSize(); + } + } +#endif // PADDLE_ONLY_CPU + + // Allocate a new maximum sized block + size_t index = 0; + void* p = system_allocator_->Alloc(index, max_chunk_size_); + + if (p == nullptr) return pool_.end(); + + DLOG(INFO) << " Creating and inserting new block " << p + << " from system allocator"; + + static_cast(p)->init(cache_, MemoryBlock::FREE_CHUNK, index, + max_chunk_size_, nullptr, nullptr); + + total_free_ += max_chunk_size_; + + // dump the block into pool + return pool_.insert({index, max_chunk_size_, p}).first; +} + +BuddyAllocator::PoolSet::iterator BuddyAllocator::FindExistChunk(size_t size) { + size_t index = 0; + + while (1) { + auto it = pool_.lower_bound({index, size, nullptr}); + if (it == pool_.end()) return it; + + if (std::get<0>(*it) > index) { + if (std::get<1>(*it) >= size) { + return it; + } + + index = std::get<0>(*it); + continue; + } + return it; + } +} + +void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it, + size_t size) { + auto block = static_cast(std::get<2>(*it)); + + pool_.erase(it); + + DLOG(INFO) << " Split block (" << block << ", " << block->total_size(cache_) + << ") into"; + + block->split(cache_, size); + + DLOG(INFO) << " Left block (" << block << ", " << block->total_size(cache_) + << ")"; + + block->set_type(cache_, MemoryBlock::ARENA_CHUNK); + + // the rest of memory if exist + if (block->has_right_buddy(cache_)) { + if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) { + DLOG(INFO) << " Insert right block (" << block->right_buddy(cache_) + << ", " << block->right_buddy(cache_)->total_size(cache_) + << ")"; + + pool_.insert({block->right_buddy(cache_)->index(cache_), + block->right_buddy(cache_)->total_size(cache_), + block->right_buddy(cache_)}); + } + } + + return block; } } // namespace detail diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h index 82e6aaedc7..38bedc9a18 100644 --- a/paddle/memory/detail/buddy_allocator.h +++ b/paddle/memory/detail/buddy_allocator.h @@ -15,9 +15,15 @@ #pragma once #include "paddle/memory/detail/system_allocator.h" +#include "paddle/memory/detail/metadata.h" +#include "paddle/platform/assert.h" +#include "paddle/platform/cpu_info.h" +#include "paddle/platform/gpu_info.h" +#include #include #include +#include namespace paddle { namespace memory { @@ -25,55 +31,83 @@ namespace detail { class BuddyAllocator { public: - BuddyAllocator(size_t pool_size, size_t max_pools, - SystemAllocator* system_allocator); + BuddyAllocator(SystemAllocator* system_allocator, size_t min_chunk_size, + size_t max_chunk_size); + ~BuddyAllocator(); - void* Alloc(size_t size); + public: + void* Alloc(size_t unaligned_size); void Free(void*); size_t Used(); + public: + // Disable copy and assignment. + BuddyAllocator(const BuddyAllocator&) = delete; + BuddyAllocator& operator=(const BuddyAllocator&) = delete; + private: - struct Block { - size_t size_; - Block* left_; // left buddy - Block* right_; // right buddy - }; + // Tuple type: allocator index, memory size, memory address + using IndexSizeAddress = std::tuple; + using PoolSet = std::set; - // Initially, there is only one pool. If a Alloc founds not enough - // memory from that pool, and there has not been max_num_pools_, - // create a new pool by calling system_allocator_.Alloc(pool_size_). - std::vector pools_; + /*! \brief Allocate fixed-size memory from system */ + void* SystemAlloc(size_t size); - size_t pool_size_; // the size of each pool; - size_t max_num_pools_; // the size of all pools; + /*! \brief If existing chunks are not suitable, refill pool */ + PoolSet::iterator RefillPool(); - SystemAllocator* system_allocator_; + /** + * \brief Find the suitable chunk from existing pool + * + * \param it pool iterator which contains suitable block. + * \param size the size of allocation. + */ + void* SplitToAlloc(PoolSet::iterator it, size_t size); - std::mutex mutex_; + /*! \brief Find the existing chunk which used to allocation */ + PoolSet::iterator FindExistChunk(size_t size); - // Disable copy and assignment. - BuddyAllocator(const BuddyAllocator&) = delete; - BuddyAllocator& operator=(const BuddyAllocator&) = delete; + private: + size_t total_used_ = 0; // the total size of used memory + size_t total_free_ = 0; // the total size of free memory + + size_t min_chunk_size_; // the minimum size of each chunk + size_t max_chunk_size_; // the maximum size of each chunk + + private: + PoolSet pool_; + + private: + // Unify the metadata format between GPU and CPU allocations + using MetadataCache = std::unordered_map; + MetadataCache cache_; + + private: + SystemAllocator* system_allocator_; + std::mutex mutex_; }; -BuddyAllocator* GetCPUBuddyAllocator() { - static BuddyAllocator* a = nullptr; +BuddyAllocator* GetCPUBuddyAllocator() { + static BuddyAllocator* a = nullptr; if (a == nullptr) { - a = new BuddyAllocator(); + a = new BuddyAllocator(new CPUAllocator, platform::CpuMinChunkSize(), + platform::CpuMaxChunkSize()); } return a; } #ifndef PADDLE_ONLY_CPU // The following code are for CUDA. -BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { - static BuddyAllocator** as = NULL; +BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { + static BuddyAllocator** as = NULL; if (as == NULL) { - int gpu_num = platform::GetDeviceCount(); - as = new BuddyAllocator*[gpu_num]; + int gpu_num = platform::GpuDeviceCount(); + as = new BuddyAllocator*[gpu_num]; for (int gpu = 0; gpu < gpu_num; gpu++) { - as[gpu] = new BuddyAllocator(); + as[gpu] = + new BuddyAllocator(new GPUAllocator, platform::GpuMinChunkSize(), + platform::GpuMaxChunkSize()); } } return as[gpu_id]; From eefcfed337899b77e56daa12470c8a9a69c69502 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Mon, 3 Jul 2017 19:58:59 +0800 Subject: [PATCH 229/542] fix ci error --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7db8a97381..5349f59805 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,6 +97,7 @@ include(external/warpctc) # download, build, install warpctc include(external/any) # download libn::any include(external/eigen) # download eigen3 +include(cudnn) # set cudnn libraries, must before configure include(configure) # add paddle env configuration include(generic) # simplify cmake module include(package) # set paddle packages @@ -105,7 +106,6 @@ include(ccache) # set ccache for compilation include(util) # set unittest and link libs include(rdma) # set rdma libraries include(flags) # set paddle compile flags -include(cudnn) # set cudnn libraries include(version) # set PADDLE_VERSION include(coveralls) # set code coverage From e48e21da2b2522e4a9e1bca589d68eb02a419fb0 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 3 Jul 2017 20:14:30 +0800 Subject: [PATCH 230/542] remove unnecessary include --- paddle/framework/tensor.h | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 6a152f6a6d..ce5d98b04e 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -16,7 +16,6 @@ limitations under the License. */ #include #include -#include #include "paddle/framework/ddim.h" #include "paddle/framework/enforce.h" #include "paddle/memory/memory.h" From 3ba7a738f3f3e77240d026db57692d66bc9481ed Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 3 Jul 2017 20:37:42 +0800 Subject: [PATCH 231/542] add dynamic_load --- paddle/platform/cublas.h | 87 +++++++++++++++++ paddle/platform/cudnn.h | 114 ++++++++++++++++++++++ paddle/platform/curand.h | 42 ++++++++ paddle/platform/dynamic_loader.cc | 157 ++++++++++++++++++++++++++++++ paddle/platform/dynamic_loader.h | 63 ++++++++++++ 5 files changed, 463 insertions(+) create mode 100644 paddle/platform/cublas.h create mode 100644 paddle/platform/cudnn.h create mode 100644 paddle/platform/curand.h create mode 100644 paddle/platform/dynamic_loader.cc create mode 100644 paddle/platform/dynamic_loader.h diff --git a/paddle/platform/cublas.h b/paddle/platform/cublas.h new file mode 100644 index 0000000000..70c9713325 --- /dev/null +++ b/paddle/platform/cublas.h @@ -0,0 +1,87 @@ +#include +#include "paddle/platform/dynamic_loader.h" + +namespace paddle { +namespace dyload { +namespace dynload { + +std::once_flag cublas_dso_flag; +void *cublas_dso_handle = nullptr; + +/** + * The following macro definition can generate structs + * (for each function) to dynamic load cublas routine + * via operator overloading. + * + * note: default dynamic linked libs + */ +#ifdef PADDLE_USE_DSO +#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + cublasStatus_t operator()(Args... args) { \ + typedef cublasStatus_t (*cublasFunc)(Args...); \ + std::call_once(cublas_dso_flag, GetCublasDsoHandle, &cublas_dso_handle); \ + void *p_##__name = dlsym(cublas_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + } __name; // struct DynLoad__##__name +#else +#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + cublasStatus_t operator()(Args... args) { \ + return __name(args...); \ + } \ + } __name; // struct DynLoad__##__name +#endif + +#define DYNAMIC_LOAD_CUBLAS_V2_WRAP(__name) DYNAMIC_LOAD_CUBLAS_WRAP(__name) + +// include all needed cublas functions in HPPL +// clang-format off +#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \ + __macro(cublasSgemv) \ + __macro(cublasDgemv) \ + __macro(cublasSgemm) \ + __macro(cublasDgemm) \ + __macro(cublasSgeam) \ + __macro(cublasDgeam) \ + +DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasCreate) +DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasDestroy) +DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasSetStream) +DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasSetPointerMode) +DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasGetPointerMode) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgemmBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgemmBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasCgemmBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasZgemmBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetrfBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetriBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgetrfBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgetriBatched) +CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP) + +#undef DYNAMIC_LOAD_CUBLAS_WRAP +#undef DYNAMIC_LOAD_CUBLAS_V2_WRAP +#undef CUBLAS_BLAS_ROUTINE_EACH + +} /* namespace dynload */ + +// clang-format on +#ifndef PADDLE_TYPE_DOUBLE +#define CUBLAS_GEAM dynload::cublasSgeam +#define CUBLAS_GEMV dynload::cublasSgemv +#define CUBLAS_GEMM dynload::cublasSgemm +#define CUBLAS_GETRF dynload::cublasSgetrfBatched +#define CUBLAS_GETRI dynload::cublasSgetriBatched +#else +#define CUBLAS_GEAM dynload::cublasDgeam +#define CUBLAS_GEMV dynload::cublasDgemv +#define CUBLAS_GEMM dynload::cublasDgemm +#define CUBLAS_GETRF dynload::cublasDgetrfBatched +#define CUBLAS_GETRI dynload::cublasDgetriBatched +#endif +} // namespace dyload +} // namespace paddle diff --git a/paddle/platform/cudnn.h b/paddle/platform/cudnn.h new file mode 100644 index 0000000000..ab878cd555 --- /dev/null +++ b/paddle/platform/cudnn.h @@ -0,0 +1,114 @@ +#include +#include "paddle/platform/dynamic_loader.h" + +namespace paddle { +namespace dyload { + +std::once_flag cudnn_dso_flag; +void* cudnn_dso_handle = nullptr; + +#ifdef PADDLE_USE_DSO + +#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using cudnn_func = decltype(__name(args...)) (*)(Args...); \ + std::call_once(cudnn_dso_flag, GetCudnnDsoHandle, &cudnn_dso_handle); \ + void* p_##__name = dlsym(cudnn_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + } __name; /* struct DynLoad__##__name */ + +#else + +#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + return __name(args...); \ + } \ + } __name; /* struct DynLoad__##__name */ + +#endif + +/** + * include all needed cudnn functions in HPPL + * different cudnn version has different interfaces + **/ +// clang-format off +#define CUDNN_DNN_ROUTINE_EACH(__macro) \ + __macro(cudnnSetTensor4dDescriptor) \ + __macro(cudnnSetTensor4dDescriptorEx) \ + __macro(cudnnGetConvolutionNdForwardOutputDim) \ + __macro(cudnnGetConvolutionForwardAlgorithm) \ + __macro(cudnnCreateTensorDescriptor) \ + __macro(cudnnDestroyTensorDescriptor) \ + __macro(cudnnCreateFilterDescriptor) \ + __macro(cudnnSetFilter4dDescriptor) \ + __macro(cudnnSetPooling2dDescriptor) \ + __macro(cudnnDestroyFilterDescriptor) \ + __macro(cudnnCreateConvolutionDescriptor) \ + __macro(cudnnCreatePoolingDescriptor) \ + __macro(cudnnDestroyPoolingDescriptor) \ + __macro(cudnnSetConvolution2dDescriptor) \ + __macro(cudnnDestroyConvolutionDescriptor) \ + __macro(cudnnCreate) \ + __macro(cudnnDestroy) \ + __macro(cudnnSetStream) \ + __macro(cudnnActivationForward) \ + __macro(cudnnConvolutionForward) \ + __macro(cudnnConvolutionBackwardBias) \ + __macro(cudnnGetConvolutionForwardWorkspaceSize) \ + __macro(cudnnTransformTensor) \ + __macro(cudnnPoolingForward) \ + __macro(cudnnPoolingBackward) \ + __macro(cudnnSoftmaxBackward) \ + __macro(cudnnSoftmaxForward) \ + __macro(cudnnGetVersion) \ + __macro(cudnnGetErrorString) +CUDNN_DNN_ROUTINE_EACH(DYNAMIC_LOAD_CUDNN_WRAP) + +#define CUDNN_DNN_ROUTINE_EACH_R2(__macro) \ + __macro(cudnnAddTensor) \ + __macro(cudnnConvolutionBackwardData) \ + __macro(cudnnConvolutionBackwardFilter) +CUDNN_DNN_ROUTINE_EACH_R2(DYNAMIC_LOAD_CUDNN_WRAP) + +// APIs available after R3: +#if CUDNN_VERSION >= 3000 +#define CUDNN_DNN_ROUTINE_EACH_AFTER_R3(__macro) \ + __macro(cudnnGetConvolutionBackwardFilterWorkspaceSize) \ + __macro(cudnnGetConvolutionBackwardDataAlgorithm) \ + __macro(cudnnGetConvolutionBackwardFilterAlgorithm) \ + __macro(cudnnGetConvolutionBackwardDataWorkspaceSize) +CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP) +#undef CUDNN_DNN_ROUTINE_EACH_AFTER_R3 +#endif + + +// APIs available after R4: +#if CUDNN_VERSION >= 4007 +#define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \ + __macro(cudnnBatchNormalizationForwardTraining) \ + __macro(cudnnBatchNormalizationForwardInference) \ + __macro(cudnnBatchNormalizationBackward) +CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DYNAMIC_LOAD_CUDNN_WRAP) +#undef CUDNN_DNN_ROUTINE_EACH_AFTER_R4 +#endif + +// APIs in R5 +#if CUDNN_VERSION >= 5000 +#define CUDNN_DNN_ROUTINE_EACH_R5(__macro) \ + __macro(cudnnCreateActivationDescriptor) \ + __macro(cudnnSetActivationDescriptor) \ + __macro(cudnnGetActivationDescriptor) \ + __macro(cudnnDestroyActivationDescriptor) +CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP) +#undef CUDNN_DNN_ROUTINE_EACH_R5 +#endif + +#undef CUDNN_DNN_ROUTINE_EACH +// clang-format on +} // namespace dyload +} // namespace paddle diff --git a/paddle/platform/curand.h b/paddle/platform/curand.h new file mode 100644 index 0000000000..692c024e6e --- /dev/null +++ b/paddle/platform/curand.h @@ -0,0 +1,42 @@ +#include +#include "paddle/platform/dynamic_loader.h" + +namespace paddle { +namespace dyload { +#ifdef PADDLE_USE_DSO +#define DYNAMIC_LOAD_CURAND_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + curandStatus_t operator()(Args... args) { \ + typedef curandStatus_t (*curandFunc)(Args...); \ + std::call_once(curand_dso_flag, GetCurandDsoHandle, &curand_dso_handle); \ + void *p_##__name = dlsym(curand_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + } __name; /* struct DynLoad__##__name */ +#else +#define DYNAMIC_LOAD_CURAND_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + curandStatus_t operator()(Args... args) { \ + return __name(args...); \ + } \ + } __name; /* struct DynLoad__##__name */ +#endif + +/* include all needed curand functions in HPPL */ +// clang-format off +#define CURAND_RAND_ROUTINE_EACH(__macro) \ + __macro(curandCreateGenerator) \ + __macro(curandSetStream) \ + __macro(curandSetPseudoRandomGeneratorSeed)\ + __macro(curandGenerateUniform) \ + __macro(curandGenerateUniformDouble) +// clang-format on + +CURAND_RAND_ROUTINE_EACH(DYNAMIC_LOAD_CURAND_WRAP) + +#undef CURAND_RAND_ROUTINE_EACH +#undef DYNAMIC_LOAD_CURAND_WRAP +} +} // namespace paddle diff --git a/paddle/platform/dynamic_loader.cc b/paddle/platform/dynamic_loader.cc new file mode 100644 index 0000000000..9036eaf642 --- /dev/null +++ b/paddle/platform/dynamic_loader.cc @@ -0,0 +1,157 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "DynamicLoader.h" +#include "Logging.h" + +DEFINE_string(cudnn_dir, "", + "Specify path for loading libcudnn.so. For instance, " + "/usr/local/cudnn/lib. If empty [default], dlopen " + "will search cudnn from LD_LIBRARY_PATH"); + +DEFINE_string(cuda_dir, "", + "Specify path for loading cuda library, such as libcublas, " + "libcurand. For instance, /usr/local/cuda/lib64. If default, " + "dlopen will search cuda from LD_LIBRARY_PATH"); + +DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so."); + +DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so."); + +static inline std::string join(const std::string& part1, + const std::string& part2) { + // directory separator + const char sep = '/'; + if (!part2.empty() && part2.front() == sep) { + return part2; + } + std::string ret; + ret.reserve(part1.size() + part2.size() + 1); + ret = part1; + if (!ret.empty() && ret.back() != sep) { + ret += sep; + } + ret += part2; + return ret; +} + +static inline void GetDsoHandleFromDefaultPath(std::string& dso_path, + void** dso_handle, + int dynload_flags) { + VLOG(3) << "Try to find library: " << dso_path + << " from default system path."; + // default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH + *dso_handle = dlopen(dso_path.c_str(), dynload_flags); + +// DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to +// bring System Integrity Projection (SIP), if dso_handle +// is null, search from default package path in Mac OS. +#if defined(__APPLE__) || defined(__OSX__) + if (nullptr == *dso_handle) { + dso_path = join("/usr/local/cuda/lib/", dso_path); + *dso_handle = dlopen(dso_path.c_str(), dynload_flags); + if (nullptr == *dso_handle) { + if (dso_path == "libcudnn.dylib") { + LOG(FATAL) + << "Note: [Recommend] copy cudnn into /usr/local/cuda/ \n" // NOLINT + << "For instance, sudo tar -xzf " + "cudnn-7.5-osx-x64-v5.0-ga.tgz -C " // NOLINT + << "/usr/local \n sudo chmod a+r " + "/usr/local/cuda/include/cudnn.h " // NOLINT + << "/usr/local/cuda/lib/libcudnn*"; + } + } + } +#endif +} + +static inline void GetDsoHandleFromSearchPath(const std::string& search_root, + const std::string& dso_name, + void** dso_handle) { + int dynload_flags = RTLD_LAZY | RTLD_LOCAL; + *dso_handle = nullptr; + + std::string dlPath = dso_name; + if (search_root.empty()) { + GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags); + } else { + // search xxx.so from custom path + dlPath = join(search_root, dso_name); + *dso_handle = dlopen(dlPath.c_str(), dynload_flags); + // if not found, search from default path + if (nullptr == *dso_handle) { + LOG(WARNING) << "Failed to find dynamic library: " << dlPath << " (" + << dlerror() << ")"; + dlPath = dso_name; + GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags); + } + } + + CHECK(nullptr != *dso_handle) << "Failed to find dynamic library: " << dlPath + << " (" << dlerror() << ") \n" + << "Please specify its path correctly using " + "following ways: \n" + + << "Method. set environment variable " + "LD_LIBRARY_PATH on Linux or " + << "DYLD_LIBRARY_PATH on Mac OS. \n" + << "For instance, issue command: export " + "LD_LIBRARY_PATH=... \n" + + << "Note: After Mac OS 10.11, using the " + "DYLD_LIBRARY_PATH is impossible " + << "unless System Integrity Protection (SIP) " + "is disabled."; +} + +void GetCublasDsoHandle(void** dso_handle) { +#if defined(__APPLE__) || defined(__OSX__) + GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.dylib", dso_handle); +#else + GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.so", dso_handle); +#endif +} + +void GetCudnnDsoHandle(void** dso_handle) { +#if defined(__APPLE__) || defined(__OSX__) + GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", dso_handle); +#else + GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.so", dso_handle); +#endif +} + +void GetCurandDsoHandle(void** dso_handle) { +#if defined(__APPLE__) || defined(__OSX__) + GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib", dso_handle); +#else + GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.so", dso_handle); +#endif +} + +void GetWarpCTCDsoHandle(void** dso_handle) { +#if defined(__APPLE__) || defined(__OSX__) + GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.dylib", dso_handle); +#else + GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.so", dso_handle); +#endif +} + +void GetLapackDsoHandle(void** dso_handle) { +#if defined(__APPLE__) || defined(__OSX__) + GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.dylib", dso_handle); +#else + GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.so", dso_handle); +#endif +} diff --git a/paddle/platform/dynamic_loader.h b/paddle/platform/dynamic_loader.h new file mode 100644 index 0000000000..9b5ad21724 --- /dev/null +++ b/paddle/platform/dynamic_loader.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef DYNAMIC_LOAD_H_ +#define DYNAMIC_LOAD_H_ + +#include +#include +#include +#include + +/** + * @brief load the DSO of CUBLAS + * + * @param **dso_handle dso handler + * + */ +void GetCublasDsoHandle(void** dso_handle); + +/** + * @brief load the DSO of CUDNN + * + * @param **dso_handle dso handler + * + */ +void GetCudnnDsoHandle(void** dso_handle); + +/** + * @brief load the DSO of CURAND + * + * @param **dso_handle dso handler + * + */ +void GetCurandDsoHandle(void** dso_handle); + +/** + * @brief load the DSO of warp-ctc + * + * @param **dso_handle dso handler + * + */ +void GetWarpCTCDsoHandle(void** dso_handle); + +/** + * @brief load the DSO of lapack + * + * @param **dso_handle dso handler + * + */ +void GetLapackDsoHandle(void** dso_handle); + +#endif // DYNAMIC_LOAD_H_ From a30754b05e1ef58b5803c3d9996ed0cc69100ac5 Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 3 Jul 2017 20:41:31 +0800 Subject: [PATCH 232/542] test device_context --- paddle/platform/CMakeLists.txt | 3 + paddle/platform/device_context.h | 166 +++++++++++++++++++++++++ paddle/platform/device_context_test.cu | 29 +++++ 3 files changed, 198 insertions(+) create mode 100644 paddle/platform/device_context.h create mode 100644 paddle/platform/device_context_test.cu diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index c7d7b14518..c95b54a4df 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -2,3 +2,6 @@ nv_test(cuda_test SRCS cuda_test.cu) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) + +cc_library(dynamic_loader SRCS dynamic_loader.cc) +nv_test(device_context_test SRCS device_context_test.cu DEPS place dynamic_loader glog gflags) diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h new file mode 100644 index 0000000000..f95aac4a36 --- /dev/null +++ b/paddle/platform/device_context.h @@ -0,0 +1,166 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifndef PADDLE_ONLY_CPU +#include "paddle/platform/cublas.h" +#include "paddle/platform/cuda.h" +#include "paddle/platform/cudnn.h" +#include "paddle/platform/curand.h" +#define EIGEN_USE_GPU +#endif + +#include "paddle/framework/enforce.h" +#include "paddle/platform/place.h" +#include "unsupported/Eigen/CXX11/Tensor" + +namespace paddle { +namespace platform { + +class DeviceContext { + public: + virtual ~DeviceContext() {} +}; + +class CpuDeviceContext : public DeviceContext { + Eigen::DefaultDevice eigen_device() { + if (!eigen_device_) { + eigen_device_ = new Eigen::DefaultDevice(); + } + return *eigen_device_; + } + + private: + Eigen::DefaultDevice* eigen_device_{nullptr}; +}; + +#ifndef PADDLE_ONLY_CPU +class DeviceGuard { + public: + explicit DeviceGuard(GPUPlace new_place) : previous_(GetCurrentDeviceId()) { + if (previous_ != new_place) { + paddle::platform::SetDeviceId(new_place.device); + } + } + + ~DeviceGuard() { paddle::platform::SetDeviceId(previous_.device); } + + private: + GPUPlace previous_; +}; + +class CudaDeviceContext : public DeviceContext { + public: + explicit CudaDeviceContext(const GPUPlace gpu_place) : gpu_place_(gpu_place) { + DeviceGuard guard(gpu_place_); + paddle::platform::throw_on_error(cudaStreamCreate(&stream_), + "cudaStreamCreate failed"); + eigen_stream_ = new Eigen::CudaStreamDevice(&stream_); + eigen_device_ = new Eigen::GpuDevice(eigen_stream_); + } + + void Wait() { + paddle::platform::throw_on_error(cudaStreamSynchronize(stream_), + "cudaStreamSynchronize failed"); + } + + cudaStream_t stream() { return stream_; } + + Eigen::GpuDevice eigen_device() { return *eigen_device_; } + + cublasHandle_t cublas_handle() { + if (!blas_handle_) { + DeviceGuard guard(gpu_place_); + PADDLE_ENFORCE(cublasCreate(&blas_handle_) == CUBLAS_STATUS_SUCCESS, + "cublasCreate failed"); + PADDLE_ENFORCE( + cublasSetStream(blas_handle_, stream_) == CUBLAS_STATUS_SUCCESS, + "cublasSetStream failed"); + } + return blas_handle_; + } + + cudnnHandle_t cudnn_handle() { + if (!dnn_handle_) { + DeviceGuard guard(gpu_place_); + PADDLE_ENFORCE(cudnnCreate(&dnn_handle_) == CUDNN_STATUS_SUCCESS, + "cudnnCreate failed"); + PADDLE_ENFORCE( + cudnnSetStream(dnn_handle_, stream_) == CUDNN_STATUS_SUCCESS, + "cudnnSetStream failed"); + } + return dnn_handle_; + } + + curandGenerator_t curand_generator() { + if (!rand_generator_) { + DeviceGuard guard(gpu_place_); + PADDLE_ENFORCE( + curandCreateGenerator(&rand_generator_, CURAND_RNG_PSEUDO_DEFAULT) == + CURAND_STATUS_SUCCESS, + "curandCreateGenerator failed"); + PADDLE_ENFORCE( + curandSetPseudoRandomGeneratorSeed(rand_generator_, random_seed_) == + CURAND_STATUS_SUCCESS, + "curandSetPseudoRandomGeneratorSeed failed"); + PADDLE_ENFORCE( + curandSetStream(rand_generator_, stream_) == CURAND_STATUS_SUCCESS, + "curandSetStream failed"); + } + return rand_generator_; + } + + ~CudaDeviceContext() { + Wait(); + if (blas_handle_) { + PADDLE_ENFORCE(cublasDestroy(blas_handle_) == CUBLAS_STATUS_SUCCESS, + "cublasDestroy failed"); + } + + if (dnn_handle_) { + PADDLE_ENFORCE(cudnnDestroy(dnn_handle_) == CUDNN_STATUS_SUCCESS, + "cudnnDestroy failed"); + } + + if (rand_generator_) { + PADDLE_ENFORCE( + curandDestroyGenerator(rand_generator_) == CURAND_STATUS_SUCCESS, + "curandDestroyGenerator failed"); + } + + delete eigen_stream_; + delete eigen_device_; + + paddle::platform::throw_on_error(cudaStreamDestroy(stream_), + "cudaStreamDestroy failed"); + } + + private: + GPUPlace gpu_place_; + cudaStream_t stream_; + + Eigen::CudaStreamDevice* eigen_stream_; + Eigen::GpuDevice* eigen_device_; + + cublasHandle_t blas_handle_{nullptr}; + + cudnnHandle_t dnn_handle_{nullptr}; + + int random_seed_; + curandGenerator_t rand_generator_{nullptr}; +}; +#endif +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/device_context_test.cu b/paddle/platform/device_context_test.cu new file mode 100644 index 0000000000..a15fb53b71 --- /dev/null +++ b/paddle/platform/device_context_test.cu @@ -0,0 +1,29 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/device_context.h" +#include "gtest/gtest.h" + + +TEST(DeviceContext, CudaDevice) { + int count = paddle::platform::GetDeviceCount(); + for (int i = 0; i < count; i++) { + paddle::platform::CudaDeviceContext* device_context = new paddle::platform::CudaDeviceContext(i); + __attribute__((unused)) Eigen::GpuDevice gpu_device = device_context->eigen_device(); + __attribute__((unused)) cudnnHandle_t cudnn_handle = device_context->cudnn_handle(); + __attribute__((unused)) cublasHandle_t cublas_handle = device_context->cublas_handle(); + __attribute__((unused)) curandGenerator_t curand_handle = device_context->curand_generator(); + delete device_context; + } +} From a77fcef3f99724e85e2239ad91683b7afe913cd8 Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 3 Jul 2017 12:55:39 +0000 Subject: [PATCH 233/542] fix cuda compile error --- paddle/platform/cublas.h | 3 -- paddle/platform/cuda.h | 9 ++++++ paddle/platform/curand.h | 5 ++- paddle/platform/device_context.h | 52 +++++++++++++++++-------------- paddle/platform/dynamic_loader.cc | 4 +-- 5 files changed, 43 insertions(+), 30 deletions(-) diff --git a/paddle/platform/cublas.h b/paddle/platform/cublas.h index 70c9713325..d60eb501e9 100644 --- a/paddle/platform/cublas.h +++ b/paddle/platform/cublas.h @@ -3,7 +3,6 @@ namespace paddle { namespace dyload { -namespace dynload { std::once_flag cublas_dso_flag; void *cublas_dso_handle = nullptr; @@ -67,8 +66,6 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP) #undef DYNAMIC_LOAD_CUBLAS_V2_WRAP #undef CUBLAS_BLAS_ROUTINE_EACH -} /* namespace dynload */ - // clang-format on #ifndef PADDLE_TYPE_DOUBLE #define CUBLAS_GEAM dynload::cublasSgeam diff --git a/paddle/platform/cuda.h b/paddle/platform/cuda.h index 8fe891f9ce..05290b0e1e 100644 --- a/paddle/platform/cuda.h +++ b/paddle/platform/cuda.h @@ -33,6 +33,15 @@ int GetDeviceCount(void) { throw_on_error(cudaGetDeviceCount(&count), "cudaGetDeviceCount failed"); return count; } +int GetCurrentDeviceId(void) { + int device_id; + throw_on_error(cudaGetDevice(&device_id), "cudaGetDevice failed"); + return device_id; +} + +void SetDeviceId(int device_id) { + throw_on_error(cudaSetDevice(device_id), "cudaSetDevice failed"); +} } // namespace platform } // namespace paddle diff --git a/paddle/platform/curand.h b/paddle/platform/curand.h index 692c024e6e..edff6526bd 100644 --- a/paddle/platform/curand.h +++ b/paddle/platform/curand.h @@ -3,6 +3,8 @@ namespace paddle { namespace dyload { +std::once_flag curand_dso_flag; +void *curand_dso_handle = nullptr; #ifdef PADDLE_USE_DSO #define DYNAMIC_LOAD_CURAND_WRAP(__name) \ struct DynLoad__##__name { \ @@ -31,7 +33,8 @@ namespace dyload { __macro(curandSetStream) \ __macro(curandSetPseudoRandomGeneratorSeed)\ __macro(curandGenerateUniform) \ - __macro(curandGenerateUniformDouble) + __macro(curandGenerateUniformDouble) \ + __macro(curandDestroyGenerator) // clang-format on CURAND_RAND_ROUTINE_EACH(DYNAMIC_LOAD_CURAND_WRAP) diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index f95aac4a36..65e76666a7 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -83,11 +83,12 @@ class CudaDeviceContext : public DeviceContext { cublasHandle_t cublas_handle() { if (!blas_handle_) { DeviceGuard guard(gpu_place_); - PADDLE_ENFORCE(cublasCreate(&blas_handle_) == CUBLAS_STATUS_SUCCESS, - "cublasCreate failed"); PADDLE_ENFORCE( - cublasSetStream(blas_handle_, stream_) == CUBLAS_STATUS_SUCCESS, - "cublasSetStream failed"); + paddle::dyload::cublasCreate(&blas_handle_) == CUBLAS_STATUS_SUCCESS, + "cublasCreate failed"); + PADDLE_ENFORCE(paddle::dyload::cublasSetStream(blas_handle_, stream_) == + CUBLAS_STATUS_SUCCESS, + "cublasSetStream failed"); } return blas_handle_; } @@ -95,11 +96,12 @@ class CudaDeviceContext : public DeviceContext { cudnnHandle_t cudnn_handle() { if (!dnn_handle_) { DeviceGuard guard(gpu_place_); - PADDLE_ENFORCE(cudnnCreate(&dnn_handle_) == CUDNN_STATUS_SUCCESS, - "cudnnCreate failed"); PADDLE_ENFORCE( - cudnnSetStream(dnn_handle_, stream_) == CUDNN_STATUS_SUCCESS, - "cudnnSetStream failed"); + paddle::dyload::cudnnCreate(&dnn_handle_) == CUDNN_STATUS_SUCCESS, + "cudnnCreate failed"); + PADDLE_ENFORCE(paddle::dyload::cudnnSetStream(dnn_handle_, stream_) == + CUDNN_STATUS_SUCCESS, + "cudnnSetStream failed"); } return dnn_handle_; } @@ -107,17 +109,17 @@ class CudaDeviceContext : public DeviceContext { curandGenerator_t curand_generator() { if (!rand_generator_) { DeviceGuard guard(gpu_place_); + PADDLE_ENFORCE(paddle::dyload::curandCreateGenerator( + &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT) == + CURAND_STATUS_SUCCESS, + "curandCreateGenerator failed"); PADDLE_ENFORCE( - curandCreateGenerator(&rand_generator_, CURAND_RNG_PSEUDO_DEFAULT) == - CURAND_STATUS_SUCCESS, - "curandCreateGenerator failed"); - PADDLE_ENFORCE( - curandSetPseudoRandomGeneratorSeed(rand_generator_, random_seed_) == - CURAND_STATUS_SUCCESS, + paddle::dyload::curandSetPseudoRandomGeneratorSeed( + rand_generator_, random_seed_) == CURAND_STATUS_SUCCESS, "curandSetPseudoRandomGeneratorSeed failed"); - PADDLE_ENFORCE( - curandSetStream(rand_generator_, stream_) == CURAND_STATUS_SUCCESS, - "curandSetStream failed"); + PADDLE_ENFORCE(paddle::dyload::curandSetStream( + rand_generator_, stream_) == CURAND_STATUS_SUCCESS, + "curandSetStream failed"); } return rand_generator_; } @@ -125,19 +127,21 @@ class CudaDeviceContext : public DeviceContext { ~CudaDeviceContext() { Wait(); if (blas_handle_) { - PADDLE_ENFORCE(cublasDestroy(blas_handle_) == CUBLAS_STATUS_SUCCESS, - "cublasDestroy failed"); + PADDLE_ENFORCE( + paddle::dyload::cublasDestroy(blas_handle_) == CUBLAS_STATUS_SUCCESS, + "cublasDestroy failed"); } if (dnn_handle_) { - PADDLE_ENFORCE(cudnnDestroy(dnn_handle_) == CUDNN_STATUS_SUCCESS, - "cudnnDestroy failed"); + PADDLE_ENFORCE( + paddle::dyload::cudnnDestroy(dnn_handle_) == CUDNN_STATUS_SUCCESS, + "cudnnDestroy failed"); } if (rand_generator_) { - PADDLE_ENFORCE( - curandDestroyGenerator(rand_generator_) == CURAND_STATUS_SUCCESS, - "curandDestroyGenerator failed"); + PADDLE_ENFORCE(paddle::dyload::curandDestroyGenerator(rand_generator_) == + CURAND_STATUS_SUCCESS, + "curandDestroyGenerator failed"); } delete eigen_stream_; diff --git a/paddle/platform/dynamic_loader.cc b/paddle/platform/dynamic_loader.cc index 9036eaf642..c34abc392c 100644 --- a/paddle/platform/dynamic_loader.cc +++ b/paddle/platform/dynamic_loader.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "dynamic_loader.h" #include -#include "DynamicLoader.h" -#include "Logging.h" +#include DEFINE_string(cudnn_dir, "", "Specify path for loading libcudnn.so. For instance, " From 5ef1425adb75eb1b0212518e0f12fefd8d9a8970 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 3 Jul 2017 21:13:20 +0800 Subject: [PATCH 234/542] "init saving model" --- go/pserver/optimizer.go | 16 ++++++++++++++-- go/pserver/service.go | 12 +++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index b4a040f46b..427251f900 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -40,17 +40,23 @@ func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { o.elementType = paramWithConfigs.Param.ElementType p := paramWithConfigs.Param c := paramWithConfigs.Config + s := paramWithConfigs.State log.WithFields(log.Fields{ "ElementType": p.ElementType, "ParamSize": len(p.Content), "ConfigSize": len(c), + "StateSize": len(s), }).Info("New Optimizer Created with config:") var cbuffer unsafe.Pointer cbuffer = C.malloc(C.size_t(len(p.Content))) C.memcpy(cbuffer, unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content))) + var cstate unsafe.Pointer + if len(s) != 0 { + cstate = unsafe.Pointer(&s[0]) + } + o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)), - C.paddle_element_type(p.ElementType), cbuffer, C.int(len(p.Content)/C.sizeof_float), - (*C.char)(nullPtr), 0) + C.paddle_element_type(p.ElementType), cbuffer, C.int(len(p.Content)/C.sizeof_float), (*C.char)(cstate), C.int(len(s))) return o } @@ -60,6 +66,12 @@ func (o *optimizer) GetWeights() []byte { return cArrayToSlice(buffer, int(buffer_len)*C.sizeof_float) } +func (o *optimizer) GetStates() []byte { + var cbuffer *C.char + cbuffer_len := C.paddle_optimizer_get_state(o.opt, &cbuffer) + return cArrayToSlice(unsafe.Pointer(cbuffer), int(cbuffer_len)) +} + func (o *optimizer) UpdateParameter(g Gradient) error { if o.elementType != g.ElementType { return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", g.Name, o.elementType, g.ElementType) diff --git a/go/pserver/service.go b/go/pserver/service.go index e15a4e5a58..a5ff862903 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -38,6 +38,7 @@ type Parameter struct { type ParameterWithConfig struct { Param Parameter Config []byte // parameter configuration in Proto Buffer format + State []byte // parameter training state } // Gradient is the gradient of the parameter. @@ -58,7 +59,7 @@ func NewService(idx int) (*Service, error) { s := &Service{ idx: idx, } - s.optMap = make(map[string]*optimizer) + s.optMap = make(map[string]*optimizer) s.initialized = make(chan struct{}) return s, nil } @@ -143,7 +144,12 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { // Save tells the parameter server to save parameters. func (s *Service) Save(path string, dummy *int) error { <-s.initialized - - // TODO + for opt, ok := range s.optMap { + if ok != nil { + return fmt.Errorf("parameter optimizerMap error: ", ok) + } + state := opt.GetStates() + weights := opt.GetWeights() + } return nil } From 1100f97e5707737d2dabe5a47bb111ff246e52e4 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 3 Jul 2017 21:26:49 +0800 Subject: [PATCH 235/542] "fix style check" --- cmake/cpplint.cmake | 1 + go/pserver/cclient/test/test_cclient.c | 2 +- go/pserver/cclient/test/testdata/optimizer.pb | Bin 0 -> 50 bytes go/pserver/cclient/test/testdata/optimizer.pb.txt | Bin 51 -> 0 bytes go/pserver/client_test.go | 2 +- go/pserver/optimizer_test.go | 2 +- go/pserver/service_test.go | 4 ++-- 7 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 go/pserver/cclient/test/testdata/optimizer.pb delete mode 100644 go/pserver/cclient/test/testdata/optimizer.pb.txt diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake index 48f705818b..6bbcd730e1 100644 --- a/cmake/cpplint.cmake +++ b/cmake/cpplint.cmake @@ -25,6 +25,7 @@ set(STYLE_FILTER "${STYLE_FILTER}-readability/casting") set(IGNORE_PATTERN .*ImportanceSampler.* .*cblas\\.h.* + .*\\.pb\\.txt .*LtrDataProvider.* .*MultiDataProvider.*) diff --git a/go/pserver/cclient/test/test_cclient.c b/go/pserver/cclient/test/test_cclient.c index b16769b433..8eababbe33 100644 --- a/go/pserver/cclient/test/test_cclient.c +++ b/go/pserver/cclient/test/test_cclient.c @@ -51,7 +51,7 @@ int main() { char *config_proto; size_t config_proto_len = 0; ssize_t nread; - FILE *fp = fopen("testdata/optimizer.pb.txt", "r"); + FILE *fp = fopen("testdata/optimizer.pb", "r"); if (!fp) { fail(); } diff --git a/go/pserver/cclient/test/testdata/optimizer.pb b/go/pserver/cclient/test/testdata/optimizer.pb new file mode 100644 index 0000000000000000000000000000000000000000..27dd3bc5f19e2964b4b674cff8860233cbdb445a GIT binary patch literal 50 kcmd;JloDUb$N&X9;j9CU3=s@ToSd^}g1}Dum25B;0LStS`2YX_ literal 0 HcmV?d00001 diff --git a/go/pserver/cclient/test/testdata/optimizer.pb.txt b/go/pserver/cclient/test/testdata/optimizer.pb.txt deleted file mode 100644 index 27c8a584df40ab714edfd730f0ff7b7bd3783964..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 51 lcmd;JloDUb$N&X9;j9CU3=s@ToSd^}g1}Dum25B;7XZ}t4FdoG diff --git a/go/pserver/client_test.go b/go/pserver/client_test.go index a248a3fb69..b805efa921 100644 --- a/go/pserver/client_test.go +++ b/go/pserver/client_test.go @@ -75,7 +75,7 @@ func TestClientFull(t *testing.T) { } const numParameter = 100 - config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb.txt") + config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb") if err != nil { t.Fatalf("read optimizer proto failed") } diff --git a/go/pserver/optimizer_test.go b/go/pserver/optimizer_test.go index 368047d6f8..b99b5a5f0b 100644 --- a/go/pserver/optimizer_test.go +++ b/go/pserver/optimizer_test.go @@ -11,7 +11,7 @@ func TestOptimizerCreateRelease(t *testing.T) { ElementType: Int32, } p.Content = []byte{1, 3} - config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb.txt") + config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb") if err != nil { t.Fatalf("read optimizer proto failed") } diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index f86619447c..30e3ac8ae1 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -19,7 +19,7 @@ func TestServiceFull(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb.txt") + config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb") if err != nil { t.Fatalf("read optimizer proto failed") } @@ -149,7 +149,7 @@ func TestBlockUntilInitialized(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb.txt") + config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb") if err != nil { t.Fatalf("read optimizer proto failed") } From 3f63d96abec165426bcd464f7aff32e2e42ed021 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 3 Jul 2017 23:16:11 +0800 Subject: [PATCH 236/542] Fix link error in op_proto_test. --- paddle/framework/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 50107faaed..f7e5753ac2 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -7,4 +7,4 @@ cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) proto_library(attr_type SRCS attr_type.proto) proto_library(op_proto SRCS op_proto.proto) -cc_test(op_proto_test SRCS op_proto_test.cc DEPS attr_type op_proto protobuf) +cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto attr_type protobuf) From f1330e216a1b8130bb578b69ff2d6a67357cdd1b Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 3 Jul 2017 23:20:39 +0800 Subject: [PATCH 237/542] "saving checkpoint" --- go/pserver/service.go | 79 +++++++++++++++++++++++++++++++++++--- go/pserver/service_test.go | 6 +++ 2 files changed, 80 insertions(+), 5 deletions(-) diff --git a/go/pserver/service.go b/go/pserver/service.go index a5ff862903..a4cf3e4750 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -1,9 +1,19 @@ package pserver import ( + "bufio" + "bytes" + "crypto/md5" + "encoding/gob" + "encoding/hex" "errors" "fmt" + "os" + "strconv" "sync" + "time" + + log "github.com/sirupsen/logrus" ) // ElementType is the type of elements of a Parameter. @@ -14,6 +24,10 @@ const ( Uninitialized = "pserver not fully initialized" ) +const ( + checkpoint_path = "/checkpoints/" +) + // Supported element types const ( Int32 ElementType = iota @@ -53,6 +67,24 @@ type Service struct { optMap map[string]*optimizer } +type Checkpoint struct { + uuid string + md5sum string + timestamp string +} + +//serialize ParameterWithConfig to byte stream +func GetBytes(content ...interface{}) ([]byte, error) { + + var buf bytes.Buffer + encoder := gob.NewEncoder(&buf) + err := encoder.Encode(content) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + // NewService creates a new service, will bypass etcd registration if no // endpoints specified. func NewService(idx int) (*Service, error) { @@ -143,13 +175,50 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { // Save tells the parameter server to save parameters. func (s *Service) Save(path string, dummy *int) error { + //FIXME: checkpoint is only used by pserver + // and has a constant path of */checkpoints/{pserver_idx}* <-s.initialized - for opt, ok := range s.optMap { - if ok != nil { - return fmt.Errorf("parameter optimizerMap error: ", ok) + s.mu.Lock() + defer s.mu.Unlock() + var paramWithConfig ParameterWithConfig + for name, opt := range s.optMap { + paramWithConfig.Param.Name = name + paramWithConfig.Param.ElementType = opt.elementType + paramWithConfig.Param.Content = opt.GetWeights() + paramWithConfig.State = opt.GetStates() + content, err := GetBytes(paramWithConfig) + if err != nil { + log.Errorln(err) + } + ck := Checkpoint{} + h := md5.New() + ck.md5sum = hex.EncodeToString(h.Sum(content)) + ck.timestamp = time.Now().String() + ck.uuid = checkpoint_path + strconv.Itoa(s.idx) + ckbytes, err := GetBytes(ck) + if err != nil { + log.Errorln(err) + } + // TODO: according design doc, need to save uuid to etcd in json format + // {\"uuid\": [UUID], \"md5\", \"MD5 sum\", \"timestamp\": xxxx} + log.Infof("parameter checkpoint %s", ckbytes) + + if _, err = os.Stat(ck.uuid); os.IsNotExist(err) { + log.Info("checkpoint not exists.") + } else { + err = os.Remove(ck.uuid) + log.Infof("remove %s", ck.uuid) + } + f, err := os.Create(ck.uuid) + defer f.Close() + if err != nil { + log.Errorln(err) + } + writer := bufio.NewWriter(f) + _, err = writer.Write(content) + if err != nil { + log.Errorln(err) } - state := opt.GetStates() - weights := opt.GetWeights() } return nil } diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index f86619447c..28956e4d85 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -79,6 +79,8 @@ func TestServiceFull(t *testing.T) { if !reflect.DeepEqual(param1, p) { t.FailNow() } + var dummy int + s.Save("", &dummy) } func TestMultipleInit(t *testing.T) { @@ -166,3 +168,7 @@ func TestBlockUntilInitialized(t *testing.T) { wg.Wait() } + +func TestCheckpointSpeed(t *testing.T) { + //TODO: test speed +} From 65afbe11853c2e32ca4196965e309e33ab843fd1 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 3 Jul 2017 23:38:21 +0800 Subject: [PATCH 238/542] "fix gob register error" --- go/pserver/service.go | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/go/pserver/service.go b/go/pserver/service.go index a4cf3e4750..decd3682ae 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -25,7 +25,7 @@ const ( ) const ( - checkpoint_path = "/checkpoints/" + checkpoint_path = "./checkpoints/" ) // Supported element types @@ -67,10 +67,10 @@ type Service struct { optMap map[string]*optimizer } -type Checkpoint struct { - uuid string - md5sum string - timestamp string +type checkpoint struct { + Uuid string + Md5sum string + Timestamp string } //serialize ParameterWithConfig to byte stream @@ -93,6 +93,8 @@ func NewService(idx int) (*Service, error) { } s.optMap = make(map[string]*optimizer) s.initialized = make(chan struct{}) + gob.Register(ParameterWithConfig{}) + gob.Register(checkpoint{}) return s, nil } @@ -190,32 +192,33 @@ func (s *Service) Save(path string, dummy *int) error { if err != nil { log.Errorln(err) } - ck := Checkpoint{} + ck := checkpoint{} h := md5.New() - ck.md5sum = hex.EncodeToString(h.Sum(content)) - ck.timestamp = time.Now().String() - ck.uuid = checkpoint_path + strconv.Itoa(s.idx) + ck.Md5sum = hex.EncodeToString(h.Sum(content)) + ck.Timestamp = time.Now().String() + ck.Uuid = checkpoint_path + strconv.Itoa(s.idx) ckbytes, err := GetBytes(ck) if err != nil { log.Errorln(err) } - // TODO: according design doc, need to save uuid to etcd in json format - // {\"uuid\": [UUID], \"md5\", \"MD5 sum\", \"timestamp\": xxxx} + // TODO: according design doc, need to save Uuid to etcd in json format + // {\"Uuid\": [UUID], \"md5\", \"MD5 sum\", \"Timestamp\": xxxx} log.Infof("parameter checkpoint %s", ckbytes) - if _, err = os.Stat(ck.uuid); os.IsNotExist(err) { + if _, err = os.Stat(ck.Uuid); os.IsNotExist(err) { log.Info("checkpoint not exists.") } else { - err = os.Remove(ck.uuid) - log.Infof("remove %s", ck.uuid) + err = os.Remove(ck.Uuid) + log.Infof("remove %s", ck.Uuid) } - f, err := os.Create(ck.uuid) + f, err := os.Create(ck.Uuid) defer f.Close() if err != nil { log.Errorln(err) } writer := bufio.NewWriter(f) _, err = writer.Write(content) + writer.Flush() if err != nil { log.Errorln(err) } From 6935dd7bc96e101ec65de39be1d2d8f4f79f1af3 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 4 Jul 2017 00:16:45 +0800 Subject: [PATCH 239/542] "lr state serialization" --- paddle/optimizer/lr_policy.h | 46 ++++++++++++++++++++++--------- paddle/optimizer/sgd_optimizer.cc | 4 +-- proto/OptimizerConfig.proto | 27 ++++++++---------- 3 files changed, 47 insertions(+), 30 deletions(-) diff --git a/paddle/optimizer/lr_policy.h b/paddle/optimizer/lr_policy.h index d8e33ad37a..ab5101e2e8 100644 --- a/paddle/optimizer/lr_policy.h +++ b/paddle/optimizer/lr_policy.h @@ -19,34 +19,54 @@ class ConstLr final : public LrPolicy { public: ConstLr(double lr) : learning_rate(lr){}; double LearningRate(const uint64_t num_sample_passed) { - return learning_rate; + return learning_rate_; + } + const char *SerializeState(int *state_len) { + LrPolicyState state; + state.set_learning_rate(learning_rate_); + auto str = state.SerializeAsString(); + *state_len = str.size(); + return str.c_str(); + } + void DeserializeState(const std::string &state) { + LrPolicyState state; + state.ParseFromString(str); + learning_rate_ = state.learning_rate(); } - const char *SerializeState(int *state_len) { return nullptr; } - void DeserializeState(const std::string &state) {} private: - double learning_rate; + double learning_rate_; }; class LinearLr final : public LrPolicy { public: LinearLr(double lr, double lr_decay_a, double lr_decay_b) - : learning_rate(lr), lr_decay_a(lr_decay_a), lr_decay_b(lr_decay_b) {} + : learning_rate_(lr), lr_decay_a_(lr_decay_a), lr_decay_b_(lr_decay_b) {} double LearningRate(const uint64_t num_sample_passed) { - return std::max(learning_rate - lr_decay_a * num_sample_passed, lr_decay_b); + return std::max(learning_rate_ - lr_decay_a_ * num_sample_passed, + lr_decay_b_); } const char *SerializeState(int *state_len) { - // TODO(zhihong) : add lr_policy serialization - return nullptr; + LrPolicyState state; + state.set_learning_rate(learning_rate_); + state.set_lr_decay_a(lr_decay_a_); + state.set_lr_decay_b(lr_decay_b_); + auto str = state.SerializeAsString(); + *state_len = str.size(); + return str.c_str(); } - void DeserializeState(const std::string &state) { - // TODO(zhihong) : add lr_policy serialization + void DeserializeState(const std::string &str) { + LrPolicyState state; + state.ParseFromString(str); + learning_rate_ = state.learning_rate(); + lr_decay_a_ = state.lr_decay_a(); + lr_decay_b_ = state.lr_decay_b(); } private: - double learning_rate; - double lr_decay_a; - double lr_decay_b; + double learning_rate_; + double lr_decay_a_; + double lr_decay_b_; }; } // namespace optimizer diff --git a/paddle/optimizer/sgd_optimizer.cc b/paddle/optimizer/sgd_optimizer.cc index 34e051003f..9e5477b2ff 100644 --- a/paddle/optimizer/sgd_optimizer.cc +++ b/paddle/optimizer/sgd_optimizer.cc @@ -30,10 +30,10 @@ void SGDOptimizer::Update(const Tensor *gradient) { const char *SGDOptimizer::SerializeState(int *state_len) { SGDOptimizerState state; state.set_num_sample_passed(num_sample_passed_); - TensorToProto(*parameter_, state.mutable_parameter()); + state.set_lr_ TensorToProto(*parameter_, state.mutable_parameter()); if (momentum_ != 0.0) TensorToProto(*momentums_, state.mutable_momentums()); auto str = state.SerializeAsString(); - *state_len = str.size(); + *state_len += str.size(); return str.c_str(); } diff --git a/proto/OptimizerConfig.proto b/proto/OptimizerConfig.proto index c698d3c2dd..19ce289ea3 100644 --- a/proto/OptimizerConfig.proto +++ b/proto/OptimizerConfig.proto @@ -78,11 +78,15 @@ enum DataType { repeated bytes content = 2; } +message LrPolicyState { + // learninRate Policy + optional double learning_rate = 1 [default = 1.0]; + optional double lr_decay_a = 2; + optional double lr_decay_b = 3; +} + message SGDOptimizerState { - // learning rate policy - optional double learning_rate = 101; - optional double lr_decay_a = 102; - optional double lr_decay_b = 103; + optional LrPolicyState lrstate = 101; optional double num_sample_passed = 104; // state optional TensorProto parameter = 1; @@ -91,9 +95,7 @@ message SGDOptimizerState { message AdadeltaOptimizerState { // learning rate policy - optional double learning_rate = 101; - optional double lr_decay_a = 102; - optional double lr_decay_b = 103; + optional LrPolicyState lrstate = 101; optional double num_sample_passed = 104; // state optional TensorProto parameter = 1; @@ -102,11 +104,9 @@ message AdadeltaOptimizerState { optional TensorProto update_delta = 4; } + message AdagradOptimizerState { - // learning rate policy - optional double learning_rate = 101; - optional double lr_decay_a = 102; - optional double lr_decay_b = 103; + optional LrPolicyState lrstate = 101; optional double num_sample_passed = 104; // state optional TensorProto parameter = 1; @@ -114,10 +114,7 @@ message AdagradOptimizerState { } message AdamOptimizerState { - // learning rate policy - optional double learning_rate = 101; - optional double lr_decay_a = 102; - optional double lr_decay_b = 103; + optional LrPolicyState lrstate = 101; optional double num_sample_passed = 104; // state optional TensorProto parameter = 1; From e1acd73fab4e17db5700feba09339f09d7152406 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 4 Jul 2017 01:13:30 +0800 Subject: [PATCH 240/542] "fix typo deleted part" --- paddle/optimizer/lr_policy.h | 4 ++-- paddle/optimizer/sgd_optimizer.cc | 6 +++++- proto/OptimizerConfig.proto | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/paddle/optimizer/lr_policy.h b/paddle/optimizer/lr_policy.h index ab5101e2e8..036c376e10 100644 --- a/paddle/optimizer/lr_policy.h +++ b/paddle/optimizer/lr_policy.h @@ -17,7 +17,7 @@ public: // constant learning rate policy class ConstLr final : public LrPolicy { public: - ConstLr(double lr) : learning_rate(lr){}; + ConstLr(double lr) : learning_rate_(lr){}; double LearningRate(const uint64_t num_sample_passed) { return learning_rate_; } @@ -28,7 +28,7 @@ public: *state_len = str.size(); return str.c_str(); } - void DeserializeState(const std::string &state) { + void DeserializeState(const std::string &str) { LrPolicyState state; state.ParseFromString(str); learning_rate_ = state.learning_rate(); diff --git a/paddle/optimizer/sgd_optimizer.cc b/paddle/optimizer/sgd_optimizer.cc index 9e5477b2ff..527e65144d 100644 --- a/paddle/optimizer/sgd_optimizer.cc +++ b/paddle/optimizer/sgd_optimizer.cc @@ -30,7 +30,11 @@ void SGDOptimizer::Update(const Tensor *gradient) { const char *SGDOptimizer::SerializeState(int *state_len) { SGDOptimizerState state; state.set_num_sample_passed(num_sample_passed_); - state.set_lr_ TensorToProto(*parameter_, state.mutable_parameter()); + std::string lr_str = this->lr_policy_->SerializeState(state_len); + LrPolicyState lr_state; + lr_state.ParseFromString(lr_str); + state.mutable_lr_state() = lr_state; + TensorToProto(*parameter_, state.mutable_parameter()); if (momentum_ != 0.0) TensorToProto(*momentums_, state.mutable_momentums()); auto str = state.SerializeAsString(); *state_len += str.size(); diff --git a/proto/OptimizerConfig.proto b/proto/OptimizerConfig.proto index 19ce289ea3..290932898e 100644 --- a/proto/OptimizerConfig.proto +++ b/proto/OptimizerConfig.proto @@ -95,7 +95,7 @@ message SGDOptimizerState { message AdadeltaOptimizerState { // learning rate policy - optional LrPolicyState lrstate = 101; + optional LrPolicyState lr_state = 101; optional double num_sample_passed = 104; // state optional TensorProto parameter = 1; From 7edabe74d45b9dd35603ac786e6a36e201bb1177 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 4 Jul 2017 01:21:13 +0800 Subject: [PATCH 241/542] "polish name convention" --- paddle/optimizer/sgd_optimizer.cc | 4 +++- proto/OptimizerConfig.proto | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/paddle/optimizer/sgd_optimizer.cc b/paddle/optimizer/sgd_optimizer.cc index 527e65144d..96570eab26 100644 --- a/paddle/optimizer/sgd_optimizer.cc +++ b/paddle/optimizer/sgd_optimizer.cc @@ -33,7 +33,7 @@ const char *SGDOptimizer::SerializeState(int *state_len) { std::string lr_str = this->lr_policy_->SerializeState(state_len); LrPolicyState lr_state; lr_state.ParseFromString(lr_str); - state.mutable_lr_state() = lr_state; + state.mutable_lr_state()->ParseFromString(lr_str); TensorToProto(*parameter_, state.mutable_parameter()); if (momentum_ != 0.0) TensorToProto(*momentums_, state.mutable_momentums()); auto str = state.SerializeAsString(); @@ -44,6 +44,8 @@ const char *SGDOptimizer::SerializeState(int *state_len) { void SGDOptimizer::DeserializeState(const std::string &str) { SGDOptimizerState state; state.ParseFromString(str); + auto lr_state = state.lr_state(); + this->lr_policy_->DeserializeState(lr_state.SerializeAsString()); num_sample_passed_ = state.num_sample_passed(); ProtoToTensor(state.parameter(), parameter_); if (momentum_ != 0.0) ProtoToTensor(state.parameter(), momentums_); diff --git a/proto/OptimizerConfig.proto b/proto/OptimizerConfig.proto index 290932898e..2a87e293f6 100644 --- a/proto/OptimizerConfig.proto +++ b/proto/OptimizerConfig.proto @@ -86,7 +86,7 @@ message LrPolicyState { } message SGDOptimizerState { - optional LrPolicyState lrstate = 101; + optional LrPolicyState lr_state = 101; optional double num_sample_passed = 104; // state optional TensorProto parameter = 1; @@ -106,7 +106,7 @@ message AdadeltaOptimizerState { message AdagradOptimizerState { - optional LrPolicyState lrstate = 101; + optional LrPolicyState lr_state = 101; optional double num_sample_passed = 104; // state optional TensorProto parameter = 1; @@ -114,7 +114,7 @@ message AdagradOptimizerState { } message AdamOptimizerState { - optional LrPolicyState lrstate = 101; + optional LrPolicyState lr_state = 101; optional double num_sample_passed = 104; // state optional TensorProto parameter = 1; From dec65aca7ddcbeac1ba54608bc487dc93d2d28f3 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 4 Jul 2017 01:24:27 +0800 Subject: [PATCH 242/542] "fix parameter accumulate size" --- paddle/optimizer/adadelta_optimizer.cc | 8 +++++--- paddle/optimizer/adagrad_optimizer.cc | 9 ++++++--- paddle/optimizer/adam_optimizer.cc | 9 ++++++--- paddle/optimizer/sgd_optimizer.cc | 2 -- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/paddle/optimizer/adadelta_optimizer.cc b/paddle/optimizer/adadelta_optimizer.cc index 465ad5e0d2..6eec5d846f 100644 --- a/paddle/optimizer/adadelta_optimizer.cc +++ b/paddle/optimizer/adadelta_optimizer.cc @@ -27,22 +27,24 @@ void AdadeltaOptimizer::Update(const Tensor* gradient) { const char* AdadeltaOptimizer::SerializeState(int* state_len) { AdadeltaOptimizerState state; - // TODO(zhihong) : add lr_policy serialization state.set_num_sample_passed(num_sample_passed_); + std::string lr_str = this->lr_policy_->SerializeState(state_len); + state.mutable_lr_state()->ParseFromString(lr_str); TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*accum_gradient_, state.mutable_accum_gradient()); TensorToProto(*accum_delta_, state.mutable_accum_delta()); TensorToProto(*update_delta_, state.mutable_update_delta()); auto str = state.SerializeAsString(); - *state_len = str.size(); + *state_len += str.size(); return str.c_str(); } void AdadeltaOptimizer::DeserializeState(const std::string& str) { AdadeltaOptimizerState state; state.ParseFromString(str); - // TODO(zhihong) : add lr_policy DeserializeState + auto lr_state = state.lr_state(); + this->lr_policy_->DeserializeState(lr_state.SerializeAsString()); num_sample_passed_ = state.num_sample_passed(); ProtoToTensor(state.parameter(), parameter_); diff --git a/paddle/optimizer/adagrad_optimizer.cc b/paddle/optimizer/adagrad_optimizer.cc index bdaa7877d2..5b92610ac5 100644 --- a/paddle/optimizer/adagrad_optimizer.cc +++ b/paddle/optimizer/adagrad_optimizer.cc @@ -19,20 +19,23 @@ void AdagradOptimizer::Update(const Tensor* gradient) { } const char* AdagradOptimizer::SerializeState(int* state_len) { AdagradOptimizerState state; - // TODO(zhihong) : add lr_policy serialization state.set_num_sample_passed(num_sample_passed_); + std::string lr_str = this->lr_policy_->SerializeState(state_len); + state.mutable_lr_state()->ParseFromString(lr_str); TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*accum_gradient_, state.mutable_accum_gradient()); auto str = state.SerializeAsString(); - *state_len = str.size(); + *state_len += str.size(); return str.c_str(); } void AdagradOptimizer::DeserializeState(const std::string& str) { AdagradOptimizerState state; state.ParseFromString(str); - // TODO(zhihong) : add lr_policy DeserializeState + auto lr_state = state.lr_state(); + this->lr_policy_->DeserializeState(lr_state.SerializeAsString()); + num_sample_passed_ = state.num_sample_passed(); ProtoToTensor(state.parameter(), parameter_); ProtoToTensor(state.accum_gradient(), accum_gradient_); diff --git a/paddle/optimizer/adam_optimizer.cc b/paddle/optimizer/adam_optimizer.cc index ceab7397d8..1ebb6b1e0f 100644 --- a/paddle/optimizer/adam_optimizer.cc +++ b/paddle/optimizer/adam_optimizer.cc @@ -24,20 +24,23 @@ void AdamOptimizer::Update(const Tensor *gradient) { const char *AdamOptimizer::SerializeState(int *state_len) { AdamOptimizerState state; - // TODO(zhihong) : add lr_policy serialization + std::string lr_str = this->lr_policy_->SerializeState(state_len); + state.mutable_lr_state()->ParseFromString(lr_str); state.set_num_sample_passed(num_sample_passed_); + TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*momentums_, state.mutable_momentums()); TensorToProto(*velocitys_, state.mutable_velocitys()); auto str = state.SerializeAsString(); - *state_len = str.size(); + *state_len += str.size(); return str.c_str(); } void AdamOptimizer::DeserializeState(const std::string &str) { AdamOptimizerState state; state.ParseFromString(str); - // TODO(zhihong) : add lr_policy DeserializeState + auto lr_state = state.lr_state(); + this->lr_policy_->DeserializeState(lr_state.SerializeAsString()); num_sample_passed_ = state.num_sample_passed(); ProtoToTensor(state.parameter(), parameter_); diff --git a/paddle/optimizer/sgd_optimizer.cc b/paddle/optimizer/sgd_optimizer.cc index 96570eab26..15418faa84 100644 --- a/paddle/optimizer/sgd_optimizer.cc +++ b/paddle/optimizer/sgd_optimizer.cc @@ -31,8 +31,6 @@ const char *SGDOptimizer::SerializeState(int *state_len) { SGDOptimizerState state; state.set_num_sample_passed(num_sample_passed_); std::string lr_str = this->lr_policy_->SerializeState(state_len); - LrPolicyState lr_state; - lr_state.ParseFromString(lr_str); state.mutable_lr_state()->ParseFromString(lr_str); TensorToProto(*parameter_, state.mutable_parameter()); if (momentum_ != 0.0) TensorToProto(*momentums_, state.mutable_momentums()); From e12d7269ff473db5cc87de1344630eb348017a4a Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 4 Jul 2017 01:22:01 +0000 Subject: [PATCH 243/542] fix by helin's comments --- python/paddle/v2/reader/creator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 20624d5286..61b5cc134f 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -106,7 +106,7 @@ def recordio(paths, buf_size=100): while True: r, err = client.next_record() - if r is None: + if err < 0: break yield r From e25c155f3954ee8cde673f39e8f82c5baebd99c6 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 4 Jul 2017 02:37:31 +0000 Subject: [PATCH 244/542] add taskfail interface --- go/master/client.go | 5 +++ go/master/service.go | 99 ++++++++++++++++++++++++++++---------------- 2 files changed, 68 insertions(+), 36 deletions(-) diff --git a/go/master/client.go b/go/master/client.go index d3bea49d0a..b6ca8cad15 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -112,6 +112,11 @@ func (c *Client) taskFinished(taskID int) error { return c.conn.Call("Service.TaskFinished", taskID, nil) } +// TaskFailed tell the master server as task is failed. +func (c *Client) taskFailed(taskID int, epoch int) error { + return c.conn.Call("Service.TaskFinished", taskID, epoch) +} + // NextRecord returns next record in the dataset. // // NextRecord will block until the next record is available. It is diff --git a/go/master/service.go b/go/master/service.go index 58e68e7448..b078f318f5 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -34,29 +34,30 @@ type Chunk struct { // Task is the basic unit of data instances assigned to trainers. type Task struct { ID int + Epoch int Chunks []Chunk } type taskEntry struct { - Epoch int NumTimeout int Task Task + FailedNum int } type taskQueues struct { Todo []taskEntry Pending map[int]taskEntry // map from task ID to task entry Done []taskEntry - Failed []Task + Failed []taskEntry } // Service is the master server service. type Service struct { - chunksPerTask int - timeoutDur time.Duration - timeoutMax int - ready chan struct{} - store Store + chunksPerTask int + timeoutDur time.Duration + failortimeoutMax int + ready chan struct{} + store Store mu sync.Mutex initDone bool @@ -91,11 +92,11 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { } // NewService creates a new service. -func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, timeoutMax int) (*Service, error) { +func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, failortimeoutMax int) (*Service, error) { s := &Service{} s.chunksPerTask = chunksPerTask s.timeoutDur = timeoutDur - s.timeoutMax = timeoutMax + s.failortimeoutMax = failortimeoutMax s.taskQueues = taskQueues{} s.taskQueues.Pending = make(map[int]taskEntry) s.ready = make(chan struct{}) @@ -257,6 +258,34 @@ func (s *Service) SetDataset(globPaths []string, dummy *int) error { return nil } +func (s *Service) checkTaskStatus(t taskEntry, epoch int) { + if t.Task.Epoch != epoch { + // new epoch, task launched after the + // schedule of this timeout check or failed status report. + return + } + + defer func() { + err := s.snapshot() + if err != nil { + log.Errorln(err) + } + }() + + delete(s.taskQueues.Pending, t.Task.ID) + + t.NumTimeout++ + if t.NumTimeout+t.FailedNum > s.failortimeoutMax { + log.Warningf("Task %v timed out %d times and failed %d times, discard.", t.Task, t.NumTimeout, t.FailedNum) + s.taskQueues.Failed = append(s.taskQueues.Failed, t) + return + } + + log.Warningf("Task %v timed out %d times and failed %d times, discard.", t.Task, t.NumTimeout, t.FailedNum) + s.taskQueues.Todo = append(s.taskQueues.Todo, t) + return +} + func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { return func() { s.mu.Lock() @@ -267,30 +296,7 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { return } - if t.Epoch != epoch { - // new epoch, task launched after the - // schedule of this timeout check. - return - } - - defer func() { - err := s.snapshot() - if err != nil { - log.Errorln(err) - } - }() - - delete(s.taskQueues.Pending, t.Task.ID) - - t.NumTimeout++ - if t.NumTimeout > s.timeoutMax { - log.Warningf("Task %v timed out %d times, discard.", t.Task, t.NumTimeout) - s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task) - return - } - - log.Warningf("Task %v timed out %d times, retry.", t.Task, t.NumTimeout) - s.taskQueues.Todo = append(s.taskQueues.Todo, t) + s.checkTaskStatus(t, epoch) } } @@ -339,7 +345,7 @@ func (s *Service) GetTask(dummy int, task *Task) error { } t := s.taskQueues.Todo[0] - t.Epoch++ + t.Task.Epoch++ s.taskQueues.Todo = s.taskQueues.Todo[1:] s.taskQueues.Pending[t.Task.ID] = t err := s.snapshot() @@ -348,9 +354,9 @@ func (s *Service) GetTask(dummy int, task *Task) error { } *task = t.Task - log.WithFields(s.logFields()).Infof("Task #%d dispatched.", task.ID) + log.WithFields(s.logFields()).Infof("Task #%v dispatched.", t) - time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.ID, t.Epoch)) + time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.ID, t.Task.Epoch)) return nil } @@ -372,6 +378,7 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { // task finished, reset timeout t.NumTimeout = 0 + t.FailedNum = 0 s.taskQueues.Done = append(s.taskQueues.Done, t) delete(s.taskQueues.Pending, taskID) @@ -389,3 +396,23 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { } return err } + +// TaskFailed tell the service that a task is failed. +func (s *Service) TaskFailed(taskID int, epoch int) error { + select { + case <-s.ready: + } + + s.mu.Lock() + defer s.mu.Unlock() + + t, ok := s.taskQueues.Pending[taskID] + if !ok { + err := errors.New("pending task not found") + log.WithFields(s.logFields()).Warningln("TaskFailed:Pending task #%d not found.", taskID) + return err + } + + s.checkTaskStatus(t, epoch) + return nil +} From 52cc601b48f6f5e179efa79bb2ba5442d42eac75 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 4 Jul 2017 02:41:47 +0000 Subject: [PATCH 245/542] fix bugs --- go/master/service.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/go/master/service.go b/go/master/service.go index b078f318f5..c47319317a 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -41,7 +41,7 @@ type Task struct { type taskEntry struct { NumTimeout int Task Task - FailedNum int + NumFailed int } type taskQueues struct { @@ -275,13 +275,13 @@ func (s *Service) checkTaskStatus(t taskEntry, epoch int) { delete(s.taskQueues.Pending, t.Task.ID) t.NumTimeout++ - if t.NumTimeout+t.FailedNum > s.failortimeoutMax { - log.Warningf("Task %v timed out %d times and failed %d times, discard.", t.Task, t.NumTimeout, t.FailedNum) + if t.NumTimeout+t.NumFailed > s.failortimeoutMax { + log.Warningf("Task %v timed out %d times and failed %d times, discard.", t.Task, t.NumTimeout, t.NumFailed) s.taskQueues.Failed = append(s.taskQueues.Failed, t) return } - log.Warningf("Task %v timed out %d times and failed %d times, discard.", t.Task, t.NumTimeout, t.FailedNum) + log.Warningf("Task %v timed out %d times and failed %d times, discard.", t.Task, t.NumTimeout, t.NumFailed) s.taskQueues.Todo = append(s.taskQueues.Todo, t) return } @@ -378,7 +378,7 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { // task finished, reset timeout t.NumTimeout = 0 - t.FailedNum = 0 + t.NumFailed = 0 s.taskQueues.Done = append(s.taskQueues.Done, t) delete(s.taskQueues.Pending, taskID) From ed18647e37f4e345f02171f29af6e22fab4790ea Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 4 Jul 2017 11:00:59 +0800 Subject: [PATCH 246/542] finish test --- paddle/platform/CMakeLists.txt | 1 - paddle/platform/cuda.h | 1 + paddle/platform/device_context.h | 170 ------------------------- paddle/platform/device_context_test.cu | 29 ----- 4 files changed, 1 insertion(+), 200 deletions(-) delete mode 100644 paddle/platform/device_context.h delete mode 100644 paddle/platform/device_context_test.cu diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index c95b54a4df..ffdc23d599 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -4,4 +4,3 @@ cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) cc_library(dynamic_loader SRCS dynamic_loader.cc) -nv_test(device_context_test SRCS device_context_test.cu DEPS place dynamic_loader glog gflags) diff --git a/paddle/platform/cuda.h b/paddle/platform/cuda.h index 05290b0e1e..5ed36c0f02 100644 --- a/paddle/platform/cuda.h +++ b/paddle/platform/cuda.h @@ -33,6 +33,7 @@ int GetDeviceCount(void) { throw_on_error(cudaGetDeviceCount(&count), "cudaGetDeviceCount failed"); return count; } + int GetCurrentDeviceId(void) { int device_id; throw_on_error(cudaGetDevice(&device_id), "cudaGetDevice failed"); diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h deleted file mode 100644 index 65e76666a7..0000000000 --- a/paddle/platform/device_context.h +++ /dev/null @@ -1,170 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#ifndef PADDLE_ONLY_CPU -#include "paddle/platform/cublas.h" -#include "paddle/platform/cuda.h" -#include "paddle/platform/cudnn.h" -#include "paddle/platform/curand.h" -#define EIGEN_USE_GPU -#endif - -#include "paddle/framework/enforce.h" -#include "paddle/platform/place.h" -#include "unsupported/Eigen/CXX11/Tensor" - -namespace paddle { -namespace platform { - -class DeviceContext { - public: - virtual ~DeviceContext() {} -}; - -class CpuDeviceContext : public DeviceContext { - Eigen::DefaultDevice eigen_device() { - if (!eigen_device_) { - eigen_device_ = new Eigen::DefaultDevice(); - } - return *eigen_device_; - } - - private: - Eigen::DefaultDevice* eigen_device_{nullptr}; -}; - -#ifndef PADDLE_ONLY_CPU -class DeviceGuard { - public: - explicit DeviceGuard(GPUPlace new_place) : previous_(GetCurrentDeviceId()) { - if (previous_ != new_place) { - paddle::platform::SetDeviceId(new_place.device); - } - } - - ~DeviceGuard() { paddle::platform::SetDeviceId(previous_.device); } - - private: - GPUPlace previous_; -}; - -class CudaDeviceContext : public DeviceContext { - public: - explicit CudaDeviceContext(const GPUPlace gpu_place) : gpu_place_(gpu_place) { - DeviceGuard guard(gpu_place_); - paddle::platform::throw_on_error(cudaStreamCreate(&stream_), - "cudaStreamCreate failed"); - eigen_stream_ = new Eigen::CudaStreamDevice(&stream_); - eigen_device_ = new Eigen::GpuDevice(eigen_stream_); - } - - void Wait() { - paddle::platform::throw_on_error(cudaStreamSynchronize(stream_), - "cudaStreamSynchronize failed"); - } - - cudaStream_t stream() { return stream_; } - - Eigen::GpuDevice eigen_device() { return *eigen_device_; } - - cublasHandle_t cublas_handle() { - if (!blas_handle_) { - DeviceGuard guard(gpu_place_); - PADDLE_ENFORCE( - paddle::dyload::cublasCreate(&blas_handle_) == CUBLAS_STATUS_SUCCESS, - "cublasCreate failed"); - PADDLE_ENFORCE(paddle::dyload::cublasSetStream(blas_handle_, stream_) == - CUBLAS_STATUS_SUCCESS, - "cublasSetStream failed"); - } - return blas_handle_; - } - - cudnnHandle_t cudnn_handle() { - if (!dnn_handle_) { - DeviceGuard guard(gpu_place_); - PADDLE_ENFORCE( - paddle::dyload::cudnnCreate(&dnn_handle_) == CUDNN_STATUS_SUCCESS, - "cudnnCreate failed"); - PADDLE_ENFORCE(paddle::dyload::cudnnSetStream(dnn_handle_, stream_) == - CUDNN_STATUS_SUCCESS, - "cudnnSetStream failed"); - } - return dnn_handle_; - } - - curandGenerator_t curand_generator() { - if (!rand_generator_) { - DeviceGuard guard(gpu_place_); - PADDLE_ENFORCE(paddle::dyload::curandCreateGenerator( - &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT) == - CURAND_STATUS_SUCCESS, - "curandCreateGenerator failed"); - PADDLE_ENFORCE( - paddle::dyload::curandSetPseudoRandomGeneratorSeed( - rand_generator_, random_seed_) == CURAND_STATUS_SUCCESS, - "curandSetPseudoRandomGeneratorSeed failed"); - PADDLE_ENFORCE(paddle::dyload::curandSetStream( - rand_generator_, stream_) == CURAND_STATUS_SUCCESS, - "curandSetStream failed"); - } - return rand_generator_; - } - - ~CudaDeviceContext() { - Wait(); - if (blas_handle_) { - PADDLE_ENFORCE( - paddle::dyload::cublasDestroy(blas_handle_) == CUBLAS_STATUS_SUCCESS, - "cublasDestroy failed"); - } - - if (dnn_handle_) { - PADDLE_ENFORCE( - paddle::dyload::cudnnDestroy(dnn_handle_) == CUDNN_STATUS_SUCCESS, - "cudnnDestroy failed"); - } - - if (rand_generator_) { - PADDLE_ENFORCE(paddle::dyload::curandDestroyGenerator(rand_generator_) == - CURAND_STATUS_SUCCESS, - "curandDestroyGenerator failed"); - } - - delete eigen_stream_; - delete eigen_device_; - - paddle::platform::throw_on_error(cudaStreamDestroy(stream_), - "cudaStreamDestroy failed"); - } - - private: - GPUPlace gpu_place_; - cudaStream_t stream_; - - Eigen::CudaStreamDevice* eigen_stream_; - Eigen::GpuDevice* eigen_device_; - - cublasHandle_t blas_handle_{nullptr}; - - cudnnHandle_t dnn_handle_{nullptr}; - - int random_seed_; - curandGenerator_t rand_generator_{nullptr}; -}; -#endif -} // namespace platform -} // namespace paddle diff --git a/paddle/platform/device_context_test.cu b/paddle/platform/device_context_test.cu deleted file mode 100644 index a15fb53b71..0000000000 --- a/paddle/platform/device_context_test.cu +++ /dev/null @@ -1,29 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/platform/device_context.h" -#include "gtest/gtest.h" - - -TEST(DeviceContext, CudaDevice) { - int count = paddle::platform::GetDeviceCount(); - for (int i = 0; i < count; i++) { - paddle::platform::CudaDeviceContext* device_context = new paddle::platform::CudaDeviceContext(i); - __attribute__((unused)) Eigen::GpuDevice gpu_device = device_context->eigen_device(); - __attribute__((unused)) cudnnHandle_t cudnn_handle = device_context->cudnn_handle(); - __attribute__((unused)) cublasHandle_t cublas_handle = device_context->cublas_handle(); - __attribute__((unused)) curandGenerator_t curand_handle = device_context->curand_generator(); - delete device_context; - } -} From 76b7be46da5fe211d25e62712673cc01bea98d54 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 4 Jul 2017 11:16:49 +0800 Subject: [PATCH 247/542] add deps for dyload cc_library --- paddle/platform/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index ffdc23d599..4f6381b8af 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -3,4 +3,4 @@ nv_test(cuda_test SRCS cuda_test.cu) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) -cc_library(dynamic_loader SRCS dynamic_loader.cc) +cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) From 9f365d36364d34f2cf186d5bc0569189145c612d Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 4 Jul 2017 11:23:49 +0800 Subject: [PATCH 248/542] "add net proto" --- paddle/framework/CMakeLists.txt | 4 +++ paddle/framework/net.h | 48 ++++++++++---------------------- paddle/framework/net_proto.proto | 16 +++++++++++ 3 files changed, 35 insertions(+), 33 deletions(-) create mode 100644 paddle/framework/net_proto.proto diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index f7e5753ac2..8c34a77c20 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -7,4 +7,8 @@ cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) proto_library(attr_type SRCS attr_type.proto) proto_library(op_proto SRCS op_proto.proto) + cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto attr_type protobuf) + +proto_library(net_proto SRCS net_proto.proto) +cc_library(net SRCS net.cc DEPS net_proto attr_type op_proto) diff --git a/paddle/framework/net.h b/paddle/framework/net.h index 88bdf0bb68..b3064e4f90 100644 --- a/paddle/framework/net.h +++ b/paddle/framework/net.h @@ -14,6 +14,8 @@ #pragma once +#include "paddle/framework/net_proto.pb.h" +#include "paddle/framework/op_proto.pb.h" #include "paddle/framework/scope.h" namespace paddle { @@ -27,31 +29,11 @@ typedef int OpIndex; * keep updating if the concepts related are implemented. */ -// Operator's runtime context. -struct OpContext { - int dev_id; - DevType dev_type{kCPU}; - enum DevType { kCPU, kGPU }; -}; - -// Proto definitions, use `struct`s for simpility. -struct VarDesc { - std::string type; - std::vector dims; -}; -struct OpDesc { - std::string type; - std::vector inputs; - std::vector outputs; -}; -struct struct NetDesc { - std::vector ops; -}; class Operator { public: Operator(const OpDesc &def) {} - Error InferShape() {} - Error Run() {} + bool InferShape() {} + bool Run() {} }; /** @@ -73,7 +55,7 @@ class Net { /** * @brief Infer shapes of all inputs and outputs of operators. */ - virtual Error InferShape(Scope *scope) override; + virtual bool InferShape(Scope *scope) override; /** * @brief Run the network. * @@ -82,8 +64,8 @@ class Net { * environment for ops. `begin` and `end` specify the scope of `ops_` to run, * If no positive indexes are provided, all operators in `ops_` will run. */ - virtual Error Run(Scope *scope, OpContext *context, OpIndex begin = -1, - OpIndex end = -1) const = 0; + virtual bool Run(Scope *scope, OpContext *context, OpIndex begin = -1, + OpIndex end = -1) const = 0; /** * @brief Add an Operator according to `def`. @@ -93,12 +75,12 @@ class Net { /** * @brief Add optimizer operators acctording to `attrs`. */ - virtual Error AddOptimizerOps(const OptAttrs &attrs) = 0; + virtual bool AddOptimizerOps(const OptAttrs &attrs) = 0; /** * @brief Add backward operators. */ - virtual Error AddBackwardOps() = 0; + virtual bool AddBackwardOps() = 0; /** * @brief Create a network. @@ -126,7 +108,7 @@ class PlainNet : public Net { * Infer all the operators' input and output varialbes' shapes, will be called * before every mini-batch */ - virtual Error InferShape(Scope *scope) override; + virtual bool InferShape(Scope *scope) override; /** * @brief Run the network. @@ -135,8 +117,8 @@ class PlainNet : public Net { * scope will be used instead. If no OpContext is provicded, default context * will be used. */ - virtual Error Run(Scope *scope = nullptr, OpContext *context = nullptr, - OpIndex begin = -1, OpIndex end = -1) const override; + virtual bool Run(Scope *scope = nullptr, OpContext *context = nullptr, + OpIndex begin = -1, OpIndex end = -1) const override; /** * @brief Add an operator to this network. @@ -146,12 +128,12 @@ class PlainNet : public Net { /** * @brief Add all optimizer operators related into the network. */ - virtual Error AddOptimizerOps(const OptAttrs &attrs) override; + virtual bool AddOptimizerOps(const OptAttrs &attrs) override; /** * @brief Add all backward operators related into the network. */ - virtual Error AddBackwardOps() override; + virtual bool AddBackwardOps() override; protected: /** @@ -159,7 +141,7 @@ class PlainNet : public Net { * * Create operators accordding to `def`, will be called by the constructor. */ - Error BuildNet(const NetDesc &def); + bool BuildNet(const NetDesc &def); /** * @brief Add an operator into this network. diff --git a/paddle/framework/net_proto.proto b/paddle/framework/net_proto.proto new file mode 100644 index 0000000000..e9aed8f349 --- /dev/null +++ b/paddle/framework/net_proto.proto @@ -0,0 +1,16 @@ +syntax="proto2"; +package paddle.framework; + +import "op_proto.proto" + +message NetDesc { + // network identification + optional string name = 1; + // operator contains in network + repeated OpProto operators = 2; + // network type to run with. e.g "plainNet", "DAG" + optional string type = 3; + // num worker always + optional int32 num_workers = 4; +} + From 3b073fdc2be1c808db27519e01e3a61c07927959 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Tue, 4 Jul 2017 11:25:11 +0800 Subject: [PATCH 249/542] fix error in test_LayerGrad --- paddle/gserver/tests/test_LayerGrad.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index ed067e7c3a..d3c99eb8b9 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -873,7 +873,7 @@ TEST(Layer, SequenceLastInstanceLayer) { TEST(Layer, AverageLayer) { testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq testDegradeLayer(false, - "max", + "average", "non-seq", 5); // seq average to a shorten seq, stride window = 5 testDegradeLayer( From f535b79820ae97ade802053dc421a893460367c8 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Tue, 4 Jul 2017 12:05:52 +0800 Subject: [PATCH 250/542] sort the Author.md with Alphabetical order --- AUTHORS.md | 74 +++++++++++++++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/AUTHORS.md b/AUTHORS.md index 09698ac140..4db4a4a8e7 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -1,48 +1,48 @@ | Github account | name | |---|---| +| backyes | Yan-Fei Wang | | beckett1124 | Bin Qi | -| Canpio | Jiayi Feng | -| chengxiaohua1105 | Xiaohua Cheng | -| xushaoyong | Shaoyong Xu | -| liuyuan | Yuan Liu | -| xujun05 | Jun Xu | -| dzhwinter | Zhihong Dong | -| Guo Sheng | Sheng Guo | -| kuke | Yibing Liu | -| llxxxll | YongFeng Liu | -| cxysteven | Xingyi Cheng | -| NHZlX | Zhaolong Xing | -| pakchoi | Chuanjiang Song | -| pkuyym | Yaming Yang | -| Superjom | Chunwei Yan | -| wanghaoshuang | Haoshuang Wang | -| wangzhen-nlp | Zhen Wang | -| wwhu | Weiwei Hu | -| xinghai-sun | XingHai Sun | -| zhaopu7 | Pu Zhao | -| reyoung | Yang Yu | +| Canpio | Jia-Yi Feng | +| chengxiaohua1105 | Xiao-Hua Cheng | +| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang | +| cxysteven | Xing-Yi Cheng | +| dzhwinter | Zhi-Hong Dong | +| emailweixu | Wei Xu | | gangliao | Gang Liao | -| luotao01 | Tao Luo | -| jacquesqiao | Long-Fei Qiao | -| qingqing01 | Qing-Qing Dang | +| gongweibao | Wei-Bao Gong | +| Guo Sheng | Sheng Guo | +| Haichao-Zhang | Hai-Chao Zhang | | hedaoyuan | Dao-Yuan He | -| wangyang59 | Yang Wang | +| helinwang | He-Lin Wang | +| jacquesqiao | Long-Fei Qiao | +| kuke | Yi-Bing Liu | +| lcy-seso | Ying Cao | +| lipeng-unisound | Peng Li | +| liuyuan | Yuan Liu | +| livc | Zhao Li | +| llxxxll | Yong-Feng Liu | +| luotao01 | Tao Luo | +| lzhao4ever | Liang Zhao | +| NHZlX | Zhao-Long Xing | +| pakchoi | Chuan-Jiang Song | +| pengli09 | Peng Li | +| pkuyym | Ya-Ming Yang | | QiJune | Jun Qi | +| qingqing01 | Qing-Qing Dang | +| reyoung | Yang Yu | +| Superjom | Chun-Wei Yan | | tianbingsz | Tian-Bing Xu | -| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang | | typhoonzero | Yi Wu | -| backyes | Yan-Fei Wang | -| pengli09 | Peng Li | -| livc | Zhao Li | +| wanghaoshuang | Hao-Shuang Wang | +| wangyang59 | Yang Wang | +| wangzhen-nlp | Zhen Wang | +| wen-bo-yang | Wen-Bo Yang | +| wwhu | Wei-Wei Hu | +| xinghai-sun | Xing-Hai Sun | | Xreki | Yi-Qun Liu | +| xujun05 | Jun Xu | +| xushaoyong | Shao-Yong Xu | | Yancey1989 | Xu Yan | -| emailweixu | Wei Xu | -| wen-bo-yang | Wen-Bo Yang | -| helinwang | He-Lin Wang | -| lcy-seso | Ying Cao | -| Zrachel | Rui-Qing Zhang | -| Haichao-Zhang | Hai-Chao Zhang | -| gongweibao | Wei-Bao Gong | -| lzhao4ever | Liang Zhao | +| zhaopu7 | Pu Zhao | | zhouxiao-coder | Xiao Zhou | -| lipeng-unisound | Peng Li | +| Zrachel | Rui-Qing Zhang | From 06156daa281e55fe5d06217cc545cd8c09aa4c9d Mon Sep 17 00:00:00 2001 From: "Superjom (Chunwei Yan)" Date: Tue, 4 Jul 2017 12:07:16 +0800 Subject: [PATCH 251/542] net design with NetBuilder (#2598) * move net_design to framework * change CreateNet result to unique_ptr * rename "ScratchNet" -> "PlainNet" * add three methods to NetBase * add NetBuilder * add InferShape to NetBuilder.Run * rename ApplyGradient, ApplyOptimizer -> AddGradientOps, AddOptimiz * rename PlainNet::CreateNet -> BuildNet * add Error and other rename actions --- paddle/framework/net_design.md | 250 +++++++++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 paddle/framework/net_design.md diff --git a/paddle/framework/net_design.md b/paddle/framework/net_design.md new file mode 100644 index 0000000000..a5f0483081 --- /dev/null +++ b/paddle/framework/net_design.md @@ -0,0 +1,250 @@ +# Network Design + +`Network` is the container and controller of a set of operators, +user can build a real network from a `NetDesc` which is a protobuf message +and use `Network.Run()` to run all the operators in the network. + +A network object knows all Operators belonging to this network. Variables, +which are inputs and outputs of these operators, +are created and managed by a hierarchy of Scope objects. + +# API + +## Net +To make the `Network` extendable, a base class is defined like this + +```c++ +// operator's index stored in a network. +typedef int OpIndex; + +// The minimum a network should be implemented. +class Net { + public: + // run all the operators and return success(true) or not, with all the + // variables are located in `scope`. `context` describes the detail execution + // environment for ops. `begin` and `end` specify the scope of `ops_` to run, + // If no positive indexes are provided, all operators in `ops_` will run. + virtual Error Run(Scope *scope, OpContext *context, OpIndex begin = -1, + OpIndex end = -1) const = 0; + + // Add an Operator according to `def`. + virtual OpIndex AddOp(const proto::OpDef &def) = 0; + + // Add optimizer operators acctording to `attrs`. + virtual Error AddOptimizerOps(const OptAttrs &attrs) = 0; + + // Add backward operators. + virtual Error AddBackwardOps() = 0; + + // Infer the shapes of variables required by operators in the network. The + // `scope` will be mutated according to the inferred shapes. + + static std::unique_ptr Create(const NetDesc &def = NetDesc()); +}; +``` + +All network implementations should build networks from a protobuf message which +describes the structure of a real network; `Run` method should be implemented by +all implementations to offer a universal method to forward or backward compute a network. + +`Net::Create` is a method of factory pattern and can be implemented like + +```c++ +std::unique Net::Create(const NetDesc& def) { + switch (def.model_type()) { + case NN: + return new Network(def); + case Recursive: + return new RecursiveNet(def); + case Recurrent: + return new RecurrentNet(def); + } + return nullptr; +} +``` + +Network is designed as the container of operators. to make it more extendable, +we decouple it from the related variable resources. + +`Run(Scope* scope)` takes the scope as a argument so that it can run in different scopes. + +Finally, `Net` can be used as followed + +```c++ +Scope default_scope; +OpContext default_context; +auto net = Net::CreateNet(def); + +if (net) { + net.Run(&default_scope, &default_context); +} +``` + +## `PlainNet` as a simple implementation of `BaseNet` + +A very basic implementation is as follows. All it does is simply to run every operators in sequence. + +```c++ +class PlainNet : public Net { + public: + // Create a network describe by `def`. NetDesc is the definition of a network. + PlainNet(const NetDesc &def); + + // Infer all the operators' input and output varialbes' shapes, will be called before every mini-batch + training. + virtual Error InferShape(Scope *scope) override; + + // Run all the operators with the `scope`, if no scope is provided, default + // scope will be used instead. If no OpContext is provicded, default context will be used. + virtual Error Run(Scope *scope = nullptr, OpContext *context=nullptr, OpIndex begin = -1, + OpIndex end = -1) const override; + + virtual OpIndex AddOp(const proto::OpDef &def) override; + + virtual Error AddOptimizerOps(const OptAttrs &attrs) override; + + virtual Error AddBackwardOps() override; + + protected: + // Create operators accordding to `def`, will be called by the constructor. + Error BuildNet(const NetDesc &def); + + // Add a operator which is identified as `type` and has attributes described + // in `attrs`, the `inputs` are the keys of readonly input variables, + // `outputs` are keys of mutable output variables. An `OpIndex` will be + // returned to indicate the offset of the new operator in `ops_`. + OpIndex AddOp(const std::string &type, const std::vector &inputs, + const std::vector &outputs, + const OprAttr &attrs = OprAttr()); + + private: + // the operators owned by `Network`. + std::vector ops_; +}; +``` + +`PlainNet` will create operators so that a private member `ops_` is defined, +the operators are created by `CreateNet`, and each operator is created by `AddOp`. + + +## PlainNet Usage +`PlainNet` can be used to define and run a network as follows + +```c++ +// create an empty scope located on CPU device. +Scope scope(CPUPlace()); + +// create and init variables described in `net_desc`. +scope.CreateVariables(net_desc); +scope.InitVariables(net_desc); + +// create a network according to `net_desc` +auto net = Net::CreateNet(net_desc); +// Add more operators if needed. +net->AddOp(add...); +net->AddOp(fc...); + +net->AddBackwardOps(); +net->AddOptimizerOps(); + +// run the network providing the `scope`. +net.Run(&scope); +``` + +## `NetBuilder` as a C++ syntax wrapper +This is a detailed description of the user-related C++ network API, and may not needed in the prototype development stage. + +The `NetBuilder` will give users a much simpler syntax as follows to create a network, and demonstrates how to use the `BaseNet`'s raw interfaces. + +```c++ +Variable* fc_out = builder.AddOp("fc", input=image, size=100, activation="Sigmoid"); +Variable* prediction = builder.AddOp("fc", input=fc_out, size=10, activation="Sigmoid"); +Variable* loss = builder.AddOp("cross_entropy", input=prediction, label=label); +Variable* avg_loss = builder.AddOp("mean", loss); + +builder.BackwardFrom(avg_loss) +builder.AddOptimization(1e-4, "adam"); +builder.Run(); +``` + +`NetBuilder` will call `Net` 's virtual functions to change the real network structure, here is a sample definition + +```c++ +class NetBuilder final { + public: + NetBuilder(Net* net) : net_(net) {} + + Variable* AddOp(const string& type, const vector& inputs, + size_t size, Activation act) { + // much code here. + // ... + net_->AddOp(def); + need_rebuild_net_ = true; + net_->InferShape(); + // ... + } + + Error BackwardFrom(const Variable& cost); + + Error Run(Scope* scope, OpContext* context, bool need_backward = true) { + // backward. + if (need_backward) { + if (need_rebuild_net_) { + AddBackwardOps(); + AddOptimizerOps(); + } + net_->Run(scope, context); + return; + } + // just forward. + net_->Run(scope, context, 0, last_forward_op_); + } + + protected: + Error AddBackwardOps(); + Error AddOptimizerOps(); + + private: + Net* net_; + OpIndex last_forward_op_{-1}; + bool need_rebuild_net_{true}; +} +``` + +## Compatibility with RNN + +Benefitting from the decoupling of `PlainNet.Run` and `Scope`, `PlainNet` is compatible with future RNN design, +for example we can implement a simple recurrent neural network as follows + +```c++ +// copy some `vars` form `source` to `target` +void Copy(const Scope &source, Scope &target, + const std::vector &vars); + +Scope default_scope; +// some initial mutations on `default_scope` here. + +auto rnn_step_net = PlainNet(rnn_step_net_def); + +// Create rnn's states, the last scope is used to store rnn outputs. +Scope *rnn_states = new Scope[num_states + 1]; + +for (int i = 0; i < num_states + 1; i++) { + // Initialize all rnn state scopes, copy parameters and so on. + rnn_states[i].CreateVars(rnn_step_net_def); + Copy(default_scope, rnn_states[i], rnn_related_vars); + // Prepare rnn's inlinks, just copy inlink variables to each state. + Copy(default_scope, rnn_states[i], inlink_vars); +} + +// Run the rnn. +for (int i = 0; i < num_states; i++) { + rnn_step_net.Run(rnn_states[i]); + // Copy current state's state variables to next state, the related variables + // are named like "previous_state_xxx". + Copy(rnn_states[i], rnn_states[i + 1], pre_state_vars) +} + +// Copy rnn's final outputs to `default_scope`. +Copy(rnn_states[num_states], default_scope, outlink_vars); +``` From 3de3894b821c06daf596c0818b6c89f4dd185928 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 4 Jul 2017 12:53:00 +0800 Subject: [PATCH 252/542] Add DEPS to `proto_library` Missing DEPS will cause compile error when parallel is large. --- cmake/generic.cmake | 4 ++-- paddle/framework/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index ca358da8f1..fb2222440c 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -323,10 +323,10 @@ endfunction(go_test) function(proto_library TARGET_NAME) set(oneValueArgs "") - set(multiValueArgs SRCS) + set(multiValueArgs SRCS DEPS) cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(proto_srcs) set(proto_hdrs) protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS}) - cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS protobuf) + cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf) endfunction() diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index f7e5753ac2..e781866759 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -6,5 +6,5 @@ cc_test(variable_test SRCS variable_test.cc) cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) proto_library(attr_type SRCS attr_type.proto) -proto_library(op_proto SRCS op_proto.proto) +proto_library(op_proto SRCS op_proto.proto DEPS attr_type) cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto attr_type protobuf) From 4e1617d05994bda1a9eb0e0b5b563249cc12f271 Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 4 Jul 2017 13:15:00 +0800 Subject: [PATCH 253/542] ENH: add buddy alloctor Free --- paddle/memory/detail/CMakeLists.txt | 3 + paddle/memory/detail/buddy_allocator.cc | 19 ++-- paddle/memory/detail/buddy_allocator.h | 10 +- paddle/memory/detail/memory_block.cc | 145 ++++++++++++++++++++++++ paddle/memory/detail/memory_block.h | 97 ++++++++++++++++ paddle/memory/detail/system_allocator.h | 1 + 6 files changed, 262 insertions(+), 13 deletions(-) create mode 100644 paddle/memory/detail/memory_block.cc create mode 100644 paddle/memory/detail/memory_block.h diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index 6caa97a76b..dbc98a8a62 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -7,3 +7,6 @@ else(${WITH_GPU}) cc_library(system_allocator SRCS system_allocator.cc DEPS gflags) cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags) endif(${WITH_GPU}) + +cc_library(metadata SRCS metadata.cc) +cc_library(buddy_allocator SRCS buddy_allocator.cc) diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc index 2462ba084b..e8d694327d 100644 --- a/paddle/memory/detail/buddy_allocator.cc +++ b/paddle/memory/detail/buddy_allocator.cc @@ -58,17 +58,16 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { // refill the pool if failure if (it == pool_.end()) { it = RefillPool(); + // if still failure, fail fatally + if (it == pool_.end()) { + return nullptr; + } } else { DLOG(INFO) << " Allocation from existing memory block " << std::get<2>(*it) << " at address " << reinterpret_cast(std::get<2>(*it))->data(); } - // if still failure, fail fatally - if (it == pool_.end()) { - return nullptr; - } - total_used_ += size; total_free_ -= size; @@ -76,6 +75,13 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { return reinterpret_cast(SplitToAlloc(it, size))->data(); } +void BuddyAllocator::Free(void* p) { + auto block = static_cast(p)->metadata(); + + // acquire the allocator lock + std::lock_guard lock(mutex_); +} + void* BuddyAllocator::SystemAlloc(size_t size) { size_t index = 0; void* p = system_allocator_->Alloc(index, size); @@ -140,17 +146,14 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::FindExistChunk(size_t size) { void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it, size_t size) { auto block = static_cast(std::get<2>(*it)); - pool_.erase(it); DLOG(INFO) << " Split block (" << block << ", " << block->total_size(cache_) << ") into"; - block->split(cache_, size); DLOG(INFO) << " Left block (" << block << ", " << block->total_size(cache_) << ")"; - block->set_type(cache_, MemoryBlock::ARENA_CHUNK); // the rest of memory if exist diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h index 38bedc9a18..4006bdcce8 100644 --- a/paddle/memory/detail/buddy_allocator.h +++ b/paddle/memory/detail/buddy_allocator.h @@ -14,16 +14,16 @@ #pragma once -#include "paddle/memory/detail/system_allocator.h" #include "paddle/memory/detail/metadata.h" +#include "paddle/memory/detail/system_allocator.h" #include "paddle/platform/assert.h" #include "paddle/platform/cpu_info.h" #include "paddle/platform/gpu_info.h" -#include #include -#include +#include #include +#include namespace paddle { namespace memory { @@ -57,9 +57,9 @@ class BuddyAllocator { /*! \brief If existing chunks are not suitable, refill pool */ PoolSet::iterator RefillPool(); - /** + /** * \brief Find the suitable chunk from existing pool - * + * * \param it pool iterator which contains suitable block. * \param size the size of allocation. */ diff --git a/paddle/memory/detail/memory_block.cc b/paddle/memory/detail/memory_block.cc new file mode 100644 index 0000000000..1c9e87df49 --- /dev/null +++ b/paddle/memory/detail/memory_block.cc @@ -0,0 +1,145 @@ +#include "paddle/memory/detail/memory_block.h" +#include "paddle/platform/assert.h" + +namespace paddle { +namespace memory { +namespace detail { + +void MemoryBlock::init(MetadataCache& cache, Type t, size_t index, size_t size, + void* left_buddy, void* right_buddy) { + cache.store(this, + MemoryBlockMetadata(t, index, size - overhead(), size, + static_cast(left_buddy), + static_cast(right_buddy))); +} + +MemoryBlock::Type MemoryBlock::type(MetadataCache& cache) const { + return cache.load(this).type; +} + +size_t MemoryBlock::size(MetadataCache& cache) const { + return cache.load(this).size; +} + +size_t MemoryBlock::total_size(MetadataCache& cache) const { + return cache.load(this).total_size; +} + +MemoryBlock* MemoryBlock::left_buddy(MetadataCache& cache) const { + return cache.load(this).left_buddy; +} + +MemoryBlock* MemoryBlock::right_buddy(MetadataCache& cache) const { + return cache.load(this).right_buddy; +} + +void MemoryBlock::split(MetadataCache& cache, size_t size) { + // make sure the split fits + assert(total_size(cache) >= size); + + // bail out if there is no room for another partition + if (total_size(cache) - size <= overhead()) { + return; + } + + // find the position of the split + void* right_partition = reinterpret_cast(this) + size; + + size_t remaining_size = total_size(cache) - size; + + // Add the new block as a buddy + auto metadata = cache.load(this); + + // Write the metadata for the new block + auto new_block_right_buddy = metadata.right_buddy; + + cache.store(static_cast(right_partition), + MemoryBlockMetadata(FREE_MEMORY, index(cache), + remaining_size - overhead(), remaining_size, + this, new_block_right_buddy)); + + metadata.right_buddy = static_cast(right_partition); + metadata.size = size - overhead(); + metadata.total_size = size; + + cache.store(this, metadata); + + // Write metadata for the new block's right buddy + if (new_block_right_buddy != nullptr) { + auto buddy_metadata = cache.load(new_block_right_buddy); + + buddy_metadata.left_buddy = static_cast(right_partition); + + cache.store(new_block_right_buddy, buddy_metadata); + } +} + +void MemoryBlock::merge(MetadataCache& cache, MemoryBlock* right_buddy) { + // only free blocks can be merged + assert(type(cache) == FREE_MEMORY); + assert(right_buddy->type(cache) == FREE_MEMORY); + + auto metadata = cache.load(this); + + // link this->buddy's buddy + metadata.right_buddy = right_buddy->right_buddy(cache); + + // link buddy's buddy -> this + if (metadata.right_buddy != nullptr) { + auto buddy_metadata = cache.load(metadata.right_buddy); + + buddy_metadata.left_buddy = this; + + cache.store(metadata.right_buddy, buddy_metadata); + } + + metadata.size += right_buddy->total_size(cache); + metadata.total_size += right_buddy->total_size(cache); + + cache.store(this, metadata); + cache.store(right_buddy, + MemoryBlockMetadata(INVALID_MEMORY, 0, 0, 0, nullptr, nullptr)); +} + +void MemoryBlock::mark_as_free(MetadataCache& cache) { + // check for double free or corruption + assert(type(cache) != FREE_MEMORY); + assert(type(cache) != INVALID_MEMORY); + + set_type(cache, FREE_MEMORY); +} + +void MemoryBlock::set_type(MetadataCache& cache, Type t) { + auto metadata = cache.load(this); + + metadata.type = t; + + cache.store(this, metadata); +} + +bool MemoryBlock::has_left_buddy(MetadataCache& cache) const { + return left_buddy(cache) != nullptr; +} + +bool MemoryBlock::has_right_buddy(MetadataCache& cache) const { + return right_buddy(cache) != nullptr; +} + +size_t MemoryBlock::index(MetadataCache& cache) const { + return cache.load(this).index; +} + +void* MemoryBlock::data() const { + return const_cast( + reinterpret_cast(this)) + + 1; +} + +MemoryBlock* MemoryBlock::metadata() const { + return const_cast(reinterpret_cast( + reinterpret_cast(this) - 1)); +} + +} // detail +} // memory +} // paddle diff --git a/paddle/memory/detail/memory_block.h b/paddle/memory/detail/memory_block.h new file mode 100644 index 0000000000..e2d39c31cf --- /dev/null +++ b/paddle/memory/detail/memory_block.h @@ -0,0 +1,97 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/memory/detail/metadata.h" + +#include +#include + +namespace paddle { +namespace memory { +namespace detail { + +// Forward Declaration +class Metadata; + +/*! \brief A class used to interpret the contents of a memory block */ +class MemoryBlock { + public: + // Unify the metadata format between GPU and CPU allocations + using MetadataCache = std::unordered_map; + + enum Type { + FREE_CHUNK, // memory is free and idle + ARENA_CHUNK, // memory is being occupied + HUGE_CHUNK, // memory is out of management + INVALID_CHUNK // memory is invalid + }; + + public: + void init(MetadataCache& cache, Type t, size_t index, size_t size, + void* left_buddy, void* right_buddy); + + public: + /*! \brief The type of the allocation */ + Type type(MetadataCache& cache) const; + + /*! \brief The size of the data region */ + size_t size(MetadataCache& cache) const; + + /*! \brief An index to track the allocator */ + size_t index(MetadataCache& cache) const; + + /*! \brief The total size of the block */ + size_t total_size(MetadataCache& cache) const; + + /*! \brief Check the left buddy of the block */ + bool has_left_buddy(MetadataCache& cache) const; + + /*! \brief Check the right buddy of the block */ + bool has_right_buddy(MetadataCache& cache) const; + + /*! \brief Get the left buddy */ + MemoryBlock* left_buddy(MetadataCache& cache) const; + + /*! \brief Get the right buddy */ + MemoryBlock* right_buddy(MetadataCache& cache) const; + + public: + /*! \brief Split the allocation into left/right blocks */ + void split(MetadataCache& cache, size_t size); + + /*! \brief Merge left and right blocks together */ + void merge(MetadataCache& cache, MemoryBlock* right_buddy); + + /*! \brief Mark the allocation as free */ + void mark_as_free(MetadataCache& cache); + + /*! \brief Change the type of the allocation */ + void set_type(MetadataCache& cache, Type t); + + public: + /*! \brief Get a pointer to the memory block's data */ + void* data() const; + + /*! \brief Get a pointer to the memory block's metadata */ + MemoryBlock* metadata() const; + + public: + static size_t overhead(); +}; + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/system_allocator.h b/paddle/memory/detail/system_allocator.h index f3bbfef843..555061a533 100644 --- a/paddle/memory/detail/system_allocator.h +++ b/paddle/memory/detail/system_allocator.h @@ -48,6 +48,7 @@ class GPUAllocator : public SystemAllocator { virtual void* Alloc(size_t& index, size_t size); virtual void Free(void* p, size_t size, size_t index); virtual bool UseGpu(); + private: size_t gpu_alloc_size_ = 0; size_t fallback_alloc_size_ = 0; From b8cc07920e3cf623250ea0b9b078049ff1348279 Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 4 Jul 2017 13:20:55 +0800 Subject: [PATCH 254/542] FIX: add eigen3 interface deps --- cmake/external/eigen.cmake | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake index 45f44f617d..39b16c3b2b 100644 --- a/cmake/external/eigen.cmake +++ b/cmake/external/eigen.cmake @@ -5,7 +5,7 @@ SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3) INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/eigen3) ExternalProject_Add( - eigen3 + extern_eigen3 ${EXTERNAL_PROJECT_LOG_ARGS} # for latest version, please get from official website # URL "https://bitbucket.org/eigen/eigen/get/3.3.4.tar.gz" @@ -26,4 +26,7 @@ ExternalProject_Add( TEST_COMMAND "" ) -LIST(APPEND external_project_dependencies eigen3) +ADD_LIBRARY(eigen3 INTERFACE) +ADD_DEPENDENCIES(eigen3 extern_eigen3) + +LIST(APPEND external_project_dependencies extern_eigen3) From 414c2b1734bcf39135935a201f7244d79a72e172 Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 4 Jul 2017 13:25:30 +0800 Subject: [PATCH 255/542] FIX: add any as interface dep --- cmake/external/any.cmake | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cmake/external/any.cmake b/cmake/external/any.cmake index 62eea42692..b61e421871 100644 --- a/cmake/external/any.cmake +++ b/cmake/external/any.cmake @@ -5,7 +5,7 @@ SET(ANY_SOURCE_DIR ${THIRD_PARTY_PATH}/any) INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/linb_any) ExternalProject_Add( - linb_any + extern_lib_any ${EXTERNAL_PROJECT_LOG_ARGS} GIT_REPOSITORY "https://github.com/thelink2012/any.git" GIT_TAG "8fef1e93710a0edf8d7658999e284a1142c4c020" @@ -17,5 +17,8 @@ ExternalProject_Add( TEST_COMMAND "" ) +ADD_LIBRARY(lib_any INTERFACE) +ADD_DEPENDENCIES(lib_any extern_lib_any) + add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE) -LIST(APPEND external_project_dependencies linb_any) \ No newline at end of file +LIST(APPEND external_project_dependencies extern_lib_any) From c602e046132b7e4e38c34f348b2a7fa290d67361 Mon Sep 17 00:00:00 2001 From: Superjom Date: Tue, 4 Jul 2017 13:35:21 +0800 Subject: [PATCH 256/542] add fake interfaces to make compilable --- paddle/framework/net.cc | 10 +++++--- paddle/framework/net.h | 44 +++++++++++++++++++------------- paddle/framework/net_proto.proto | 3 +-- 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/paddle/framework/net.cc b/paddle/framework/net.cc index 0ce9296820..2d9e099dc0 100644 --- a/paddle/framework/net.cc +++ b/paddle/framework/net.cc @@ -5,7 +5,7 @@ namespace framework { PlainNet::PlainNet(const NetDesc& def) {} -virtual Error PlainNet::InferShape() { +Error PlainNet::InferShape(Scope* scope) { for (auto& op : ops_) { // wrong shape auto err = op.InferShape(); @@ -15,9 +15,11 @@ virtual Error PlainNet::InferShape() { return Error(); } -virtual Error PlainNet::Run(Scope* scope = nullptr, - OpContext* context = nullptr, OpIndex begin = -1, - OpIndex end = -1) const {} +Error PlainNet::Run(Scope* scope, OpContext* context, OpIndex begin, + OpIndex end) const { + // TODO Add implementation here. + return Error(); +} } // namespace framework } // namespace paddle diff --git a/paddle/framework/net.h b/paddle/framework/net.h index b3064e4f90..76e0ed9330 100644 --- a/paddle/framework/net.h +++ b/paddle/framework/net.h @@ -17,6 +17,7 @@ #include "paddle/framework/net_proto.pb.h" #include "paddle/framework/op_proto.pb.h" #include "paddle/framework/scope.h" +#include "paddle/utils/Error.h" namespace paddle { namespace framework { @@ -29,11 +30,16 @@ typedef int OpIndex; * keep updating if the concepts related are implemented. */ +struct OpDesc; +struct OpDef; +struct OpContext; +struct OpAttrs {}; + class Operator { public: Operator(const OpDesc &def) {} - bool InferShape() {} - bool Run() {} + Error InferShape() { return Error(); } + Error Run() { return Error(); } }; /** @@ -55,7 +61,7 @@ class Net { /** * @brief Infer shapes of all inputs and outputs of operators. */ - virtual bool InferShape(Scope *scope) override; + virtual Error InferShape(Scope *scope) = 0; /** * @brief Run the network. * @@ -64,28 +70,30 @@ class Net { * environment for ops. `begin` and `end` specify the scope of `ops_` to run, * If no positive indexes are provided, all operators in `ops_` will run. */ - virtual bool Run(Scope *scope, OpContext *context, OpIndex begin = -1, - OpIndex end = -1) const = 0; + virtual Error Run(Scope *scope, OpContext *context, OpIndex begin = -1, + OpIndex end = -1) const = 0; /** * @brief Add an Operator according to `def`. */ - virtual OpIndex AddOp(const proto::OpDef &def) = 0; + virtual OpIndex AddOp(const OpDef &def) = 0; /** * @brief Add optimizer operators acctording to `attrs`. */ - virtual bool AddOptimizerOps(const OptAttrs &attrs) = 0; + virtual Error AddOptimizerOps(const OpAttrs &attrs) = 0; /** * @brief Add backward operators. */ - virtual bool AddBackwardOps() = 0; + virtual Error AddBackwardOps() = 0; /** * @brief Create a network. */ static std::unique_ptr Create(const NetDesc &def = NetDesc()); + + virtual ~Net() = 0; }; /** @@ -108,7 +116,7 @@ class PlainNet : public Net { * Infer all the operators' input and output varialbes' shapes, will be called * before every mini-batch */ - virtual bool InferShape(Scope *scope) override; + virtual Error InferShape(Scope *scope) override; /** * @brief Run the network. @@ -117,23 +125,23 @@ class PlainNet : public Net { * scope will be used instead. If no OpContext is provicded, default context * will be used. */ - virtual bool Run(Scope *scope = nullptr, OpContext *context = nullptr, - OpIndex begin = -1, OpIndex end = -1) const override; + virtual Error Run(Scope *scope = nullptr, OpContext *context = nullptr, + OpIndex begin = -1, OpIndex end = -1) const override; /** * @brief Add an operator to this network. */ - virtual OpIndex AddOp(const proto::OpDef &def) override; + virtual OpIndex AddOp(const OpDef &def) override; /** * @brief Add all optimizer operators related into the network. */ - virtual bool AddOptimizerOps(const OptAttrs &attrs) override; + virtual Error AddOptimizerOps(const OpAttrs &attrs) override; /** * @brief Add all backward operators related into the network. */ - virtual bool AddBackwardOps() override; + virtual Error AddBackwardOps() override; protected: /** @@ -141,7 +149,7 @@ class PlainNet : public Net { * * Create operators accordding to `def`, will be called by the constructor. */ - bool BuildNet(const NetDesc &def); + Error BuildNet(const NetDesc &def); /** * @brief Add an operator into this network. @@ -151,9 +159,9 @@ class PlainNet : public Net { * `outputs` are keys of mutable output variables. An `OpIndex` will be * returned to indicate the offset of the new operator in `ops_`. */ - OpIndex AddOp(const std::string &type, const std::vector &inputs, - const std::vector &outputs, - const OprAttr &attrs = OprAttr()); + OpIndex AddOp(const std::string &type, const std::vector &inputs, + const std::vector &outputs, + const OpAttrs &attrs = OpAttrs()); private: // the operators owned by `Network`. diff --git a/paddle/framework/net_proto.proto b/paddle/framework/net_proto.proto index e9aed8f349..2d042457e3 100644 --- a/paddle/framework/net_proto.proto +++ b/paddle/framework/net_proto.proto @@ -1,7 +1,7 @@ syntax="proto2"; package paddle.framework; -import "op_proto.proto" +import "op_proto.proto"; message NetDesc { // network identification @@ -13,4 +13,3 @@ message NetDesc { // num worker always optional int32 num_workers = 4; } - From 04e20034dfcbb0ceb1de30ddd5b1f8b8ee811d4f Mon Sep 17 00:00:00 2001 From: Superjom Date: Tue, 4 Jul 2017 13:44:01 +0800 Subject: [PATCH 257/542] replace Error with void --- paddle/framework/net.cc | 11 +++-------- paddle/framework/net.h | 23 +++++++++++------------ 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/paddle/framework/net.cc b/paddle/framework/net.cc index 2d9e099dc0..d49861c343 100644 --- a/paddle/framework/net.cc +++ b/paddle/framework/net.cc @@ -5,20 +5,15 @@ namespace framework { PlainNet::PlainNet(const NetDesc& def) {} -Error PlainNet::InferShape(Scope* scope) { +void PlainNet::InferShape(Scope* scope) { for (auto& op : ops_) { - // wrong shape - auto err = op.InferShape(); - if (!err) return err; + op.InferShape(); } - // ok - return Error(); } -Error PlainNet::Run(Scope* scope, OpContext* context, OpIndex begin, +void PlainNet::Run(Scope* scope, OpContext* context, OpIndex begin, OpIndex end) const { // TODO Add implementation here. - return Error(); } } // namespace framework diff --git a/paddle/framework/net.h b/paddle/framework/net.h index 76e0ed9330..55dcf147e1 100644 --- a/paddle/framework/net.h +++ b/paddle/framework/net.h @@ -17,7 +17,6 @@ #include "paddle/framework/net_proto.pb.h" #include "paddle/framework/op_proto.pb.h" #include "paddle/framework/scope.h" -#include "paddle/utils/Error.h" namespace paddle { namespace framework { @@ -38,8 +37,8 @@ struct OpAttrs {}; class Operator { public: Operator(const OpDesc &def) {} - Error InferShape() { return Error(); } - Error Run() { return Error(); } + void InferShape() {} + void Run() {} }; /** @@ -61,7 +60,7 @@ class Net { /** * @brief Infer shapes of all inputs and outputs of operators. */ - virtual Error InferShape(Scope *scope) = 0; + virtual void InferShape(Scope *scope) = 0; /** * @brief Run the network. * @@ -70,7 +69,7 @@ class Net { * environment for ops. `begin` and `end` specify the scope of `ops_` to run, * If no positive indexes are provided, all operators in `ops_` will run. */ - virtual Error Run(Scope *scope, OpContext *context, OpIndex begin = -1, + virtual void Run(Scope *scope, OpContext *context, OpIndex begin = -1, OpIndex end = -1) const = 0; /** @@ -81,12 +80,12 @@ class Net { /** * @brief Add optimizer operators acctording to `attrs`. */ - virtual Error AddOptimizerOps(const OpAttrs &attrs) = 0; + virtual void AddOptimizerOps(const OpAttrs &attrs) = 0; /** * @brief Add backward operators. */ - virtual Error AddBackwardOps() = 0; + virtual void AddBackwardOps() = 0; /** * @brief Create a network. @@ -116,7 +115,7 @@ class PlainNet : public Net { * Infer all the operators' input and output varialbes' shapes, will be called * before every mini-batch */ - virtual Error InferShape(Scope *scope) override; + virtual void InferShape(Scope *scope) override; /** * @brief Run the network. @@ -125,7 +124,7 @@ class PlainNet : public Net { * scope will be used instead. If no OpContext is provicded, default context * will be used. */ - virtual Error Run(Scope *scope = nullptr, OpContext *context = nullptr, + virtual void Run(Scope *scope = nullptr, OpContext *context = nullptr, OpIndex begin = -1, OpIndex end = -1) const override; /** @@ -136,12 +135,12 @@ class PlainNet : public Net { /** * @brief Add all optimizer operators related into the network. */ - virtual Error AddOptimizerOps(const OpAttrs &attrs) override; + virtual void AddOptimizerOps(const OpAttrs &attrs) override; /** * @brief Add all backward operators related into the network. */ - virtual Error AddBackwardOps() override; + virtual void AddBackwardOps() override; protected: /** @@ -149,7 +148,7 @@ class PlainNet : public Net { * * Create operators accordding to `def`, will be called by the constructor. */ - Error BuildNet(const NetDesc &def); + void BuildNet(const NetDesc &def); /** * @brief Add an operator into this network. From b7397031e9c03363c4e3e3119ff371b0d8a13e7c Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 4 Jul 2017 13:47:02 +0800 Subject: [PATCH 258/542] Add target_link_libraries for cc_library It will fix #2728. Maybe it is silly to `target_link_libraries` for static library, because a static library do not need to link other libraries. But it will tell cmake how to propagate dependencies. The solution comes from [here](http://floooh.github.io/2016/01/12/cmake-dependency-juggling.html). * Also change op_proto_test DEPS for testing this fix works. --- cmake/generic.cmake | 2 ++ paddle/framework/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index fb2222440c..cae9524b2f 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -162,6 +162,7 @@ function(cc_library TARGET_NAME) endif() if (cc_library_DEPS) add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) + target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) endif() else(cc_library_SRCS) if (cc_library_DEPS) @@ -211,6 +212,7 @@ function(nv_library TARGET_NAME) endif() if (nv_library_DEPS) add_dependencies(${TARGET_NAME} ${nv_library_DEPS}) + target_link_libraries(${TARGET_NAME} ${nv_library_DEPS}) endif() else(nv_library_SRCS) if (nv_library_DEPS) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index e781866759..baad38e3c1 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -7,4 +7,4 @@ cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) proto_library(attr_type SRCS attr_type.proto) proto_library(op_proto SRCS op_proto.proto DEPS attr_type) -cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto attr_type protobuf) +cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) From 9eeabe986d039b3fe3b28e5ef98f66d6dd2a3e31 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 4 Jul 2017 14:03:58 +0800 Subject: [PATCH 259/542] follow comments --- paddle/platform/cublas.h | 58 +++++++++++++++++++++---------- paddle/platform/cudnn.h | 38 +++++++++++++++----- paddle/platform/curand.h | 40 +++++++++++++++------ paddle/platform/dynamic_loader.cc | 16 +++++++-- paddle/platform/dynamic_loader.h | 14 ++++---- 5 files changed, 119 insertions(+), 47 deletions(-) diff --git a/paddle/platform/cublas.h b/paddle/platform/cublas.h index d60eb501e9..90704f37e6 100644 --- a/paddle/platform/cublas.h +++ b/paddle/platform/cublas.h @@ -1,7 +1,24 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + #include #include "paddle/platform/dynamic_loader.h" namespace paddle { +namespace platform { namespace dyload { std::once_flag cublas_dso_flag; @@ -15,15 +32,17 @@ void *cublas_dso_handle = nullptr; * note: default dynamic linked libs */ #ifdef PADDLE_USE_DSO -#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - cublasStatus_t operator()(Args... args) { \ - typedef cublasStatus_t (*cublasFunc)(Args...); \ - std::call_once(cublas_dso_flag, GetCublasDsoHandle, &cublas_dso_handle); \ - void *p_##__name = dlsym(cublas_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ +#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + cublasStatus_t operator()(Args... args) { \ + typedef cublasStatus_t (*cublasFunc)(Args...); \ + std::call_once(cublas_dso_flag, \ + paddle::platform::dyload::GetCublasDsoHandle, \ + &cublas_dso_handle); \ + void *p_##__name = dlsym(cublas_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ } __name; // struct DynLoad__##__name #else #define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ @@ -68,17 +87,18 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP) // clang-format on #ifndef PADDLE_TYPE_DOUBLE -#define CUBLAS_GEAM dynload::cublasSgeam -#define CUBLAS_GEMV dynload::cublasSgemv -#define CUBLAS_GEMM dynload::cublasSgemm -#define CUBLAS_GETRF dynload::cublasSgetrfBatched -#define CUBLAS_GETRI dynload::cublasSgetriBatched +#define CUBLAS_GEAM paddle::platform::dynload::cublasSgeam +#define CUBLAS_GEMV paddle::platform::dynload::cublasSgemv +#define CUBLAS_GEMM paddle::platform::dynload::cublasSgemm +#define CUBLAS_GETRF paddle::platform::dynload::cublasSgetrfBatched +#define CUBLAS_GETRI paddle::platform::dynload::cublasSgetriBatched #else -#define CUBLAS_GEAM dynload::cublasDgeam -#define CUBLAS_GEMV dynload::cublasDgemv -#define CUBLAS_GEMM dynload::cublasDgemm -#define CUBLAS_GETRF dynload::cublasDgetrfBatched -#define CUBLAS_GETRI dynload::cublasDgetriBatched +#define CUBLAS_GEAM paddle::platform::dynload::cublasDgeam +#define CUBLAS_GEMV paddle::platform::dynload::cublasDgemv +#define CUBLAS_GEMM paddle::platform::dynload::cublasDgemm +#define CUBLAS_GETRF paddle::platform::dynload::cublasDgetrfBatched +#define CUBLAS_GETRI paddle::platform::dynload::cublasDgetriBatched #endif } // namespace dyload +} // namespace platform } // namespace paddle diff --git a/paddle/platform/cudnn.h b/paddle/platform/cudnn.h index ab878cd555..06e2a05d86 100644 --- a/paddle/platform/cudnn.h +++ b/paddle/platform/cudnn.h @@ -1,7 +1,24 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + #include #include "paddle/platform/dynamic_loader.h" namespace paddle { +namespace platform { namespace dyload { std::once_flag cudnn_dso_flag; @@ -9,15 +26,17 @@ void* cudnn_dso_handle = nullptr; #ifdef PADDLE_USE_DSO -#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - using cudnn_func = decltype(__name(args...)) (*)(Args...); \ - std::call_once(cudnn_dso_flag, GetCudnnDsoHandle, &cudnn_dso_handle); \ - void* p_##__name = dlsym(cudnn_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ +#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using cudnn_func = decltype(__name(args...)) (*)(Args...); \ + std::call_once(cudnn_dso_flag, \ + paddle::platform::dyload::GetCudnnDsoHandle, \ + &cudnn_dso_handle); \ + void* p_##__name = dlsym(cudnn_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ } __name; /* struct DynLoad__##__name */ #else @@ -111,4 +130,5 @@ CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP) #undef CUDNN_DNN_ROUTINE_EACH // clang-format on } // namespace dyload +} // namespace platform } // namespace paddle diff --git a/paddle/platform/curand.h b/paddle/platform/curand.h index edff6526bd..a9cbe48ef8 100644 --- a/paddle/platform/curand.h +++ b/paddle/platform/curand.h @@ -1,20 +1,39 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + #include #include "paddle/platform/dynamic_loader.h" namespace paddle { +namespace platform { namespace dyload { std::once_flag curand_dso_flag; void *curand_dso_handle = nullptr; #ifdef PADDLE_USE_DSO -#define DYNAMIC_LOAD_CURAND_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - curandStatus_t operator()(Args... args) { \ - typedef curandStatus_t (*curandFunc)(Args...); \ - std::call_once(curand_dso_flag, GetCurandDsoHandle, &curand_dso_handle); \ - void *p_##__name = dlsym(curand_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ +#define DYNAMIC_LOAD_CURAND_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + curandStatus_t operator()(Args... args) { \ + typedef curandStatus_t (*curandFunc)(Args...); \ + std::call_once(curand_dso_flag, \ + paddle::platform::dyload::GetCurandDsoHandle, \ + &curand_dso_handle); \ + void *p_##__name = dlsym(curand_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ } __name; /* struct DynLoad__##__name */ #else #define DYNAMIC_LOAD_CURAND_WRAP(__name) \ @@ -41,5 +60,6 @@ CURAND_RAND_ROUTINE_EACH(DYNAMIC_LOAD_CURAND_WRAP) #undef CURAND_RAND_ROUTINE_EACH #undef DYNAMIC_LOAD_CURAND_WRAP -} +} // namespace dyload +} // namespace platform } // namespace paddle diff --git a/paddle/platform/dynamic_loader.cc b/paddle/platform/dynamic_loader.cc index c34abc392c..9e0aadf8e2 100644 --- a/paddle/platform/dynamic_loader.cc +++ b/paddle/platform/dynamic_loader.cc @@ -13,8 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "dynamic_loader.h" -#include -#include +#include +#include +#include +#include +#include "gflags/gflags.h" +#include "glog/logging.h" DEFINE_string(cudnn_dir, "", "Specify path for loading libcudnn.so. For instance, " @@ -30,6 +34,10 @@ DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so."); DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so."); +namespace paddle { +namespace platform { +namespace dyload { + static inline std::string join(const std::string& part1, const std::string& part2) { // directory separator @@ -155,3 +163,7 @@ void GetLapackDsoHandle(void** dso_handle) { GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.so", dso_handle); #endif } + +} // namespace dyload +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/dynamic_loader.h b/paddle/platform/dynamic_loader.h index 9b5ad21724..bb58fcba17 100644 --- a/paddle/platform/dynamic_loader.h +++ b/paddle/platform/dynamic_loader.h @@ -12,13 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifndef DYNAMIC_LOAD_H_ -#define DYNAMIC_LOAD_H_ +#pragma once -#include -#include -#include -#include +namespace paddle { +namespace platform { +namespace dyload { /** * @brief load the DSO of CUBLAS @@ -60,4 +58,6 @@ void GetWarpCTCDsoHandle(void** dso_handle); */ void GetLapackDsoHandle(void** dso_handle); -#endif // DYNAMIC_LOAD_H_ +} // namespace dyload +} // namespace platform +} // namespace paddle From 3567ea6d7c85bdfdd42f57f6db98e5bc9bbac5e2 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 4 Jul 2017 14:58:20 +0800 Subject: [PATCH 260/542] move to dynload directory --- paddle/platform/CMakeLists.txt | 4 +-- paddle/platform/dynload/CMakeLists.txt | 1 + paddle/platform/{ => dynload}/cublas.h | 26 +++++++++---------- paddle/platform/{ => dynload}/cudnn.h | 26 +++++++++---------- paddle/platform/{ => dynload}/curand.h | 26 +++++++++---------- .../platform/{ => dynload}/dynamic_loader.cc | 4 +-- .../platform/{ => dynload}/dynamic_loader.h | 4 +-- 7 files changed, 46 insertions(+), 45 deletions(-) create mode 100644 paddle/platform/dynload/CMakeLists.txt rename paddle/platform/{ => dynload}/cublas.h (95%) rename paddle/platform/{ => dynload}/cudnn.h (97%) rename paddle/platform/{ => dynload}/curand.h (93%) rename paddle/platform/{ => dynload}/dynamic_loader.cc (99%) rename paddle/platform/{ => dynload}/dynamic_loader.h (96%) diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 4f6381b8af..cc6b52e927 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -1,6 +1,6 @@ +add_subdirectory(dynload) + nv_test(cuda_test SRCS cuda_test.cu) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) - -cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) diff --git a/paddle/platform/dynload/CMakeLists.txt b/paddle/platform/dynload/CMakeLists.txt new file mode 100644 index 0000000000..9f829b7012 --- /dev/null +++ b/paddle/platform/dynload/CMakeLists.txt @@ -0,0 +1 @@ +cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) diff --git a/paddle/platform/cublas.h b/paddle/platform/dynload/cublas.h similarity index 95% rename from paddle/platform/cublas.h rename to paddle/platform/dynload/cublas.h index 90704f37e6..c9150ac573 100644 --- a/paddle/platform/cublas.h +++ b/paddle/platform/dynload/cublas.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace platform { -namespace dyload { +namespace dynload { std::once_flag cublas_dso_flag; void *cublas_dso_handle = nullptr; @@ -32,17 +32,17 @@ void *cublas_dso_handle = nullptr; * note: default dynamic linked libs */ #ifdef PADDLE_USE_DSO -#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - cublasStatus_t operator()(Args... args) { \ - typedef cublasStatus_t (*cublasFunc)(Args...); \ - std::call_once(cublas_dso_flag, \ - paddle::platform::dyload::GetCublasDsoHandle, \ - &cublas_dso_handle); \ - void *p_##__name = dlsym(cublas_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ +#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + cublasStatus_t operator()(Args... args) { \ + typedef cublasStatus_t (*cublasFunc)(Args...); \ + std::call_once(cublas_dso_flag, \ + paddle::platform::dynload::GetCublasDsoHandle, \ + &cublas_dso_handle); \ + void *p_##__name = dlsym(cublas_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ } __name; // struct DynLoad__##__name #else #define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ @@ -99,6 +99,6 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP) #define CUBLAS_GETRF paddle::platform::dynload::cublasDgetrfBatched #define CUBLAS_GETRI paddle::platform::dynload::cublasDgetriBatched #endif -} // namespace dyload +} // namespace dynload } // namespace platform } // namespace paddle diff --git a/paddle/platform/cudnn.h b/paddle/platform/dynload/cudnn.h similarity index 97% rename from paddle/platform/cudnn.h rename to paddle/platform/dynload/cudnn.h index 06e2a05d86..c03424b375 100644 --- a/paddle/platform/cudnn.h +++ b/paddle/platform/dynload/cudnn.h @@ -19,24 +19,24 @@ limitations under the License. */ namespace paddle { namespace platform { -namespace dyload { +namespace dynload { std::once_flag cudnn_dso_flag; void* cudnn_dso_handle = nullptr; #ifdef PADDLE_USE_DSO -#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - using cudnn_func = decltype(__name(args...)) (*)(Args...); \ - std::call_once(cudnn_dso_flag, \ - paddle::platform::dyload::GetCudnnDsoHandle, \ - &cudnn_dso_handle); \ - void* p_##__name = dlsym(cudnn_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ +#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using cudnn_func = decltype(__name(args...)) (*)(Args...); \ + std::call_once(cudnn_dso_flag, \ + paddle::platform::dynload::GetCudnnDsoHandle, \ + &cudnn_dso_handle); \ + void* p_##__name = dlsym(cudnn_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ } __name; /* struct DynLoad__##__name */ #else @@ -129,6 +129,6 @@ CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP) #undef CUDNN_DNN_ROUTINE_EACH // clang-format on -} // namespace dyload +} // namespace dynload } // namespace platform } // namespace paddle diff --git a/paddle/platform/curand.h b/paddle/platform/dynload/curand.h similarity index 93% rename from paddle/platform/curand.h rename to paddle/platform/dynload/curand.h index a9cbe48ef8..1ef7a8c833 100644 --- a/paddle/platform/curand.h +++ b/paddle/platform/dynload/curand.h @@ -19,21 +19,21 @@ limitations under the License. */ namespace paddle { namespace platform { -namespace dyload { +namespace dynload { std::once_flag curand_dso_flag; void *curand_dso_handle = nullptr; #ifdef PADDLE_USE_DSO -#define DYNAMIC_LOAD_CURAND_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - curandStatus_t operator()(Args... args) { \ - typedef curandStatus_t (*curandFunc)(Args...); \ - std::call_once(curand_dso_flag, \ - paddle::platform::dyload::GetCurandDsoHandle, \ - &curand_dso_handle); \ - void *p_##__name = dlsym(curand_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ +#define DYNAMIC_LOAD_CURAND_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + curandStatus_t operator()(Args... args) { \ + typedef curandStatus_t (*curandFunc)(Args...); \ + std::call_once(curand_dso_flag, \ + paddle::platform::dynload::GetCurandDsoHandle, \ + &curand_dso_handle); \ + void *p_##__name = dlsym(curand_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ } __name; /* struct DynLoad__##__name */ #else #define DYNAMIC_LOAD_CURAND_WRAP(__name) \ @@ -60,6 +60,6 @@ CURAND_RAND_ROUTINE_EACH(DYNAMIC_LOAD_CURAND_WRAP) #undef CURAND_RAND_ROUTINE_EACH #undef DYNAMIC_LOAD_CURAND_WRAP -} // namespace dyload +} // namespace dynload } // namespace platform } // namespace paddle diff --git a/paddle/platform/dynamic_loader.cc b/paddle/platform/dynload/dynamic_loader.cc similarity index 99% rename from paddle/platform/dynamic_loader.cc rename to paddle/platform/dynload/dynamic_loader.cc index 9e0aadf8e2..8ef67bad8c 100644 --- a/paddle/platform/dynamic_loader.cc +++ b/paddle/platform/dynload/dynamic_loader.cc @@ -36,7 +36,7 @@ DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so."); namespace paddle { namespace platform { -namespace dyload { +namespace dynload { static inline std::string join(const std::string& part1, const std::string& part2) { @@ -164,6 +164,6 @@ void GetLapackDsoHandle(void** dso_handle) { #endif } -} // namespace dyload +} // namespace dynload } // namespace platform } // namespace paddle diff --git a/paddle/platform/dynamic_loader.h b/paddle/platform/dynload/dynamic_loader.h similarity index 96% rename from paddle/platform/dynamic_loader.h rename to paddle/platform/dynload/dynamic_loader.h index bb58fcba17..a99b05443f 100644 --- a/paddle/platform/dynamic_loader.h +++ b/paddle/platform/dynload/dynamic_loader.h @@ -16,7 +16,7 @@ limitations under the License. */ namespace paddle { namespace platform { -namespace dyload { +namespace dynload { /** * @brief load the DSO of CUBLAS @@ -58,6 +58,6 @@ void GetWarpCTCDsoHandle(void** dso_handle); */ void GetLapackDsoHandle(void** dso_handle); -} // namespace dyload +} // namespace dynload } // namespace platform } // namespace paddle From a211374d53090733667f2be2cf629cf858757c6d Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 4 Jul 2017 15:01:05 +0800 Subject: [PATCH 261/542] FIX: interface deps under cmake < 3.3 --- cmake/external/any.cmake | 13 ++++++++++--- cmake/external/eigen.cmake | 13 ++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/cmake/external/any.cmake b/cmake/external/any.cmake index b61e421871..edf6edc0bd 100644 --- a/cmake/external/any.cmake +++ b/cmake/external/any.cmake @@ -17,8 +17,15 @@ ExternalProject_Add( TEST_COMMAND "" ) -ADD_LIBRARY(lib_any INTERFACE) -ADD_DEPENDENCIES(lib_any extern_lib_any) +if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_any_dummy.c) + file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";") + add_library(lib_any STATIC ${dummyfile}) +else() + add_library(lib_any INTERFACE) +endif() + +add_dependencies(lib_any extern_lib_any) add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE) -LIST(APPEND external_project_dependencies extern_lib_any) +LIST(APPEND external_project_dependencies lib_any) diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake index 39b16c3b2b..1f2fdcac65 100644 --- a/cmake/external/eigen.cmake +++ b/cmake/external/eigen.cmake @@ -26,7 +26,14 @@ ExternalProject_Add( TEST_COMMAND "" ) -ADD_LIBRARY(eigen3 INTERFACE) -ADD_DEPENDENCIES(eigen3 extern_eigen3) +if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/eigen3_dummy.c) + file(WRITE ${dummyfile} "const char * dummy_eigen3 = \"${dummyfile}\";") + add_library(eigen3 STATIC ${dummyfile}) +else() + add_library(eigen3 INTERFACE) +endif() -LIST(APPEND external_project_dependencies extern_eigen3) +add_dependencies(eigen3 extern_eigen3) + +LIST(APPEND external_project_dependencies eigen3) From 109937b8d512904d04a1773bdf19ddb756ecd087 Mon Sep 17 00:00:00 2001 From: Superjom Date: Tue, 4 Jul 2017 15:18:20 +0800 Subject: [PATCH 262/542] fix ci error --- paddle/framework/CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 42600277f6..ceff1d3581 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -7,8 +7,6 @@ cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) proto_library(attr_type SRCS attr_type.proto) proto_library(op_proto SRCS op_proto.proto DEPS attr_type) - +proto_library(net_proto SRCS net_proto.proto DEPS op_proto) cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto attr_type protobuf) - -proto_library(net_proto SRCS net_proto.proto) -cc_library(net SRCS net.cc DEPS net_proto attr_type op_proto) \ No newline at end of file +cc_library(net SRCS net.cc DEPS net_proto attr_type op_proto) From ff36389452c1af6cc6a5f03b5ca52404ab20f108 Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 4 Jul 2017 15:21:24 +0800 Subject: [PATCH 263/542] ENH: code style --- paddle/memory/detail/buddy_allocator.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc index e8d694327d..eddfd9d13c 100644 --- a/paddle/memory/detail/buddy_allocator.cc +++ b/paddle/memory/detail/buddy_allocator.cc @@ -48,7 +48,6 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { // if the allocation is huge, send directly to the system allocator if (size > max_chunk_size_) { DLOG(INFO) << "Allocate from system allocator."; - return SystemAlloc(size); } From 817f317bef82eb2c024927e6a62b048a1ba93d4a Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 4 Jul 2017 15:39:08 +0800 Subject: [PATCH 264/542] FIX: INTERFACE path --- cmake/external/any.cmake | 2 +- cmake/external/eigen.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/external/any.cmake b/cmake/external/any.cmake index edf6edc0bd..45e3764e84 100644 --- a/cmake/external/any.cmake +++ b/cmake/external/any.cmake @@ -2,7 +2,7 @@ INCLUDE(ExternalProject) SET(ANY_SOURCE_DIR ${THIRD_PARTY_PATH}/any) -INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/linb_any) +INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/extern_lib_any) ExternalProject_Add( extern_lib_any diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake index 1f2fdcac65..3e6cedbb0d 100644 --- a/cmake/external/eigen.cmake +++ b/cmake/external/eigen.cmake @@ -2,7 +2,7 @@ INCLUDE(ExternalProject) SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3) -INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/eigen3) +INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/extern_eigen3) ExternalProject_Add( extern_eigen3 From 9045063b535c400ff8ebf20d0b8534103ec6d9ab Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Tue, 4 Jul 2017 15:58:15 +0800 Subject: [PATCH 265/542] pserver etcd client (#2559) * init etcd cclient * add etcd * add etcd.go * fix compile problem * move code to etcd.go * add etcd_lister.go for pserver client * add etcd_client_test.go * merge etcd_client_test and client_test * refine client_test.go * refine code * format code * add TODO and use interface instead of struct * fix typo of initDesiredPservers * optimize dir structure of go/pserver/client * add a flag to config index for pserver * follow comment * fix path * optimize code * remove err in pserver NewEtcd * restore comment about /ps_desired --- CMakeLists.txt | 2 +- go/CMakeLists.txt | 2 +- go/cmd/pserver/pserver.go | 16 ++- go/master/etcd_client.go | 4 +- .../{cclient => client/c}/CMakeLists.txt | 2 +- go/pserver/{cclient => client/c}/cclient.go | 26 ++-- .../{cclient => client/c}/test/CMakeLists.txt | 0 .../{cclient => client/c}/test/test_cclient.c | 0 .../{cclient => client/c}/test/test_mnist.py | 0 .../{cclient => client/c}/test/test_train.py | 0 .../c}/test/testdata/optimizer.pb | Bin go/pserver/{ => client}/client.go | 17 +-- go/pserver/{ => client}/client_test.go | 77 +++++++++-- go/pserver/client/etcd_client.go | 125 ++++++++++++++++++ go/pserver/etcd_client.go | 13 +- go/pserver/optimizer.go | 2 +- go/pserver/optimizer_test.go | 2 +- go/pserver/service.go | 3 - go/pserver/service_test.go | 8 +- 19 files changed, 246 insertions(+), 53 deletions(-) rename go/pserver/{cclient => client/c}/CMakeLists.txt (67%) rename go/pserver/{cclient => client/c}/cclient.go (88%) rename go/pserver/{cclient => client/c}/test/CMakeLists.txt (100%) rename go/pserver/{cclient => client/c}/test/test_cclient.c (100%) rename go/pserver/{cclient => client/c}/test/test_mnist.py (100%) rename go/pserver/{cclient => client/c}/test/test_train.py (100%) rename go/pserver/{cclient => client/c}/test/testdata/optimizer.pb (100%) rename go/pserver/{ => client}/client.go (92%) rename go/pserver/{ => client}/client_test.go (54%) create mode 100644 go/pserver/client/etcd_client.go diff --git a/CMakeLists.txt b/CMakeLists.txt index 5349f59805..5bedbbefa8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -113,7 +113,7 @@ include(coveralls) # set code coverage include_directories("${PROJ_ROOT}") include_directories("${PROJ_ROOT}/paddle/cuda/include") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") -include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/cclient") +include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c") include_directories(${Boost_INCLUDE_DIRS}) set(EXTERNAL_LIBS diff --git a/go/CMakeLists.txt b/go/CMakeLists.txt index 014697d155..f00c70a058 100644 --- a/go/CMakeLists.txt +++ b/go/CMakeLists.txt @@ -13,7 +13,7 @@ # limitations under the License. # -add_subdirectory(pserver/cclient) +add_subdirectory(pserver/client/c) add_subdirectory(cmd/pserver) add_subdirectory(cmd/master) add_subdirectory(master/c) diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index 8a42d4f8af..31ef450f03 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -15,6 +15,7 @@ import ( func main() { port := flag.Int("port", 0, "port of the pserver") + index := flag.Int("index", -1, "index of this pserver, should be larger or equal than 0") etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379", "comma separated endpoint string for pserver to connect to etcd") etcdTimeout := flag.Int("etcd-timeout", 5, "timeout for etcd calls") @@ -29,11 +30,16 @@ func main() { } log.SetLevel(level) - timeout := time.Second * time.Duration((*etcdTimeout)) - e := pserver.NewEtcdClient(*etcdEndpoint, *numPservers, timeout) - idx, err := e.Register() - if err != nil { - panic(err) + var idx int + if *index >= 0 { + idx = *index + } else { + timeout := time.Second * time.Duration((*etcdTimeout)) + e := pserver.NewEtcdClient(*etcdEndpoint, *numPservers, timeout) + idx, err = e.Register() + if err != nil { + panic(err) + } } s, err := pserver.NewService(idx) diff --git a/go/master/etcd_client.go b/go/master/etcd_client.go index e27c014792..04c1394e96 100644 --- a/go/master/etcd_client.go +++ b/go/master/etcd_client.go @@ -50,7 +50,7 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat lock := concurrency.NewMutex(sess, lockPath) // It's fine for the lock to get stuck, in this case we have // multiple master servers running (only configured to have - // one master running, but split-brain problem may cuase + // one master running, but split-brain problem may cause // multiple master servers running), and the cluster management // software will kill one of them. log.Debugf("Trying to acquire lock at %s.", lockPath) @@ -98,7 +98,7 @@ func (e *EtcdClient) Save(state []byte) error { // We lost the master lock and can not acquire // it back, it means some other master is // already started. We don't want cluster - // managment system to kill the master server + // management system to kill the master server // who is holding the lock and running // correctly. So the most feasible solution is // to kill current master server. The current diff --git a/go/pserver/cclient/CMakeLists.txt b/go/pserver/client/c/CMakeLists.txt similarity index 67% rename from go/pserver/cclient/CMakeLists.txt rename to go/pserver/client/c/CMakeLists.txt index 7fe74c62f1..a3fcaeef19 100644 --- a/go/pserver/cclient/CMakeLists.txt +++ b/go/pserver/client/c/CMakeLists.txt @@ -1,5 +1,5 @@ cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf) -go_library(paddle_pserver_cclient STATIC) +go_library(paddle_pserver_cclient STATIC DEPS paddle_go_optimizer) if(WITH_TESTING) add_subdirectory(test) endif() diff --git a/go/pserver/cclient/cclient.go b/go/pserver/client/c/cclient.go similarity index 88% rename from go/pserver/cclient/cclient.go rename to go/pserver/client/c/cclient.go index bbaf43d9f1..7ddaceb7ed 100644 --- a/go/pserver/cclient/cclient.go +++ b/go/pserver/client/c/cclient.go @@ -30,15 +30,16 @@ import ( "unsafe" "github.com/PaddlePaddle/Paddle/go/pserver" + "github.com/PaddlePaddle/Paddle/go/pserver/client" log "github.com/sirupsen/logrus" ) var nullPtr = unsafe.Pointer(uintptr(0)) var mu sync.Mutex -var handleMap = make(map[C.paddle_pserver_client]*pserver.Client) +var handleMap = make(map[C.paddle_pserver_client]*client.Client) var curHandle C.paddle_pserver_client -func add(c *pserver.Client) C.paddle_pserver_client { +func add(c *client.Client) C.paddle_pserver_client { mu.Lock() defer mu.Unlock() client := curHandle @@ -47,13 +48,13 @@ func add(c *pserver.Client) C.paddle_pserver_client { return client } -func get(client C.paddle_pserver_client) *pserver.Client { +func get(client C.paddle_pserver_client) *client.Client { mu.Lock() defer mu.Unlock() return handleMap[client] } -func remove(client C.paddle_pserver_client) *pserver.Client { +func remove(client C.paddle_pserver_client) *client.Client { mu.Lock() defer mu.Unlock() h := handleMap[client] @@ -80,9 +81,9 @@ func (s selector) Select() bool { return bool(s) } -type lister []pserver.Server +type lister []client.Server -func (l lister) List() []pserver.Server { +func (l lister) List() []client.Server { return l } @@ -90,19 +91,22 @@ func (l lister) List() []pserver.Server { func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_client { a := C.GoString(addrs) as := strings.Split(a, ",") - servers := make([]pserver.Server, len(as)) + servers := make([]client.Server, len(as)) for i := range as { servers[i].Index = i servers[i].Addr = as[i] } - c := pserver.NewClient(lister(servers), len(as), selector(selected != 0)) + c := client.NewClient(lister(servers), len(as), selector(selected != 0)) return add(c) } //export paddle_new_etcd_pserver_client -func paddle_new_etcd_pserver_client(etcd_addr *C.char) C.paddle_pserver_client { - // TODO(helin): fault tolerant pserver client using etcd. - panic("not implemented.") +func paddle_new_etcd_pserver_client(etcd_endpoints *C.char, selected int) C.paddle_pserver_client { + // TODO(Longfei: use etcd lock to decide which trainer to initialize the parameters) + addr := C.GoString(etcd_endpoints) + etcd_client := client.NewEtcd(addr) + c := client.NewClient(etcd_client, etcd_client.Desired(), selector(selected != 0)) + return add(c) } //export paddle_pserver_client_release diff --git a/go/pserver/cclient/test/CMakeLists.txt b/go/pserver/client/c/test/CMakeLists.txt similarity index 100% rename from go/pserver/cclient/test/CMakeLists.txt rename to go/pserver/client/c/test/CMakeLists.txt diff --git a/go/pserver/cclient/test/test_cclient.c b/go/pserver/client/c/test/test_cclient.c similarity index 100% rename from go/pserver/cclient/test/test_cclient.c rename to go/pserver/client/c/test/test_cclient.c diff --git a/go/pserver/cclient/test/test_mnist.py b/go/pserver/client/c/test/test_mnist.py similarity index 100% rename from go/pserver/cclient/test/test_mnist.py rename to go/pserver/client/c/test/test_mnist.py diff --git a/go/pserver/cclient/test/test_train.py b/go/pserver/client/c/test/test_train.py similarity index 100% rename from go/pserver/cclient/test/test_train.py rename to go/pserver/client/c/test/test_train.py diff --git a/go/pserver/cclient/test/testdata/optimizer.pb b/go/pserver/client/c/test/testdata/optimizer.pb similarity index 100% rename from go/pserver/cclient/test/testdata/optimizer.pb rename to go/pserver/client/c/test/testdata/optimizer.pb diff --git a/go/pserver/client.go b/go/pserver/client/client.go similarity index 92% rename from go/pserver/client.go rename to go/pserver/client/client.go index 6938b9d5ce..aa8bfe30c2 100644 --- a/go/pserver/client.go +++ b/go/pserver/client/client.go @@ -1,4 +1,4 @@ -package pserver +package client import ( "errors" @@ -7,6 +7,7 @@ import ( "time" "github.com/PaddlePaddle/Paddle/go/connection" + "github.com/PaddlePaddle/Paddle/go/pserver" log "github.com/sirupsen/logrus" ) @@ -105,7 +106,7 @@ func (c *Client) BeginInitParams() bool { } // InitParam initializes the parameter on parameter servers. -func (c *Client) InitParam(paramWithConfigs ParameterWithConfig) error { +func (c *Client) InitParam(paramWithConfigs pserver.ParameterWithConfig) error { return c.pservers[c.partition(paramWithConfigs.Param.Name)].Call("Service.InitParam", paramWithConfigs, nil) } @@ -123,13 +124,13 @@ func (c *Client) FinishInitParams() error { // SendGrads sends gradients to parameter servers for updating // parameters. -func (c *Client) SendGrads(grads []Gradient) error { +func (c *Client) SendGrads(grads []pserver.Gradient) error { if len(grads) == 0 { return errors.New("no gradient received") } errCh := make(chan error, len(grads)) for _, g := range grads { - go func(g Gradient) { + go func(g pserver.Gradient) { err := c.pservers[c.partition(g.Name)].Call("Service.SendGrad", g, nil) errCh <- err }(g) @@ -151,7 +152,7 @@ func (c *Client) SendGrads(grads []Gradient) error { type result struct { idx int - param Parameter + param pserver.Parameter err error } @@ -170,12 +171,12 @@ func (r results) Swap(i int, j int) { } // GetParams gets parameters from parameter servers. -func (c *Client) GetParams(names []string) ([]Parameter, error) { +func (c *Client) GetParams(names []string) ([]pserver.Parameter, error) { rCh := make(chan result, len(names)) for idx, name := range names { go func(name string, idx int) { - var parameter Parameter + var parameter pserver.Parameter err := c.pservers[c.partition(name)].Call("Service.GetParam", name, ¶meter) rCh <- result{idx: idx, param: parameter, err: err} }(name, idx) @@ -196,7 +197,7 @@ func (c *Client) GetParams(names []string) ([]Parameter, error) { } sort.Sort(rs) - ps := make([]Parameter, len(rs)) + ps := make([]pserver.Parameter, len(rs)) for i := range rs { ps[i] = rs[i].param } diff --git a/go/pserver/client_test.go b/go/pserver/client/client_test.go similarity index 54% rename from go/pserver/client_test.go rename to go/pserver/client/client_test.go index b805efa921..29b400812c 100644 --- a/go/pserver/client_test.go +++ b/go/pserver/client/client_test.go @@ -1,6 +1,7 @@ -package pserver_test +package client_test import ( + "context" "io/ioutil" "net" "net/http" @@ -8,15 +9,25 @@ import ( "strconv" "strings" "testing" + "time" "github.com/PaddlePaddle/Paddle/go/pserver" + "github.com/PaddlePaddle/Paddle/go/pserver/client" + "github.com/coreos/etcd/clientv3" + log "github.com/sirupsen/logrus" ) -const numPserver = 10 +const ( + numPserver = 10 + etcdEndpoints = "127.0.0.1:2379" + timeout = 2 * time.Second +) -var port [numPserver]int +var pserverClientPorts [numPserver]int -func init() { +// this function init pserver client and return their ports in an array. +func initClient() [numPserver]int { + var ports [numPserver]int for i := 0; i < numPserver; i++ { l, err := net.Listen("tcp", ":0") if err != nil { @@ -28,7 +39,7 @@ func init() { if err != nil { panic(err) } - port[i] = p + ports[i] = p go func(l net.Listener) { s, err := pserver.NewService(0) @@ -49,6 +60,31 @@ func init() { } }(l) } + return ports +} + +func initNativeClient() { + pserverClientPorts = initClient() +} + +func initEtcdClient() { + client, err := clientv3.New(clientv3.Config{ + Endpoints: []string{etcdEndpoints}, + DialTimeout: time.Second * time.Duration(1), + }) + if err != nil { + log.Errorf("err %v", err) + } + ctx, cancel := context.WithTimeout(context.Background(), timeout) + client.Delete(ctx, pserver.PsDesired) + client.Delete(ctx, pserver.PsPath) + client.Put(ctx, pserver.PsDesired, strconv.Itoa(numPserver)) + ports := initClient() + for i := 0; i < numPserver; i++ { + client.Put(ctx, pserver.PsPath+strconv.Itoa(i), ":"+strconv.Itoa(ports[i])) + } + cancel() + client.Close() } type selector bool @@ -57,25 +93,20 @@ func (s selector) Select() bool { return bool(s) } -type lister []pserver.Server +type lister []client.Server -func (l lister) List() []pserver.Server { +func (l lister) List() []client.Server { return l } -func TestClientFull(t *testing.T) { - servers := make([]pserver.Server, numPserver) - for i := 0; i < numPserver; i++ { - servers[i] = pserver.Server{Index: i, Addr: ":" + strconv.Itoa(port[i])} - } - c := pserver.NewClient(lister(servers), len(servers), selector(true)) +func ClientTest(t *testing.T, c *client.Client) { selected := c.BeginInitParams() if !selected { t.Fatal("should be selected.") } const numParameter = 100 - config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb") + config, err := ioutil.ReadFile("./c/test/testdata/optimizer.pb") if err != nil { t.Fatalf("read optimizer proto failed") } @@ -129,3 +160,21 @@ func TestClientFull(t *testing.T) { } } } + +func TestNativeClient(t *testing.T) { + initNativeClient() + servers := make([]client.Server, numPserver) + for i := 0; i < numPserver; i++ { + servers[i] = client.Server{Index: i, Addr: ":" + strconv.Itoa(pserverClientPorts[i])} + } + c1 := client.NewClient(lister(servers), len(servers), selector(true)) + ClientTest(t, c1) +} + +// TODO: tmperary disable etcdClient test for dependency of etcd) +func EtcdClient(t *testing.T) { + initEtcdClient() + etcd_client := client.NewEtcd(etcdEndpoints) + c2 := client.NewClient(etcd_client, etcd_client.Desired(), selector(true)) + ClientTest(t, c2) +} diff --git a/go/pserver/client/etcd_client.go b/go/pserver/client/etcd_client.go new file mode 100644 index 0000000000..1fd3479aa8 --- /dev/null +++ b/go/pserver/client/etcd_client.go @@ -0,0 +1,125 @@ +package client + +import ( + "context" + "strconv" + "strings" + "time" + + "github.com/PaddlePaddle/Paddle/go/pserver" + "github.com/coreos/etcd/clientv3" + log "github.com/sirupsen/logrus" +) + +const ( + DefaultEtcdTimeout time.Duration = 5 * time.Second +) + +// EtcdClient is used by pserver client that is a part of trainer process. +// TODO: +// 1. add watcher to watch the change state of pservers) +// 1. add etcd lock) +type EtcdClient struct { + client *clientv3.Client + timeout time.Duration + endpoints []string +} + +// Desired read ps desired number from etcd. +func (p *EtcdClient) Desired() int { + var psDesired int + for { + ctx, cancel := context.WithTimeout(context.Background(), p.timeout) + resp, err := p.client.Get(ctx, pserver.PsDesired) + cancel() + if err != nil { + log.Errorf("Get ps dresire number failed! recnnectiong..., %v", err) + time.Sleep(p.timeout) + continue + } + + kvs := resp.Kvs + if len(kvs) == 0 { + log.Infoln("Waiting for ps desired registered ...") + time.Sleep(p.timeout) + continue + } + + psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value)) + if err != nil { + log.Errorf("psDesired %s invalid %v", psDesired, err) + time.Sleep(p.timeout) + continue + } + + log.Debugf("Get psDesired number: %d", psDesired) + break + } + return psDesired +} + +// List return the pserver list read from etcd. +func (p *EtcdClient) List() []Server { + psDesired := p.Desired() + + servers := make([]Server, psDesired) + for { + for i := 0; i < psDesired; i++ { + ctx, cancel := context.WithTimeout(context.Background(), p.timeout) + cancel() + psKey := pserver.PsPath + strconv.Itoa(i) + log.Debugf("checking %s", psKey) + resp, err := p.client.Get(ctx, psKey) + if err != nil { + log.Infof("Get psKey= %s error, %v", psKey, err) + time.Sleep(p.timeout) + continue + } + kvs := resp.Kvs + if len(kvs) == 0 { + log.Infof("Waiting for ps addr registered ...") + time.Sleep(p.timeout) + continue + } + + psAddr := string(resp.Kvs[0].Value) + // TODO(Longfei) check the ps address + if psAddr == "" { + log.Infof("Get psKey = %s, psAddr is empty", psKey) + time.Sleep(p.timeout) + continue + } + log.Infof("got value (%s) for key: %s", psAddr, psKey) + servers[i].Index = i + servers[i].Addr = psAddr + } + break + } + return servers +} + +// NewEtcd create a etcd client to return the state of pserver on etcd. +func NewEtcd(endpoints string) *EtcdClient { + ep := strings.Split(endpoints, ",") + var cli *clientv3.Client + var err error + for { + cli, err = clientv3.New(clientv3.Config{ + Endpoints: ep, + DialTimeout: DefaultEtcdTimeout, + }) + if err != nil { + log.Errorf("Init etcd connection failed: %v", err) + time.Sleep(DefaultEtcdTimeout) + continue + } + break + } + log.Infof("Connected to etcd: %s\n", endpoints) + client := &EtcdClient{ + client: cli, + timeout: DefaultEtcdTimeout, + endpoints: ep, + } + return client +} diff --git a/go/pserver/etcd_client.go b/go/pserver/etcd_client.go index 4d88243edd..37b8d522c1 100644 --- a/go/pserver/etcd_client.go +++ b/go/pserver/etcd_client.go @@ -13,6 +13,13 @@ import ( log "github.com/sirupsen/logrus" ) +const ( + // PsDesired is etcd path for store desired pserver count + PsDesired = "/ps_desired" + // PsAddr is the base dir for pserver to store their addr + PsPath = "/ps/" +) + // EtcdClient is the etcd client that the pserver uses for fault // tolerance, service registry and coordination. type EtcdClient struct { @@ -68,7 +75,7 @@ func (e *EtcdClient) Register() (int, error) { // it at the same time. for { ctx, cancel := context.WithTimeout(context.Background(), time.Second) - _, err := e.initDesiredPsercers(ctx, e.numPservers) + _, err := e.initDesiredPservers(ctx, e.numPservers) cancel() if err != nil { log.Warn(err) @@ -120,7 +127,7 @@ func (e *EtcdClient) Register() (int, error) { return pserverIdx, nil } -func (e *EtcdClient) initDesiredPsercers(ctx context.Context, numPservers int) (*clientv3.TxnResponse, error) { +func (e *EtcdClient) initDesiredPservers(ctx context.Context, numPservers int) (*clientv3.TxnResponse, error) { return concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error { dsStr := c.Get(PsDesired) if dsStr == "" { @@ -136,7 +143,7 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) { _, err := concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error { registered := false for i := 0; i < e.desired; i++ { - psKey := "/ps/" + strconv.Itoa(i) + psKey := PsPath + strconv.Itoa(i) log.Debugf("checking %s", psKey) ps := c.Get(psKey) log.Debugf("got value (%s) for key: %s", ps, psKey) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index b4a040f46b..bca3718af3 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -2,7 +2,7 @@ package pserver // #cgo CFLAGS: -I ../../ // //FIXME: ldflags contain "build" path -// #cgo LDFLAGS: ../../build/go/pserver/cclient/libpaddle_go_optimizer.a -lstdc++ +// #cgo LDFLAGS: ../../build/go/pserver/client/c/libpaddle_go_optimizer.a -lstdc++ // #include "paddle/optimizer/optimizer.h" // #include // #include diff --git a/go/pserver/optimizer_test.go b/go/pserver/optimizer_test.go index b99b5a5f0b..0b2f4cfa41 100644 --- a/go/pserver/optimizer_test.go +++ b/go/pserver/optimizer_test.go @@ -11,7 +11,7 @@ func TestOptimizerCreateRelease(t *testing.T) { ElementType: Int32, } p.Content = []byte{1, 3} - config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb") + config, err := ioutil.ReadFile("./client/c/test/testdata/optimizer.pb") if err != nil { t.Fatalf("read optimizer proto failed") } diff --git a/go/pserver/service.go b/go/pserver/service.go index e15a4e5a58..7711dc027e 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -24,9 +24,6 @@ const ( Float64 ) -// PsDesired is etcd path for store desired pserver count -const PsDesired = "/ps_desired" - // Parameter is a piece of data to sync with the parameter server. type Parameter struct { Name string diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index 30e3ac8ae1..b6d20d2c8b 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -10,6 +10,10 @@ import ( "github.com/PaddlePaddle/Paddle/go/pserver" ) +const ( + OptimizerConfig = "./client/c/test/testdata/optimizer.pb" +) + func TestServiceFull(t *testing.T) { s, err := pserver.NewService(0) if err != nil { @@ -19,7 +23,7 @@ func TestServiceFull(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb") + config, err := ioutil.ReadFile(OptimizerConfig) if err != nil { t.Fatalf("read optimizer proto failed") } @@ -149,7 +153,7 @@ func TestBlockUntilInitialized(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - config, err := ioutil.ReadFile("./cclient/test/testdata/optimizer.pb") + config, err := ioutil.ReadFile(OptimizerConfig) if err != nil { t.Fatalf("read optimizer proto failed") } From 3f5e5a24c497714530e8f55f2f076fc4e3168d9c Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 4 Jul 2017 08:16:08 +0000 Subject: [PATCH 266/542] fix cmake error --- .travis.yml | 2 +- go/master/c/CMakeLists.txt | 2 +- go/pserver/optimizer.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index a53bd18094..4f72e2ca33 100644 --- a/.travis.yml +++ b/.travis.yml @@ -42,7 +42,7 @@ before_install: function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: - | - timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout + timeout 2580 paddle/scripts/travis/${JOB}.sh -e "WITH_GOLANG=ON" # 43min timeout RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi; notifications: email: diff --git a/go/master/c/CMakeLists.txt b/go/master/c/CMakeLists.txt index 94d6bb0b2e..d900850be0 100644 --- a/go/master/c/CMakeLists.txt +++ b/go/master/c/CMakeLists.txt @@ -1 +1 @@ -go_library(paddle_master SHARED) +go_library(paddle_master SHARED DEPS paddle_go_optimizer) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index bca3718af3..d84f55b987 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -2,7 +2,7 @@ package pserver // #cgo CFLAGS: -I ../../ // //FIXME: ldflags contain "build" path -// #cgo LDFLAGS: ../../build/go/pserver/client/c/libpaddle_go_optimizer.a -lstdc++ +// #cgo LDFLAGS: ../../build/go/pserver/client/c/libpaddle_go_optimizer.a -lstdc++ -lm // #include "paddle/optimizer/optimizer.h" // #include // #include From d8941e67ec5da7333666b31264704dae7d830ca2 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 4 Jul 2017 08:24:28 +0000 Subject: [PATCH 267/542] fix bugs --- .travis.yml | 2 +- paddle/scripts/docker/build.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4f72e2ca33..16432dac0c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -42,7 +42,7 @@ before_install: function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: - | - timeout 2580 paddle/scripts/travis/${JOB}.sh -e "WITH_GOLANG=ON" # 43min timeout + export WITH_GOLANG=ON && timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi; notifications: email: diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index a182e5f4ae..1ccee686df 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -3,7 +3,7 @@ set -xe # Set BASE_IMAGE according to env variables -if [ ${WITH_GPU} == "ON" ]; then +if [[ ${WITH_GPU} == "ON" ]]; then BASE_IMAGE="nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04" else BASE_IMAGE="ubuntu:16.04" From e95299b58300afda0d61e868998dfceb28e999da Mon Sep 17 00:00:00 2001 From: Superjom Date: Tue, 4 Jul 2017 16:28:21 +0800 Subject: [PATCH 268/542] fix ci error --- paddle/framework/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index ceff1d3581..0abc63a831 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -7,6 +7,6 @@ cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) proto_library(attr_type SRCS attr_type.proto) proto_library(op_proto SRCS op_proto.proto DEPS attr_type) +cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) proto_library(net_proto SRCS net_proto.proto DEPS op_proto) -cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto attr_type protobuf) cc_library(net SRCS net.cc DEPS net_proto attr_type op_proto) From 86543f7f6a8f0fc073977794abee9ae5b033f78e Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 4 Jul 2017 16:40:00 +0800 Subject: [PATCH 269/542] Follow comments. --- doc/api/v2/config/layer.rst | 2 +- paddle/gserver/layers/DetectionOutputLayer.h | 8 +- paddle/gserver/layers/MultiBoxLossLayer.cpp | 6 +- paddle/gserver/layers/MultiBoxLossLayer.h | 2 +- .../paddle/trainer_config_helpers/layers.py | 20 +++-- .../test_detection_output_layer.protostr | 66 ++++++++++++++++ .../test_multibox_loss_layer.protostr | 79 +++++++++++++++++++ 7 files changed, 164 insertions(+), 19 deletions(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 0a8465919d..4f4a9187bc 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -478,6 +478,6 @@ Detection output Layer ====================== detection_output ---- +---------------- .. autoclass:: paddle.v2.layer.detection_output :noindex: diff --git a/paddle/gserver/layers/DetectionOutputLayer.h b/paddle/gserver/layers/DetectionOutputLayer.h index 9cc568219c..a232af0a69 100644 --- a/paddle/gserver/layers/DetectionOutputLayer.h +++ b/paddle/gserver/layers/DetectionOutputLayer.h @@ -22,14 +22,14 @@ limitations under the License. */ namespace paddle { /** - * The detection output layer for a SSD detection task. This layer apply the - * Non-maximum suppression to the all predicted bounding box and keep the + * The detection output layer for a SSD detection task. This layer applies the + * Non-maximum suppression to the all predicted bounding box and keeps the * Top-K bounding boxes. - * - Input: This layer needs three input layers: This first input layer + * - Input: This layer needs three input layers: The first input layer * is the priorbox layer. The rest two input layers are convolution * layers for generating bbox location offset and the classification * confidence. - * - Output: The predict bounding box location. + * - Output: The predict bounding box locations. */ class DetectionOutputLayer : public Layer { diff --git a/paddle/gserver/layers/MultiBoxLossLayer.cpp b/paddle/gserver/layers/MultiBoxLossLayer.cpp index f2d7b8eb1d..bbf1166dce 100644 --- a/paddle/gserver/layers/MultiBoxLossLayer.cpp +++ b/paddle/gserver/layers/MultiBoxLossLayer.cpp @@ -258,8 +258,7 @@ void MultiBoxLossLayer::forward(PassType passType) { } real loss = locLoss_ + confLoss_; MatrixPtr outV = getOutputValue(); - std::vector tmp(batchSize, loss); - outV->copyFrom(&tmp[0], batchSize); + outV->assign(loss); } void MultiBoxLossLayer::backward(const UpdateCallback& callback) { @@ -336,6 +335,9 @@ void MultiBoxLossLayer::backward(const UpdateCallback& callback) { const MatrixPtr inLocG = getInputGrad(*getLocInputLayer(n)); const MatrixPtr inConfG = getInputGrad(*getConfInputLayer(n)); size_t height = getInput(*getLocInputLayer(n)).getFrameHeight(); + // only for unittest, there are no width and height information + // when constructing matrix in unittest, so we should + // set the shape in configuration if (!height) height = layerConf.height(); size_t width = getInput(*getLocInputLayer(n)).getFrameWidth(); if (!width) width = layerConf.width(); diff --git a/paddle/gserver/layers/MultiBoxLossLayer.h b/paddle/gserver/layers/MultiBoxLossLayer.h index 9767fed7f1..9935da5644 100644 --- a/paddle/gserver/layers/MultiBoxLossLayer.h +++ b/paddle/gserver/layers/MultiBoxLossLayer.h @@ -30,7 +30,7 @@ namespace paddle { * The loss is composed by the location loss and the confidence loss. * The location loss is a smooth L1 loss and the confidence loss is * a softmax loss. - * - Input: This layer need four input layers: This first input layer + * - Input: This layer needs four input layers: The first input layer * is the priorbox layer and the second layer is a label layer. * The rest two input layers are convolution layers for generating * bbox location offset and the classification confidence. diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 1286ed198e..86e91e2c57 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1072,10 +1072,10 @@ def multibox_loss_layer(input_loc, :param name: The Layer Name. :type name: basestring - :param input_loc: The input predict location. - :type input_loc: LayerOutput + :param input_loc: The input predict locations. + :type input_loc: LayerOutput | List of LayerOutput :param input_conf: The input priorbox confidence. - :type input_conf: LayerOutput + :type input_conf: LayerOutput | List of LayerOutput :param priorbox: The input priorbox location and the variance. :type priorbox: LayerOutput :param label: The input label. @@ -1146,10 +1146,10 @@ def detection_output_layer(input_loc, :param name: The Layer Name. :type name: basestring - :param input_loc: The input predict location. - :type input_loc: LayerOutput + :param input_loc: The input predict locations. + :type input_loc: LayerOutput | List of LayerOutput. :param input_conf: The input priorbox confidence. - :type input_conf: LayerOutput + :type input_conf: LayerOutput | List of LayerOutput. :param priorbox: The input priorbox location and the variance. :type priorbox: LayerOutput :param num_classes: The number of the classification. @@ -1166,22 +1166,20 @@ def detection_output_layer(input_loc, :type background_id: int :return: LayerOutput """ - input_loc_num = 0 - input_conf_num = 0 - if isinstance(input_loc, LayerOutput): input_loc = [input_loc] assert isinstance(input_loc, collections.Sequence) # list or tuple for each in input_loc: assert isinstance(each, LayerOutput) - input_loc_num += 1 + input_loc_num = len(input_loc) if isinstance(input_conf, LayerOutput): input_conf = [input_conf] assert isinstance(input_conf, collections.Sequence) # list or tuple for each in input_conf: assert isinstance(each, LayerOutput) - input_conf_num += 1 + input_conf_num = len(input_conf) + # Check the input layer number. assert input_loc_num == input_conf_num diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr new file mode 100644 index 0000000000..6690f9852a --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr @@ -0,0 +1,66 @@ +type: "nn" +layers { + name: "input_loc" + type: "data" + size: 16 + active_type: "" + height: 16 + width: 1 +} +layers { + name: "input_conf" + type: "data" + size: 8 + active_type: "" + height: 1 + width: 8 +} +layers { + name: "priorbox" + type: "data" + size: 32 + active_type: "" + height: 4 + width: 8 +} +layers { + name: "test_detection_output" + type: "detection_output" + size: 1400 + active_type: "" + inputs { + input_layer_name: "priorbox" + detection_output_conf { + num_classes: 21 + nms_threshold: 0.45 + nms_top_k: 400 + background_id: 0 + input_num: 1 + keep_top_k: 200 + confidence_threshold: 0.01 + } + } + inputs { + input_layer_name: "input_loc" + } + inputs { + input_layer_name: "input_conf" + } +} +input_layer_names: "priorbox" +input_layer_names: "input_loc" +input_layer_names: "input_conf" +output_layer_names: "test_detection_output" +sub_models { + name: "root" + layer_names: "input_loc" + layer_names: "input_conf" + layer_names: "priorbox" + layer_names: "test_detection_output" + input_layer_names: "priorbox" + input_layer_names: "input_loc" + input_layer_names: "input_conf" + output_layer_names: "test_detection_output" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr new file mode 100644 index 0000000000..0ba84dcc6d --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr @@ -0,0 +1,79 @@ +type: "nn" +layers { + name: "input_loc" + type: "data" + size: 16 + active_type: "" + height: 16 + width: 1 +} +layers { + name: "input_conf" + type: "data" + size: 8 + active_type: "" + height: 1 + width: 8 +} +layers { + name: "priorbox" + type: "data" + size: 32 + active_type: "" + height: 4 + width: 8 +} +layers { + name: "label" + type: "data" + size: 24 + active_type: "" + height: 4 + width: 6 +} +layers { + name: "test_multibox_loss" + type: "multibox_loss" + size: 1 + active_type: "" + inputs { + input_layer_name: "priorbox" + multibox_loss_conf { + num_classes: 21 + overlap_threshold: 0.5 + neg_pos_ratio: 3.0 + neg_overlap: 0.5 + background_id: 0 + input_num: 1 + } + } + inputs { + input_layer_name: "label" + } + inputs { + input_layer_name: "input_loc" + } + inputs { + input_layer_name: "input_conf" + } +} +input_layer_names: "priorbox" +input_layer_names: "label" +input_layer_names: "input_loc" +input_layer_names: "input_conf" +output_layer_names: "test_multibox_loss" +sub_models { + name: "root" + layer_names: "input_loc" + layer_names: "input_conf" + layer_names: "priorbox" + layer_names: "label" + layer_names: "test_multibox_loss" + input_layer_names: "priorbox" + input_layer_names: "label" + input_layer_names: "input_loc" + input_layer_names: "input_conf" + output_layer_names: "test_multibox_loss" + is_recurrent_layer_group: false +} + From 379434b243faeaf9fd4d38cf9f95dfe45cc563d5 Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 4 Jul 2017 17:21:09 +0800 Subject: [PATCH 270/542] Delete cmake in dynload --- paddle/platform/dynload/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 paddle/platform/dynload/CMakeLists.txt diff --git a/paddle/platform/dynload/CMakeLists.txt b/paddle/platform/dynload/CMakeLists.txt deleted file mode 100644 index 9f829b7012..0000000000 --- a/paddle/platform/dynload/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) From e6fcdd47e5c153497efb54ff5737f4fc6a13596d Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 4 Jul 2017 17:22:43 +0800 Subject: [PATCH 271/542] fix wrong including header-file in files in paddle/platform/dynload dir --- paddle/platform/dynload/cublas.h | 4 ++- paddle/platform/dynload/cudnn.h | 4 ++- paddle/platform/dynload/curand.h | 4 ++- paddle/platform/dynload/dynamic_loader.cc | 41 ++++++++++------------- 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/paddle/platform/dynload/cublas.h b/paddle/platform/dynload/cublas.h index c9150ac573..258cc88031 100644 --- a/paddle/platform/dynload/cublas.h +++ b/paddle/platform/dynload/cublas.h @@ -15,7 +15,9 @@ limitations under the License. */ #pragma once #include -#include "paddle/platform/dynamic_loader.h" +#include +#include +#include "paddle/platform/dynload/dynamic_loader.h" namespace paddle { namespace platform { diff --git a/paddle/platform/dynload/cudnn.h b/paddle/platform/dynload/cudnn.h index c03424b375..0a9562c573 100644 --- a/paddle/platform/dynload/cudnn.h +++ b/paddle/platform/dynload/cudnn.h @@ -15,7 +15,9 @@ limitations under the License. */ #pragma once #include -#include "paddle/platform/dynamic_loader.h" +#include +#include +#include "paddle/platform/dynload/dynamic_loader.h" namespace paddle { namespace platform { diff --git a/paddle/platform/dynload/curand.h b/paddle/platform/dynload/curand.h index 1ef7a8c833..9dc0a25c0f 100644 --- a/paddle/platform/dynload/curand.h +++ b/paddle/platform/dynload/curand.h @@ -15,7 +15,9 @@ limitations under the License. */ #pragma once #include -#include "paddle/platform/dynamic_loader.h" +#include +#include +#include "paddle/platform/dynload/dynamic_loader.h" namespace paddle { namespace platform { diff --git a/paddle/platform/dynload/dynamic_loader.cc b/paddle/platform/dynload/dynamic_loader.cc index 8ef67bad8c..dd914e006d 100644 --- a/paddle/platform/dynload/dynamic_loader.cc +++ b/paddle/platform/dynload/dynamic_loader.cc @@ -12,13 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "dynamic_loader.h" +#include "paddle/platform/dynload/dynamic_loader.h" #include #include #include #include #include "gflags/gflags.h" #include "glog/logging.h" +#include "paddle/framework/enforce.h" DEFINE_string(cudnn_dir, "", "Specify path for loading libcudnn.so. For instance, " @@ -72,13 +73,12 @@ static inline void GetDsoHandleFromDefaultPath(std::string& dso_path, *dso_handle = dlopen(dso_path.c_str(), dynload_flags); if (nullptr == *dso_handle) { if (dso_path == "libcudnn.dylib") { - LOG(FATAL) - << "Note: [Recommend] copy cudnn into /usr/local/cuda/ \n" // NOLINT - << "For instance, sudo tar -xzf " - "cudnn-7.5-osx-x64-v5.0-ga.tgz -C " // NOLINT - << "/usr/local \n sudo chmod a+r " - "/usr/local/cuda/include/cudnn.h " // NOLINT - << "/usr/local/cuda/lib/libcudnn*"; + PADDLE_ENFORCE(true, + "Note: [Recommend] copy cudnn into /usr/local/cuda/ \n " + "For instance, sudo tar -xzf " + "cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local \n sudo " + "chmod a+r /usr/local/cuda/include/cudnn.h " + "/usr/local/cuda/lib/libcudnn*"); } } } @@ -106,22 +106,15 @@ static inline void GetDsoHandleFromSearchPath(const std::string& search_root, GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags); } } - - CHECK(nullptr != *dso_handle) << "Failed to find dynamic library: " << dlPath - << " (" << dlerror() << ") \n" - << "Please specify its path correctly using " - "following ways: \n" - - << "Method. set environment variable " - "LD_LIBRARY_PATH on Linux or " - << "DYLD_LIBRARY_PATH on Mac OS. \n" - << "For instance, issue command: export " - "LD_LIBRARY_PATH=... \n" - - << "Note: After Mac OS 10.11, using the " - "DYLD_LIBRARY_PATH is impossible " - << "unless System Integrity Protection (SIP) " - "is disabled."; + PADDLE_ENFORCE(nullptr != *dso_handle, + "Failed to find dynamic library: %s ( %s ) \n Please specify " + "its path correctly using following ways: \n Method. set " + "environment variable LD_LIBRARY_PATH on Linux or " + "DYLD_LIBRARY_PATH on Mac OS. \n For instance, issue command: " + "export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, " + "using the DYLD_LIBRARY_PATH is impossible unless System " + "Integrity Protection (SIP) is disabled.", + dlPath, dlerror()); } void GetCublasDsoHandle(void** dso_handle) { From 571714159aeb42903fca14d614dcb1e6942b5cc4 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Tue, 4 Jul 2017 20:04:32 +0800 Subject: [PATCH 272/542] add op_desc.proto (#2736) * add op_desc.proto In Operator design, we need a proto message to describe an Operator. Third-party language such as python can build this proto message and use AddOp(const OpDesc& op_desc) of Paddle core to construct an Op in the Network. --- paddle/framework/CMakeLists.txt | 3 ++ paddle/framework/op_desc.proto | 56 ++++++++++++++++++++++++++++++++ paddle/framework/op_desc_test.cc | 35 ++++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 paddle/framework/op_desc.proto create mode 100644 paddle/framework/op_desc_test.cc diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index baad38e3c1..a016f57b3e 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -8,3 +8,6 @@ cc_test(enforce_test SRCS enforce_test.cc) proto_library(attr_type SRCS attr_type.proto) proto_library(op_proto SRCS op_proto.proto DEPS attr_type) cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) + +proto_library(op_desc SRCS op_desc.proto DEPS attr_type) +cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) diff --git a/paddle/framework/op_desc.proto b/paddle/framework/op_desc.proto new file mode 100644 index 0000000000..89497f3c16 --- /dev/null +++ b/paddle/framework/op_desc.proto @@ -0,0 +1,56 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +syntax="proto2"; +package paddle.framework; + +import "attr_type.proto"; + +// AttrDesc is used to describe Attributes of an Operator. It contain's +// name, type, and value of Attribute. +// +// e.g, for scale=3.0: name=scala, type=AttrType.FLOAT, value=3.0 +message AttrDesc { + required string name = 1; + required AttrType type = 2; + optional int32 i = 3; + optional float f = 4; + optional string s = 5; + repeated int32 ints = 6; + repeated float floats = 7; + repeated string strings = 8; +}; + +// Protocol Message to describe an Operator. +// +// In PaddlePaddle, Operator is used to do a certain computation such +// as "add", "sub", "cosine", etc. +// (1) Operator needs to know the input and output variable names. +// (2) Some ops may have special attributes such as "scale" in "CosineOp". +// +// 3rd-party language can build this proto message and call +// AddOp(const OpDesc& op_desc) of Paddle core to create an Operator. +message OpDesc { + // input names of this Operator. + repeated string inputs = 1; + + // output names of this Operator. + repeated string outputs = 2; + + // type of this Operator, such as "add", "sub", "fc". + required string type = 3; + + // Attributes of this Operator. e.g., scale=3.0 in cosine op. + repeated AttrDesc attrs = 4; +}; \ No newline at end of file diff --git a/paddle/framework/op_desc_test.cc b/paddle/framework/op_desc_test.cc new file mode 100644 index 0000000000..d0c52523b6 --- /dev/null +++ b/paddle/framework/op_desc_test.cc @@ -0,0 +1,35 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +TEST(OpDesc, Create) { + paddle::framework::OpDesc op_desc; + op_desc.set_type("add"); + op_desc.add_inputs("X"); + op_desc.add_inputs("Y"); + op_desc.add_outputs("Z"); + + auto attr = op_desc.mutable_attrs()->Add(); + attr->set_type(paddle::framework::AttrType::FLOAT); + attr->set_f(3.14); + + // required field name is not set, so IsInitialized should be false. + ASSERT_FALSE(op_desc.IsInitialized()); + + attr->set_name("add"); + // after all required fields are set, IsInitialized should be true now. + ASSERT_TRUE(op_desc.IsInitialized()); +} \ No newline at end of file From 1ecddd8174fea793e70071163b7e47a750064499 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 4 Jul 2017 21:21:02 +0800 Subject: [PATCH 273/542] Remove buggy BarrierStat The implementation of BarrierStat is buggy, and it is not necessary for Paddle to diagnose which node in cluster is slow. --- paddle/parameter/tests/test_common.cpp | 50 --- paddle/pserver/ParameterServer2.cpp | 215 ------------- paddle/pserver/ParameterServer2.h | 49 --- paddle/utils/BarrierStat.cpp | 340 -------------------- paddle/utils/BarrierStat.h | 425 ------------------------- paddle/utils/Stat.cpp | 61 ---- paddle/utils/Stat.h | 17 - 7 files changed, 1157 deletions(-) delete mode 100644 paddle/utils/BarrierStat.cpp delete mode 100644 paddle/utils/BarrierStat.h diff --git a/paddle/parameter/tests/test_common.cpp b/paddle/parameter/tests/test_common.cpp index 8bab5a6289..64d204aea1 100644 --- a/paddle/parameter/tests/test_common.cpp +++ b/paddle/parameter/tests/test_common.cpp @@ -172,53 +172,3 @@ TEST_F(CommonTest, syncThreadPool) { EXPECT_EQ((int)0, nums[i]); } } - -TEST_F(CommonTest, barrierStat) { - const int threadNum = 10; - - SyncThreadPool pool(threadNum); - -#define TEST_BARRIER_RANDOM(statName, numConnThreads, ...) \ - pool.exec([&](int tid, size_t numThreads) { \ - struct timeval time; \ - gettimeofday(&time, nullptr); \ - uint64_t usec = timeToMicroSecond(time); \ - std::srand(usec); \ - auto value = std::rand() % 100000; \ - usleep(value); \ - REGISTER_SLOW_NODES_PROBE( \ - globalStat, statName, numConnThreads, tid, __VA_ARGS__); \ - }); - - for (auto i = 0; i < 10; i++) { - TEST_BARRIER_RANDOM("synThreadBarrier1", threadNum); - TEST_BARRIER_RANDOM("synThreadBarrier2", threadNum); - } - - globalStat.printAllStatus(); - globalStat.reset(); - - for (auto i = 0; i < 10; i++) { - TEST_BARRIER_RANDOM("synThreadBarrier3", threadNum, "tag0"); - TEST_BARRIER_RANDOM("synThreadBarrier4", threadNum, "tag1"); - } - - globalStat.printAllStatus(); - globalStat.reset(); - -// use it to test accurate barrier gap -#define TEST_BARRIER(statName, numConnThreads, ...) \ - pool.exec([&](int tid, size_t numThreads) { \ - usleep(tid * 10000); \ - REGISTER_SLOW_NODES_PROBE( \ - globalStat, statName, numConnThreads, tid, __VA_ARGS__); \ - }); - - for (auto i = 0; i < 10; i++) { - TEST_BARRIER("synThreadBarrier3", threadNum, "tag0"); - TEST_BARRIER("synThreadBarrier4", threadNum, "tag1"); - } - - globalStat.printAllStatus(); - globalStat.reset(); -} diff --git a/paddle/pserver/ParameterServer2.cpp b/paddle/pserver/ParameterServer2.cpp index 41ac15336d..d7c1d4f788 100644 --- a/paddle/pserver/ParameterServer2.cpp +++ b/paddle/pserver/ParameterServer2.cpp @@ -217,10 +217,6 @@ void ParameterServer2::setConfig(const SetConfigRequest& request, SetConfigResponse response; callback(response); - - /// always defined, barrier slowest node function need it. - statSet_.reset(new StatSet("ParameterServer" + - str::to_string(static_cast(serverId_)))); } real bufferSum(const std::vector& buffers) { @@ -369,50 +365,7 @@ void ParameterServer2::addGradient(const SendParameterRequest& request, std::vector* outputBuffers) { VLOG(1) << "pserver: addGradient"; - // forwardbackward delta from all trainers - // indicate the fluctuation caused by forwardbackward. - if (!numPassFinishClients_) { - REGISTER_BARRIER_DELTA_SERVER_SET( - *statSet_, - "forwardbackwardDelta", - FLAGS_num_gradient_servers, - request.trainer_id(), - request.forwardbackward_time(), - isSparseServer_ ? "_sparseUpdater" : "_denseUpdater"); - } - { - /// approximately pure network overhead - REGISTER_TIMER_DYNAMIC_SET( - "pushRecv", timeToMicroSecond(*handleRequestBegin_), -1, *statSet_); - } - -#ifndef PADDLE_DISABLE_TIMER - gettimeofday(&(*addGradBegin_), nullptr); -#endif - - /// barrier fluctuation caused by network and previous forwardbackward - if (!numPassFinishClients_) { - REGISTER_BARRIER_TIMER_SERVER_SET( - *statSet_, - "handleReqBegin", - FLAGS_num_gradient_servers, - request.trainer_id(), - (*handleRequestBegin_), - isSparseServer_ ? "_sparseUpdater" : "_denseUpdater"); - } - - if (!numPassFinishClients_) { - REGISTER_BARRIER_TIMER_SERVER( - *statSet_, - "addGradBegin", - FLAGS_num_gradient_servers, - request.trainer_id(), - isSparseServer_ ? "_sparseUpdater" : "_denseUpdater"); - } - - { - REGISTER_TIMER_DYNAMIC("addGradCore", -1, *statSet_); ReadLockGuard guard(parameterMutex_); int bufferIndex = 0; for (const auto& block : request.blocks()) { @@ -444,15 +397,6 @@ void ParameterServer2::addGradient(const SendParameterRequest& request, std::lock_guard guard(*info.lock); simd::addTo(gradientSumBuffer, gradientBuffer, size); } - - if (!numPassFinishClients_) { - REGISTER_BARRIER_TIMER_SERVER( - *statSet_, - "addGradCoreFinish", - FLAGS_num_gradient_servers, - request.trainer_id(), - isSparseServer_ ? "_sparseUpdater" : "_denseUpdater"); - } } if (request.batch_status() == BATCH_FINISH || request.batch_status() == BATCH_START_AND_FINISH) { @@ -461,47 +405,12 @@ void ParameterServer2::addGradient(const SendParameterRequest& request, VLOG(1) << "num samples: " << numSamplesProcessed_ << ", new cost:" << cost_; - /// numPassFinishClients_ means some trainer has entered finishPass - if (!numPassFinishClients_) { - REGISTER_SLOW_NODES_PROBE( - *statSet_, - "SLOW_NODES", - FLAGS_num_gradient_servers, - request.trainer_id(), - isSparseServer_ ? "_sparseUpdater" : "_denseUpdater"); - } - /// notify doOperation gradient ready gradientReadyBarrier_.wait(); - /// if wait pass finish does not start, do check - if (!numPassFinishClients_) { - CHECK_BARRIER_TIMER(*statSet_, - "SLOW_NODES", - FLAGS_num_gradient_servers, - isSparseServer_ ? "_sparseUpdater" : "_denseUpdater"); - } - - /// barrier performance while all parameter add is finished - /// can indicate the fluctation caused by computation at pserver. - if (!numPassFinishClients_) { - REGISTER_BARRIER_TIMER_SERVER( - *statSet_, - "paraReady", - FLAGS_num_gradient_servers, - request.trainer_id(), - isSparseServer_ ? "_sparseUpdater" : "_denseUpdater"); - } /// wait doOperation finish parameterReadyBarrier_.wait(); VLOG(1) << "start send back"; - { - /// total time except overhead of network. - REGISTER_TIMER_DYNAMIC_SET("sendParaNoRecvNoSend", - timeToMicroSecond(*addGradBegin_), - -1, - *statSet_); - } } } @@ -543,57 +452,6 @@ bool ParameterServer2::asyncGrdientCommitCheckAndStat( return commitGradient; } -void ParameterServer2::printAsyncGradientCommitStatAndReset() { - std::stringstream statFormat; - if (asyncUpdateSteps_) { - statFormat << "async discard gradients stat: " << std::endl; - statFormat << "serverId: " << serverId_ - << " serverType: " << isSparseServer_ - << " total updates: " << asyncUpdateSteps_ - << " discard updates: " << asyncLaggedGradientsNum_ - << " discard ratio: " - << (real)asyncLaggedGradientsNum_ / (real)asyncUpdateSteps_; - statFormat << std::endl; - statFormat << std::endl; - - statFormat << "Async Gradient Update Steps distribution: " << std::endl - << "Sample: 1:1912(0.00284449) means " - << "the updates step=1 count 1912 times " - << "and account for 0.284449% of total updates" << std::endl; - size_t index = 0; - for (const auto& stat : asyncUpdateStat_) { - statFormat << index << ":" << stat << "(" - << (real)stat / (real)asyncUpdateSteps_ << ") "; - index++; - } - statFormat << std::endl; - statFormat << std::endl; - - statFormat << "Async Gradient Discard based on trainer_id: " << std::endl - << "Sample: 2:22(0.0016363) means " - << "total discarded updates from trainer_id=2 count 22 " - << "and account for 0.16363% of all updates from trainer_id=2" - << std::endl; - for (auto i = 0; i < FLAGS_num_gradient_servers; i++) { - real ratio = - (real)asyncTrainerDiscardStat_[i] / - (real)(asyncTrainerCommitStat_[i] + asyncTrainerDiscardStat_[i]); - statFormat << i << ":" << asyncTrainerDiscardStat_[i] << "(" << ratio - << ")" - << " "; - } - LOG(INFO) << statFormat.str(); - - /// reset stat - asyncUpdateSteps_ = 0; - asyncTrainerSteps_.assign(asyncTrainerSteps_.size(), 0); - asyncLaggedGradientsNum_ = 0; - asyncUpdateStat_.assign(asyncUpdateStat_.size(), 0); - asyncTrainerDiscardStat_.assign(asyncTrainerDiscardStat_.size(), 0); - asyncTrainerCommitStat_.assign(asyncTrainerCommitStat_.size(), 0); - } -} - static ThreadLocal> localBlockBitset_; void ParameterServer2::asyncSGD(const SendParameterRequest& request, @@ -695,7 +553,6 @@ void ParameterServer2::asyncSGD(const SendParameterRequest& request, if (request.trainer_id() == 0) { /// batchId_ is approximately equal to "real batchId_" batchId_++; - tuningAsyncsgdMidOutput(); } } @@ -881,34 +738,6 @@ void ParameterServer2::sendParameter(const SendParameterRequest& request, } (*requestVec_).clear(); (*callbackVec_).clear(); - - /// barrier perfromance while all data are send finished. - /// indicates network flucatuation for big message. - if (!numPassFinishClients_) { - REGISTER_BARRIER_TIMER_SERVER( - *statSet_, - "sendParamFinish", - FLAGS_num_gradient_servers, - request.trainer_id(), - isSparseServer_ ? "_sparseUpdater" : "_denseUpdater"); - } - /// all time exhausted in parameterServer for big message. - /// it contains network and computation at pserver. - { - /// total time including overhead of network. - REGISTER_TIMER_DYNAMIC_SET("sendParaTotal", - timeToMicroSecond(*handleRequestBegin_), - -1, - *statSet_); - } - /// all time exhausted in pserverServer except recieve network. - { - /// total time except overhead of network receive - REGISTER_TIMER_DYNAMIC_SET("sendParaNoRecv", - timeToMicroSecond(*addGradBegin_), - -1, - *statSet_); - } } break; case PSERVER_UPDATE_MODE_SET_PARAM: @@ -1088,8 +917,6 @@ void ParameterServer2::op_SGD(const Operation& operation, } { - REGISTER_TIMER_DYNAMIC("op_SGD", -1, *statSet_); - parallelExecForEachBlock([&](int64_t blockId, const VectorPtr vecs[]) { BlockInfo& info = blockInfos_[blockId]; const ParameterConfig& config = getParameterConfig(blockId); @@ -1113,7 +940,6 @@ void ParameterServer2::op_SGD(const Operation& operation, } batchId_++; - tuningSgdMidOutput(); } void ParameterServer2::op_start_pass(const Operation& operation, @@ -1146,8 +972,6 @@ void ParameterServer2::op_finish_pass(const Operation& operation, /// finish pass info.optimizer->finishPass(); }); - - tuningSgdFinished(); batchId_ = 0; } @@ -1515,7 +1339,6 @@ void ParameterServer2::asyncFinishPass(const SynchronizeRequest& request, callback(SynchronizeResponse()); if (request.trainer_id() == 0) { - tuningAsyncsgdFinished(); batchId_ = 0; } } @@ -1574,42 +1397,4 @@ void ParameterServer2::releaseMatrix(const ReleaseMatrixRequest& request, callback(response); } -void ParameterServer2::tuningSgdMidOutput() { - if (batchId_ && batchId_ % FLAGS_log_period_server == 0) { - LOG(INFO) << "======== Batch=" << batchId_ << "======="; - statSet_->setThreadInfo(true); - statSet_->printAllStatus(); - /// not reset raw data for reducing the overhead of performance tuning - statSet_->reset(false); - } -} - -void ParameterServer2::tuningSgdFinished() { - LOG(INFO) << "======== Batch=" << batchId_ << " pass END" - << "======="; - statSet_->setThreadInfo(true); - statSet_->printAllStatus(); - /** - * reset raw data at end of pass since some raw data could be not - * complete. Otherwise the raw data will pollute next pass performance - * tuning - */ - statSet_->reset(); -} - -void ParameterServer2::tuningAsyncsgdMidOutput() { -#ifndef PADDLE_DISABLE_TIMER - if (batchId_ && batchId_ % FLAGS_log_period_server == 0) { - LOG(INFO) << "======== [not accurate] Batch=" << batchId_ << "======="; - printAsyncGradientCommitStatAndReset(); - } -#endif -} - -void ParameterServer2::tuningAsyncsgdFinished() { - LOG(INFO) << "======== [not accurate] Batch=" << batchId_ << " pass END" - << "======="; - printAsyncGradientCommitStatAndReset(); -} - } // namespace paddle diff --git a/paddle/pserver/ParameterServer2.h b/paddle/pserver/ParameterServer2.h index 0f5a589590..f7d3587b88 100644 --- a/paddle/pserver/ParameterServer2.h +++ b/paddle/pserver/ParameterServer2.h @@ -298,24 +298,6 @@ protected: /// barrier performance tuning sync-sgd required std::atomic batchId_; - /// the beginning of addGradient without network overhead - ThreadLocal addGradBegin_; - - /** - * tuning barrier performance - * to better control log for sparse and dense parameter, - * we use different log entities for different parameterServer - * objects. - * it will output lots of performance stats to perceive the - * overhead of network, fluctuation of computation from - * forwardbackward and network, computation from optimization - * at pserver end, barrier overhead, etc. to understand tuning - * data, focus on the synchronization between addGradient and - * doOperation which indirectly call op_SGD operation controlled - * by remote updater controller - */ - std::unique_ptr statSet_; - public: struct Buffer { real* base; @@ -325,7 +307,6 @@ public: protected: /// async gradient commit control bool asyncGrdientCommitCheckAndStat(const SendParameterRequest& request); - void printAsyncGradientCommitStatAndReset(); public: /// disable default parameter for overloading @@ -710,36 +691,6 @@ public: void op_load(const Operation& operation, OperationResult* result); void op_save(const Operation& operation, OperationResult* result); - - /** - * @brief output log in at the middle stage of training - * - * @note flush log histroy and state at the end for sgd - */ - void tuningSgdMidOutput(); - - /** - * @brief output log in at the end stage of training - * - * @note flush log histroy and state at the end for sgd. it will also - * flush some stateful stat for next pass. - */ - void tuningSgdFinished(); - - /** - * @brief output log in at the middle stage of training - * - * @note flush log histroy and state at the end for async-sgd. - * it will log some performance log if some lagged node are found - */ - void tuningAsyncsgdMidOutput(); - - /** - * @brief output log in at the end stage of training - * - * @note flush log histroy and state at the end for async-sgd. - */ - void tuningAsyncsgdFinished(); }; } // namespace paddle diff --git a/paddle/utils/BarrierStat.cpp b/paddle/utils/BarrierStat.cpp deleted file mode 100644 index a6dbdcae3f..0000000000 --- a/paddle/utils/BarrierStat.cpp +++ /dev/null @@ -1,340 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/utils/BarrierStat.h" -#include -#include -#include -#include -#include "paddle/utils/Flags.h" -#include "paddle/utils/Stat.h" - -DEFINE_bool(log_barrier_abstract, - true, - "if true, show abstract of barrier performance"); -DEFINE_int32(log_barrier_lowest_nodes, - 5, - "how many lowest node will be logged"); -DEFINE_bool(log_barrier_show_log, - false, // for performance tuning insight - "if true, always show barrier abstract even with little gap"); - -namespace paddle { - -std::ostream &operator<<(std::ostream &output, const BarrierStatBase &stat) { - if (FLAGS_log_barrier_abstract) { - std::lock_guard guard(stat.lock_); - stat.showAbstract(output); - } - return output; -} - -BarrierStatBase::BarrierStatBase(uint16_t numConnThreads, - const std::string &name) - : totSamples_(0), numConnThreads_(numConnThreads), name_(name) { - abstract_.resize(numConnThreads_); - if (FLAGS_log_barrier_show_log) { - rateThreshold_ = 0.0; - } else { - /* probablity of abnormal node - * p = 1/n + (n/8)/(n+1), n = nodes, n > 1 - * if the freq of lowest trainerId larger than p, - * output FLAGS_log_barrier_lowest_nodes lastTrainerId. - * numConnThreads_ indicates nodes - */ - float n = (float)numConnThreads; - rateThreshold_ = 1.0 / n + (n / 8.0) / (n + 1.0); - } -} - -BarrierEndStat::BarrierEndStat(uint16_t numConnThreads, const std::string &name) - : BarrierStatBase(numConnThreads, name) { - timeVector_.reset(new TimeVectorEnd(numConnThreads_)); - reset(true); - LOG(INFO) << " create barrierEndStat: " << name - << " endBarrier warning rate: " << rateThreshold_; -} - -/* - * Note: - * the design different pserver entity owns different statSet to obey - * the background that different pserver runs separately. - */ -void BarrierEndStat::updateStat(struct timeval &cur, int32_t trainerId) { - CHECK_LT(trainerId, numConnThreads_) << "trainerId is invalid in barrier"; - - std::lock_guard guard(lock_); - timeVector_->addTimeval(cur, trainerId); - - if (timeVector_->full()) { - std::lock_guard abstractGuard(abstractLock_); - auto id = timeVector_->getLastTrainerId(); - auto delta = timeToMicroSecond(timeVector_->getDelta()); - auto secondDelta = timeToMicroSecond(timeVector_->get1NDelta()); - auto lastTwoDelta = timeToMicroSecond(timeVector_->getMinus1NDelta()); - auto midDelta = timeToMicroSecond(timeVector_->getMidNDelta()); - // discard first sample, since first sample probably is abnormal. - if (totSamples_) { - abstract_[id].freq++; - - if (delta < abstract_[id].minDelta) { - abstract_[id].minDelta = delta; - } - if (delta > abstract_[id].maxDelta) { - abstract_[id].maxDelta = delta; - } - abstract_[id].totDelta += delta; - abstract_[id].totSecondDelta += secondDelta; - abstract_[id].totLastTwoDelta += lastTwoDelta; - abstract_[id].totMidDelta += midDelta; - - // update totAbstract_ - totAbstract_.freq++; - if (delta < totAbstract_.minDelta) { - totAbstract_.minDelta = delta; - } - if (delta > totAbstract_.maxDelta) { - totAbstract_.maxDelta = delta; - } - totAbstract_.totDelta += delta; - totAbstract_.totSecondDelta += secondDelta; - totAbstract_.totLastTwoDelta += lastTwoDelta; - totAbstract_.totMidDelta += midDelta; - } - - totSamples_++; - timeVector_->reset(); - } -} - -void BarrierEndStat::reset(bool clearRawData) { - int32_t i = 0; - - totSamples_ = 0; - - std::lock_guard guard(abstractLock_); - - if (clearRawData) { - timeVector_->reset(); - } - - for (auto &abstract : abstract_) { - memset((void *)&abstract, 0, sizeof(abstract)); - abstract.minDelta = UINT64_MAX; - abstract.trainerId = i++; - } - memset((void *)&totAbstract_, 0, sizeof(Abstract)); - totAbstract_.minDelta = UINT64_MAX; -} - -void BarrierEndStat::showAbstract(std::ostream &output) const { - // do not support the case "<=2 pserver" - if (numConnThreads_ <= 2 || !totSamples_) { - return; - } - - // duplicate freq info - std::vector outputAbstract = abstract_; - std::sort(outputAbstract.begin(), - outputAbstract.end(), - [](const struct Abstract &a, const struct Abstract &b) { - return a.freq > b.freq; - }); - - auto rate = (float)outputAbstract[0].freq / (float)totSamples_; - if (rate < rateThreshold_) { - return; - } - - output << std::setw(20) << name_ << std::endl; - - /* - * Note: - * avgGap: the average delta between 1 -- n arriving trainers - * avgSecondGap: the average delta between 2 -- n arriving trainers - * avgLastTwoGap: the average delta between n-1 -- n arriving trainers - * avgMidGap: the average delta between n/2 -- n arriving trainers - * rato: samples / totSamples - * - * the stat is based on per trainer if trainer_id is set, totAbstract is - * stat based on all trainers scope. - */ - output << std::setw(42) << " " << std::setw(15) << "trainerId" - << std::setw(15) << "avgGap" << std::setw(15) << "avgSecondGap" - << std::setw(15) << "avgLastTwoGap" << std::setw(15) << "avgMidGap" - << std::setw(10) << "rate" << std::setw(10) << "samples" - << std::setw(10) << "totSamples" << std::endl; - // show totAbstract, it's valuable when lastTrainerId is even-distributed' - if (!totAbstract_.freq) return; - output << std::setw(42) << " " << std::setw(15) << "totAbstract" - << std::setw(15) << (totAbstract_.totDelta / totAbstract_.freq) * 0.001 - << std::setw(15) - << (totAbstract_.totSecondDelta / totAbstract_.freq) * 0.001 - << std::setw(15) - << (totAbstract_.totLastTwoDelta / totAbstract_.freq) * 0.001 - << std::setw(15) - << (totAbstract_.totMidDelta / totAbstract_.freq) * 0.001 - << std::setw(10) << (float)totAbstract_.freq / (float)totSamples_ - << std::setw(10) << (float)totAbstract_.freq << std::setw(10) - << (float)totSamples_ << std::endl; - - // show lastTrainerId abstract - int count = 0; - for (auto &abstract : outputAbstract) { - if (!abstract.freq || count++ >= FLAGS_log_barrier_lowest_nodes) { - break; - } - // output format control - output << std::setw(42) << " " << std::setw(15) << abstract.trainerId - << std::setw(15) << (abstract.totDelta / abstract.freq) * 0.001 - << std::setw(15) << (abstract.totSecondDelta / abstract.freq) * 0.001 - << std::setw(15) - << (abstract.totLastTwoDelta / abstract.freq) * 0.001 - << std::setw(15) << (abstract.totMidDelta / abstract.freq) * 0.001 - << std::setw(10) << (float)abstract.freq / (float)totSamples_ - << std::setw(10) << (float)abstract.freq << std::setw(10) - << (float)totSamples_ << std::endl; - } -} - -BarrierDeltaStat::BarrierDeltaStat(uint16_t numConnThreads, - const std::string &name) - : BarrierStatBase(numConnThreads, name) { - timeVector_.reset(new TimeVectorDelta(numConnThreads_)); - reset(true); - LOG(INFO) << " create barrierDeltaStat: " << name - << " barrierDelta warning rate: " << rateThreshold_; -} - -void BarrierDeltaStat::updateStat(uint64_t delta, int32_t trainerId) { - CHECK_LT(trainerId, numConnThreads_) << "trainerId is invalid in barrier"; - - std::lock_guard guard(lock_); - timeVector_->addTimeval(delta, trainerId); - - if (timeVector_->full()) { - std::lock_guard abstractGuard(abstractLock_); - auto id = timeVector_->getMaxTrainerId(); - auto delta = timeVector_->getDelta(); - // discard first sample, since first sample probably is abnormal. - if (totSamples_) { - abstract_[id].freq++; - - if (delta < abstract_[id].minDelta) { - abstract_[id].minDelta = delta; - } - if (delta > abstract_[id].maxDelta) { - abstract_[id].maxDelta = delta; - } - abstract_[id].totDelta += delta; - - // update totAbstract_ - totAbstract_.freq++; - if (delta < totAbstract_.minDelta) { - totAbstract_.minDelta = delta; - } - if (delta > totAbstract_.maxDelta) { - totAbstract_.maxDelta = delta; - } - totAbstract_.totDelta += delta; - } - - totSamples_++; - timeVector_->reset(); - } -} - -void BarrierDeltaStat::reset(bool clearRawData) { - int32_t i = 0; - - totSamples_ = 0; - - std::lock_guard guard(abstractLock_); - - if (clearRawData) { - timeVector_->reset(); - } - - for (auto &abstract : abstract_) { - memset((void *)&abstract, 0, sizeof(abstract)); - abstract.minDelta = UINT64_MAX; - abstract.trainerId = i++; - } - memset((void *)&totAbstract_, 0, sizeof(Abstract)); - totAbstract_.minDelta = UINT64_MAX; -} - -void BarrierDeltaStat::showAbstract(std::ostream &output) const { - // do not support the case "<=2 pserver" - if (numConnThreads_ <= 2 || !totSamples_) { - return; - } - - // duplicate freq info - std::vector outputAbstract = abstract_; - std::sort(outputAbstract.begin(), - outputAbstract.end(), - [](const struct Abstract &a, const struct Abstract &b) { - return a.freq > b.freq; - }); - - auto rate = (float)outputAbstract[0].freq / (float)totSamples_; - if (rate < rateThreshold_) { - return; - } - - output << std::setw(20) << name_ << std::endl; - - /* Note: - * Gap means the delta from all trainers' forwardbackward - * avgGap: average Gap in log_period batches - * minGap: min Gap in log_period batches - * maxGap: max Gap in log_period batches - * trainerId: the slowest trainer_id - * - * the stat is based on per trainer if trainer_id is set, totAbstract is - * stat based on all trainers scope. - */ - output << std::setw(42) << " " << std::setw(15) << "trainerId" - << std::setw(15) << "avgGap" << std::setw(10) << "minGap" - << std::setw(10) << "maxGap" << std::setw(10) << "rate" - << std::setw(10) << "samples" << std::setw(10) << "totSamples" - << std::endl; - // show totAbstract, it's valuable when lastTrainerId is even-distributed' - if (!totAbstract_.freq) return; - output << std::setw(42) << " " << std::setw(15) << "totAbstract" - << std::setw(15) << (totAbstract_.totDelta / totAbstract_.freq) * 0.001 - << std::setw(10) << totAbstract_.minDelta * 0.001 << std::setw(10) - << totAbstract_.maxDelta * 0.001 << std::setw(10) - << (float)totAbstract_.freq / (float)totSamples_ << std::setw(10) - << (float)totAbstract_.freq << std::setw(10) << (float)totSamples_ - << std::endl; - - // show lastTrainerId abstract - int count = 0; - for (auto &abstract : outputAbstract) { - if (!abstract.freq || count++ >= FLAGS_log_barrier_lowest_nodes) { - break; - } - // output format control - output << std::setw(42) << " " << std::setw(15) << abstract.trainerId - << std::setw(15) << (abstract.totDelta / abstract.freq) * 0.001 - << std::setw(10) << abstract.minDelta * 0.001 << std::setw(10) - << abstract.maxDelta * 0.001 << std::setw(10) - << (float)abstract.freq / (float)totSamples_ << std::setw(10) - << (float)abstract.freq << std::setw(10) << (float)totSamples_ - << std::endl; - } -} -} // namespace paddle diff --git a/paddle/utils/BarrierStat.h b/paddle/utils/BarrierStat.h deleted file mode 100644 index a9c925eff6..0000000000 --- a/paddle/utils/BarrierStat.h +++ /dev/null @@ -1,425 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Locks.h" -#include "Logging.h" -#include "ThreadLocal.h" - -namespace paddle { - -inline uint64_t timeToMicroSecond(struct timeval time) { - return time.tv_sec * 1000000LU + time.tv_usec; -} - -class TimeVectorEnd { - /* - * help class for gathering all barrier performance data - * which shows time point property. - * freqently used in barrier performance tuning API, such - * as tuning which is slowest node in sync-sgd mode training. - */ -public: - explicit TimeVectorEnd(uint16_t size) : size_(size) { - index_ = 0; - timeArray_.resize(size); - trainerIds_.resize(size); - } - ~TimeVectorEnd() {} - - uint16_t size() { return size_; } - - bool full() { return index_ == size_; } - - bool empty() { return index_ == 0; } - - void reset() { index_ = 0; } - - void addTimeval(struct timeval time, int32_t trainerId) { - timeArray_[index_] = time; - trainerIds_[index_] = trainerId; - index_++; - } - - struct timeval getDelta() const { - struct timeval delta; - CHECK_GT(size_, 1) << "not support with 1 pserver"; - timersub(&timeArray_[size_ - 1], &timeArray_[0], &delta); - return delta; - } - - /* 2, n delta */ - struct timeval get1NDelta() const { - CHECK_GT(size_, 2) << "not support with less than 2 pservers"; - struct timeval delta; - timersub(&timeArray_[size_ - 1], &timeArray_[1], &delta); - return delta; - } - - /* n-1, n delta */ - struct timeval getMinus1NDelta() const { - CHECK_GT(size_, 2) << "not support with less than 2 pservers"; - struct timeval delta; - timersub(&timeArray_[size_ - 1], &timeArray_[size_ - 2], &delta); - return delta; - } - - /* n/2, n delta */ - struct timeval getMidNDelta() const { - CHECK_GT(size_, 2) << "not support with less than 2 pservers"; - struct timeval delta; - timersub(&timeArray_[size_ - 1], &timeArray_[size_ / 2], &delta); - return delta; - } - - int32_t getLastTrainerId() const { return trainerIds_[index_ - 1]; } - -private: - uint16_t size_; - uint16_t index_; - std::vector timeArray_; - std::vector trainerIds_; -}; - -class TimeVectorDelta { - /* - * help class for gathering performance data which shows time - * delta property, such as tuning the time distribution of - * forwardBackward time from all cluster nodes. - */ -public: - explicit TimeVectorDelta(uint16_t size) - : size_(size), min_(UINT64_MAX), max_(0) { - index_ = 0; - timeArray_.resize(size); - } - ~TimeVectorDelta() {} - - uint16_t size() { return size_; } - - bool full() { return index_ == size_; } - - bool empty() { return index_ == 0; } - - void reset() { - index_ = 0; - min_ = UINT64_MAX; - max_ = 0; - } - - void addTimeval(uint64_t delta, int32_t trainerId) { - timeArray_[index_] = delta; - index_++; - if (delta < min_) { - min_ = delta; - } - if (delta > max_) { - max_ = delta; - maxTrainerId_ = trainerId; - } - } - - uint64_t getDelta() const { - CHECK_GT(size_, 1) << "not support with 1 pserver"; - return max_ - min_; - } - - /* 2, n delta */ - uint64_t get1NDelta() const { - CHECK_GT(size_, 2) << "not support with less than 2 pservers"; - LOG(FATAL) << "Not implemented"; - } - - /* n-1, n delta */ - uint64_t getMinus1NDelta() const { - CHECK_GT(size_, 2) << "not support with less than 2 pservers"; - LOG(FATAL) << "Not implemented"; - } - - /* n/2, n delta */ - uint64_t getMidNDelta() const { - CHECK_GT(size_, 2) << "not support with less than 2 pservers"; - LOG(FATAL) << "Not implemented"; - } - - int32_t getMaxTrainerId() const { return maxTrainerId_; } - -private: - uint16_t size_; - uint16_t index_; - std::vector timeArray_; - -private: - uint64_t min_; - uint64_t max_; - int32_t maxTrainerId_; -}; - -// total samples stats, us -struct Abstract { - // last trainerId for barrier end, maxDelta trainerId for barrier delta - int32_t trainerId; - uint64_t minDelta; - uint64_t maxDelta; - uint64_t totDelta; - // first one is probably itself, so discard it. - uint64_t totSecondDelta; - // to confirm if last node destroy barrier performance. - uint64_t totLastTwoDelta; - // n/2-n delta - uint64_t totMidDelta; - uint64_t freq; -}; - -// barrier performance tunning stats -class BarrierStatBase { -public: - BarrierStatBase(uint16_t numConnThreads, const std::string &name); - - virtual ~BarrierStatBase() {} - - // if called at pserver end, then trainId means trainer's id. - // by default trainer does not use trainerId, so set it to -1 - virtual void updateStat(struct timeval &cur, int32_t trainerId = -1) = 0; - virtual void updateStat(uint64_t delta, int32_t trainerId = -1) = 0; - - const std::string &getName() { return name_; } - - virtual void reset(bool clearRawData = true) {} - // since the timeVector_ is not stateful, so it's not clear whether the - // the barrier delta is correct. if one timestamp was lost, the all data - // from barrier stat becomes rubbish. -_- - virtual bool checkPassBarrier() { - LOG(INFO) << "bug implementation found"; - return false; - } - -protected: - virtual void showAbstract(std::ostream &output) const {} - friend std::ostream &operator<<(std::ostream &output, - const BarrierStatBase &stat); - -protected: - mutable std::mutex lock_; - std::mutex abstractLock_; // see note on updaterStat - // each freqency for each barrier trainer - std::vector abstract_; - // it is valuable when do perf-tuining, if lastTrainerId acts uniform - // distribution - struct Abstract totAbstract_; - uint64_t totSamples_; - -protected: - uint16_t numConnThreads_; // total updates needed - float rateThreshold_; - std::string name_; -}; - -// the end-time of arriving real/forged barrier position -class BarrierEndStat : public BarrierStatBase { -public: - BarrierEndStat(uint16_t numConnThreads, const std::string &name); - ~BarrierEndStat() {} - - virtual void updateStat(struct timeval &cur, int32_t trainerId = -1); - virtual void updateStat(uint64_t delta, int32_t trainerId = -1) { - LOG(INFO) << "have no delta updateStat in BarrierEndStat"; - } - virtual void reset(bool clearRawData = true); - virtual bool checkPassBarrier() { return timeVector_->empty(); } - -protected: - /* - * LOG: - * readAllBlocks_denseUpdater - * trainerId avgGap avgSecondGap avgLastTwoGap avgMidGap rate - * 44 86.702 81.022 9.984 50.472 0.144737 - * 46 87.723 82.939 8.737 50.019 0.118421 - * 35 100.923 96.752 14.305 61.979 - * 0.0657895 - * log_barrier_abstract, log_barrier_lowest_nodes, log_barrier_threshold - * control details. - */ - virtual void showAbstract(std::ostream &output) const; - -private: - std::unique_ptr timeVector_; -}; - -// the delta-time from different trainers, -// eg, find the degree of imbalance of BP time at pserver end -// the entry value in timerVector_ is BP delta, do evaluation to BP delta. -class BarrierDeltaStat : public BarrierStatBase { -public: - BarrierDeltaStat(uint16_t numConnThreads, const std::string &name); - ~BarrierDeltaStat() {} - - virtual void updateStat(uint64_t delta, int32_t trainerId = -1); - virtual void updateStat(struct timeval &cur, int32_t trainerId = -1) { - LOG(INFO) << "have no timeval updateStat in BarrierDeltaStat"; - } - - virtual void reset(bool clearRawData = true); - - virtual bool checkPassBarrier() { return timeVector_->empty(); } - -protected: - virtual void showAbstract(std::ostream &outPut) const; - -private: - // store delta time in uint64_t, eg BP time of all trainers - std::unique_ptr timeVector_; -}; - -// to distinguish different contexts for same parallel threads, and different -// threads with same code-sgement, just use tagName to tag the run-time -// position. -// in Sparse, sendParallel threads can not only run in the stage of push&pull -// with same thread group, but also run in the stage of pull&push with different -// thread group, tag will be used to distinguish different run-time barrier -// position. -// trainerId in REGISTER_BARRIER_TIMER_SERVER is used to retreive lowest trainer -// nodes. - -// end barrier -#define __REGISTER_BARRIER_TIMER_SERVER( \ - set, statName, numConnThreads, trainerId, ...) \ - do { \ - if (numConnThreads > 2) { \ - std::string internalName = \ - std::string(statName) + std::string(__VA_ARGS__); \ - BarrierStatPtr __stat = \ - (set).getStat(numConnThreads, internalName, BARRIER_END); \ - struct timeval cur; \ - gettimeofday(&cur, nullptr); \ - __stat->updateStat(cur, trainerId); \ - } \ - } while (0); - -// end barrier with user-defined timer -#define __REGISTER_BARRIER_TIMER_SERVER_SET( \ - set, statName, numConnThreads, trainerId, cur, ...) \ - do { \ - if (numConnThreads > 2) { \ - std::string internalName = \ - std::string(statName) + std::string(__VA_ARGS__); \ - BarrierStatPtr __stat = \ - (set).getStat(numConnThreads, internalName, BARRIER_END); \ - __stat->updateStat(cur, trainerId); \ - } \ - } while (0); - -// delta barrier -#define __REGISTER_BARRIER_DELTA_SERVER_SET( \ - set, statName, numConnThreads, trainerId, delta, ...) \ - do { \ - if (numConnThreads > 2) { \ - std::string internalName = \ - std::string(statName) + std::string(__VA_ARGS__); \ - BarrierStatPtr __stat = \ - (set).getStat(numConnThreads, internalName, BARRIER_DELTA); \ - __stat->updateStat(delta, trainerId); \ - } \ - } while (0); - -// check end barrier -#define __CHECK_BARRIER_TIMER(set, statName, numConnThreads, ...) \ - do { \ - std::string internalName = \ - std::string(statName) + std::string(__VA_ARGS__); \ - BarrierStatPtr __stat = \ - (set).getStat(numConnThreads, internalName, BARRIER_END); \ - PCHECK(__stat->checkPassBarrier()) << internalName \ - << ": invalid barrier data"; \ - } while (0); - -/* - * Note: - * with sync-sgd algriothm in cluster mode, lots of synchronize action exsit at - * pserve end. these synchronizaton actions have impact on the efficiency of - * parameter exchange. the synchronizaton(barrier) GAP is composed of lots of - * factors, such as the forwardBackward variance, network fluncation. we try - * to have a quantitative analysis on these factor, so we design lots of barrier - * time to capture these performance. these barrier also can be placed at - * implict barrier position. - * - * example: - * in sync-sgd algorithm, each parameter server waits for all gradients from - * all trainers, thus, an explict barrier point exsit before doing optimization. - * the barrier timer located before the point can sense the barrier condition. - * - */ - -// try to capture which trainer is slowest node in sync-sgd at pserver. -#define REGISTER_SLOW_NODES_PROBE( \ - set, statName, numConnThreads, trainerId, ...) \ - __REGISTER_BARRIER_TIMER_SERVER( \ - (set), statName, numConnThreads, trainerId, __VA_ARGS__) -// try to check if all threads or trainers have passed barriers for data -// accuracy. -#define CHECK_BARRIER_TIMER(set, statName, numConnThreads, ...) \ - __CHECK_BARRIER_TIMER((set), statName, numConnThreads, __VA_ARGS__) - -#ifdef PADDLE_DISABLE_TIMER - -#define REGISTER_BARRIER_TIMER_SERVER( \ - set, statName, numConnThreads, trainerId, ...) -#define REGISTER_BARRIER_TIMER_SERVER_SET( \ - set, statName, numConnThreads, trainerId, cur, ...) -#define REGISTER_BARRIER_DELTA_SERVER_SET( \ - set, statName, numConnThreads, trainerId, cur, ...) - -#else - -/* - * sensing barrier time distribution for all parallelization threads. - * it provides low API for slow node check(REGISTER_SLOW_NODES_PROBE) - */ -#define REGISTER_BARRIER_TIMER_SERVER( \ - set, statName, numConnThreads, trainerId, ...) \ - __REGISTER_BARRIER_TIMER_SERVER( \ - (set), statName, numConnThreads, trainerId, __VA_ARGS__) - -/* - * sensing barrier time distribution for all parallelization threads. - * but time point for barrier performance is set by user. - * eg, with this api, you can get implict barrier point such as the beginning - * time distribution - * for receiving data. - */ -#define REGISTER_BARRIER_TIMER_SERVER_SET( \ - set, statName, numConnThreads, trainerId, cur, ...) \ - __REGISTER_BARRIER_TIMER_SERVER_SET( \ - (set), statName, numConnThreads, trainerId, cur, __VA_ARGS__) - -// try to capture time delta from all trainers, such as forwardBackward time -// which implies -// computation fluctuation -#define REGISTER_BARRIER_DELTA_SERVER_SET( \ - set, statName, numConnThreads, trainerId, delta, ...) \ - __REGISTER_BARRIER_DELTA_SERVER_SET( \ - (set), statName, numConnThreads, trainerId, delta, __VA_ARGS__) - -#endif // DISABLE_TIMER -} // namespace paddle diff --git a/paddle/utils/Stat.cpp b/paddle/utils/Stat.cpp index c7194d3bf1..ff1b1bf888 100644 --- a/paddle/utils/Stat.cpp +++ b/paddle/utils/Stat.cpp @@ -97,34 +97,6 @@ std::ostream& operator<<(std::ostream& outPut, const Stat& stat) { return outPut; } -BarrierStatPtr StatSet::getStat(uint16_t numConnThreads, - const std::string& name, - BarrierStatType bType) { - { - ReadLockGuard guard(lock_); - auto it = barrierStatSet_.find(name); - if (it != barrierStatSet_.end()) { - return it->second; - } - } - - std::lock_guard guard(lock_); - // test again with lock_guard - auto it = barrierStatSet_.find(name); - if (it != barrierStatSet_.end()) { - return it->second; - } - - BarrierStatPtr stat; - if (bType == BARRIER_END) { - stat = std::make_shared(numConnThreads, name); - } else if (bType == BARRIER_DELTA) { - stat = std::make_shared(numConnThreads, name); - } - auto ret = barrierStatSet_.insert(std::make_pair(name, stat)); - return ret.first->second; -} - void StatSet::printSegTimerStatus() { ReadLockGuard guard(lock_); LOG(INFO) << std::setiosflags(std::ios::left) << std::setfill(' ') @@ -135,46 +107,20 @@ void StatSet::printSegTimerStatus() { } } -void StatSet::printBarrierTimerStatus() { - ReadLockGuard guard(lock_); - if (barrierStatSet_.empty()) { - return; - } - // control barrierAbstact in runtime, so enable compliation - LOG(INFO) << std::setiosflags(std::ios::left) << std::setfill(' ') - << "======= BarrierStatSet status ======" << std::endl; - for (auto& stat : barrierStatSet_) { - LOG(INFO) << std::setiosflags(std::ios::left) << std::setfill(' ') - << *(stat.second); - } -} - void StatSet::printAllStatus() { #ifndef PADDLE_DISABLE_TIMER printSegTimerStatus(); #endif - printBarrierTimerStatus(); LOG(INFO) << std::setiosflags(std::ios::left) << "--------------------------------------------------" << std::endl; } -void StatSet::printStatus(const std::string& name) { - ReadLockGuard guard(lock_); - auto iter = statSet_.find(name); - CHECK(iter != statSet_.end()) << name << " is not registed in " << name_; - LOG(INFO) << *(iter->second); -} - void StatSet::reset(bool clearRawData) { ReadLockGuard guard(lock_); for (auto& stat : statSet_) { stat.second->reset(); } - // reset barrierStat - for (auto& stat : barrierStatSet_) { - stat.second->reset(clearRawData); - } } void StatSet::setThreadInfo(const std::string& name, bool flag) { @@ -184,13 +130,6 @@ void StatSet::setThreadInfo(const std::string& name, bool flag) { iter->second->setThreadInfo(flag); } -void StatSet::deleteStat(const std::string& name) { - std::lock_guard guard(lock_); - auto iter = statSet_.find(name); - CHECK(iter != statSet_.end()) << name << " is not registed in " << name_; - statSet_.erase(iter); -} - StatInfo::~StatInfo() { if (stat_) { std::lock_guard guard(stat_->lock_); diff --git a/paddle/utils/Stat.h b/paddle/utils/Stat.h index d9cc6e413a..79fd3b8cf0 100644 --- a/paddle/utils/Stat.h +++ b/paddle/utils/Stat.h @@ -23,7 +23,6 @@ limitations under the License. */ #include #include -#include "BarrierStat.h" #include "Locks.h" #include "Logging.h" #include "ThreadLocal.h" @@ -60,12 +59,6 @@ public: class Stat; typedef std::shared_ptr StatPtr; -typedef std::shared_ptr BarrierStatPtr; - -enum BarrierStatType { - BARRIER_END = 0, - BARRIER_DELTA = 1, -}; class StatSet { public: @@ -74,11 +67,8 @@ public: // print to LOG(INFO) void printSegTimerStatus(); - void printBarrierTimerStatus(); void printAllStatus(); - void printStatus(const std::string& name); - StatPtr getStat(const std::string& name) { { ReadLockGuard guard(lock_); @@ -93,12 +83,6 @@ public: return ret.first->second; } - BarrierStatPtr getStat(uint16_t numConnThreads, - const std::string& name, - BarrierStatType bType); - - void deleteStat(const std::string& name); - // true for showing stats for each thread // false for showing stats aggragated over threads void setThreadInfo(const std::string& name, bool flag); @@ -120,7 +104,6 @@ public: private: std::unordered_map statSet_; - std::unordered_map barrierStatSet_; const std::string name_; RWLock lock_; }; From 0ba63475659822bd146f1f1dcfc7eabca8b7047d Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 4 Jul 2017 21:23:23 +0800 Subject: [PATCH 274/542] ENH: Add buddy allocator Free --- paddle/memory/detail/CMakeLists.txt | 9 ++-- paddle/memory/detail/buddy_allocator.cc | 41 ++++++++++++++++++- paddle/memory/detail/buddy_allocator.h | 2 +- paddle/memory/detail/memory_block.h | 2 +- .../detail/{metadata.cc => meta_data.cc} | 2 +- .../memory/detail/{metadata.h => meta_data.h} | 0 paddle/platform/cpu_info.h | 10 ----- paddle/platform/gpu_info.cc | 13 ++++++ paddle/platform/gpu_info.h | 6 +++ 9 files changed, 65 insertions(+), 20 deletions(-) rename paddle/memory/detail/{metadata.cc => meta_data.cc} (97%) rename paddle/memory/detail/{metadata.h => meta_data.h} (100%) diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index dbc98a8a62..c3167cd30a 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1,12 +1,9 @@ if(${WITH_GPU}) - nv_library(system_allocator SRCS system_allocator.cc DEPS gflags) - nv_test(system_allocator_test - SRCS system_allocator_test.cc - DEPS system_allocator gpu_info gflags) + nv_library(system_allocator SRCS system_allocator.cc DEPS gflags gpu_info) else(${WITH_GPU}) cc_library(system_allocator SRCS system_allocator.cc DEPS gflags) - cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags) endif(${WITH_GPU}) +cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator) -cc_library(metadata SRCS metadata.cc) +cc_library(meta_data SRCS meta_data.cc) cc_library(buddy_allocator SRCS buddy_allocator.cc) diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc index eddfd9d13c..f677feda0d 100644 --- a/paddle/memory/detail/buddy_allocator.cc +++ b/paddle/memory/detail/buddy_allocator.cc @@ -75,10 +75,49 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { } void BuddyAllocator::Free(void* p) { + // Point back to metadata auto block = static_cast(p)->metadata(); - // acquire the allocator lock + // Acquire the allocator lock std::lock_guard lock(mutex_); + + DLOG(INFO) << "Free from address " << block; + + if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) { + DLOG(INFO) << "Free directly from system allocator"; + system_allocator_->Free(block, block->total_size(cache_), + block->index(cache_)); + + // Invalidate GPU allocation from cache + if (system_allocator_->UseGpu()) { + cache_.erase(block); + } + return; + } + + block->mark_as_free(cache_); + + total_used_ -= block->total_size(cache_); + total_free_ += block->total_size(cache_); + + // Trying to merge the right buddy + if (block->has_right_buddy(cache_)) { + DLOG(INFO) << "Merging this block " << block << " with its right buddy " + << block->right_buddy(cache_); + } + + // Trying to merge the left buddy + if (block->has_left_buddy(cache_)) { + DLOG(INFO) << "Merging this block " << block << " with its left buddy " + << block->left_buddy(cache_); + } + + // Dumping this block into pool + DLOG(INFO) << "Inserting free block (" << block << ", " + << block->total_size(cache_) << ")"; + pool_.insert({block->index(cache_), block->total_size(cache_), block}); + + // TODO(gangliao): Clean up if existing too much free memory } void* BuddyAllocator::SystemAlloc(size_t size) { diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h index 4006bdcce8..49bd6cf901 100644 --- a/paddle/memory/detail/buddy_allocator.h +++ b/paddle/memory/detail/buddy_allocator.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/memory/detail/metadata.h" +#include "paddle/memory/detail/meta_data.h" #include "paddle/memory/detail/system_allocator.h" #include "paddle/platform/assert.h" #include "paddle/platform/cpu_info.h" diff --git a/paddle/memory/detail/memory_block.h b/paddle/memory/detail/memory_block.h index e2d39c31cf..2945520113 100644 --- a/paddle/memory/detail/memory_block.h +++ b/paddle/memory/detail/memory_block.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/memory/detail/metadata.h" +#include "paddle/memory/detail/meta_data.h" #include #include diff --git a/paddle/memory/detail/metadata.cc b/paddle/memory/detail/meta_data.cc similarity index 97% rename from paddle/memory/detail/metadata.cc rename to paddle/memory/detail/meta_data.cc index 4607cd8512..a3b7a9b4fe 100644 --- a/paddle/memory/detail/metadata.cc +++ b/paddle/memory/detail/meta_data.cc @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/memory/detail/metadata.h" +#include "paddle/memory/detail/meta_data.h" #include diff --git a/paddle/memory/detail/metadata.h b/paddle/memory/detail/meta_data.h similarity index 100% rename from paddle/memory/detail/metadata.h rename to paddle/memory/detail/meta_data.h diff --git a/paddle/platform/cpu_info.h b/paddle/platform/cpu_info.h index edd76517a6..8df7c7b4bc 100644 --- a/paddle/platform/cpu_info.h +++ b/paddle/platform/cpu_info.h @@ -28,15 +28,5 @@ size_t CpuMinChunkSize(); //! Get the maximum chunk size for buddy allocator. size_t CpuMaxChunkSize(); -int GetCurrentDeviceId(void) { - int device_id; - throw_on_error(cudaGetDevice(&device_id), "cudaGetDevice failed"); - return device_id; -} - -void SetDeviceId(int device_id) { - throw_on_error(cudaSetDevice(device_id), "cudaSetDevice failed"); -} - } // namespace platform } // namespace paddle diff --git a/paddle/platform/gpu_info.cc b/paddle/platform/gpu_info.cc index d6c6fe34ef..05a243c506 100644 --- a/paddle/platform/gpu_info.cc +++ b/paddle/platform/gpu_info.cc @@ -31,6 +31,19 @@ int GpuDeviceCount() { return count; } +int GetCurrentDeviceId() { + int device_id; + throw_on_error( + cudaGetDevice(&device_id), + "cudaGetDevice failed in paddle::platform::GetCurrentDeviceId"); + return device_id; +} + +void SetDeviceId(int id) { + throw_on_error(cudaSetDevice(id), + "cudaSetDevice failed in paddle::platform::SetDeviceId"); +} + void GpuMemoryUsage(size_t& available, size_t& total) { throw_on_error(cudaMemGetInfo(&available, &total), "cudaMemGetInfo failed in paddle::platform::GetMemoryUsage"); diff --git a/paddle/platform/gpu_info.h b/paddle/platform/gpu_info.h index d7bf0f4093..81ee5f6e0a 100644 --- a/paddle/platform/gpu_info.h +++ b/paddle/platform/gpu_info.h @@ -24,6 +24,12 @@ namespace platform { //! Get the total number of GPU devices in system. int GpuDeviceCount(); +//! Get the current GPU device id in system. +int GetCurrentDeviceId(); + +//! Set the GPU device id for next execution. +void SetDeviceId(int device_id); + //!Get the memory usage of current GPU device. void GpuMemoryUsage(size_t& available, size_t& total); From 4dc3c9e0cc1b6ec5dbc324f4804974247ca6506f Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 4 Jul 2017 23:28:15 +0800 Subject: [PATCH 275/542] ENH: Add paddle_memory for external usage --- paddle/memory/CMakeLists.txt | 10 ++++ paddle/memory/detail/CMakeLists.txt | 6 +++ paddle/memory/detail/buddy_allocator.cc | 12 ++--- paddle/memory/detail/buddy_allocator.h | 2 +- paddle/memory/detail/memory_block.cc | 56 +++++++++++-------- paddle/memory/detail/memory_block.h | 10 +--- paddle/memory/detail/meta_cache.cc | 57 ++++++++++++++++++++ paddle/memory/detail/meta_cache.h | 71 +++++++++++++++++++++++++ paddle/memory/detail/meta_data.cc | 8 +++ paddle/memory/detail/meta_data.h | 1 + 10 files changed, 196 insertions(+), 37 deletions(-) create mode 100644 paddle/memory/detail/meta_cache.cc create mode 100644 paddle/memory/detail/meta_cache.h diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt index 3943c3cfad..8c290712fc 100644 --- a/paddle/memory/CMakeLists.txt +++ b/paddle/memory/CMakeLists.txt @@ -1 +1,11 @@ add_subdirectory(detail) + +cc_library(memory + SRCS + memory.cc) + +cc_library(paddle_memory + DEPS + memory meta_data + meta_cache memory_block + buddy_allocator system_allocator) diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index c3167cd30a..4fdabc8eeb 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -3,7 +3,13 @@ if(${WITH_GPU}) else(${WITH_GPU}) cc_library(system_allocator SRCS system_allocator.cc DEPS gflags) endif(${WITH_GPU}) + cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator) cc_library(meta_data SRCS meta_data.cc) + +cc_library(meta_cache SRCS meta_cache.cc) + +cc_library(memory_block SRCS memory_block.cc) + cc_library(buddy_allocator SRCS buddy_allocator.cc) diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc index f677feda0d..aa5b6b557c 100644 --- a/paddle/memory/detail/buddy_allocator.cc +++ b/paddle/memory/detail/buddy_allocator.cc @@ -20,14 +20,14 @@ namespace memory { namespace detail { BuddyAllocator::BuddyAllocator(SystemAllocator* system_allocator, - size_t min_chunk_size, size_t max_chunk_size) { + size_t min_chunk_size, size_t max_chunk_size) + : min_chunk_size_(min_chunk_size), + max_chunk_size_(max_chunk_size), + cache_(system_allocator->UseGpu()), + system_allocator_(std::move(system_allocator)) { PADDLE_ASSERT(min_chunk_size > 0); PADDLE_ASSERT(max_chunk_size > 0); PADDLE_ASSERT(system_allocator != nullptr); - - system_allocator_ = std::move(system_allocator); - min_chunk_size_ = min_chunk_size; - max_chunk_size_ = max_chunk_size; } inline size_t align(size_t size, size_t alignment) { @@ -90,7 +90,7 @@ void BuddyAllocator::Free(void* p) { // Invalidate GPU allocation from cache if (system_allocator_->UseGpu()) { - cache_.erase(block); + cache_.invalidate(block); } return; } diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h index 49bd6cf901..ecf23b77ae 100644 --- a/paddle/memory/detail/buddy_allocator.h +++ b/paddle/memory/detail/buddy_allocator.h @@ -14,6 +14,7 @@ #pragma once +#include "paddle/memory/detail/meta_cache.h" #include "paddle/memory/detail/meta_data.h" #include "paddle/memory/detail/system_allocator.h" #include "paddle/platform/assert.h" @@ -80,7 +81,6 @@ class BuddyAllocator { private: // Unify the metadata format between GPU and CPU allocations - using MetadataCache = std::unordered_map; MetadataCache cache_; private: diff --git a/paddle/memory/detail/memory_block.cc b/paddle/memory/detail/memory_block.cc index 1c9e87df49..eaa97e7b4a 100644 --- a/paddle/memory/detail/memory_block.cc +++ b/paddle/memory/detail/memory_block.cc @@ -1,4 +1,20 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #include "paddle/memory/detail/memory_block.h" +#include "paddle/memory/detail/meta_cache.h" +#include "paddle/memory/detail/meta_data.h" #include "paddle/platform/assert.h" namespace paddle { @@ -7,10 +23,9 @@ namespace detail { void MemoryBlock::init(MetadataCache& cache, Type t, size_t index, size_t size, void* left_buddy, void* right_buddy) { - cache.store(this, - MemoryBlockMetadata(t, index, size - overhead(), size, - static_cast(left_buddy), - static_cast(right_buddy))); + cache.store(this, Metadata(t, index, size - sizeof(Metadata), size, + static_cast(left_buddy), + static_cast(right_buddy))); } MemoryBlock::Type MemoryBlock::type(MetadataCache& cache) const { @@ -35,10 +50,10 @@ MemoryBlock* MemoryBlock::right_buddy(MetadataCache& cache) const { void MemoryBlock::split(MetadataCache& cache, size_t size) { // make sure the split fits - assert(total_size(cache) >= size); + PADDLE_ASSERT(total_size(cache) >= size); // bail out if there is no room for another partition - if (total_size(cache) - size <= overhead()) { + if (total_size(cache) - size <= sizeof(Metadata)) { return; } @@ -53,13 +68,13 @@ void MemoryBlock::split(MetadataCache& cache, size_t size) { // Write the metadata for the new block auto new_block_right_buddy = metadata.right_buddy; - cache.store(static_cast(right_partition), - MemoryBlockMetadata(FREE_MEMORY, index(cache), - remaining_size - overhead(), remaining_size, - this, new_block_right_buddy)); + cache.store( + static_cast(right_partition), + Metadata(FREE_CHUNK, index(cache), remaining_size - sizeof(Metadata), + remaining_size, this, new_block_right_buddy)); metadata.right_buddy = static_cast(right_partition); - metadata.size = size - overhead(); + metadata.size = size - sizeof(Metadata); metadata.total_size = size; cache.store(this, metadata); @@ -76,8 +91,8 @@ void MemoryBlock::split(MetadataCache& cache, size_t size) { void MemoryBlock::merge(MetadataCache& cache, MemoryBlock* right_buddy) { // only free blocks can be merged - assert(type(cache) == FREE_MEMORY); - assert(right_buddy->type(cache) == FREE_MEMORY); + PADDLE_ASSERT(type(cache) == FREE_MEMORY); + PADDLE_ASSERT(right_buddy->type(cache) == FREE_MEMORY); auto metadata = cache.load(this); @@ -97,16 +112,15 @@ void MemoryBlock::merge(MetadataCache& cache, MemoryBlock* right_buddy) { metadata.total_size += right_buddy->total_size(cache); cache.store(this, metadata); - cache.store(right_buddy, - MemoryBlockMetadata(INVALID_MEMORY, 0, 0, 0, nullptr, nullptr)); + cache.store(right_buddy, Metadata(INVALID_CHUNK, 0, 0, 0, nullptr, nullptr)); } void MemoryBlock::mark_as_free(MetadataCache& cache) { // check for double free or corruption - assert(type(cache) != FREE_MEMORY); - assert(type(cache) != INVALID_MEMORY); + PADDLE_ASSERT(type(cache) != FREE_CHUNK); + PADDLE_ASSERT(type(cache) != INVALID_CHUNK); - set_type(cache, FREE_MEMORY); + set_type(cache, FREE_CHUNK); } void MemoryBlock::set_type(MetadataCache& cache, Type t) { @@ -130,14 +144,12 @@ size_t MemoryBlock::index(MetadataCache& cache) const { } void* MemoryBlock::data() const { - return const_cast( - reinterpret_cast(this)) + - 1; + return const_cast(reinterpret_cast(this)) + 1; } MemoryBlock* MemoryBlock::metadata() const { return const_cast(reinterpret_cast( - reinterpret_cast(this) - 1)); + reinterpret_cast(this) - 1)); } } // detail diff --git a/paddle/memory/detail/memory_block.h b/paddle/memory/detail/memory_block.h index 2945520113..a5168b519f 100644 --- a/paddle/memory/detail/memory_block.h +++ b/paddle/memory/detail/memory_block.h @@ -14,24 +14,18 @@ #pragma once -#include "paddle/memory/detail/meta_data.h" - #include -#include namespace paddle { namespace memory { namespace detail { -// Forward Declaration -class Metadata; +// Forward Declarations +class MetadataCache; /*! \brief A class used to interpret the contents of a memory block */ class MemoryBlock { public: - // Unify the metadata format between GPU and CPU allocations - using MetadataCache = std::unordered_map; - enum Type { FREE_CHUNK, // memory is free and idle ARENA_CHUNK, // memory is being occupied diff --git a/paddle/memory/detail/meta_cache.cc b/paddle/memory/detail/meta_cache.cc new file mode 100644 index 0000000000..189ab4fc7b --- /dev/null +++ b/paddle/memory/detail/meta_cache.cc @@ -0,0 +1,57 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/memory/detail/meta_cache.h" +#include "paddle/memory/detail/memory_block.h" +#include "paddle/platform/assert.h" + +namespace paddle { +namespace memory { +namespace detail { + +MetadataCache::MetadataCache(bool uses_gpu) : uses_gpu_(uses_gpu) {} + +Metadata MetadataCache::load(const MemoryBlock* block) { + if (uses_gpu_) { + auto existing_metadata = cache_.find(block); + assert(existing_metadata->second.check_guards()); + return existing_metadata->second; + } else { + PADDLE_ASSERT(reinterpret_cast(block)->check_guards()); + return *reinterpret_cast(block); + } +} + +void MetadataCache::store(MemoryBlock* block, + const Metadata& original_metadata) { + auto metadata = original_metadata; + + metadata.update_guards(); + + if (uses_gpu_) { + cache_[block] = metadata; + } else { + *reinterpret_cast(block) = metadata; + } +} + +void MetadataCache::invalidate(MemoryBlock* block) { + if (uses_gpu_) { + cache_.erase(block); + } +} + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/meta_cache.h b/paddle/memory/detail/meta_cache.h new file mode 100644 index 0000000000..3ca1020d22 --- /dev/null +++ b/paddle/memory/detail/meta_cache.h @@ -0,0 +1,71 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/memory/detail/memory_block.h" +#include "paddle/memory/detail/meta_data.h" + +#include + +namespace paddle { +namespace memory { +namespace detail { + +/*! A cache for accessing memory block meta-data that may be expensive to access + directly. + + Note: this class exists to unify the metadata format between GPU and CPU + allocations. + It should be removed when the CPU can access all GPU allocations directly + via UVM. +*/ +class MetadataCache { + public: + MetadataCache(bool uses_gpu); + + public: + /*! \brief Load the associated metadata for the specified memory block. */ + Metadata load(const MemoryBlock*); + + /*! \brief Store the associated metadata for the specified memory block. */ + void store(MemoryBlock*, const Metadata&); + + public: + /*! \brief Acquire any external metadata updates. */ + void acquire(MemoryBlock*); + + /*! \brief Publish any local updates externally. */ + void release(MemoryBlock*); + + /*! \brief Indicate that the specified metadata will no longer be used */ + void invalidate(MemoryBlock*); + + public: + MetadataCache(const MetadataCache&) = delete; + MetadataCache& operator=(const MetadataCache&) = delete; + + private: + bool uses_gpu_; + + private: + typedef std::unordered_map MetadataMap; + + private: + MetadataMap cache_; +}; + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/meta_data.cc b/paddle/memory/detail/meta_data.cc index a3b7a9b4fe..70c5c1f439 100644 --- a/paddle/memory/detail/meta_data.cc +++ b/paddle/memory/detail/meta_data.cc @@ -29,6 +29,14 @@ Metadata::Metadata(MemoryBlock::Type t, size_t i, size_t s, size_t ts, left_buddy(l), right_buddy(r) {} +Metadata::Metadata() + : type(MemoryBlock::INVALID_CHUNK), + index(0), + size(0), + total_size(0), + left_buddy(nullptr), + right_buddy(nullptr) {} + template inline void hash_combine(std::size_t& seed, const T& v) { std::hash hasher; diff --git a/paddle/memory/detail/meta_data.h b/paddle/memory/detail/meta_data.h index ddb826571b..628cf1f2e3 100644 --- a/paddle/memory/detail/meta_data.h +++ b/paddle/memory/detail/meta_data.h @@ -26,6 +26,7 @@ class Metadata { public: Metadata(MemoryBlock::Type t, size_t i, size_t s, size_t ts, MemoryBlock* l, MemoryBlock* r); + Metadata(); public: /*! \brief Update the guards when metadata is changed */ From 9f2357561d939bdeae2a7bc0bd41be43d9ab0fe5 Mon Sep 17 00:00:00 2001 From: Superjom Date: Wed, 5 Jul 2017 10:08:23 +0800 Subject: [PATCH 276/542] fix ci error --- paddle/framework/net.cc | 2 +- paddle/framework/net.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/paddle/framework/net.cc b/paddle/framework/net.cc index d49861c343..8c565c28cb 100644 --- a/paddle/framework/net.cc +++ b/paddle/framework/net.cc @@ -12,7 +12,7 @@ void PlainNet::InferShape(Scope* scope) { } void PlainNet::Run(Scope* scope, OpContext* context, OpIndex begin, - OpIndex end) const { + OpIndex end) const { // TODO Add implementation here. } diff --git a/paddle/framework/net.h b/paddle/framework/net.h index 55dcf147e1..9564c831ee 100644 --- a/paddle/framework/net.h +++ b/paddle/framework/net.h @@ -70,7 +70,7 @@ class Net { * If no positive indexes are provided, all operators in `ops_` will run. */ virtual void Run(Scope *scope, OpContext *context, OpIndex begin = -1, - OpIndex end = -1) const = 0; + OpIndex end = -1) const = 0; /** * @brief Add an Operator according to `def`. @@ -125,7 +125,7 @@ class PlainNet : public Net { * will be used. */ virtual void Run(Scope *scope = nullptr, OpContext *context = nullptr, - OpIndex begin = -1, OpIndex end = -1) const override; + OpIndex begin = -1, OpIndex end = -1) const override; /** * @brief Add an operator to this network. @@ -142,6 +142,8 @@ class PlainNet : public Net { */ virtual void AddBackwardOps() override; + virtual ~PlainNet() override {} + protected: /** * @brief Build the network. From 5c10a5ad555d834dac4785d8cd2feac18da9b67b Mon Sep 17 00:00:00 2001 From: Superjom Date: Wed, 5 Jul 2017 10:34:49 +0800 Subject: [PATCH 277/542] remove virtual --- paddle/framework/net.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/paddle/framework/net.h b/paddle/framework/net.h index 9564c831ee..e60356dc17 100644 --- a/paddle/framework/net.h +++ b/paddle/framework/net.h @@ -91,8 +91,6 @@ class Net { * @brief Create a network. */ static std::unique_ptr Create(const NetDesc &def = NetDesc()); - - virtual ~Net() = 0; }; /** @@ -142,8 +140,6 @@ class PlainNet : public Net { */ virtual void AddBackwardOps() override; - virtual ~PlainNet() override {} - protected: /** * @brief Build the network. From 166dfbb085ef4ebbccea190abc436524fb80ed57 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Wed, 5 Jul 2017 02:36:10 +0000 Subject: [PATCH 278/542] fix cmake errors --- cmake/generic.cmake | 7 ++++--- go/cmd/master/CMakeLists.txt | 2 +- go/cmd/pserver/CMakeLists.txt | 2 +- go/pserver/optimizer.go | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index cae9524b2f..97196114ff 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -284,7 +284,7 @@ function(go_library TARGET_NAME) add_custom_command(TARGET ${TARGET_NAME} POST_BUILD COMMAND rm "${${TARGET_NAME}_LIB_PATH}" # Golang build source code - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} + COMMAND env LIBRARY_PATH=${CMAKE_BINARY_DIR}/go/pserver/client/c/:$ENV{LIBRARY_PATH} GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} -o "${${TARGET_NAME}_LIB_PATH}" "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${GO_SOURCE}" # must run under GOPATH @@ -300,10 +300,11 @@ function(go_binary TARGET_NAME) string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) add_custom_command(OUTPUT ${TARGET_NAME}_timestamp - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build + COMMAND env LIBRARY_PATH=${CMAKE_BINARY_DIR}/go/pserver/client/c/:$ENV{LIBRARY_PATH} + GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${go_binary_SRCS}" - WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") # TODO: don't know what ${TARGET_NAME}_link does add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_timestamp ${go_binary_DEPS}) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin) diff --git a/go/cmd/master/CMakeLists.txt b/go/cmd/master/CMakeLists.txt index 9e149967e7..1058ffa86b 100644 --- a/go/cmd/master/CMakeLists.txt +++ b/go/cmd/master/CMakeLists.txt @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -go_binary(master SRC master.go) +go_binary(master SRC master.go DEPS paddle_go_optimizer) diff --git a/go/cmd/pserver/CMakeLists.txt b/go/cmd/pserver/CMakeLists.txt index bc1da3348c..51db6dff04 100644 --- a/go/cmd/pserver/CMakeLists.txt +++ b/go/cmd/pserver/CMakeLists.txt @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -go_binary(pserver SRCS pserver.go) +go_binary(pserver SRCS pserver.go DEPS paddle_go_optimizer) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index d84f55b987..2c9b0d5652 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -2,7 +2,7 @@ package pserver // #cgo CFLAGS: -I ../../ // //FIXME: ldflags contain "build" path -// #cgo LDFLAGS: ../../build/go/pserver/client/c/libpaddle_go_optimizer.a -lstdc++ -lm +// #cgo LDFLAGS: -lpaddle_go_optimizer -lstdc++ -lm // #include "paddle/optimizer/optimizer.h" // #include // #include From 013d0a268591829d7f757deeb3c23c58915c2d95 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Fri, 16 Jun 2017 19:02:46 +0800 Subject: [PATCH 279/542] add crop layer --- paddle/function/CMakeLists.txt | 1 + paddle/function/CropOp.cpp | 177 ++++++++++++++++++++++++++++ paddle/function/CropOp.h | 56 +++++++++ paddle/function/CropOpGpu.cu | 109 +++++++++++++++++ paddle/function/CropOpTest.cpp | 47 ++++++++ paddle/gserver/layers/CropLayer.cpp | 101 ++++++++++++++++ paddle/gserver/layers/CropLayer.h | 46 ++++++++ 7 files changed, 537 insertions(+) create mode 100644 paddle/function/CropOp.cpp create mode 100644 paddle/function/CropOp.h create mode 100644 paddle/function/CropOpGpu.cu create mode 100644 paddle/function/CropOpTest.cpp create mode 100644 paddle/gserver/layers/CropLayer.cpp create mode 100644 paddle/gserver/layers/CropLayer.h diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 1518a8a654..f19a1eb777 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -37,6 +37,7 @@ if(WITH_GPU) add_simple_unittest(MulOpTest) add_simple_unittest(CosSimOpTest) add_simple_unittest(RowConvOpTest) + add_simple_unittest(CropOpTest) endif() add_simple_unittest(ConvOpTest) diff --git a/paddle/function/CropOp.cpp b/paddle/function/CropOp.cpp new file mode 100644 index 0000000000..4d47d9c149 --- /dev/null +++ b/paddle/function/CropOp.cpp @@ -0,0 +1,177 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "CropOp.h" +#include "paddle/math/Vector.h" +#include "paddle/function/TensorShape.h" +namespace paddle { + +static inline CropConf castToCropConf(const FuncConfig& conf) { + return {conf.get>("crop_corner"), + conf.get>("crop_shape")}; +} + +template <> +void Crop(real* outputs, + const real* inputs, + const TensorShape inShape, + const CropConf& crop) { + int cCrop = crop.corner[0]; + int hCrop = crop.corner[1]; + int wCrop = crop.corner[2]; + + int num = inShape[0]; + int inC = inShape[1]; + int inH = inShape[2]; + int inW = inShape[3]; + + int outC = crop.shape[0]; + int outH = crop.shape[1]; + int outW = crop.shape[2]; + + for (int n = 0; n < num; n++) { + for (int c = 0; c < outC; c++) { + for (int h = 0; h < outH; h++) { + int outoff = ((n * outC + c) * outH + h) * outW; + int inoff = ((n * inC + c + cCrop) * inH + h + hCrop) * inW + wCrop; + memcpy(outputs + outoff, inputs + inoff, outW * sizeof(real)); + } + } + } +} + +template <> +void CropGrad(const real* inGrad, + real* outGrad, + const TensorShape outShape, + const CropConf& crop) { + int cCrop = crop.corner[0]; + int hCrop = crop.corner[1]; + int wCrop = crop.corner[2]; + + int num = outShape[0]; + int outC = outShape[1]; + int outH = outShape[2]; + int outW = outShape[3]; + + int inC = crop.shape[0]; + int inH = crop.shape[1]; + int inW = crop.shape[2]; + + for (int n = 0; n < num; n++) { + for (int c = 0; c < inC; c++) { + for (int h = 0; h < inH; h++) { + int outoff = ((n * outC + c + cCrop) * outH + h + hCrop) * outW + wCrop; + int inoff = ((n * inC + c) * inH + h) * inW; + CpuVector inG = CpuVector(inW, const_cast(inGrad + inoff)); + CpuVector outG = CpuVector(inW, outGrad + outoff); + outG += inG; + } + } + } +} + +/** + * \brief Crop input according to the specify corner and shape. + * The input and output is a 4D tensor. In CropFunc, we only + * crop the 2nd to 4th dimension. + * + * Argument in this Function: + * \param pad_ A struct object contains the cropping corner and shape. + * \param inputs A 4D tensor, only one input. + * \param outputs A 4D tensor, the output value after cropping. + * + * For example, + * Input(2,2,2,3) = [ + * [ [[1,2,3], [3,4,5]], + * [[2,3,5], [1,6,7]] ], + * [ [[4,3,1], [1,8,7]], + * [[3,8,9], [2,3,5]] ] + * ] # the input shape is (2,2,2,3) + * + * pad_: if corner = (0,1,1) and crop_shape = (2,1,2) + * Output(2,2,1,2) = [ + * [ [[4,5]], + * [[6,7]] ], + * [ [[8,7]], + * [[3,5]] ] + * ] # the input shape is (2,2,2,3) + */ +template +class CropFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + crop_ = castToCropConf(config); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); + CHECK_EQ(outputs[0].shape()[1], crop_.shape[0]); + CHECK_EQ(outputs[0].shape()[2], crop_.shape[1]); + CHECK_EQ(outputs[0].shape()[3], crop_.shape[2]); + CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + + TensorShape inShape = inputs[0].shape(); + + Crop( + outputs[0].data(), inputs[0].data(), inShape, crop_); + } + +private: + CropConf crop_; +}; + +/** + * \brief The backward propagation of cropping Function. + * + * Argument in this Function: + * \param crop_ The same meaning as it in CropFunc. + * \param inputs The gradient with respect to the output value of CropFunc. + * \param outputs The gradient with respect to the input value of CropFunc. + */ + +template +class CropGradFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + crop_ = castToCropConf(config); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); + CHECK_EQ(inputs[0].shape()[1], crop_.shape[0]); + CHECK_EQ(inputs[0].shape()[2], crop_.shape[1]); + CHECK_EQ(inputs[0].shape()[3], crop_.shape[2]); + CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + + TensorShape outShape = outputs[0].shape(); + + CropGrad( + inputs[0].data(), outputs[0].data(), outShape, crop_); + } + +private: + CropConf crop_; +}; + +REGISTER_TYPED_FUNC(Crop, CPU, CropFunc); +REGISTER_TYPED_FUNC(CropGrad, CPU, CropGradFunc); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(Crop, GPU, CropFunc); +REGISTER_TYPED_FUNC(CropGrad, GPU, CropGradFunc); +#endif + +} // namespace paddle diff --git a/paddle/function/CropOp.h b/paddle/function/CropOp.h new file mode 100644 index 0000000000..78a55bd43e --- /dev/null +++ b/paddle/function/CropOp.h @@ -0,0 +1,56 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Function.h" + +namespace paddle { + +struct CropConf { + /// The upper left corner of croped result + std::vector corner; + /// The shape of croped result + std::vector shape; +}; + +/** + * \brief This funtion crops inputs according to the specify start point and + *shape. + * + * \param[out] outputs save results. + * \param[in] inputs input data. + * \param[in] inShape the shape of input tensor. + * \param[in] crop the cropping config + */ +template +void Crop(real* outputs, + const real* inputs, + const TensorShape inShape, + const CropConf& crop); + +/** + * \brief Cropping operation backward. + * + * \param[out] inGrad gradients of previous layer + * \param[in] outGrad output gradient + * \param[in] inShape the shape of input tensor. + * \param[in] crop the cropping config + */ +template +void CropGrad(const real* inGrad, + real* outGrad, + const TensorShape inShape, + const CropConf& crop); +} // namespace paddle diff --git a/paddle/function/CropOpGpu.cu b/paddle/function/CropOpGpu.cu new file mode 100644 index 0000000000..f7d7d03abd --- /dev/null +++ b/paddle/function/CropOpGpu.cu @@ -0,0 +1,109 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_base.h" +#include "CropOp.h" + +namespace paddle { + +__global__ void KeCrop(real* outputs, const real* inputs, + int inC, int inH, int inW, + int cropC, int cropH, int cropW, + int outC, int outH, int outW, int nthreads) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < nthreads) { + const int w = idx % outW; + const int h = (idx / outW) % outH; + const int c = (idx / outW / outH) % outC; + const int n = idx / outW / outH / outC; + + const int off = ((n * inC + c + cropC) * inH + h + cropH) * inW + cropW + w; + outputs[idx] = inputs[off]; + } +} + +template <> +void Crop(real* outputs, + const real* inputs, + const TensorShape inShape, + const CropConf& crop) { + int cropC = crop.corner[0]; + int cropH = crop.corner[1]; + int cropW = crop.corner[2]; + + int num = inShape[0]; + int inC = inShape[1]; + int inH = inShape[2]; + int inW = inShape[3]; + + int outC = crop.shape[0]; + int outH = crop.shape[1]; + int outW = crop.shape[2]; + + size_t nth = num * outC * outH * outW; + int blockSize = 1024; + int gridSize = (nth + blockSize - 1) / blockSize; + + KeCrop<<>> + (outputs, inputs, inC, inH, inW, cropC, cropH, cropW, + outC, outH, outW, nth); + CHECK_SYNC("Crop"); +} + +__global__ void KeCropDiff(const real* inGrad, real* outGrad, + int inC, int inH, int inW, + int cropC, int cropH, int cropW, + int outC, int outH, int outW, int nthreads) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < nthreads) { + const int w = idx % inW; + const int h = (idx / inW) % inH; + const int c = (idx / inW / inH) % inC; + const int n = idx / inW / inH / inC; + + const int off = ((n * outC + c + cropC) * outH + h + cropH) * outW + cropW + w; + + outGrad[off] += inGrad[idx]; + } +} + +template <> +void CropGrad(const real* inGrad, + real* outGrad, + const TensorShape outShape, + const CropConf& crop) { + int cropC = crop.corner[0]; + int cropH = crop.corner[1]; + int cropW = crop.corner[2]; + + int num = outShape[0]; + int outC = outShape[1]; + int outH = outShape[2]; + int outW = outShape[3]; + + int inC = crop.shape[0]; + int inH = crop.shape[1]; + int inW = crop.shape[2]; + + size_t nth = num * inC * inH * inW; + int blockSize = 1024; + int gridSize = (nth + blockSize - 1) / blockSize; + + KeCropDiff <<>> + (inGrad, outGrad, inC, inH, inW, cropC, cropH, cropW, + outC, outH, outW, nth); + CHECK_SYNC("CropGrad"); +} + +} // namespace paddle diff --git a/paddle/function/CropOpTest.cpp b/paddle/function/CropOpTest.cpp new file mode 100644 index 0000000000..62b4bd9fde --- /dev/null +++ b/paddle/function/CropOpTest.cpp @@ -0,0 +1,47 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "FunctionTest.h" + +namespace paddle { + +TEST(Crop, real) { + for (size_t numSamples : {5, 32}) { + for (size_t channels : {5, 5, 32}) { + for (size_t imgSizeH : {5, 33, 100}) { + for (size_t imgSizeW : {5, 32, 96}) { + VLOG(3) << " numSamples=" << numSamples << " channels=" << channels + << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; + for (bool test_grad : {false, true}) { + FunctionCompare compare( + test_grad ? "CropGrad" : "Crop", + FuncConfig() + .set>("crop_corner", {1, 1, 1}) + .set>("crop_shape", {2, 3, 3})); + TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW}; + TensorShape outDims{numSamples, 2, 3, 3}; + compare.addInputs( + BufferArg(VALUE_TYPE_FLOAT, test_grad ? outDims : inDims)); + compare.addOutputs(BufferArg( + VALUE_TYPE_FLOAT, test_grad ? inDims : outDims, ASSIGN_TO)); + compare.run(); + } + } + } + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/CropLayer.cpp b/paddle/gserver/layers/CropLayer.cpp new file mode 100644 index 0000000000..ab23d4617e --- /dev/null +++ b/paddle/gserver/layers/CropLayer.cpp @@ -0,0 +1,101 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "CropLayer.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +REGISTER_LAYER(crop, CropLayer); + +bool CropLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + + auto& crop_conf = config_.inputs(0).crop_conf(); + auto& img_conf = crop_conf.image_conf(); + CHECK_EQ(config_.inputs_size(), 1); + inDims_ = TensorShape( + {0, + img_conf.channels(), + img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(), + img_conf.img_size()}); + + crop_corner_ = {crop_conf.crop_corner(0), + crop_conf.crop_corner(1), + crop_conf.crop_corner(2)}; + crop_shape_ = {crop_conf.crop_shape(0), + crop_conf.crop_shape(1), + crop_conf.crop_shape(2)}; + + outDims_ = TensorShape(4); + setOutDims(0); + + createFunction(forward_, + "Crop", + FuncConfig() + .set("crop_corner", crop_corner_) + .set("crop_shape", crop_shape_)); + createFunction(backward_, + "CropGrad", + FuncConfig() + .set("crop_corner", crop_corner_) + .set("crop_shape", crop_shape_)); + + return true; +} + +void CropLayer::setOutDims(const size_t batchSize) { + outDims_.reshape({batchSize, crop_shape_[0], crop_shape_[1], crop_shape_[2]}); +} + +void CropLayer::setTensorDim(const size_t batchSize) { + CHECK_EQ(static_cast(inputLayers_.size()), 1); + inDims_.setDim(0, batchSize); + int h = inputLayers_[0]->getOutput().getFrameHeight(); + if (h != 0) inDims_.setDim(2, h); + int w = inputLayers_[0]->getOutput().getFrameWidth(); + if (w != 0) inDims_.setDim(3, w); + setOutDims(batchSize); +} + +void CropLayer::forward(PassType passType) { + Layer::forward(passType); + MatrixPtr input = inputLayers_[0]->getOutputValue(); + size_t batchSize = input->getHeight(); + setTensorDim(batchSize); + int size = outDims_[1] * outDims_[2] * outDims_[3]; + resetOutput(batchSize, size); + MatrixPtr outV = getOutputValue(); + REGISTER_TIMER_INFO("CropForward", getName().c_str()); + + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getInputValue(0), inDims_); + outputs.addArg(*getOutputValue(), outDims_, ASSIGN_TO); + forward_[0]->calc(inputs, outputs); +} + +void CropLayer::backward(const UpdateCallback& callback) { + (void)callback; + REGISTER_TIMER_INFO("CropBackward", getName().c_str()); + + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getOutputGrad(), outDims_); + outputs.addArg(*getInputGrad(0), inDims_, ADD_TO); + backward_[0]->calc(inputs, outputs); +} +} // namespace paddle diff --git a/paddle/gserver/layers/CropLayer.h b/paddle/gserver/layers/CropLayer.h new file mode 100644 index 0000000000..3ce89707ca --- /dev/null +++ b/paddle/gserver/layers/CropLayer.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" + +namespace paddle { + +/** + * \brief This layer crop inputs according to the specify corner and shape. + * The input and output is a 4D tensor. Cropping from the 2nd to + * the 4th dimenstion. + */ +class CropLayer : public Layer { +public: + explicit CropLayer(const LayerConfig& config) : Layer(config) {} + + ~CropLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +protected: + void setOutDims(const size_t batchSize); + void setTensorDim(const size_t batchSize); + + std::vector crop_corner_; + std::vector crop_shape_; + TensorShape inDims_; + TensorShape outDims_; +}; +} // namespace paddle From 90ed2004a56a955dc6a1413e1d4c624caf31780b Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Thu, 22 Jun 2017 16:54:07 +0800 Subject: [PATCH 280/542] Refine configure option of crop layer 1. change configure content to 'axis, offset, shape' 2. add an optional input to crop layer as cropping reference --- paddle/function/CropOp.cpp | 63 ++++++++++++--------------- paddle/function/CropOp.h | 15 ++----- paddle/function/CropOpGpu.cu | 32 ++++++++------ paddle/function/CropOpTest.cpp | 4 +- paddle/gserver/layers/CropLayer.cpp | 67 ++++++++++++++++++++++------- paddle/gserver/layers/CropLayer.h | 13 ++++-- 6 files changed, 114 insertions(+), 80 deletions(-) diff --git a/paddle/function/CropOp.cpp b/paddle/function/CropOp.cpp index 4d47d9c149..0d511ceef5 100644 --- a/paddle/function/CropOp.cpp +++ b/paddle/function/CropOp.cpp @@ -17,28 +17,27 @@ limitations under the License. */ #include "paddle/function/TensorShape.h" namespace paddle { -static inline CropConf castToCropConf(const FuncConfig& conf) { - return {conf.get>("crop_corner"), - conf.get>("crop_shape")}; -} - template <> void Crop(real* outputs, const real* inputs, const TensorShape inShape, - const CropConf& crop) { - int cCrop = crop.corner[0]; - int hCrop = crop.corner[1]; - int wCrop = crop.corner[2]; + const FuncConfig& conf) { + std::vector crop_corner = + conf.get>("crop_corner"); + std::vector crop_shape = + conf.get>("crop_shape"); + int cCrop = crop_corner[1]; + int hCrop = crop_corner[2]; + int wCrop = crop_corner[3]; int num = inShape[0]; int inC = inShape[1]; int inH = inShape[2]; int inW = inShape[3]; - int outC = crop.shape[0]; - int outH = crop.shape[1]; - int outW = crop.shape[2]; + int outC = crop_shape[1]; + int outH = crop_shape[2]; + int outW = crop_shape[3]; for (int n = 0; n < num; n++) { for (int c = 0; c < outC; c++) { @@ -55,19 +54,23 @@ template <> void CropGrad(const real* inGrad, real* outGrad, const TensorShape outShape, - const CropConf& crop) { - int cCrop = crop.corner[0]; - int hCrop = crop.corner[1]; - int wCrop = crop.corner[2]; + const FuncConfig& conf) { + std::vector crop_corner = + conf.get>("crop_corner"); + std::vector crop_shape = + conf.get>("crop_shape"); + int cCrop = crop_corner[1]; + int hCrop = crop_corner[2]; + int wCrop = crop_corner[3]; int num = outShape[0]; int outC = outShape[1]; int outH = outShape[2]; int outW = outShape[3]; - int inC = crop.shape[0]; - int inH = crop.shape[1]; - int inW = crop.shape[2]; + int inC = crop_shape[1]; + int inH = crop_shape[2]; + int inW = crop_shape[3]; for (int n = 0; n < num; n++) { for (int c = 0; c < inC; c++) { @@ -111,26 +114,21 @@ void CropGrad(const real* inGrad, template class CropFunc : public FunctionBase { public: - void init(const FuncConfig& config) override { - crop_ = castToCropConf(config); - } + void init(const FuncConfig& config) override { conf_ = config; } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(1UL, inputs.size()); CHECK_EQ(1UL, outputs.size()); - CHECK_EQ(outputs[0].shape()[1], crop_.shape[0]); - CHECK_EQ(outputs[0].shape()[2], crop_.shape[1]); - CHECK_EQ(outputs[0].shape()[3], crop_.shape[2]); CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); TensorShape inShape = inputs[0].shape(); Crop( - outputs[0].data(), inputs[0].data(), inShape, crop_); + outputs[0].data(), inputs[0].data(), inShape, conf_); } private: - CropConf crop_; + FuncConfig conf_; }; /** @@ -145,26 +143,21 @@ private: template class CropGradFunc : public FunctionBase { public: - void init(const FuncConfig& config) override { - crop_ = castToCropConf(config); - } + void init(const FuncConfig& config) override { conf_ = config; } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(1UL, inputs.size()); CHECK_EQ(1UL, outputs.size()); - CHECK_EQ(inputs[0].shape()[1], crop_.shape[0]); - CHECK_EQ(inputs[0].shape()[2], crop_.shape[1]); - CHECK_EQ(inputs[0].shape()[3], crop_.shape[2]); CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); TensorShape outShape = outputs[0].shape(); CropGrad( - inputs[0].data(), outputs[0].data(), outShape, crop_); + inputs[0].data(), outputs[0].data(), outShape, conf_); } private: - CropConf crop_; + FuncConfig conf_; }; REGISTER_TYPED_FUNC(Crop, CPU, CropFunc); diff --git a/paddle/function/CropOp.h b/paddle/function/CropOp.h index 78a55bd43e..71e8c4c00e 100644 --- a/paddle/function/CropOp.h +++ b/paddle/function/CropOp.h @@ -18,13 +18,6 @@ limitations under the License. */ namespace paddle { -struct CropConf { - /// The upper left corner of croped result - std::vector corner; - /// The shape of croped result - std::vector shape; -}; - /** * \brief This funtion crops inputs according to the specify start point and *shape. @@ -32,13 +25,13 @@ struct CropConf { * \param[out] outputs save results. * \param[in] inputs input data. * \param[in] inShape the shape of input tensor. - * \param[in] crop the cropping config + * \param[in] conf the cropping config */ template void Crop(real* outputs, const real* inputs, const TensorShape inShape, - const CropConf& crop); + const FuncConfig& conf); /** * \brief Cropping operation backward. @@ -46,11 +39,11 @@ void Crop(real* outputs, * \param[out] inGrad gradients of previous layer * \param[in] outGrad output gradient * \param[in] inShape the shape of input tensor. - * \param[in] crop the cropping config + * \param[in] conf the cropping config */ template void CropGrad(const real* inGrad, real* outGrad, const TensorShape inShape, - const CropConf& crop); + const FuncConfig& conf); } // namespace paddle diff --git a/paddle/function/CropOpGpu.cu b/paddle/function/CropOpGpu.cu index f7d7d03abd..cadb58b6e9 100644 --- a/paddle/function/CropOpGpu.cu +++ b/paddle/function/CropOpGpu.cu @@ -37,19 +37,21 @@ template <> void Crop(real* outputs, const real* inputs, const TensorShape inShape, - const CropConf& crop) { - int cropC = crop.corner[0]; - int cropH = crop.corner[1]; - int cropW = crop.corner[2]; + const FuncConfig& conf) { + std::vector crop_corner = conf.get>("crop_corner"); + std::vector crop_shape = conf.get>("crop_shape"); + int cropC = crop_corner[1]; + int cropH = crop_corner[2]; + int cropW = crop_corner[3]; int num = inShape[0]; int inC = inShape[1]; int inH = inShape[2]; int inW = inShape[3]; - int outC = crop.shape[0]; - int outH = crop.shape[1]; - int outW = crop.shape[2]; + int outC = crop_shape[1]; + int outH = crop_shape[2]; + int outW = crop_shape[3]; size_t nth = num * outC * outH * outW; int blockSize = 1024; @@ -82,19 +84,21 @@ template <> void CropGrad(const real* inGrad, real* outGrad, const TensorShape outShape, - const CropConf& crop) { - int cropC = crop.corner[0]; - int cropH = crop.corner[1]; - int cropW = crop.corner[2]; + const FuncConfig& conf) { + std::vector crop_corner = conf.get>("crop_corner"); + std::vector crop_shape = conf.get>("crop_shape"); + int cropC = crop_corner[1]; + int cropH = crop_corner[2]; + int cropW = crop_corner[3]; int num = outShape[0]; int outC = outShape[1]; int outH = outShape[2]; int outW = outShape[3]; - int inC = crop.shape[0]; - int inH = crop.shape[1]; - int inW = crop.shape[2]; + int inC = crop_shape[1]; + int inH = crop_shape[2]; + int inW = crop_shape[3]; size_t nth = num * inC * inH * inW; int blockSize = 1024; diff --git a/paddle/function/CropOpTest.cpp b/paddle/function/CropOpTest.cpp index 62b4bd9fde..c331a70d1f 100644 --- a/paddle/function/CropOpTest.cpp +++ b/paddle/function/CropOpTest.cpp @@ -28,8 +28,8 @@ TEST(Crop, real) { FunctionCompare compare( test_grad ? "CropGrad" : "Crop", FuncConfig() - .set>("crop_corner", {1, 1, 1}) - .set>("crop_shape", {2, 3, 3})); + .set>("crop_corner", {0, 1, 1, 1}) + .set>("crop_shape", {0, 2, 3, 3})); TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW}; TensorShape outDims{numSamples, 2, 3, 3}; compare.addInputs( diff --git a/paddle/gserver/layers/CropLayer.cpp b/paddle/gserver/layers/CropLayer.cpp index ab23d4617e..198ceffb46 100644 --- a/paddle/gserver/layers/CropLayer.cpp +++ b/paddle/gserver/layers/CropLayer.cpp @@ -25,20 +25,57 @@ bool CropLayer::init(const LayerMap& layerMap, Layer::init(layerMap, parameterMap); auto& crop_conf = config_.inputs(0).crop_conf(); - auto& img_conf = crop_conf.image_conf(); - CHECK_EQ(config_.inputs_size(), 1); - inDims_ = TensorShape( - {0, - img_conf.channels(), - img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(), - img_conf.img_size()}); - - crop_corner_ = {crop_conf.crop_corner(0), - crop_conf.crop_corner(1), - crop_conf.crop_corner(2)}; - crop_shape_ = {crop_conf.crop_shape(0), - crop_conf.crop_shape(1), - crop_conf.crop_shape(2)}; + crop_axis_ = crop_conf.axis(); + for (int i = 0; i < crop_conf.offset_size(); i++) { + crop_offsets_[i] = crop_conf.offset(i); + } + + // 1. get input_0 shape + auto& input0_img_conf = config_.inputs(0).image_conf(); + inDims_ = TensorShape({0, + input0_img_conf.channels(), + input0_img_conf.has_img_size_y() + ? input0_img_conf.img_size_y() + : input0_img_conf.img_size(), + input0_img_conf.img_size()}); + + // 2. get output shape from input_1 or crop shap conf + if (config_.inputs_size() == 2) { + auto& input1_img_conf = config_.inputs(1).image_conf(); + targetDims_ = TensorShape({0, + input1_img_conf.channels(), + input1_img_conf.has_img_size_y() + ? input1_img_conf.img_size_y() + : input1_img_conf.img_size(), + input1_img_conf.img_size()}); + } else { + targetDims_ = TensorShape({crop_conf.shape(0), + crop_conf.shape(1), + crop_conf.shape(2), + crop_conf.shape(3)}); + } + + // 3. get final crop shape + int dimSize = 4; + for (int i = 0; i < dimSize; i++) { + if (i >= crop_axis_) { + crop_shape_[i] = targetDims_[i]; + } else { + crop_shape_[i] = inDims_[i]; + } + } + + // 4. get final crop corner + crop_corner_ = {0, 0, 0, 0}; + for (int i = 0; i < dimSize; i++) { + if (i >= crop_axis_) { + if (crop_offsets_.size() > 1) { + crop_corner_[i] = crop_offsets_[i - crop_axis_]; + } else { + crop_corner_[i] = crop_offsets_[0]; + } + } + } outDims_ = TensorShape(4); setOutDims(0); @@ -58,7 +95,7 @@ bool CropLayer::init(const LayerMap& layerMap, } void CropLayer::setOutDims(const size_t batchSize) { - outDims_.reshape({batchSize, crop_shape_[0], crop_shape_[1], crop_shape_[2]}); + outDims_.reshape({batchSize, crop_shape_[1], crop_shape_[2], crop_shape_[3]}); } void CropLayer::setTensorDim(const size_t batchSize) { diff --git a/paddle/gserver/layers/CropLayer.h b/paddle/gserver/layers/CropLayer.h index 3ce89707ca..23cede1c3f 100644 --- a/paddle/gserver/layers/CropLayer.h +++ b/paddle/gserver/layers/CropLayer.h @@ -19,9 +19,13 @@ limitations under the License. */ namespace paddle { /** - * \brief This layer crop inputs according to the specify corner and shape. - * The input and output is a 4D tensor. Cropping from the 2nd to - * the 4th dimenstion. + * \brief This layer crop input according to the specify conf. + * input_0: input to be cropped + * input_1: optional reference input + * axis: start dimension to be croped + * offset: offset of cropping in each dimension + * shape: if reference input layer was not setted, + * crop input as this shape conf */ class CropLayer : public Layer { public: @@ -38,9 +42,12 @@ protected: void setOutDims(const size_t batchSize); void setTensorDim(const size_t batchSize); + int32_t crop_axis_; + std::vector crop_offsets_; std::vector crop_corner_; std::vector crop_shape_; TensorShape inDims_; + TensorShape targetDims_; TensorShape outDims_; }; } // namespace paddle From 701827f59cb5727676818c2fffb2b07766528436 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 5 Jul 2017 00:53:32 +0800 Subject: [PATCH 281/542] Add grad test and python wrapper for crop layer --- paddle/function/CropOp.cpp | 2 +- paddle/function/CropOpTest.cpp | 2 +- paddle/gserver/layers/CropLayer.cpp | 23 ++++---- paddle/gserver/tests/CMakeLists.txt | 2 +- paddle/gserver/tests/test_LayerGrad.cpp | 28 ++++++++++ proto/ModelConfig.proto | 8 ++- python/paddle/trainer/config_parser.py | 45 ++++++++++++++++ .../paddle/trainer_config_helpers/layers.py | 54 +++++++++++++++++++ 8 files changed, 147 insertions(+), 17 deletions(-) diff --git a/paddle/function/CropOp.cpp b/paddle/function/CropOp.cpp index 0d511ceef5..1bb194a9bc 100644 --- a/paddle/function/CropOp.cpp +++ b/paddle/function/CropOp.cpp @@ -148,7 +148,7 @@ public: void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(1UL, inputs.size()); CHECK_EQ(1UL, outputs.size()); - CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + CHECK_EQ(outputs[0].getArgType(), ADD_TO); TensorShape outShape = outputs[0].shape(); diff --git a/paddle/function/CropOpTest.cpp b/paddle/function/CropOpTest.cpp index c331a70d1f..71d9b05812 100644 --- a/paddle/function/CropOpTest.cpp +++ b/paddle/function/CropOpTest.cpp @@ -25,7 +25,7 @@ TEST(Crop, real) { VLOG(3) << " numSamples=" << numSamples << " channels=" << channels << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; for (bool test_grad : {false, true}) { - FunctionCompare compare( + CpuGpuFuncCompare compare( test_grad ? "CropGrad" : "Crop", FuncConfig() .set>("crop_corner", {0, 1, 1, 1}) diff --git a/paddle/gserver/layers/CropLayer.cpp b/paddle/gserver/layers/CropLayer.cpp index 198ceffb46..b2fa17b400 100644 --- a/paddle/gserver/layers/CropLayer.cpp +++ b/paddle/gserver/layers/CropLayer.cpp @@ -14,7 +14,6 @@ limitations under the License. */ #include "CropLayer.h" #include "paddle/utils/Stat.h" - namespace paddle { REGISTER_LAYER(crop, CropLayer); @@ -24,10 +23,9 @@ bool CropLayer::init(const LayerMap& layerMap, /* Initialize the basic parent class */ Layer::init(layerMap, parameterMap); - auto& crop_conf = config_.inputs(0).crop_conf(); - crop_axis_ = crop_conf.axis(); - for (int i = 0; i < crop_conf.offset_size(); i++) { - crop_offsets_[i] = crop_conf.offset(i); + crop_axis_ = config_.axis(); + for (int i = 0; i < config_.offset_size(); i++) { + crop_offsets_.push_back(config_.offset(i)); } // 1. get input_0 shape @@ -38,7 +36,6 @@ bool CropLayer::init(const LayerMap& layerMap, ? input0_img_conf.img_size_y() : input0_img_conf.img_size(), input0_img_conf.img_size()}); - // 2. get output shape from input_1 or crop shap conf if (config_.inputs_size() == 2) { auto& input1_img_conf = config_.inputs(1).image_conf(); @@ -49,19 +46,19 @@ bool CropLayer::init(const LayerMap& layerMap, : input1_img_conf.img_size(), input1_img_conf.img_size()}); } else { - targetDims_ = TensorShape({crop_conf.shape(0), - crop_conf.shape(1), - crop_conf.shape(2), - crop_conf.shape(3)}); + targetDims_ = TensorShape({config_.shape(0), + config_.shape(1), + config_.shape(2), + config_.shape(3)}); } // 3. get final crop shape int dimSize = 4; for (int i = 0; i < dimSize; i++) { if (i >= crop_axis_) { - crop_shape_[i] = targetDims_[i]; + crop_shape_.push_back(targetDims_[i]); } else { - crop_shape_[i] = inDims_[i]; + crop_shape_.push_back(inDims_[i]); } } @@ -99,7 +96,7 @@ void CropLayer::setOutDims(const size_t batchSize) { } void CropLayer::setTensorDim(const size_t batchSize) { - CHECK_EQ(static_cast(inputLayers_.size()), 1); + CHECK_EQ(static_cast(inputLayers_.size()), 2); inDims_.setDim(0, batchSize); int h = inputLayers_[0]->getOutput().getFrameHeight(); if (h != 0) inDims_.setDim(2, h); diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 92f6cbcfe5..a43adc7ce7 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -56,7 +56,7 @@ add_test(NAME test_DetectionOutput add_unittest_without_exec(test_ConvUnify test_ConvUnify.cpp LayerGradUtil.cpp) - + add_test(NAME test_ConvUnify COMMAND test_ConvUnify) ################# test_BatchNorm ####################### diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 59d1e9273d..20a83d7aa1 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1792,6 +1792,34 @@ TEST(Layer, RowConvLayer) { } } +TEST(Layer, CropLayer) { + TestConfig config; + // config input_0 + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ImageConfig* img = input->mutable_image_conf(); + img->set_channels(4); + img->set_img_size(16); + config.layerConfig.set_axis(2); + config.layerConfig.add_offset(0); + config.layerConfig.add_offset(0); + + // config input_1 + config.inputDefs.push_back({INPUT_DATA, "layer_1", 128, 0}); + input = config.layerConfig.add_inputs(); + img = input->mutable_image_conf(); + img->set_channels(2); + img->set_img_size(8); + + // config crop layer + config.layerConfig.set_type("crop"); + config.layerConfig.set_name("cropLayer"); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "crop", 100, false, useGpu, false); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 37cd16c798..83f72c137b 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -472,10 +472,16 @@ message LayerConfig { // blank label used in ctc loss optional uint32 blank = 52 [default = 0]; - // stride parameter for seqlastins layer, AverageLayer, MaxLayer, which + // stride parameter for seqlastins layer, AverageLayer, MaxLayer, which // controls the scope of pooling operation. can be set > 0. // leave empty or set to -1 to disable this stride pooling. optional int32 seq_pool_stride = 53 [default = -1]; + + // for crop layer + optional int32 axis = 54 [default = 2]; + repeated uint32 offset = 55; + repeated uint32 shape = 56; + } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 370529ed97..8c529fdfd3 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1986,6 +1986,51 @@ class PadLayer(LayerBase): self.config.size = out_ch * out_h * out_w +@config_layer('crop') +class CropLayer(LayerBase): + def __init__(self, inputs, axis, offset, shape, name, **xargs): + super(CropLayer, self).__init__(name, 'crop', 0, inputs=inputs, **xargs) + self.conf.axis = axis + self.conf.axis = offset + self.conf.axis = shape + + crop = self.inputs[0].crop + self.config.inputs[0].crop_conf.axis = crop.axis + self.config.inputs[0].crop_conf.offset.extend(crop.offset) + self.config.inputs[0].crop_conf.shape.extend(crop.shape) + + # get channel, width and height from input_0 layer + input_layer = self.get_input_layer(0) + image_conf = self.config.inputs[0].image_conf + image_conf.img_size = input_layer.width + image_conf.img_size_y = input_layer.height + image_conf.channels = input_layer.size / (input_layer.width * + input_layer.height) + out_ch = image_conf.channels + out_h = image_conf.img_size + out_w = image_conf.img_size_y + if len(self.inputs) == 2: + # get channels, width and height from input_1 layer + input_layer = self.get_input_layer(1) + image_conf = self.config.inputs[1].image_conf + image_conf.img_size = input_layer.width + image_conf.img_size_y = input_layer.height + image_conf.channels = input_layer.size / (input_layer.width * + input_layer.height) + out_ch = image_conf.channels + out_h = image_conf.img_size_y + out_w = image_conf.img_size + else: + # set channels, width and heigth of current layer + if len(shape) > 2: + out_ch = shape[-3] + if len(shape) > 1: + out_h = shape[-2] + if len(shape) > 0: + out_w = shape[-1] + self.set_cnn_layer(name, out_h, out_w, out_ch) + + @config_layer('batch_norm') class BatchNormLayer(LayerBase): layer_type = 'batch_norm' diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 206de1f8e1..f9de086cba 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -217,6 +217,7 @@ class LayerType(object): SMOOTH_L1 = 'smooth_l1' PRELU = 'prelu' + CROP_LAYER = 'crop' @staticmethod def is_layer_type(type_name): @@ -5853,3 +5854,56 @@ def prelu_layer(input, layer_type=LayerType.PRELU, parents=input, size=l.config.size) + + +@wrap_name_default() +@layer_support() +def crop_layer(input, axis, offset, shape=None, name=None, layer_attr=None): + """ + The crop layer crop images by offset and shape. User can set crop shape by + args 'shape' explicitly or by reference input layer. + + + The example usage is: + + .. code-block:: python + + crop = crop_layer(input=[image_input, reference_input], axis=2, offset=[2, 3]) + + :param input: The input layer.If two inputs were setted, + the second input will be regarded as reference input + :type input: LayerOutput or Sequence + :param axis: start axis to be cropped. To image input layer: + - 0: batch size + - 1: channels + - 2: height + - 3: width + :type partial_sum: int + :param offset: The crop offset + :type offset: Sequence + :param shape: The shape to be cropped. Default is None. + :type shape: Sqquence | None + :param name: Name of this layer. + :type name: basestring + :return: LayerOutput object. + :rtype: LayerOutput + """ + if isinstance(input, LayerOutput): + input = [input] + elif isinstance(input, Projection): + input = [input] + else: + assert isinstance(input, collections.Sequence) + l = Layer( + inputs=[x.name for x in input], + axis=axis, + offset=offset, + shape=shape, + name=name, + type=LayerType.CROP_LAYER, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name=name, + layer_type=LayerType.CROP_LAYER, + parents=input, + size=l.config.size) From cbd61c7719b148043f4b8a4f3feacca57c17f1ab Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 5 Jul 2017 10:36:22 +0800 Subject: [PATCH 282/542] fix crop function test --- paddle/function/CropOpTest.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/function/CropOpTest.cpp b/paddle/function/CropOpTest.cpp index 71d9b05812..dcba972e10 100644 --- a/paddle/function/CropOpTest.cpp +++ b/paddle/function/CropOpTest.cpp @@ -34,8 +34,10 @@ TEST(Crop, real) { TensorShape outDims{numSamples, 2, 3, 3}; compare.addInputs( BufferArg(VALUE_TYPE_FLOAT, test_grad ? outDims : inDims)); - compare.addOutputs(BufferArg( - VALUE_TYPE_FLOAT, test_grad ? inDims : outDims, ASSIGN_TO)); + compare.addOutputs(BufferArg(VALUE_TYPE_FLOAT, + test_grad ? inDims : outDims, + tes_grad ? ADD_TO : ASSIGN_TO), + test_grad ? ADD_TO : ASSIGN_TO); compare.run(); } } From 568c03ba1d311ac2af2cb9242cefb00537174e50 Mon Sep 17 00:00:00 2001 From: Superjom Date: Wed, 5 Jul 2017 10:51:47 +0800 Subject: [PATCH 283/542] add virtual implementation --- paddle/framework/net.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paddle/framework/net.h b/paddle/framework/net.h index e60356dc17..2025bfa4b2 100644 --- a/paddle/framework/net.h +++ b/paddle/framework/net.h @@ -91,6 +91,8 @@ class Net { * @brief Create a network. */ static std::unique_ptr Create(const NetDesc &def = NetDesc()); + + virtual ~Net() {} }; /** @@ -140,6 +142,8 @@ class PlainNet : public Net { */ virtual void AddBackwardOps() override; + virtual ~PlainNet() override {} + protected: /** * @brief Build the network. From e10040ca8a9b4b9d9eb8275cab468edefd94caf9 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Fri, 16 Jun 2017 19:02:46 +0800 Subject: [PATCH 284/542] add crop layer --- paddle/function/CMakeLists.txt | 1 + paddle/function/CropOp.cpp | 177 ++++++++++++++++++++++++++++ paddle/function/CropOp.h | 56 +++++++++ paddle/function/CropOpGpu.cu | 109 +++++++++++++++++ paddle/function/CropOpTest.cpp | 47 ++++++++ paddle/gserver/layers/CropLayer.cpp | 101 ++++++++++++++++ paddle/gserver/layers/CropLayer.h | 46 ++++++++ 7 files changed, 537 insertions(+) create mode 100644 paddle/function/CropOp.cpp create mode 100644 paddle/function/CropOp.h create mode 100644 paddle/function/CropOpGpu.cu create mode 100644 paddle/function/CropOpTest.cpp create mode 100644 paddle/gserver/layers/CropLayer.cpp create mode 100644 paddle/gserver/layers/CropLayer.h diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 1518a8a654..f19a1eb777 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -37,6 +37,7 @@ if(WITH_GPU) add_simple_unittest(MulOpTest) add_simple_unittest(CosSimOpTest) add_simple_unittest(RowConvOpTest) + add_simple_unittest(CropOpTest) endif() add_simple_unittest(ConvOpTest) diff --git a/paddle/function/CropOp.cpp b/paddle/function/CropOp.cpp new file mode 100644 index 0000000000..4d47d9c149 --- /dev/null +++ b/paddle/function/CropOp.cpp @@ -0,0 +1,177 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "CropOp.h" +#include "paddle/math/Vector.h" +#include "paddle/function/TensorShape.h" +namespace paddle { + +static inline CropConf castToCropConf(const FuncConfig& conf) { + return {conf.get>("crop_corner"), + conf.get>("crop_shape")}; +} + +template <> +void Crop(real* outputs, + const real* inputs, + const TensorShape inShape, + const CropConf& crop) { + int cCrop = crop.corner[0]; + int hCrop = crop.corner[1]; + int wCrop = crop.corner[2]; + + int num = inShape[0]; + int inC = inShape[1]; + int inH = inShape[2]; + int inW = inShape[3]; + + int outC = crop.shape[0]; + int outH = crop.shape[1]; + int outW = crop.shape[2]; + + for (int n = 0; n < num; n++) { + for (int c = 0; c < outC; c++) { + for (int h = 0; h < outH; h++) { + int outoff = ((n * outC + c) * outH + h) * outW; + int inoff = ((n * inC + c + cCrop) * inH + h + hCrop) * inW + wCrop; + memcpy(outputs + outoff, inputs + inoff, outW * sizeof(real)); + } + } + } +} + +template <> +void CropGrad(const real* inGrad, + real* outGrad, + const TensorShape outShape, + const CropConf& crop) { + int cCrop = crop.corner[0]; + int hCrop = crop.corner[1]; + int wCrop = crop.corner[2]; + + int num = outShape[0]; + int outC = outShape[1]; + int outH = outShape[2]; + int outW = outShape[3]; + + int inC = crop.shape[0]; + int inH = crop.shape[1]; + int inW = crop.shape[2]; + + for (int n = 0; n < num; n++) { + for (int c = 0; c < inC; c++) { + for (int h = 0; h < inH; h++) { + int outoff = ((n * outC + c + cCrop) * outH + h + hCrop) * outW + wCrop; + int inoff = ((n * inC + c) * inH + h) * inW; + CpuVector inG = CpuVector(inW, const_cast(inGrad + inoff)); + CpuVector outG = CpuVector(inW, outGrad + outoff); + outG += inG; + } + } + } +} + +/** + * \brief Crop input according to the specify corner and shape. + * The input and output is a 4D tensor. In CropFunc, we only + * crop the 2nd to 4th dimension. + * + * Argument in this Function: + * \param pad_ A struct object contains the cropping corner and shape. + * \param inputs A 4D tensor, only one input. + * \param outputs A 4D tensor, the output value after cropping. + * + * For example, + * Input(2,2,2,3) = [ + * [ [[1,2,3], [3,4,5]], + * [[2,3,5], [1,6,7]] ], + * [ [[4,3,1], [1,8,7]], + * [[3,8,9], [2,3,5]] ] + * ] # the input shape is (2,2,2,3) + * + * pad_: if corner = (0,1,1) and crop_shape = (2,1,2) + * Output(2,2,1,2) = [ + * [ [[4,5]], + * [[6,7]] ], + * [ [[8,7]], + * [[3,5]] ] + * ] # the input shape is (2,2,2,3) + */ +template +class CropFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + crop_ = castToCropConf(config); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); + CHECK_EQ(outputs[0].shape()[1], crop_.shape[0]); + CHECK_EQ(outputs[0].shape()[2], crop_.shape[1]); + CHECK_EQ(outputs[0].shape()[3], crop_.shape[2]); + CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + + TensorShape inShape = inputs[0].shape(); + + Crop( + outputs[0].data(), inputs[0].data(), inShape, crop_); + } + +private: + CropConf crop_; +}; + +/** + * \brief The backward propagation of cropping Function. + * + * Argument in this Function: + * \param crop_ The same meaning as it in CropFunc. + * \param inputs The gradient with respect to the output value of CropFunc. + * \param outputs The gradient with respect to the input value of CropFunc. + */ + +template +class CropGradFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + crop_ = castToCropConf(config); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); + CHECK_EQ(inputs[0].shape()[1], crop_.shape[0]); + CHECK_EQ(inputs[0].shape()[2], crop_.shape[1]); + CHECK_EQ(inputs[0].shape()[3], crop_.shape[2]); + CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + + TensorShape outShape = outputs[0].shape(); + + CropGrad( + inputs[0].data(), outputs[0].data(), outShape, crop_); + } + +private: + CropConf crop_; +}; + +REGISTER_TYPED_FUNC(Crop, CPU, CropFunc); +REGISTER_TYPED_FUNC(CropGrad, CPU, CropGradFunc); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(Crop, GPU, CropFunc); +REGISTER_TYPED_FUNC(CropGrad, GPU, CropGradFunc); +#endif + +} // namespace paddle diff --git a/paddle/function/CropOp.h b/paddle/function/CropOp.h new file mode 100644 index 0000000000..78a55bd43e --- /dev/null +++ b/paddle/function/CropOp.h @@ -0,0 +1,56 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Function.h" + +namespace paddle { + +struct CropConf { + /// The upper left corner of croped result + std::vector corner; + /// The shape of croped result + std::vector shape; +}; + +/** + * \brief This funtion crops inputs according to the specify start point and + *shape. + * + * \param[out] outputs save results. + * \param[in] inputs input data. + * \param[in] inShape the shape of input tensor. + * \param[in] crop the cropping config + */ +template +void Crop(real* outputs, + const real* inputs, + const TensorShape inShape, + const CropConf& crop); + +/** + * \brief Cropping operation backward. + * + * \param[out] inGrad gradients of previous layer + * \param[in] outGrad output gradient + * \param[in] inShape the shape of input tensor. + * \param[in] crop the cropping config + */ +template +void CropGrad(const real* inGrad, + real* outGrad, + const TensorShape inShape, + const CropConf& crop); +} // namespace paddle diff --git a/paddle/function/CropOpGpu.cu b/paddle/function/CropOpGpu.cu new file mode 100644 index 0000000000..f7d7d03abd --- /dev/null +++ b/paddle/function/CropOpGpu.cu @@ -0,0 +1,109 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_base.h" +#include "CropOp.h" + +namespace paddle { + +__global__ void KeCrop(real* outputs, const real* inputs, + int inC, int inH, int inW, + int cropC, int cropH, int cropW, + int outC, int outH, int outW, int nthreads) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < nthreads) { + const int w = idx % outW; + const int h = (idx / outW) % outH; + const int c = (idx / outW / outH) % outC; + const int n = idx / outW / outH / outC; + + const int off = ((n * inC + c + cropC) * inH + h + cropH) * inW + cropW + w; + outputs[idx] = inputs[off]; + } +} + +template <> +void Crop(real* outputs, + const real* inputs, + const TensorShape inShape, + const CropConf& crop) { + int cropC = crop.corner[0]; + int cropH = crop.corner[1]; + int cropW = crop.corner[2]; + + int num = inShape[0]; + int inC = inShape[1]; + int inH = inShape[2]; + int inW = inShape[3]; + + int outC = crop.shape[0]; + int outH = crop.shape[1]; + int outW = crop.shape[2]; + + size_t nth = num * outC * outH * outW; + int blockSize = 1024; + int gridSize = (nth + blockSize - 1) / blockSize; + + KeCrop<<>> + (outputs, inputs, inC, inH, inW, cropC, cropH, cropW, + outC, outH, outW, nth); + CHECK_SYNC("Crop"); +} + +__global__ void KeCropDiff(const real* inGrad, real* outGrad, + int inC, int inH, int inW, + int cropC, int cropH, int cropW, + int outC, int outH, int outW, int nthreads) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < nthreads) { + const int w = idx % inW; + const int h = (idx / inW) % inH; + const int c = (idx / inW / inH) % inC; + const int n = idx / inW / inH / inC; + + const int off = ((n * outC + c + cropC) * outH + h + cropH) * outW + cropW + w; + + outGrad[off] += inGrad[idx]; + } +} + +template <> +void CropGrad(const real* inGrad, + real* outGrad, + const TensorShape outShape, + const CropConf& crop) { + int cropC = crop.corner[0]; + int cropH = crop.corner[1]; + int cropW = crop.corner[2]; + + int num = outShape[0]; + int outC = outShape[1]; + int outH = outShape[2]; + int outW = outShape[3]; + + int inC = crop.shape[0]; + int inH = crop.shape[1]; + int inW = crop.shape[2]; + + size_t nth = num * inC * inH * inW; + int blockSize = 1024; + int gridSize = (nth + blockSize - 1) / blockSize; + + KeCropDiff <<>> + (inGrad, outGrad, inC, inH, inW, cropC, cropH, cropW, + outC, outH, outW, nth); + CHECK_SYNC("CropGrad"); +} + +} // namespace paddle diff --git a/paddle/function/CropOpTest.cpp b/paddle/function/CropOpTest.cpp new file mode 100644 index 0000000000..62b4bd9fde --- /dev/null +++ b/paddle/function/CropOpTest.cpp @@ -0,0 +1,47 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "FunctionTest.h" + +namespace paddle { + +TEST(Crop, real) { + for (size_t numSamples : {5, 32}) { + for (size_t channels : {5, 5, 32}) { + for (size_t imgSizeH : {5, 33, 100}) { + for (size_t imgSizeW : {5, 32, 96}) { + VLOG(3) << " numSamples=" << numSamples << " channels=" << channels + << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; + for (bool test_grad : {false, true}) { + FunctionCompare compare( + test_grad ? "CropGrad" : "Crop", + FuncConfig() + .set>("crop_corner", {1, 1, 1}) + .set>("crop_shape", {2, 3, 3})); + TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW}; + TensorShape outDims{numSamples, 2, 3, 3}; + compare.addInputs( + BufferArg(VALUE_TYPE_FLOAT, test_grad ? outDims : inDims)); + compare.addOutputs(BufferArg( + VALUE_TYPE_FLOAT, test_grad ? inDims : outDims, ASSIGN_TO)); + compare.run(); + } + } + } + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/CropLayer.cpp b/paddle/gserver/layers/CropLayer.cpp new file mode 100644 index 0000000000..ab23d4617e --- /dev/null +++ b/paddle/gserver/layers/CropLayer.cpp @@ -0,0 +1,101 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "CropLayer.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +REGISTER_LAYER(crop, CropLayer); + +bool CropLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + + auto& crop_conf = config_.inputs(0).crop_conf(); + auto& img_conf = crop_conf.image_conf(); + CHECK_EQ(config_.inputs_size(), 1); + inDims_ = TensorShape( + {0, + img_conf.channels(), + img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(), + img_conf.img_size()}); + + crop_corner_ = {crop_conf.crop_corner(0), + crop_conf.crop_corner(1), + crop_conf.crop_corner(2)}; + crop_shape_ = {crop_conf.crop_shape(0), + crop_conf.crop_shape(1), + crop_conf.crop_shape(2)}; + + outDims_ = TensorShape(4); + setOutDims(0); + + createFunction(forward_, + "Crop", + FuncConfig() + .set("crop_corner", crop_corner_) + .set("crop_shape", crop_shape_)); + createFunction(backward_, + "CropGrad", + FuncConfig() + .set("crop_corner", crop_corner_) + .set("crop_shape", crop_shape_)); + + return true; +} + +void CropLayer::setOutDims(const size_t batchSize) { + outDims_.reshape({batchSize, crop_shape_[0], crop_shape_[1], crop_shape_[2]}); +} + +void CropLayer::setTensorDim(const size_t batchSize) { + CHECK_EQ(static_cast(inputLayers_.size()), 1); + inDims_.setDim(0, batchSize); + int h = inputLayers_[0]->getOutput().getFrameHeight(); + if (h != 0) inDims_.setDim(2, h); + int w = inputLayers_[0]->getOutput().getFrameWidth(); + if (w != 0) inDims_.setDim(3, w); + setOutDims(batchSize); +} + +void CropLayer::forward(PassType passType) { + Layer::forward(passType); + MatrixPtr input = inputLayers_[0]->getOutputValue(); + size_t batchSize = input->getHeight(); + setTensorDim(batchSize); + int size = outDims_[1] * outDims_[2] * outDims_[3]; + resetOutput(batchSize, size); + MatrixPtr outV = getOutputValue(); + REGISTER_TIMER_INFO("CropForward", getName().c_str()); + + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getInputValue(0), inDims_); + outputs.addArg(*getOutputValue(), outDims_, ASSIGN_TO); + forward_[0]->calc(inputs, outputs); +} + +void CropLayer::backward(const UpdateCallback& callback) { + (void)callback; + REGISTER_TIMER_INFO("CropBackward", getName().c_str()); + + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getOutputGrad(), outDims_); + outputs.addArg(*getInputGrad(0), inDims_, ADD_TO); + backward_[0]->calc(inputs, outputs); +} +} // namespace paddle diff --git a/paddle/gserver/layers/CropLayer.h b/paddle/gserver/layers/CropLayer.h new file mode 100644 index 0000000000..3ce89707ca --- /dev/null +++ b/paddle/gserver/layers/CropLayer.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" + +namespace paddle { + +/** + * \brief This layer crop inputs according to the specify corner and shape. + * The input and output is a 4D tensor. Cropping from the 2nd to + * the 4th dimenstion. + */ +class CropLayer : public Layer { +public: + explicit CropLayer(const LayerConfig& config) : Layer(config) {} + + ~CropLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +protected: + void setOutDims(const size_t batchSize); + void setTensorDim(const size_t batchSize); + + std::vector crop_corner_; + std::vector crop_shape_; + TensorShape inDims_; + TensorShape outDims_; +}; +} // namespace paddle From d1d70ec8319a55964231f2e925ef8cb881c94497 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Thu, 22 Jun 2017 16:54:07 +0800 Subject: [PATCH 285/542] Refine configure option of crop layer 1. change configure content to 'axis, offset, shape' 2. add an optional input to crop layer as cropping reference --- paddle/function/CropOp.cpp | 63 ++++++++++++--------------- paddle/function/CropOp.h | 15 ++----- paddle/function/CropOpGpu.cu | 32 ++++++++------ paddle/function/CropOpTest.cpp | 4 +- paddle/gserver/layers/CropLayer.cpp | 67 ++++++++++++++++++++++------- paddle/gserver/layers/CropLayer.h | 13 ++++-- 6 files changed, 114 insertions(+), 80 deletions(-) diff --git a/paddle/function/CropOp.cpp b/paddle/function/CropOp.cpp index 4d47d9c149..0d511ceef5 100644 --- a/paddle/function/CropOp.cpp +++ b/paddle/function/CropOp.cpp @@ -17,28 +17,27 @@ limitations under the License. */ #include "paddle/function/TensorShape.h" namespace paddle { -static inline CropConf castToCropConf(const FuncConfig& conf) { - return {conf.get>("crop_corner"), - conf.get>("crop_shape")}; -} - template <> void Crop(real* outputs, const real* inputs, const TensorShape inShape, - const CropConf& crop) { - int cCrop = crop.corner[0]; - int hCrop = crop.corner[1]; - int wCrop = crop.corner[2]; + const FuncConfig& conf) { + std::vector crop_corner = + conf.get>("crop_corner"); + std::vector crop_shape = + conf.get>("crop_shape"); + int cCrop = crop_corner[1]; + int hCrop = crop_corner[2]; + int wCrop = crop_corner[3]; int num = inShape[0]; int inC = inShape[1]; int inH = inShape[2]; int inW = inShape[3]; - int outC = crop.shape[0]; - int outH = crop.shape[1]; - int outW = crop.shape[2]; + int outC = crop_shape[1]; + int outH = crop_shape[2]; + int outW = crop_shape[3]; for (int n = 0; n < num; n++) { for (int c = 0; c < outC; c++) { @@ -55,19 +54,23 @@ template <> void CropGrad(const real* inGrad, real* outGrad, const TensorShape outShape, - const CropConf& crop) { - int cCrop = crop.corner[0]; - int hCrop = crop.corner[1]; - int wCrop = crop.corner[2]; + const FuncConfig& conf) { + std::vector crop_corner = + conf.get>("crop_corner"); + std::vector crop_shape = + conf.get>("crop_shape"); + int cCrop = crop_corner[1]; + int hCrop = crop_corner[2]; + int wCrop = crop_corner[3]; int num = outShape[0]; int outC = outShape[1]; int outH = outShape[2]; int outW = outShape[3]; - int inC = crop.shape[0]; - int inH = crop.shape[1]; - int inW = crop.shape[2]; + int inC = crop_shape[1]; + int inH = crop_shape[2]; + int inW = crop_shape[3]; for (int n = 0; n < num; n++) { for (int c = 0; c < inC; c++) { @@ -111,26 +114,21 @@ void CropGrad(const real* inGrad, template class CropFunc : public FunctionBase { public: - void init(const FuncConfig& config) override { - crop_ = castToCropConf(config); - } + void init(const FuncConfig& config) override { conf_ = config; } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(1UL, inputs.size()); CHECK_EQ(1UL, outputs.size()); - CHECK_EQ(outputs[0].shape()[1], crop_.shape[0]); - CHECK_EQ(outputs[0].shape()[2], crop_.shape[1]); - CHECK_EQ(outputs[0].shape()[3], crop_.shape[2]); CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); TensorShape inShape = inputs[0].shape(); Crop( - outputs[0].data(), inputs[0].data(), inShape, crop_); + outputs[0].data(), inputs[0].data(), inShape, conf_); } private: - CropConf crop_; + FuncConfig conf_; }; /** @@ -145,26 +143,21 @@ private: template class CropGradFunc : public FunctionBase { public: - void init(const FuncConfig& config) override { - crop_ = castToCropConf(config); - } + void init(const FuncConfig& config) override { conf_ = config; } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(1UL, inputs.size()); CHECK_EQ(1UL, outputs.size()); - CHECK_EQ(inputs[0].shape()[1], crop_.shape[0]); - CHECK_EQ(inputs[0].shape()[2], crop_.shape[1]); - CHECK_EQ(inputs[0].shape()[3], crop_.shape[2]); CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); TensorShape outShape = outputs[0].shape(); CropGrad( - inputs[0].data(), outputs[0].data(), outShape, crop_); + inputs[0].data(), outputs[0].data(), outShape, conf_); } private: - CropConf crop_; + FuncConfig conf_; }; REGISTER_TYPED_FUNC(Crop, CPU, CropFunc); diff --git a/paddle/function/CropOp.h b/paddle/function/CropOp.h index 78a55bd43e..71e8c4c00e 100644 --- a/paddle/function/CropOp.h +++ b/paddle/function/CropOp.h @@ -18,13 +18,6 @@ limitations under the License. */ namespace paddle { -struct CropConf { - /// The upper left corner of croped result - std::vector corner; - /// The shape of croped result - std::vector shape; -}; - /** * \brief This funtion crops inputs according to the specify start point and *shape. @@ -32,13 +25,13 @@ struct CropConf { * \param[out] outputs save results. * \param[in] inputs input data. * \param[in] inShape the shape of input tensor. - * \param[in] crop the cropping config + * \param[in] conf the cropping config */ template void Crop(real* outputs, const real* inputs, const TensorShape inShape, - const CropConf& crop); + const FuncConfig& conf); /** * \brief Cropping operation backward. @@ -46,11 +39,11 @@ void Crop(real* outputs, * \param[out] inGrad gradients of previous layer * \param[in] outGrad output gradient * \param[in] inShape the shape of input tensor. - * \param[in] crop the cropping config + * \param[in] conf the cropping config */ template void CropGrad(const real* inGrad, real* outGrad, const TensorShape inShape, - const CropConf& crop); + const FuncConfig& conf); } // namespace paddle diff --git a/paddle/function/CropOpGpu.cu b/paddle/function/CropOpGpu.cu index f7d7d03abd..cadb58b6e9 100644 --- a/paddle/function/CropOpGpu.cu +++ b/paddle/function/CropOpGpu.cu @@ -37,19 +37,21 @@ template <> void Crop(real* outputs, const real* inputs, const TensorShape inShape, - const CropConf& crop) { - int cropC = crop.corner[0]; - int cropH = crop.corner[1]; - int cropW = crop.corner[2]; + const FuncConfig& conf) { + std::vector crop_corner = conf.get>("crop_corner"); + std::vector crop_shape = conf.get>("crop_shape"); + int cropC = crop_corner[1]; + int cropH = crop_corner[2]; + int cropW = crop_corner[3]; int num = inShape[0]; int inC = inShape[1]; int inH = inShape[2]; int inW = inShape[3]; - int outC = crop.shape[0]; - int outH = crop.shape[1]; - int outW = crop.shape[2]; + int outC = crop_shape[1]; + int outH = crop_shape[2]; + int outW = crop_shape[3]; size_t nth = num * outC * outH * outW; int blockSize = 1024; @@ -82,19 +84,21 @@ template <> void CropGrad(const real* inGrad, real* outGrad, const TensorShape outShape, - const CropConf& crop) { - int cropC = crop.corner[0]; - int cropH = crop.corner[1]; - int cropW = crop.corner[2]; + const FuncConfig& conf) { + std::vector crop_corner = conf.get>("crop_corner"); + std::vector crop_shape = conf.get>("crop_shape"); + int cropC = crop_corner[1]; + int cropH = crop_corner[2]; + int cropW = crop_corner[3]; int num = outShape[0]; int outC = outShape[1]; int outH = outShape[2]; int outW = outShape[3]; - int inC = crop.shape[0]; - int inH = crop.shape[1]; - int inW = crop.shape[2]; + int inC = crop_shape[1]; + int inH = crop_shape[2]; + int inW = crop_shape[3]; size_t nth = num * inC * inH * inW; int blockSize = 1024; diff --git a/paddle/function/CropOpTest.cpp b/paddle/function/CropOpTest.cpp index 62b4bd9fde..c331a70d1f 100644 --- a/paddle/function/CropOpTest.cpp +++ b/paddle/function/CropOpTest.cpp @@ -28,8 +28,8 @@ TEST(Crop, real) { FunctionCompare compare( test_grad ? "CropGrad" : "Crop", FuncConfig() - .set>("crop_corner", {1, 1, 1}) - .set>("crop_shape", {2, 3, 3})); + .set>("crop_corner", {0, 1, 1, 1}) + .set>("crop_shape", {0, 2, 3, 3})); TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW}; TensorShape outDims{numSamples, 2, 3, 3}; compare.addInputs( diff --git a/paddle/gserver/layers/CropLayer.cpp b/paddle/gserver/layers/CropLayer.cpp index ab23d4617e..198ceffb46 100644 --- a/paddle/gserver/layers/CropLayer.cpp +++ b/paddle/gserver/layers/CropLayer.cpp @@ -25,20 +25,57 @@ bool CropLayer::init(const LayerMap& layerMap, Layer::init(layerMap, parameterMap); auto& crop_conf = config_.inputs(0).crop_conf(); - auto& img_conf = crop_conf.image_conf(); - CHECK_EQ(config_.inputs_size(), 1); - inDims_ = TensorShape( - {0, - img_conf.channels(), - img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(), - img_conf.img_size()}); - - crop_corner_ = {crop_conf.crop_corner(0), - crop_conf.crop_corner(1), - crop_conf.crop_corner(2)}; - crop_shape_ = {crop_conf.crop_shape(0), - crop_conf.crop_shape(1), - crop_conf.crop_shape(2)}; + crop_axis_ = crop_conf.axis(); + for (int i = 0; i < crop_conf.offset_size(); i++) { + crop_offsets_[i] = crop_conf.offset(i); + } + + // 1. get input_0 shape + auto& input0_img_conf = config_.inputs(0).image_conf(); + inDims_ = TensorShape({0, + input0_img_conf.channels(), + input0_img_conf.has_img_size_y() + ? input0_img_conf.img_size_y() + : input0_img_conf.img_size(), + input0_img_conf.img_size()}); + + // 2. get output shape from input_1 or crop shap conf + if (config_.inputs_size() == 2) { + auto& input1_img_conf = config_.inputs(1).image_conf(); + targetDims_ = TensorShape({0, + input1_img_conf.channels(), + input1_img_conf.has_img_size_y() + ? input1_img_conf.img_size_y() + : input1_img_conf.img_size(), + input1_img_conf.img_size()}); + } else { + targetDims_ = TensorShape({crop_conf.shape(0), + crop_conf.shape(1), + crop_conf.shape(2), + crop_conf.shape(3)}); + } + + // 3. get final crop shape + int dimSize = 4; + for (int i = 0; i < dimSize; i++) { + if (i >= crop_axis_) { + crop_shape_[i] = targetDims_[i]; + } else { + crop_shape_[i] = inDims_[i]; + } + } + + // 4. get final crop corner + crop_corner_ = {0, 0, 0, 0}; + for (int i = 0; i < dimSize; i++) { + if (i >= crop_axis_) { + if (crop_offsets_.size() > 1) { + crop_corner_[i] = crop_offsets_[i - crop_axis_]; + } else { + crop_corner_[i] = crop_offsets_[0]; + } + } + } outDims_ = TensorShape(4); setOutDims(0); @@ -58,7 +95,7 @@ bool CropLayer::init(const LayerMap& layerMap, } void CropLayer::setOutDims(const size_t batchSize) { - outDims_.reshape({batchSize, crop_shape_[0], crop_shape_[1], crop_shape_[2]}); + outDims_.reshape({batchSize, crop_shape_[1], crop_shape_[2], crop_shape_[3]}); } void CropLayer::setTensorDim(const size_t batchSize) { diff --git a/paddle/gserver/layers/CropLayer.h b/paddle/gserver/layers/CropLayer.h index 3ce89707ca..23cede1c3f 100644 --- a/paddle/gserver/layers/CropLayer.h +++ b/paddle/gserver/layers/CropLayer.h @@ -19,9 +19,13 @@ limitations under the License. */ namespace paddle { /** - * \brief This layer crop inputs according to the specify corner and shape. - * The input and output is a 4D tensor. Cropping from the 2nd to - * the 4th dimenstion. + * \brief This layer crop input according to the specify conf. + * input_0: input to be cropped + * input_1: optional reference input + * axis: start dimension to be croped + * offset: offset of cropping in each dimension + * shape: if reference input layer was not setted, + * crop input as this shape conf */ class CropLayer : public Layer { public: @@ -38,9 +42,12 @@ protected: void setOutDims(const size_t batchSize); void setTensorDim(const size_t batchSize); + int32_t crop_axis_; + std::vector crop_offsets_; std::vector crop_corner_; std::vector crop_shape_; TensorShape inDims_; + TensorShape targetDims_; TensorShape outDims_; }; } // namespace paddle From 5e6e1f636a356b6ae7d25ff8494354349b3b4f5f Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 5 Jul 2017 00:53:32 +0800 Subject: [PATCH 286/542] Add grad test and python wrapper for crop layer --- paddle/function/CropOp.cpp | 2 +- paddle/function/CropOpTest.cpp | 2 +- paddle/gserver/layers/CropLayer.cpp | 23 ++++---- paddle/gserver/tests/CMakeLists.txt | 2 +- paddle/gserver/tests/test_LayerGrad.cpp | 28 ++++++++++ proto/ModelConfig.proto | 8 ++- python/paddle/trainer/config_parser.py | 45 ++++++++++++++++ .../paddle/trainer_config_helpers/layers.py | 54 +++++++++++++++++++ 8 files changed, 147 insertions(+), 17 deletions(-) diff --git a/paddle/function/CropOp.cpp b/paddle/function/CropOp.cpp index 0d511ceef5..1bb194a9bc 100644 --- a/paddle/function/CropOp.cpp +++ b/paddle/function/CropOp.cpp @@ -148,7 +148,7 @@ public: void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(1UL, inputs.size()); CHECK_EQ(1UL, outputs.size()); - CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + CHECK_EQ(outputs[0].getArgType(), ADD_TO); TensorShape outShape = outputs[0].shape(); diff --git a/paddle/function/CropOpTest.cpp b/paddle/function/CropOpTest.cpp index c331a70d1f..71d9b05812 100644 --- a/paddle/function/CropOpTest.cpp +++ b/paddle/function/CropOpTest.cpp @@ -25,7 +25,7 @@ TEST(Crop, real) { VLOG(3) << " numSamples=" << numSamples << " channels=" << channels << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; for (bool test_grad : {false, true}) { - FunctionCompare compare( + CpuGpuFuncCompare compare( test_grad ? "CropGrad" : "Crop", FuncConfig() .set>("crop_corner", {0, 1, 1, 1}) diff --git a/paddle/gserver/layers/CropLayer.cpp b/paddle/gserver/layers/CropLayer.cpp index 198ceffb46..b2fa17b400 100644 --- a/paddle/gserver/layers/CropLayer.cpp +++ b/paddle/gserver/layers/CropLayer.cpp @@ -14,7 +14,6 @@ limitations under the License. */ #include "CropLayer.h" #include "paddle/utils/Stat.h" - namespace paddle { REGISTER_LAYER(crop, CropLayer); @@ -24,10 +23,9 @@ bool CropLayer::init(const LayerMap& layerMap, /* Initialize the basic parent class */ Layer::init(layerMap, parameterMap); - auto& crop_conf = config_.inputs(0).crop_conf(); - crop_axis_ = crop_conf.axis(); - for (int i = 0; i < crop_conf.offset_size(); i++) { - crop_offsets_[i] = crop_conf.offset(i); + crop_axis_ = config_.axis(); + for (int i = 0; i < config_.offset_size(); i++) { + crop_offsets_.push_back(config_.offset(i)); } // 1. get input_0 shape @@ -38,7 +36,6 @@ bool CropLayer::init(const LayerMap& layerMap, ? input0_img_conf.img_size_y() : input0_img_conf.img_size(), input0_img_conf.img_size()}); - // 2. get output shape from input_1 or crop shap conf if (config_.inputs_size() == 2) { auto& input1_img_conf = config_.inputs(1).image_conf(); @@ -49,19 +46,19 @@ bool CropLayer::init(const LayerMap& layerMap, : input1_img_conf.img_size(), input1_img_conf.img_size()}); } else { - targetDims_ = TensorShape({crop_conf.shape(0), - crop_conf.shape(1), - crop_conf.shape(2), - crop_conf.shape(3)}); + targetDims_ = TensorShape({config_.shape(0), + config_.shape(1), + config_.shape(2), + config_.shape(3)}); } // 3. get final crop shape int dimSize = 4; for (int i = 0; i < dimSize; i++) { if (i >= crop_axis_) { - crop_shape_[i] = targetDims_[i]; + crop_shape_.push_back(targetDims_[i]); } else { - crop_shape_[i] = inDims_[i]; + crop_shape_.push_back(inDims_[i]); } } @@ -99,7 +96,7 @@ void CropLayer::setOutDims(const size_t batchSize) { } void CropLayer::setTensorDim(const size_t batchSize) { - CHECK_EQ(static_cast(inputLayers_.size()), 1); + CHECK_EQ(static_cast(inputLayers_.size()), 2); inDims_.setDim(0, batchSize); int h = inputLayers_[0]->getOutput().getFrameHeight(); if (h != 0) inDims_.setDim(2, h); diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 92f6cbcfe5..a43adc7ce7 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -56,7 +56,7 @@ add_test(NAME test_DetectionOutput add_unittest_without_exec(test_ConvUnify test_ConvUnify.cpp LayerGradUtil.cpp) - + add_test(NAME test_ConvUnify COMMAND test_ConvUnify) ################# test_BatchNorm ####################### diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 59d1e9273d..20a83d7aa1 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1792,6 +1792,34 @@ TEST(Layer, RowConvLayer) { } } +TEST(Layer, CropLayer) { + TestConfig config; + // config input_0 + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ImageConfig* img = input->mutable_image_conf(); + img->set_channels(4); + img->set_img_size(16); + config.layerConfig.set_axis(2); + config.layerConfig.add_offset(0); + config.layerConfig.add_offset(0); + + // config input_1 + config.inputDefs.push_back({INPUT_DATA, "layer_1", 128, 0}); + input = config.layerConfig.add_inputs(); + img = input->mutable_image_conf(); + img->set_channels(2); + img->set_img_size(8); + + // config crop layer + config.layerConfig.set_type("crop"); + config.layerConfig.set_name("cropLayer"); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "crop", 100, false, useGpu, false); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 37cd16c798..83f72c137b 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -472,10 +472,16 @@ message LayerConfig { // blank label used in ctc loss optional uint32 blank = 52 [default = 0]; - // stride parameter for seqlastins layer, AverageLayer, MaxLayer, which + // stride parameter for seqlastins layer, AverageLayer, MaxLayer, which // controls the scope of pooling operation. can be set > 0. // leave empty or set to -1 to disable this stride pooling. optional int32 seq_pool_stride = 53 [default = -1]; + + // for crop layer + optional int32 axis = 54 [default = 2]; + repeated uint32 offset = 55; + repeated uint32 shape = 56; + } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 370529ed97..8c529fdfd3 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1986,6 +1986,51 @@ class PadLayer(LayerBase): self.config.size = out_ch * out_h * out_w +@config_layer('crop') +class CropLayer(LayerBase): + def __init__(self, inputs, axis, offset, shape, name, **xargs): + super(CropLayer, self).__init__(name, 'crop', 0, inputs=inputs, **xargs) + self.conf.axis = axis + self.conf.axis = offset + self.conf.axis = shape + + crop = self.inputs[0].crop + self.config.inputs[0].crop_conf.axis = crop.axis + self.config.inputs[0].crop_conf.offset.extend(crop.offset) + self.config.inputs[0].crop_conf.shape.extend(crop.shape) + + # get channel, width and height from input_0 layer + input_layer = self.get_input_layer(0) + image_conf = self.config.inputs[0].image_conf + image_conf.img_size = input_layer.width + image_conf.img_size_y = input_layer.height + image_conf.channels = input_layer.size / (input_layer.width * + input_layer.height) + out_ch = image_conf.channels + out_h = image_conf.img_size + out_w = image_conf.img_size_y + if len(self.inputs) == 2: + # get channels, width and height from input_1 layer + input_layer = self.get_input_layer(1) + image_conf = self.config.inputs[1].image_conf + image_conf.img_size = input_layer.width + image_conf.img_size_y = input_layer.height + image_conf.channels = input_layer.size / (input_layer.width * + input_layer.height) + out_ch = image_conf.channels + out_h = image_conf.img_size_y + out_w = image_conf.img_size + else: + # set channels, width and heigth of current layer + if len(shape) > 2: + out_ch = shape[-3] + if len(shape) > 1: + out_h = shape[-2] + if len(shape) > 0: + out_w = shape[-1] + self.set_cnn_layer(name, out_h, out_w, out_ch) + + @config_layer('batch_norm') class BatchNormLayer(LayerBase): layer_type = 'batch_norm' diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 206de1f8e1..f9de086cba 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -217,6 +217,7 @@ class LayerType(object): SMOOTH_L1 = 'smooth_l1' PRELU = 'prelu' + CROP_LAYER = 'crop' @staticmethod def is_layer_type(type_name): @@ -5853,3 +5854,56 @@ def prelu_layer(input, layer_type=LayerType.PRELU, parents=input, size=l.config.size) + + +@wrap_name_default() +@layer_support() +def crop_layer(input, axis, offset, shape=None, name=None, layer_attr=None): + """ + The crop layer crop images by offset and shape. User can set crop shape by + args 'shape' explicitly or by reference input layer. + + + The example usage is: + + .. code-block:: python + + crop = crop_layer(input=[image_input, reference_input], axis=2, offset=[2, 3]) + + :param input: The input layer.If two inputs were setted, + the second input will be regarded as reference input + :type input: LayerOutput or Sequence + :param axis: start axis to be cropped. To image input layer: + - 0: batch size + - 1: channels + - 2: height + - 3: width + :type partial_sum: int + :param offset: The crop offset + :type offset: Sequence + :param shape: The shape to be cropped. Default is None. + :type shape: Sqquence | None + :param name: Name of this layer. + :type name: basestring + :return: LayerOutput object. + :rtype: LayerOutput + """ + if isinstance(input, LayerOutput): + input = [input] + elif isinstance(input, Projection): + input = [input] + else: + assert isinstance(input, collections.Sequence) + l = Layer( + inputs=[x.name for x in input], + axis=axis, + offset=offset, + shape=shape, + name=name, + type=LayerType.CROP_LAYER, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name=name, + layer_type=LayerType.CROP_LAYER, + parents=input, + size=l.config.size) From 86bdb2f33fa9e9e806e8248b14a172ce4e0557c6 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 5 Jul 2017 10:36:22 +0800 Subject: [PATCH 287/542] fix crop function test --- paddle/function/CropOpTest.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/function/CropOpTest.cpp b/paddle/function/CropOpTest.cpp index 71d9b05812..dcba972e10 100644 --- a/paddle/function/CropOpTest.cpp +++ b/paddle/function/CropOpTest.cpp @@ -34,8 +34,10 @@ TEST(Crop, real) { TensorShape outDims{numSamples, 2, 3, 3}; compare.addInputs( BufferArg(VALUE_TYPE_FLOAT, test_grad ? outDims : inDims)); - compare.addOutputs(BufferArg( - VALUE_TYPE_FLOAT, test_grad ? inDims : outDims, ASSIGN_TO)); + compare.addOutputs(BufferArg(VALUE_TYPE_FLOAT, + test_grad ? inDims : outDims, + tes_grad ? ADD_TO : ASSIGN_TO), + test_grad ? ADD_TO : ASSIGN_TO); compare.run(); } } From cf868918012f29b94628cff7e80cfc6e65bf0ee6 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 5 Jul 2017 11:34:16 +0800 Subject: [PATCH 288/542] fix unittest of crop layer --- paddle/function/CropOpTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/function/CropOpTest.cpp b/paddle/function/CropOpTest.cpp index dcba972e10..6f11abfdf6 100644 --- a/paddle/function/CropOpTest.cpp +++ b/paddle/function/CropOpTest.cpp @@ -36,7 +36,7 @@ TEST(Crop, real) { BufferArg(VALUE_TYPE_FLOAT, test_grad ? outDims : inDims)); compare.addOutputs(BufferArg(VALUE_TYPE_FLOAT, test_grad ? inDims : outDims, - tes_grad ? ADD_TO : ASSIGN_TO), + test_grad ? ADD_TO : ASSIGN_TO), test_grad ? ADD_TO : ASSIGN_TO); compare.run(); } From acfd2fc6dfc1bf06bbfd6e25496ca1dfde881551 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 5 Jul 2017 11:54:47 +0800 Subject: [PATCH 289/542] fix cpp format --- paddle/function/CropOp.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/function/CropOp.cpp b/paddle/function/CropOp.cpp index 1bb194a9bc..39e06fc120 100644 --- a/paddle/function/CropOp.cpp +++ b/paddle/function/CropOp.cpp @@ -13,8 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "CropOp.h" -#include "paddle/math/Vector.h" #include "paddle/function/TensorShape.h" +#include "paddle/math/Vector.h" + namespace paddle { template <> From cd437f5072b0482685d107c386e587bc1fe59044 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Wed, 5 Jul 2017 05:16:41 +0000 Subject: [PATCH 290/542] fix bugs --- go/pserver/client/c/test/CMakeLists.txt | 4 +++- go/pserver/optimizer.go | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/go/pserver/client/c/test/CMakeLists.txt b/go/pserver/client/c/test/CMakeLists.txt index f287f85071..44bc183738 100644 --- a/go/pserver/client/c/test/CMakeLists.txt +++ b/go/pserver/client/c/test/CMakeLists.txt @@ -1,2 +1,4 @@ -cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient) +# FIXME:It's ugly +#cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer) +cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_optimizer paddle_proto glog gflags protobuf) add_style_check_target(test_cclient test_cclient.c) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 2c9b0d5652..93389b93a7 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -1,7 +1,6 @@ package pserver // #cgo CFLAGS: -I ../../ -// //FIXME: ldflags contain "build" path // #cgo LDFLAGS: -lpaddle_go_optimizer -lstdc++ -lm // #include "paddle/optimizer/optimizer.h" // #include From 1409b17e4f20afdd922b8566be324581ed3f0e54 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Wed, 5 Jul 2017 06:06:13 +0000 Subject: [PATCH 291/542] add fixme --- cmake/generic.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 97196114ff..74396abdbb 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -281,6 +281,7 @@ function(go_library TARGET_NAME) file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + # FIXME: link path add_custom_command(TARGET ${TARGET_NAME} POST_BUILD COMMAND rm "${${TARGET_NAME}_LIB_PATH}" # Golang build source code @@ -299,6 +300,7 @@ function(go_binary TARGET_NAME) cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + # FIXME: link path add_custom_command(OUTPUT ${TARGET_NAME}_timestamp COMMAND env LIBRARY_PATH=${CMAKE_BINARY_DIR}/go/pserver/client/c/:$ENV{LIBRARY_PATH} GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build From 7364fcd4c3c6b08b569ed2bb809bed9904b55030 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Wed, 5 Jul 2017 15:42:17 +0800 Subject: [PATCH 292/542] add golang precommit --- .pre-commit-config.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4cd8eb12f6..a7c450176d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,3 +21,10 @@ sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29 hooks: - id: clang-formater +- repo: https://github.com/dnephin/pre-commit-golang + sha: e4693a4c282b4fc878eda172a929f7a6508e7d16 + hooks: + - id: go-fmt + - id: go-vet + - id: go-lint + - id: gometalinter From e7b071f33a2af3168586ef2710835b694f61e958 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Wed, 5 Jul 2017 15:55:26 +0800 Subject: [PATCH 293/542] update annotation with comments --- paddle/gserver/layers/AverageLayer.h | 4 ++++ paddle/gserver/layers/MaxLayer.h | 7 +++---- paddle/gserver/layers/SequenceLastInstanceLayer.cpp | 7 +++---- paddle/gserver/layers/SequencePoolLayer.h | 5 +++-- python/paddle/trainer_config_helpers/layers.py | 11 +++++++---- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/paddle/gserver/layers/AverageLayer.h b/paddle/gserver/layers/AverageLayer.h index 332552a304..db4a17bfb0 100644 --- a/paddle/gserver/layers/AverageLayer.h +++ b/paddle/gserver/layers/AverageLayer.h @@ -25,6 +25,10 @@ namespace paddle { * If SequenceLevel = kNonSeq: * Output: output size is the number of input sequences (NOT input instances) * output[i] = average_{for each instance in this sequence}{input[i]} + * If stride_ > 0: + * Output: a shorten sequence. Stride is the step size by which we slide a + * window upon the input sequence, and the average pooling + * operation is then applied to each interval independently. * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: output size is the number of input sub-sequences diff --git a/paddle/gserver/layers/MaxLayer.h b/paddle/gserver/layers/MaxLayer.h index adf7ab4ae4..fa536fce2b 100644 --- a/paddle/gserver/layers/MaxLayer.h +++ b/paddle/gserver/layers/MaxLayer.h @@ -27,10 +27,9 @@ namespace paddle { * Output: output size is the number of input sequences (NOT input instances) * output[i] = max_{for each instance in this sequence}{input[i]} * If stride_ > 0: - * Output: a shorten sequence. The operation of getting max instance of a - * sequence is independently performed on every slice of the input - * sequence, which is obtained by sliding a window with the window - * size set to stride_. + * Output: a shorten sequence. Stride is the step size by which we slide a + * window upon the input sequence, and the max pooling operation is + * then applied to each interval independently. * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: output size is the number of input sub-sequences diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp index 8127cbf09c..323cc47df1 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp @@ -26,10 +26,9 @@ namespace paddle { * If SequenceLevel = kNonseq: * Output: a sequence containing only the last instance of the input sequence * If stride_ > 0: - * Output: a shorten sequence. The operation of getting last instance of a - * sequence is independently performed on every slice of the input - * sequence, which is obtained by sliding a window with the window - * size set to stride_. + * Output: a shorten sequence. Stride is the step size by which we slide a + * window upon the input sequence, and getting last instance + * operation is then applied to each interval independently. * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: a sequence containing only the last instance of each sub-sequence diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h index 058627def8..e207afd1dc 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/gserver/layers/SequencePoolLayer.h @@ -28,8 +28,9 @@ namespace paddle { * sequence}{input[i]} * If stride_ > 0: * Check input sequence must not have sub-sequence - * Output: a shorten sequence, pooling is performed upon a small local - * area + * Output: a shorten sequence. Stride is the step size by which we slide + * a window upon the input sequence, and the pooling operation + * is then applied to each interval independently. * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: output size is the number of input sub-sequences diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 5e8bf4b203..2f52a27e60 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1097,7 +1097,10 @@ def pooling_layer(input, If stride > 0, this layer slides a window whose size is determined by stride, and return the pooling value of the window as the output. Thus, a long sequence - will be shorten. Note that for sequence with sub-sequence, the default value + will be shorten. + + The parameter stride specifies the intervals at which to apply the pooling + operation. Note that for sequence with sub-sequence, the default value of stride is -1. The example usage is: @@ -1118,7 +1121,7 @@ def pooling_layer(input, :param pooling_type: Type of pooling, MaxPooling(default), AvgPooling, SumPooling, SquareRootNPooling. :type pooling_type: BasePoolingType|None - :param stride: window size. + :param stride: The step size between successive pooling regions. :type stride: Int :param bias_attr: Bias parameter attribute. False if no bias. :type bias_attr: ParameterAttribute|None|False @@ -1408,7 +1411,7 @@ def last_seq(input, :type name: basestring :param input: Input layer name. :type input: LayerOutput - :param stride: window size. + :param stride: The step size between successive pooling regions. :type stride: Int :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -1464,7 +1467,7 @@ def first_seq(input, :type name: basestring :param input: Input layer name. :type input: LayerOutput - :param stride: window size. + :param stride: The step size between successive pooling regions. :type stride: Int :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. From 7ed6463ee91e0b71e7beca313554eae36da1c4e4 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 24 May 2017 13:55:58 +0800 Subject: [PATCH 294/542] fix bugs for CrossChannelNormLayer --- .../gserver/layers/CrossChannelNormLayer.cpp | 32 ++++++++++++++----- paddle/gserver/layers/NormLayer.cpp | 10 ------ paddle/gserver/tests/LayerGradUtil.cpp | 7 +++- paddle/gserver/tests/LayerGradUtil.h | 6 ++++ paddle/gserver/tests/test_LayerGrad.cpp | 5 ++- 5 files changed, 40 insertions(+), 20 deletions(-) diff --git a/paddle/gserver/layers/CrossChannelNormLayer.cpp b/paddle/gserver/layers/CrossChannelNormLayer.cpp index 3fbccc1103..4dfe460561 100644 --- a/paddle/gserver/layers/CrossChannelNormLayer.cpp +++ b/paddle/gserver/layers/CrossChannelNormLayer.cpp @@ -36,6 +36,16 @@ MatrixPtr CrossChannelNormLayer::createSpatialMatrix(MatrixPtr data, data->getData() + iter * spatialDim, 1, spatialDim, false, useGpu_); } +bool CrossChannelNormLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + CHECK(parameters_[0]); + const NormConfig& conf = config_.inputs(0).norm_conf(); + channels_ = conf.channels(); + scale_.reset(new Weight(channels_, 1, parameters_[0])); + return true; +} + void CrossChannelNormLayer::forward(PassType passType) { Layer::forward(passType); MatrixPtr inV = getInputValue(0); @@ -63,6 +73,7 @@ void CrossChannelNormLayer::forward(PassType passType) { // compute norm. spatialBuffer_->sumCols(*dataTmp, 1, 0); + spatialBuffer_->add(*normTmp); spatialBuffer_->sqrt2(*spatialBuffer_); normTmp->copyFrom(*spatialBuffer_); outVTmp->copyFrom(*inVTmp); @@ -82,6 +93,9 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) { size_t dataDim = inG->getWidth(); size_t spatialDim = dataDim / channels_; + MatrixPtr inGBuffer; + Matrix::resizeOrCreate(inGBuffer, channels_, spatialDim, false, useGpu_); + dataBuffer_->dotMul(*outG, *outV); Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_); Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_); @@ -100,22 +114,24 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) { scaleDiff_->add(*channelBuffer_, 1.); sampleBuffer_->dotMul(*inVTmp, *outGTmp); - spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.); + spatialBuffer_->sumCols(*sampleBuffer_, 1., 0.); // scale the grad - inGTmp->copyFrom(*inVTmp); - inGTmp->mulRowVector(*spatialBuffer_); + inGBuffer->copyFrom(*inVTmp); + inGBuffer->mulRowVector(*spatialBuffer_); // divide by square of norm spatialBuffer_->dotMul(*normTmp, *normTmp); - inGTmp->divRowVector(*spatialBuffer_); + inGBuffer->divRowVector(*spatialBuffer_); // subtract - inGTmp->add(*outGTmp, -1, 1); + inGBuffer->add(*outGTmp, -1, 1); // divide by norm - inGTmp->divRowVector(*normTmp); + inGBuffer->divRowVector(*normTmp); // scale the diff - inGTmp->mulColVector(*scale_->getW()); + inGBuffer->mulColVector(*scale_->getW()); + + inGTmp->add(*inGBuffer); } // updata scale - if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_); + if (scale_->getWGrad()) scale_->getWGrad()->add(*scaleDiff_); scale_->getParameterPtr()->incUpdate(callback); } diff --git a/paddle/gserver/layers/NormLayer.cpp b/paddle/gserver/layers/NormLayer.cpp index e094078bfe..caef710092 100644 --- a/paddle/gserver/layers/NormLayer.cpp +++ b/paddle/gserver/layers/NormLayer.cpp @@ -56,14 +56,4 @@ bool ResponseNormLayer::init(const LayerMap& layerMap, return true; } -bool CrossChannelNormLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - CHECK(parameters_[0]); - const NormConfig& conf = config_.inputs(0).norm_conf(); - channels_ = conf.channels(); - scale_.reset(new Weight(channels_, 1, parameters_[0])); - return true; -} - } // namespace paddle diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index e3591ba4df..66aafba844 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -465,7 +465,6 @@ void initTestLayer(TestConfig testConf, ParameterConfig paraConfig) { paraConfig.set_name(paraName); paraConfig.set_size(paraSize); - paraConfig.set_initial_std(1); paraConfig.set_is_static(isStatic); auto para = std::make_shared(paraConfig, FLAGS_use_gpu, initialize); @@ -499,6 +498,12 @@ void initTestLayer(TestConfig testConf, paraConfig.add_dims((*layerMap)[input.input_layer_name()]->getSize()); paraConfig.add_dims(testConf.layerConfig.size()); } + if (testConf.hasParamInitialValue) { + paraConfig.set_initial_mean(testConf.paramInitialMean); + paraConfig.set_initial_std(testConf.paramInitialStd); + } else { + paraConfig.set_initial_std(1); + } initParameter(paraName, paraSize, inputDef.isStatic, false, paraConfig); } } diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index 18a6525a14..5ea7ca0f24 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -125,12 +125,18 @@ struct TestConfig { LayerConfig layerConfig; std::vector inputDefs; size_t biasSize; + real paramInitialMean; + real paramInitialStd; + bool hasParamInitialValue; bool testAccumulate; bool testState; bool staticBias; bool testBatchState; TestConfig() : biasSize(0), + paramInitialMean(0), + paramInitialStd(1), + hasParamInitialValue(false), testAccumulate(true), testState(false), staticBias(false), diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 59d1e9273d..6441e08b48 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1661,6 +1661,9 @@ TEST(Layer, PadLayer) { TEST(Layer, CrossChannelNormLayer) { TestConfig config; + config.hasParamInitialValue = true; + config.paramInitialMean = 1.; + config.paramInitialStd = 0.; config.layerConfig.set_type("norm"); config.layerConfig.set_size(100); LayerInputConfig* input = config.layerConfig.add_inputs(); @@ -1674,7 +1677,7 @@ TEST(Layer, CrossChannelNormLayer) { config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); for (auto useGpu : {false, true}) { - testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false, 5); + testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false); } } From 2bf4f1bbc1e4abc9c173b89aeb96c40b404e94f4 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 24 May 2017 14:22:41 +0800 Subject: [PATCH 295/542] make adding eps more clear --- paddle/gserver/layers/CrossChannelNormLayer.cpp | 7 +++---- paddle/gserver/tests/LayerGradUtil.h | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/paddle/gserver/layers/CrossChannelNormLayer.cpp b/paddle/gserver/layers/CrossChannelNormLayer.cpp index 4dfe460561..d72503217f 100644 --- a/paddle/gserver/layers/CrossChannelNormLayer.cpp +++ b/paddle/gserver/layers/CrossChannelNormLayer.cpp @@ -61,9 +61,7 @@ void CrossChannelNormLayer::forward(PassType passType) { Matrix::resizeOrCreate(dataBuffer_, batchSize, dataDim, false, useGpu_); Matrix::resizeOrCreate(spatialBuffer_, 1, spatialDim, false, useGpu_); Matrix::resizeOrCreate(normBuffer_, batchSize, spatialDim, false, useGpu_); - normBuffer_->zeroMem(); - // add eps to avoid overflow - normBuffer_->addScalar(*normBuffer_, 1e-6); + inV->square2(*dataBuffer_); for (size_t i = 0; i < batchSize; i++) { const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim); @@ -73,7 +71,8 @@ void CrossChannelNormLayer::forward(PassType passType) { // compute norm. spatialBuffer_->sumCols(*dataTmp, 1, 0); - spatialBuffer_->add(*normTmp); + // add eps to avoid overflow + spatialBuffer_->add(1e-6); spatialBuffer_->sqrt2(*spatialBuffer_); normTmp->copyFrom(*spatialBuffer_); outVTmp->copyFrom(*inVTmp); diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index 5ea7ca0f24..9dbd202757 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -134,8 +134,8 @@ struct TestConfig { bool testBatchState; TestConfig() : biasSize(0), - paramInitialMean(0), - paramInitialStd(1), + paramInitialMean(0.0), + paramInitialStd(1.0), hasParamInitialValue(false), testAccumulate(true), testState(false), From 7c6aa04f6185e92082b9a742d5c746b335406711 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Wed, 5 Jul 2017 16:24:53 +0800 Subject: [PATCH 296/542] add go pre-commit and travis build --- .pre-commit-config.yaml | 4 ++-- .travis.yml | 4 ++-- go/pserver/service.go | 6 ++++-- paddle/scripts/travis/build_doc.sh | 11 ++++++----- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a7c450176d..61b989dc69 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,6 +25,6 @@ sha: e4693a4c282b4fc878eda172a929f7a6508e7d16 hooks: - id: go-fmt - - id: go-vet + files: (.*\.go) - id: go-lint - - id: gometalinter + files: (.*\.go) diff --git a/.travis.yml b/.travis.yml index 16432dac0c..aafeeba027 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,7 +33,7 @@ addons: - ccache before_install: - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi - # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python + # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # protobuf version. - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker - pip install rarfile @@ -42,7 +42,7 @@ before_install: function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: - | - export WITH_GOLANG=ON && timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout + timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi; notifications: email: diff --git a/go/pserver/service.go b/go/pserver/service.go index 7711dc027e..ad16a5708d 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -10,8 +10,10 @@ import ( type ElementType int const ( + // AlreadyInitialized is true if pserver is initialized AlreadyInitialized = "pserver already initialized" - Uninitialized = "pserver not fully initialized" + // Uninitialized is true if pserver not fully initialized + Uninitialized = "pserver not fully initialized" ) // Supported element types @@ -55,7 +57,7 @@ func NewService(idx int) (*Service, error) { s := &Service{ idx: idx, } - s.optMap = make(map[string]*optimizer) + s.optMap = make(map[string]*optimizer) s.initialized = make(chan struct{}) return s, nil } diff --git a/paddle/scripts/travis/build_doc.sh b/paddle/scripts/travis/build_doc.sh index a44bd35357..a443851580 100755 --- a/paddle/scripts/travis/build_doc.sh +++ b/paddle/scripts/travis/build_doc.sh @@ -5,13 +5,14 @@ set -e mkdir -p $TRAVIS_BUILD_DIR/build cd $TRAVIS_BUILD_DIR/build -# Compile Documentation only. -cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF +# Compile paddle binaries first +cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_GOLANG=ON -DWITH_STYLE_CHECK=OFF mkdir output make -j `nproc` find .. -name '*whl' | xargs pip install # install all wheels. rm -rf * +# Compile Documentation only. cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON make -j `nproc` paddle_docs paddle_docs_cn @@ -25,7 +26,7 @@ SSH_REPO=${REPO/https:\/\/github.com\//git@github.com:} SHA=`git rev-parse --verify HEAD` # Documentation branch name -# gh-pages branch is used for PaddlePaddle.org. The English version of +# gh-pages branch is used for PaddlePaddle.org. The English version of # documentation in `doc` directory, and the chinese version in `doc_cn` # directory. TARGET_BRANCH="gh-pages" @@ -51,7 +52,7 @@ function deploy_docs() { # checkout github page branch git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH - + mkdir -p ${DIR} # remove old docs. mv new docs. set +e @@ -62,7 +63,7 @@ function deploy_docs() { git add . } -deploy_docs "master" "." +deploy_docs "master" "." deploy_docs "develop" "./develop/" # Check is there anything changed. From 81bfd47eb3fdbf7a0c398f6ad7e62f1d6e7350c1 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Wed, 5 Jul 2017 16:32:14 +0800 Subject: [PATCH 297/542] add glide in travis --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index aafeeba027..498674469b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,6 +37,7 @@ before_install: # protobuf version. - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker - pip install rarfile + - curl https://glide.sh/get | bash - eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } From d0ad0314bb868b9e0c1aa77f74ca0d2d3e8b8ef0 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 5 Jul 2017 16:33:18 +0800 Subject: [PATCH 298/542] FIX: glog dependency --- paddle/memory/detail/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index 4fdabc8eeb..6cb6422e47 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -12,4 +12,4 @@ cc_library(meta_cache SRCS meta_cache.cc) cc_library(memory_block SRCS memory_block.cc) -cc_library(buddy_allocator SRCS buddy_allocator.cc) +cc_library(buddy_allocator SRCS buddy_allocator.cc DEPS glog) From 2f085a7bcf11f5501bded27862988022e32299a0 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Wed, 5 Jul 2017 17:08:19 +0800 Subject: [PATCH 299/542] add go pserver deps --- go/cmd/pserver/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/cmd/pserver/CMakeLists.txt b/go/cmd/pserver/CMakeLists.txt index bc1da3348c..51db6dff04 100644 --- a/go/cmd/pserver/CMakeLists.txt +++ b/go/cmd/pserver/CMakeLists.txt @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -go_binary(pserver SRCS pserver.go) +go_binary(pserver SRCS pserver.go DEPS paddle_go_optimizer) From 5eb8bf0324ba7de923760dc05aa7e850a9ae103f Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 5 Jul 2017 17:23:41 +0800 Subject: [PATCH 300/542] Correct GLOG CHECK in Paddle Use CHECK instead of PCHECK, because PCHECK is used for errno. --- paddle/pserver/LightNetwork.cpp | 28 ++++++++++++++-------------- paddle/pserver/SocketChannel.cpp | 22 +++++++++++----------- paddle/pserver/test/SocketTest.cpp | 28 ++++++++++++++-------------- paddle/trainer/Tester.cpp | 2 +- paddle/utils/ThreadLocal.h | 12 ++++++------ 5 files changed, 46 insertions(+), 46 deletions(-) diff --git a/paddle/pserver/LightNetwork.cpp b/paddle/pserver/LightNetwork.cpp index 922f25734d..8616fd2d5a 100644 --- a/paddle/pserver/LightNetwork.cpp +++ b/paddle/pserver/LightNetwork.cpp @@ -142,7 +142,7 @@ SocketServer::SocketServer(const std::string &addr, int port, int rdmaCpu) } /// trigger to initialize RDMA lib - PCHECK(RdmaClientDaemons::get()) << "initilizate RDMA failed\n"; + CHECK(RdmaClientDaemons::get()) << "initilizate RDMA failed\n"; } SocketServer::~SocketServer() { @@ -168,7 +168,7 @@ void SocketServer::tcpServer() { /// First call to socket() function socket_ = socket(AF_INET, SOCK_STREAM, 0); - PCHECK(socket_ >= 0) << "ERROR opening socket"; + CHECK(socket_ >= 0) << "ERROR opening socket"; /// Initialize socket structure bzero((char *)&serv_addr, sizeof(serv_addr)); @@ -176,7 +176,7 @@ void SocketServer::tcpServer() { serv_addr.sin_port = htons(port_); if (!addr_.empty()) { server = gethostbyname(addr_.c_str()); - PCHECK(server) << "ERROR, no such host: " << addr_; + CHECK(server) << "ERROR, no such host: " << addr_; bcopy((char *)server->h_addr, (char *)&serv_addr.sin_addr.s_addr, server->h_length); @@ -187,7 +187,7 @@ void SocketServer::tcpServer() { setOption(socket_); /// Now bind the host address using bind() call. - PCHECK(bind(socket_, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) >= 0) + CHECK(bind(socket_, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) >= 0) << "ERROR on binding " << addr_; /// Now start listening for the clients, here process will @@ -201,7 +201,7 @@ void SocketServer::tcpServer() { if (stopping_) { break; } - PCHECK(newsockfd >= 0) << "ERROR on accept"; + CHECK(newsockfd >= 0) << "ERROR on accept"; constexpr int kPeerNameLen = 128; char peerName[kPeerNameLen]; CHECK(inet_ntop(AF_INET, &cli_addr.sin_addr, peerName, kPeerNameLen)); @@ -227,14 +227,14 @@ void SocketServer::rdmaServer() { /// First call to socket() function rdmaSocket_ = rdma::ssocket(rdmaCpu_); - PCHECK(rdmaSocket_) << "ERROR opening RDMA socket"; + CHECK(rdmaSocket_) << "ERROR opening RDMA socket"; - PCHECK(rdma::bind(rdmaSocket_, rdmaUri_.c_str()) == 0) + CHECK(rdma::bind(rdmaSocket_, rdmaUri_.c_str()) == 0) << "ERROR bind RDMA socket"; /// Now start listening for the clients, here process will /// go in sleep mode and will wait for the incoming connection - PCHECK(rdma::listen(rdmaSocket_) == 0) << "ERROR listen RDMA socket"; + CHECK(rdma::listen(rdmaSocket_) == 0) << "ERROR listen RDMA socket"; while (true) { /// Accept actual connection from the client @@ -242,7 +242,7 @@ void SocketServer::rdmaServer() { if (stopping_) { break; } - PCHECK(newsock) << "ERROR on accept"; + CHECK(newsock) << "ERROR on accept"; constexpr int kPeerNameLen = 128; char peerName[kPeerNameLen]; @@ -290,7 +290,7 @@ RdmaClientDaemons::RdmaClientDaemons() { onlineCpus_ = rdma::numCpus(); for (auto i = 0; i < onlineCpus_; i++) { socket = rdma::csocket(i); - PCHECK(socket) << "ERROR open client socket daemon"; + CHECK(socket) << "ERROR open client socket daemon"; rdmaClientSocket_.push_back(socket); } @@ -355,7 +355,7 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) { /// Create a socket point int sockfd = socket(AF_INET, SOCK_STREAM, 0); - PCHECK(sockfd >= 0) << "ERROR opening socket"; + CHECK(sockfd >= 0) << "ERROR opening socket"; #if defined(__OSX__) || defined(__APPLE__) server = getipnodebyname(serverAddr.c_str(), AF_INET, AI_DEFAULT, &errRet); @@ -396,8 +396,8 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) { } std::this_thread::sleep_for(std::chrono::seconds(1)); } else { - PCHECK(errno != 0) << "ERROR connecting to " << serverAddr << ":" - << serverPort << "errorno: " << errno; + CHECK(errno != 0) << "ERROR connecting to " << serverAddr << ":" + << serverPort << "errorno: " << errno; } } while (errno == ECONNREFUSED); @@ -426,7 +426,7 @@ void SocketClient::RdmaClient(const std::string &serverAddr, int serverPort) { /// connect to server with socket daemon sock = rdma::connect(socketDaemon_, rdmaUri.c_str()); - PCHECK(sock) << "ERROR connect to server" << rdmaUri; + CHECK(sock) << "ERROR connect to server" << rdmaUri; std::vector seg; str::split(rdmaUri, '/', &seg); diff --git a/paddle/pserver/SocketChannel.cpp b/paddle/pserver/SocketChannel.cpp index 0599889164..12e3bc6552 100644 --- a/paddle/pserver/SocketChannel.cpp +++ b/paddle/pserver/SocketChannel.cpp @@ -51,7 +51,7 @@ size_t SocketChannel::read(void* buf, size_t size) { else len = rdma::read(rdmaSocket_, (char*)buf + total, size - total); - PCHECK(len >= 0) << " peer=" << peerName_; + CHECK(len >= 0) << " peer=" << peerName_; if (len <= 0) { return total; } @@ -69,7 +69,7 @@ size_t SocketChannel::write(const void* buf, size_t size) { else len = rdma::write(rdmaSocket_, (char*)buf + total, size - total); - PCHECK(len >= 0) << " peer=" << peerName_; + CHECK(len >= 0) << " peer=" << peerName_; if (len <= 0) { return total; } @@ -98,10 +98,10 @@ static size_t readwritev(IOFunc iofunc, while (size < total) { ssize_t len = iofunc(socket, &iovs[curIov], std::min(iovcnt - curIov, maxiovs)); - PCHECK(len > 0) << " peer=" << peerName << " curIov=" << curIov - << " iovCnt=" << iovcnt - << " iovs[curIov].base=" << iovs[curIov].iov_base - << " iovs[curIov].iov_len=" << iovs[curIov].iov_len; + CHECK(len > 0) << " peer=" << peerName << " curIov=" << curIov + << " iovCnt=" << iovcnt + << " iovs[curIov].base=" << iovs[curIov].iov_base + << " iovs[curIov].iov_len=" << iovs[curIov].iov_len; size += len; /// restore iovs[curIov] to the original value @@ -183,7 +183,7 @@ void SocketChannel::writeMessage(const std::vector& userIovs) { header.totalLength += iov.iov_len; } - PCHECK(writev(iovs) == (size_t)header.totalLength); + CHECK(writev(iovs) == (size_t)header.totalLength); } std::unique_ptr SocketChannel::readMessage() { @@ -194,7 +194,7 @@ std::unique_ptr SocketChannel::readMessage() { return nullptr; } - PCHECK(len == sizeof(header)); + CHECK(len == sizeof(header)); std::unique_ptr msgReader(new MsgReader(this, header.numIovs)); @@ -209,7 +209,7 @@ std::unique_ptr SocketChannel::readMessage() { MsgReader::MsgReader(SocketChannel* channel, size_t numBlocks) : channel_(channel), blockLengths_(numBlocks), currentBlockIndex_(0) { size_t size = numBlocks * sizeof(blockLengths_[0]); - PCHECK(channel_->read(&blockLengths_[0], size) == size); + CHECK(channel_->read(&blockLengths_[0], size) == size); } void MsgReader::readBlocks(const std::vector& bufs) { @@ -223,12 +223,12 @@ void MsgReader::readBlocks(const std::vector& bufs) { ++currentBlockIndex_; } - PCHECK(channel_->readv(&iovs) == totalLength); + CHECK(channel_->readv(&iovs) == totalLength); } void MsgReader::readNextBlock(void* buf) { CHECK_LT(currentBlockIndex_, blockLengths_.size()); - PCHECK(channel_->read(buf, getNextBlockLength()) == getNextBlockLength()); + CHECK(channel_->read(buf, getNextBlockLength()) == getNextBlockLength()); ++currentBlockIndex_; } diff --git a/paddle/pserver/test/SocketTest.cpp b/paddle/pserver/test/SocketTest.cpp index 066a6c0293..6f6c9e596c 100644 --- a/paddle/pserver/test/SocketTest.cpp +++ b/paddle/pserver/test/SocketTest.cpp @@ -113,7 +113,7 @@ void SocketServer::run() { /* First call to socket() function */ socket_ = socket(AF_INET, SOCK_STREAM, 0); - PCHECK(socket_ >= 0) << "ERROR opening socket"; + CHECK(socket_ >= 0) << "ERROR opening socket"; /* Initialize socket structure */ bzero((char*)&serv_addr, sizeof(serv_addr)); @@ -122,7 +122,7 @@ void SocketServer::run() { serv_addr.sin_port = htons(port_); /* Now bind the host address using bind() call.*/ - PCHECK(bind(socket_, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) >= 0) + CHECK(bind(socket_, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) >= 0) << "ERROR on binding"; /* Now start listening for the clients, here process will @@ -134,7 +134,7 @@ void SocketServer::run() { while (true) { /* Accept actual connection from the client */ newsockfd = accept(socket_, (struct sockaddr*)&cli_addr, &clilen); - PCHECK(newsockfd >= 0) << "ERROR on accept"; + CHECK(newsockfd >= 0) << "ERROR on accept"; SocketWorker* worker = new SocketWorker(newsockfd); worker->start(); @@ -146,17 +146,17 @@ void SocketWorker::run() { while (true) { int64_t n = channel_.readAll(&header, sizeof(header)); - PCHECK(n == sizeof(header)) << "ERROR reading from socket"; + CHECK(n == sizeof(header)) << "ERROR reading from socket"; buffer_.resize(header.dataLength); n = channel_.readAll(&buffer_[0], header.dataLength); - PCHECK(n == header.dataLength) << "ERROR reading from socket"; + CHECK(n == header.dataLength) << "ERROR reading from socket"; /* Write a response to the client */ n = channel_.writeAll(&header, sizeof(header)); - PCHECK(n == sizeof(header)) << "ERROR reading from socket"; + CHECK(n == sizeof(header)) << "ERROR reading from socket"; n = channel_.writeAll(buffer_.data(), buffer_.size()); - PCHECK(n == header.dataLength) << "ERROR writing to socket"; + CHECK(n == header.dataLength) << "ERROR writing to socket"; } } @@ -177,9 +177,9 @@ SocketClient::SocketClient(const std::string& serverAddr, int serverPort) { /* Create a socket point */ int sockfd = socket(AF_INET, SOCK_STREAM, 0); - PCHECK(sockfd >= 0) << "ERROR opening socket"; + CHECK(sockfd >= 0) << "ERROR opening socket"; server = gethostbyname(serverAddr.c_str()); - PCHECK(server) << "ERROR, no such host: " << serverAddr; + CHECK(server) << "ERROR, no such host: " << serverAddr; bzero((char*)&serv_addr, sizeof(serv_addr)); serv_addr.sin_family = AF_INET; @@ -189,7 +189,7 @@ SocketClient::SocketClient(const std::string& serverAddr, int serverPort) { serv_addr.sin_port = htons(serverPort); /* Now connect to the server */ - PCHECK(connect(sockfd, (sockaddr*)&serv_addr, sizeof(serv_addr)) >= 0) + CHECK(connect(sockfd, (sockaddr*)&serv_addr, sizeof(serv_addr)) >= 0) << "ERROR connecting"; channel_.reset(new SocketChannel(sockfd)); @@ -234,18 +234,18 @@ int main(int argc, char** argv) { cpuGrad.copyFrom(gpuGrad); header.dataLength = dataSize; - PCHECK(channel->writeAll(&header, sizeof(header)) == sizeof(header)) + CHECK(channel->writeAll(&header, sizeof(header)) == sizeof(header)) << "Client write header error"; - PCHECK(channel->writeAll(cpuGrad.getData(), dataSize) == dataSize) + CHECK(channel->writeAll(cpuGrad.getData(), dataSize) == dataSize) << "Client write data error"; /* Now read server response */ - PCHECK(channel->readAll(&header, sizeof(header)) == sizeof(header)) + CHECK(channel->readAll(&header, sizeof(header)) == sizeof(header)) << "Client read header error"; CHECK_EQ((uint64_t)header.dataLength, dataSize); - PCHECK(channel->readAll(cpuParam.getData(), dataSize) == dataSize) + CHECK(channel->readAll(cpuParam.getData(), dataSize) == dataSize) << "Client read data error"; gpuParam.copyFrom(cpuParam); diff --git a/paddle/trainer/Tester.cpp b/paddle/trainer/Tester.cpp index 80664fa877..16e676d602 100644 --- a/paddle/trainer/Tester.cpp +++ b/paddle/trainer/Tester.cpp @@ -175,7 +175,7 @@ real Tester::forwardOneBatch(const DataBatch& dataBatch, } hl_stream_synchronize(HPPL_STREAM_DEFAULT); FILE* fp = fopen(featFile.c_str(), "ab+"); - PCHECK(!ferror(fp)) << "Fail to open " << featFile; + CHECK(!ferror(fp)) << "Fail to open " << featFile; size_t sampleNum = featMatrices[0]->getHeight(); for (size_t i = 0; i < sampleNum; ++i) { diff --git a/paddle/utils/ThreadLocal.h b/paddle/utils/ThreadLocal.h index a4987c9ec2..b5e2862546 100644 --- a/paddle/utils/ThreadLocal.h +++ b/paddle/utils/ThreadLocal.h @@ -51,7 +51,7 @@ template class ThreadLocal { public: ThreadLocal() { - PCHECK(pthread_key_create(&threadSpecificKey_, dataDestructor) == 0); + CHECK(pthread_key_create(&threadSpecificKey_, dataDestructor) == 0); } ~ThreadLocal() { pthread_key_delete(threadSpecificKey_); } @@ -65,7 +65,7 @@ public: if (!p && createLocal) { p = new T(); int ret = pthread_setspecific(threadSpecificKey_, p); - PCHECK(ret == 0); + CHECK(ret == 0); } return p; } @@ -79,7 +79,7 @@ public: if (T* q = get(false)) { dataDestructor(q); } - PCHECK(pthread_setspecific(threadSpecificKey_, p) == 0); + CHECK(pthread_setspecific(threadSpecificKey_, p) == 0); } /** @@ -112,7 +112,7 @@ private: template class ThreadLocalD { public: - ThreadLocalD() { PCHECK(pthread_key_create(&threadSpecificKey_, NULL) == 0); } + ThreadLocalD() { CHECK(pthread_key_create(&threadSpecificKey_, NULL) == 0); } ~ThreadLocalD() { pthread_key_delete(threadSpecificKey_); for (auto t : threadMap_) { @@ -127,7 +127,7 @@ public: T* p = (T*)pthread_getspecific(threadSpecificKey_); if (!p) { p = new T(); - PCHECK(pthread_setspecific(threadSpecificKey_, p) == 0); + CHECK(pthread_setspecific(threadSpecificKey_, p) == 0); updateMap(p); } return p; @@ -141,7 +141,7 @@ public: if (T* q = (T*)pthread_getspecific(threadSpecificKey_)) { dataDestructor(q); } - PCHECK(pthread_setspecific(threadSpecificKey_, p) == 0); + CHECK(pthread_setspecific(threadSpecificKey_, p) == 0); updateMap(p); } From 2e302085d7c0a79a8516533b29450a1febc25d79 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Wed, 5 Jul 2017 09:49:30 +0000 Subject: [PATCH 301/542] fix bugs --- cmake/generic.cmake | 4 ++-- go/pserver/client/c/CMakeLists.txt | 3 ++- go/pserver/client/c/test/CMakeLists.txt | 4 +--- paddle/api/CMakeLists.txt | 1 + 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 74396abdbb..d51b95a5d7 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -192,9 +192,9 @@ function(cc_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${cc_test_SRCS}) - target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main) + target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main -lstdc++ -lm) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main) - add_test(${TARGET_NAME} ${TARGET_NAME}) + add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() endfunction(cc_test) diff --git a/go/pserver/client/c/CMakeLists.txt b/go/pserver/client/c/CMakeLists.txt index a3fcaeef19..d2ac20e25c 100644 --- a/go/pserver/client/c/CMakeLists.txt +++ b/go/pserver/client/c/CMakeLists.txt @@ -1,5 +1,6 @@ cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf) go_library(paddle_pserver_cclient STATIC DEPS paddle_go_optimizer) if(WITH_TESTING) - add_subdirectory(test) + # TODO: add unit test + #add_subdirectory(test) endif() diff --git a/go/pserver/client/c/test/CMakeLists.txt b/go/pserver/client/c/test/CMakeLists.txt index 44bc183738..dce8645ce7 100644 --- a/go/pserver/client/c/test/CMakeLists.txt +++ b/go/pserver/client/c/test/CMakeLists.txt @@ -1,4 +1,2 @@ -# FIXME:It's ugly -#cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer) -cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_optimizer paddle_proto glog gflags protobuf) +cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer) add_style_check_target(test_cclient test_cclient.c) diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt index 39d8aa075b..84da89a142 100644 --- a/paddle/api/CMakeLists.txt +++ b/paddle/api/CMakeLists.txt @@ -66,6 +66,7 @@ SWIG_LINK_LIBRARIES(swig_paddle paddle_trainer_lib paddle_network paddle_parameter + paddle_optimizer paddle_math paddle_utils paddle_proto From 204869c2dae9b03b1155be106484ef328e942132 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Wed, 5 Jul 2017 10:10:18 +0000 Subject: [PATCH 302/542] fix bugs --- paddle/scripts/docker/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 1ccee686df..ab60f1a38d 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -78,7 +78,7 @@ paddle version # PaddlePaddle. This awkwardness is due to # https://github.com/PaddlePaddle/Paddle/issues/1854. It also # describes a solution. -if [ ${WITH_DOC} == "ON" ]; then +if [[ ${WITH_DOC} == "ON" ]]; then cat < Date: Wed, 5 Jul 2017 18:18:32 +0800 Subject: [PATCH 303/542] fix auto cgo LDFLAGS --- go/pserver/optimizer.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index d84f55b987..54d1082094 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -2,7 +2,7 @@ package pserver // #cgo CFLAGS: -I ../../ // //FIXME: ldflags contain "build" path -// #cgo LDFLAGS: ../../build/go/pserver/client/c/libpaddle_go_optimizer.a -lstdc++ -lm +// #cgo LDFLAGS: ${SRCDIR}/../../build/go/pserver/client/c/libpaddle_go_optimizer.a -lstdc++ -lm // #include "paddle/optimizer/optimizer.h" // #include // #include @@ -56,8 +56,8 @@ func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { func (o *optimizer) GetWeights() []byte { var buffer unsafe.Pointer - buffer_len := C.paddle_optimizer_get_weights(o.opt, &buffer) - return cArrayToSlice(buffer, int(buffer_len)*C.sizeof_float) + bufferLen := C.paddle_optimizer_get_weights(o.opt, &buffer) + return cArrayToSlice(buffer, int(bufferLen)*C.sizeof_float) } func (o *optimizer) UpdateParameter(g Gradient) error { From c37da0bd3ba14318198bfc6dd8f8ba5e13c1a269 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 5 Jul 2017 18:36:47 +0800 Subject: [PATCH 304/542] Remove hasParamInitialValue flag. --- paddle/gserver/tests/LayerGradUtil.cpp | 9 +++------ paddle/gserver/tests/LayerGradUtil.h | 2 -- paddle/gserver/tests/test_LayerGrad.cpp | 1 - 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index 66aafba844..15b8cedeb8 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -498,12 +498,9 @@ void initTestLayer(TestConfig testConf, paraConfig.add_dims((*layerMap)[input.input_layer_name()]->getSize()); paraConfig.add_dims(testConf.layerConfig.size()); } - if (testConf.hasParamInitialValue) { - paraConfig.set_initial_mean(testConf.paramInitialMean); - paraConfig.set_initial_std(testConf.paramInitialStd); - } else { - paraConfig.set_initial_std(1); - } + CHECK_GE(testConf.paramInitialStd, 0); + paraConfig.set_initial_mean(testConf.paramInitialMean); + paraConfig.set_initial_std(testConf.paramInitialStd); initParameter(paraName, paraSize, inputDef.isStatic, false, paraConfig); } } diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index 9dbd202757..d299b4dd09 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -127,7 +127,6 @@ struct TestConfig { size_t biasSize; real paramInitialMean; real paramInitialStd; - bool hasParamInitialValue; bool testAccumulate; bool testState; bool staticBias; @@ -136,7 +135,6 @@ struct TestConfig { : biasSize(0), paramInitialMean(0.0), paramInitialStd(1.0), - hasParamInitialValue(false), testAccumulate(true), testState(false), staticBias(false), diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 6441e08b48..bf0136a10f 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1661,7 +1661,6 @@ TEST(Layer, PadLayer) { TEST(Layer, CrossChannelNormLayer) { TestConfig config; - config.hasParamInitialValue = true; config.paramInitialMean = 1.; config.paramInitialStd = 0.; config.layerConfig.set_type("norm"); From ada1c20bbc2520d566b7d2bd2a56cf94cbcddd27 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 5 Jul 2017 19:16:02 +0800 Subject: [PATCH 305/542] FIX: Buddy Allocator Free with Merge feature --- paddle/memory/detail/buddy_allocator.cc | 33 ++++++++++++++++++++++--- paddle/memory/detail/buddy_allocator.h | 15 +++++++---- paddle/memory/detail/memory_block.cc | 4 +-- paddle/platform/CMakeLists.txt | 2 +- 4 files changed, 42 insertions(+), 12 deletions(-) diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc index aa5b6b557c..9f334a7048 100644 --- a/paddle/memory/detail/buddy_allocator.cc +++ b/paddle/memory/detail/buddy_allocator.cc @@ -89,9 +89,8 @@ void BuddyAllocator::Free(void* p) { block->index(cache_)); // Invalidate GPU allocation from cache - if (system_allocator_->UseGpu()) { - cache_.invalidate(block); - } + cache_.invalidate(block); + return; } @@ -104,12 +103,35 @@ void BuddyAllocator::Free(void* p) { if (block->has_right_buddy(cache_)) { DLOG(INFO) << "Merging this block " << block << " with its right buddy " << block->right_buddy(cache_); + + auto right_buddy = block->right_buddy(cache_); + + if (right_buddy->type(cache_) == MemoryBlock::FREE_CHUNK) { + // Take away right buddy from pool + pool_.erase({right_buddy->index(cache_), right_buddy->total_size(cache_), + right_buddy}); + + // merge its right buddy to the block + block->merge(cache_, right_buddy); + } } // Trying to merge the left buddy if (block->has_left_buddy(cache_)) { DLOG(INFO) << "Merging this block " << block << " with its left buddy " << block->left_buddy(cache_); + + auto left_buddy = block->left_buddy(cache_); + + if (left_buddy->type(cache_) == MemoryBlock::FREE_CHUNK) { + // Take away right buddy from pool + pool_.erase({left_buddy->index(cache_), left_buddy->total_size(cache_), + left_buddy}); + + // merge the block to its left buddy + left_buddy->merge(cache_, block); + block = left_buddy; + } } // Dumping this block into pool @@ -167,13 +189,16 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::FindExistChunk(size_t size) { while (1) { auto it = pool_.lower_bound({index, size, nullptr}); + + // no match chunk memory if (it == pool_.end()) return it; if (std::get<0>(*it) > index) { + // find suitable one if (std::get<1>(*it) >= size) { return it; } - + // update and continue index = std::get<0>(*it); continue; } diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h index ecf23b77ae..2fd9c8162a 100644 --- a/paddle/memory/detail/buddy_allocator.h +++ b/paddle/memory/detail/buddy_allocator.h @@ -42,14 +42,14 @@ class BuddyAllocator { void Free(void*); size_t Used(); - public: + private: // Disable copy and assignment. BuddyAllocator(const BuddyAllocator&) = delete; BuddyAllocator& operator=(const BuddyAllocator&) = delete; - private: - // Tuple type: allocator index, memory size, memory address + // Tuple (allocator index, memory size, memory address) using IndexSizeAddress = std::tuple; + // Each element in PoolSet is a free allocation using PoolSet = std::set; /*! \brief Allocate fixed-size memory from system */ @@ -57,7 +57,6 @@ class BuddyAllocator { /*! \brief If existing chunks are not suitable, refill pool */ PoolSet::iterator RefillPool(); - /** * \brief Find the suitable chunk from existing pool * @@ -77,13 +76,19 @@ class BuddyAllocator { size_t max_chunk_size_; // the maximum size of each chunk private: + /** + * \brief A list of free allocation + * + * \note Only store free chunk memory in pool + */ PoolSet pool_; private: - // Unify the metadata format between GPU and CPU allocations + /*! Unify the metadata format between GPU and CPU allocations */ MetadataCache cache_; private: + /*! Allocate CPU/GPU memory from system */ SystemAllocator* system_allocator_; std::mutex mutex_; }; diff --git a/paddle/memory/detail/memory_block.cc b/paddle/memory/detail/memory_block.cc index eaa97e7b4a..bc67bcef0f 100644 --- a/paddle/memory/detail/memory_block.cc +++ b/paddle/memory/detail/memory_block.cc @@ -91,8 +91,8 @@ void MemoryBlock::split(MetadataCache& cache, size_t size) { void MemoryBlock::merge(MetadataCache& cache, MemoryBlock* right_buddy) { // only free blocks can be merged - PADDLE_ASSERT(type(cache) == FREE_MEMORY); - PADDLE_ASSERT(right_buddy->type(cache) == FREE_MEMORY); + PADDLE_ASSERT(type(cache) == FREE_CHUNK); + PADDLE_ASSERT(right_buddy->type(cache) == FREE_CHUNK); auto metadata = cache.load(this); diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 2f3d1c061e..0ad11f1b10 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -6,4 +6,4 @@ nv_library(gpu_info SRCS gpu_info.cc DEPS gflags) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) -cc_library(dynamic_loader SRCS dynload/dynamic_loader.cc) +cc_library(dynamic_loader SRCS dynload/dynamic_loader.cc DEPS gflags) From b68e90be820f7a925e114f76f27156e728fc9e79 Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Wed, 5 Jul 2017 21:30:28 +0800 Subject: [PATCH 306/542] fix go test building --- go/pserver/client/c/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/go/pserver/client/c/CMakeLists.txt b/go/pserver/client/c/CMakeLists.txt index a3fcaeef19..34aa7ca5ff 100644 --- a/go/pserver/client/c/CMakeLists.txt +++ b/go/pserver/client/c/CMakeLists.txt @@ -1,4 +1,5 @@ cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf) +target_link_libraries(paddle_go_optimizer stdc++ m) go_library(paddle_pserver_cclient STATIC DEPS paddle_go_optimizer) if(WITH_TESTING) add_subdirectory(test) From 78f1274d6e2c75d0036ae2a7da6cbccfc844b8f0 Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Wed, 5 Jul 2017 21:40:12 +0800 Subject: [PATCH 307/542] remove unnessesary cc_test link --- cmake/generic.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index d51b95a5d7..c2962e35ef 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -192,7 +192,7 @@ function(cc_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${cc_test_SRCS}) - target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main -lstdc++ -lm) + target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main) add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() @@ -285,7 +285,7 @@ function(go_library TARGET_NAME) add_custom_command(TARGET ${TARGET_NAME} POST_BUILD COMMAND rm "${${TARGET_NAME}_LIB_PATH}" # Golang build source code - COMMAND env LIBRARY_PATH=${CMAKE_BINARY_DIR}/go/pserver/client/c/:$ENV{LIBRARY_PATH} GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} + COMMAND GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} -o "${${TARGET_NAME}_LIB_PATH}" "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${GO_SOURCE}" # must run under GOPATH From 4d2a83c750c6168d16a4ee302b0c69e553bd0b34 Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Wed, 5 Jul 2017 21:58:46 +0800 Subject: [PATCH 308/542] update again --- go/pserver/client/c/test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/pserver/client/c/test/CMakeLists.txt b/go/pserver/client/c/test/CMakeLists.txt index f287f85071..dce8645ce7 100644 --- a/go/pserver/client/c/test/CMakeLists.txt +++ b/go/pserver/client/c/test/CMakeLists.txt @@ -1,2 +1,2 @@ -cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient) +cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer) add_style_check_target(test_cclient test_cclient.c) From 74691789e9e5ee782adb003642f66699603b20e2 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 6 Jul 2017 01:16:03 +0800 Subject: [PATCH 309/542] ENH: add memory unit test --- paddle/memory/CMakeLists.txt | 6 ++-- paddle/memory/detail/CMakeLists.txt | 4 +-- paddle/memory/detail/buddy_allocator.cc | 33 +++++++++++------ paddle/memory/detail/buddy_allocator.h | 27 -------------- paddle/memory/memory.cc | 42 ++++++++++++++++++---- paddle/memory/memory_test.cc | 48 +++++++++++++++++++++++++ paddle/platform/gpu_info.cc | 2 +- 7 files changed, 112 insertions(+), 50 deletions(-) create mode 100644 paddle/memory/memory_test.cc diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt index 8c290712fc..fac442cca5 100644 --- a/paddle/memory/CMakeLists.txt +++ b/paddle/memory/CMakeLists.txt @@ -1,11 +1,11 @@ add_subdirectory(detail) -cc_library(memory - SRCS - memory.cc) +cc_library(memory SRCS memory.cc) cc_library(paddle_memory DEPS memory meta_data meta_cache memory_block buddy_allocator system_allocator) + +cc_test(memory_test SRCS memory_test.cc DEPS place paddle_memory) diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index 6cb6422e47..b9c3fc31c1 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1,7 +1,7 @@ if(${WITH_GPU}) - nv_library(system_allocator SRCS system_allocator.cc DEPS gflags gpu_info) + nv_library(system_allocator SRCS system_allocator.cc DEPS gflags cpu_info gpu_info) else(${WITH_GPU}) - cc_library(system_allocator SRCS system_allocator.cc DEPS gflags) + cc_library(system_allocator SRCS system_allocator.cc DEPS gflags cpu_info) endif(${WITH_GPU}) cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator) diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc index 9f334a7048..ed2eedf9af 100644 --- a/paddle/memory/detail/buddy_allocator.cc +++ b/paddle/memory/detail/buddy_allocator.cc @@ -24,10 +24,20 @@ BuddyAllocator::BuddyAllocator(SystemAllocator* system_allocator, : min_chunk_size_(min_chunk_size), max_chunk_size_(max_chunk_size), cache_(system_allocator->UseGpu()), - system_allocator_(std::move(system_allocator)) { - PADDLE_ASSERT(min_chunk_size > 0); - PADDLE_ASSERT(max_chunk_size > 0); - PADDLE_ASSERT(system_allocator != nullptr); + system_allocator_(std::move(system_allocator)) {} + +BuddyAllocator::~BuddyAllocator() { + DLOG(INFO) << "BuddyAllocator Disconstructor makes sure that all of these " + "have actually been freed"; + while (!pool_.empty()) { + auto block = static_cast(std::get<2>(*pool_.begin())); + DLOG(INFO) << "Free from block (" << block << ", " << max_chunk_size_ + << ")"; + + system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); + cache_.invalidate(block); + pool_.erase(pool_.begin()); + } } inline size_t align(size_t size, size_t alignment) { @@ -62,7 +72,7 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { return nullptr; } } else { - DLOG(INFO) << " Allocation from existing memory block " << std::get<2>(*it) + DLOG(INFO) << "Allocation from existing memory block " << std::get<2>(*it) << " at address " << reinterpret_cast(std::get<2>(*it))->data(); } @@ -142,6 +152,8 @@ void BuddyAllocator::Free(void* p) { // TODO(gangliao): Clean up if existing too much free memory } +size_t BuddyAllocator::Used() { return total_used_; } + void* BuddyAllocator::SystemAlloc(size_t size) { size_t index = 0; void* p = system_allocator_->Alloc(index, size); @@ -172,7 +184,7 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() { if (p == nullptr) return pool_.end(); - DLOG(INFO) << " Creating and inserting new block " << p + DLOG(INFO) << "Creating and inserting new block " << p << " from system allocator"; static_cast(p)->init(cache_, MemoryBlock::FREE_CHUNK, index, @@ -211,20 +223,19 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it, auto block = static_cast(std::get<2>(*it)); pool_.erase(it); - DLOG(INFO) << " Split block (" << block << ", " << block->total_size(cache_) + DLOG(INFO) << "Split block (" << block << ", " << block->total_size(cache_) << ") into"; block->split(cache_, size); - DLOG(INFO) << " Left block (" << block << ", " << block->total_size(cache_) + DLOG(INFO) << "Left block (" << block << ", " << block->total_size(cache_) << ")"; block->set_type(cache_, MemoryBlock::ARENA_CHUNK); // the rest of memory if exist if (block->has_right_buddy(cache_)) { if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) { - DLOG(INFO) << " Insert right block (" << block->right_buddy(cache_) - << ", " << block->right_buddy(cache_)->total_size(cache_) - << ")"; + DLOG(INFO) << "Insert right block (" << block->right_buddy(cache_) << ", " + << block->right_buddy(cache_)->total_size(cache_) << ")"; pool_.insert({block->right_buddy(cache_)->index(cache_), block->right_buddy(cache_)->total_size(cache_), diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h index 2fd9c8162a..eeb2dc8836 100644 --- a/paddle/memory/detail/buddy_allocator.h +++ b/paddle/memory/detail/buddy_allocator.h @@ -93,33 +93,6 @@ class BuddyAllocator { std::mutex mutex_; }; -BuddyAllocator* GetCPUBuddyAllocator() { - static BuddyAllocator* a = nullptr; - if (a == nullptr) { - a = new BuddyAllocator(new CPUAllocator, platform::CpuMinChunkSize(), - platform::CpuMaxChunkSize()); - } - return a; -} - -#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. - -BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { - static BuddyAllocator** as = NULL; - if (as == NULL) { - int gpu_num = platform::GpuDeviceCount(); - as = new BuddyAllocator*[gpu_num]; - for (int gpu = 0; gpu < gpu_num; gpu++) { - as[gpu] = - new BuddyAllocator(new GPUAllocator, platform::GpuMinChunkSize(), - platform::GpuMaxChunkSize()); - } - } - return as[gpu_id]; -} - -#endif // PADDLE_ONLY_CPU - } // namespace detail } // namespace memory } // namespace paddle diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index 0d123d99e2..dde6ff0ef3 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -22,37 +22,67 @@ limitations under the License. */ namespace paddle { namespace memory { +detail::BuddyAllocator* GetCPUBuddyAllocator() { + static detail::BuddyAllocator* a = nullptr; + if (a == nullptr) { + a = new detail::BuddyAllocator(new detail::CPUAllocator, + platform::CpuMinChunkSize(), + platform::CpuMaxChunkSize()); + } + return a; +} + +#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. + +detail::BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { + static detail::BuddyAllocator** as = NULL; + if (as == NULL) { + int gpu_num = platform::GpuDeviceCount(); + as = new detail::BuddyAllocator*[gpu_num]; + for (int gpu = 0; gpu < gpu_num; gpu++) { + platform::SetDeviceId(gpu); + as[gpu] = new detail::BuddyAllocator(new detail::GPUAllocator, + platform::GpuMinChunkSize(), + platform::GpuMaxChunkSize()); + } + } + return as[gpu_id]; +} + +#endif // PADDLE_ONLY_CPU + void* Alloc(platform::Place pl, size_t size) { #ifndef PADDLE_ONLY_CPU if (paddle::platform::is_gpu_place(pl)) { size_t gpu_id = boost::get(pl).device; - return detail::GetGPUBuddyAllocator(gpu_id)->Alloc(size); + return GetGPUBuddyAllocator(gpu_id)->Alloc(size); } #endif // PADDLE_ONLY_CPU PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); - return detail::GetCPUBuddyAllocator()->Alloc(size); + return GetCPUBuddyAllocator()->Alloc(size); } void Free(paddle::platform::Place pl, void* p) { #ifndef PADDLE_ONLY_CPU if (paddle::platform::is_gpu_place(pl)) { size_t gpu_id = boost::get(pl).device; - detail::GetGPUBuddyAllocator(gpu_id)->Free(p); + GetGPUBuddyAllocator(gpu_id)->Free(p); + return; } #endif // PADDLE_ONLY_CPU PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); - detail::GetCPUBuddyAllocator()->Free(p); + GetCPUBuddyAllocator()->Free(p); } size_t Used(paddle::platform::Place pl) { #ifndef PADDLE_ONLY_CPU if (paddle::platform::is_gpu_place(pl)) { size_t gpu_id = boost::get(pl).device; - return detail::GetGPUBuddyAllocator(gpu_id)->Used(); + return GetGPUBuddyAllocator(gpu_id)->Used(); } #endif // PADDLE_ONLY_CPU PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); - return detail::GetCPUBuddyAllocator()->Used(); + return GetCPUBuddyAllocator()->Used(); } } // namespace memory diff --git a/paddle/memory/memory_test.cc b/paddle/memory/memory_test.cc new file mode 100644 index 0000000000..a98ccbb806 --- /dev/null +++ b/paddle/memory/memory_test.cc @@ -0,0 +1,48 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/memory/memory.h" +#include "paddle/platform/place.h" + +#include "gtest/gtest.h" + +TEST(BuddyAllocator, CPUAllocation) { + void *p = nullptr; + + EXPECT_EQ(p, nullptr); + + paddle::platform::CPUPlace cpu; + p = paddle::memory::Alloc(cpu, 4096); + + EXPECT_NE(p, nullptr); + + paddle::memory::Free(cpu, p); +} + +#ifndef PADDLE_ONLY_CPU + +TEST(BuddyAllocator, GPUAllocation) { + void *p = nullptr; + + EXPECT_EQ(p, nullptr); + + paddle::platform::GPUPlace gpu(0); + p = paddle::memory::Alloc(gpu, 4096); + + EXPECT_NE(p, nullptr); + + paddle::memory::Free(gpu, p); +} + +#endif // PADDLE_ONLY_CPU diff --git a/paddle/platform/gpu_info.cc b/paddle/platform/gpu_info.cc index 05a243c506..fe475d23ce 100644 --- a/paddle/platform/gpu_info.cc +++ b/paddle/platform/gpu_info.cc @@ -56,7 +56,7 @@ size_t GpuMaxAllocSize() { GpuMemoryUsage(available, total); // Reserve the rest for page tables, etc. - return total * FLAGS_fraction_of_gpu_memory_to_use; + return static_cast(total * FLAGS_fraction_of_gpu_memory_to_use); } size_t GpuMinChunkSize() { From 936cd1e11885daf35ae34b9fbf11a798e9e51022 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 6 Jul 2017 01:34:51 +0800 Subject: [PATCH 310/542] FIX: code format --- paddle/memory/memory.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index dde6ff0ef3..43f2084e8d 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -29,7 +29,7 @@ detail::BuddyAllocator* GetCPUBuddyAllocator() { platform::CpuMinChunkSize(), platform::CpuMaxChunkSize()); } - return a; + return a; } #ifndef PADDLE_ONLY_CPU // The following code are for CUDA. From 5d2e8edb3ec86e13b7684cbe372650d21fe7954d Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 6 Jul 2017 01:37:20 +0800 Subject: [PATCH 311/542] FIX: dynamic loader deps --- paddle/platform/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 0ad11f1b10..bf3e8f31ab 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -6,4 +6,4 @@ nv_library(gpu_info SRCS gpu_info.cc DEPS gflags) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) -cc_library(dynamic_loader SRCS dynload/dynamic_loader.cc DEPS gflags) +cc_library(dynamic_loader SRCS dynload/dynamic_loader.cc DEPS gflags glog) From 7848a3fb5c6de5c21a6c1c34a9d12e8e866c760c Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Thu, 6 Jul 2017 09:45:01 +0800 Subject: [PATCH 312/542] remove cclient test --- go/pserver/client/c/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/go/pserver/client/c/CMakeLists.txt b/go/pserver/client/c/CMakeLists.txt index a3fcaeef19..d5c1ed38e5 100644 --- a/go/pserver/client/c/CMakeLists.txt +++ b/go/pserver/client/c/CMakeLists.txt @@ -1,5 +1,7 @@ cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf) go_library(paddle_pserver_cclient STATIC DEPS paddle_go_optimizer) if(WITH_TESTING) - add_subdirectory(test) + # FIXME: this test requires pserver which is not managed by the test + # we need some kind of e2e testing machanism. + # add_subdirectory(test) endif() From 1264480b048cf68e29f3dffa91e228425df55908 Mon Sep 17 00:00:00 2001 From: Superjom Date: Thu, 6 Jul 2017 10:48:00 +0800 Subject: [PATCH 313/542] fix ci --- paddle/framework/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index b33014210f..fc2fbf88f1 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -11,5 +11,6 @@ proto_library(op_proto SRCS op_proto.proto DEPS attr_type) cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) proto_library(net_proto SRCS net_proto.proto DEPS op_proto) #cc_library(net SRCS net.cc DEPS net_proto attr_type op_proto) + proto_library(op_desc SRCS op_desc.proto DEPS attr_type) cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) From 3ad8e364715915fba5909c137834e34f38b6e9ac Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 6 Jul 2017 11:24:01 +0800 Subject: [PATCH 314/542] FIX: merge static libs with propagation dependencies --- cmake/generic.cmake | 51 ++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index cae9524b2f..87d8caaec4 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -99,15 +99,37 @@ function(merge_static_libs TARGET_NAME) set(libs ${ARGN}) list(REMOVE_DUPLICATES libs) - # First get the file names of the libraries to be merged + # Get all propagation dependencies from the merged libraries foreach(lib ${libs}) + list(APPEND libs_deps ${${lib}_LIB_DEPENDS}) + endforeach() + + # To produce a library we need at least one source file. + # It is created by add_custom_command below and will helps + # also help to track dependencies. + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) + + # Make the generated dummy source file depended on all static input + # libs. If input lib changes,the source file is touched + # which causes the desired effect (relink). + add_custom_command(OUTPUT ${dummyfile} + COMMAND ${CMAKE_COMMAND} -E touch ${dummyfile} + DEPENDS ${libs}) + + # Generate dummy staic lib + file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") + add_library(${TARGET_NAME} STATIC ${dummyfile}) + target_link_libraries(${TARGET_NAME} ${libs_deps}) + + foreach(lib ${libs}) + # Get the file names of the libraries to be merged set(libfiles ${libfiles} $) endforeach() + # Get the file name of the generated library + set(outlibfile "$") + if(APPLE) # Use OSX's libtool to merge archives - set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) - file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") - add_library(${TARGET_NAME} STATIC ${dummyfile}) add_custom_command(TARGET ${TARGET_NAME} POST_BUILD COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}) @@ -117,7 +139,8 @@ function(merge_static_libs TARGET_NAME) set(objdir ${lib}.objdir) add_custom_command(OUTPUT ${objdir} - COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir}) + COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir} + DEPENDS ${lib}) add_custom_command(OUTPUT ${objlistfile} COMMAND ${CMAKE_AR} -x "$" @@ -125,23 +148,9 @@ function(merge_static_libs TARGET_NAME) DEPENDS ${lib} ${objdir} WORKING_DIRECTORY ${objdir}) - # Empty dummy source file that goes into merged library - set(mergebase ${lib}.mergebase.c) - add_custom_command(OUTPUT ${mergebase} - COMMAND ${CMAKE_COMMAND} -E touch ${mergebase} - DEPENDS ${objlistfile}) - - list(APPEND mergebases "${mergebase}") - endforeach() - - # We need a target for the output merged library - add_library(${TARGET_NAME} STATIC ${mergebases}) - set(outlibfile "$") - - foreach(lib ${libs}) add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND ${CMAKE_AR} ru ${outlibfile} @"../${lib}.objlist" - WORKING_DIRECTORY ${lib}.objdir) + COMMAND ${CMAKE_AR} ru ${outlibfile} *.o + WORKING_DIRECTORY ${objdir}) endforeach() add_custom_command(TARGET ${TARGET_NAME} POST_BUILD From d6ecae779a28d51e669a4c029d00ec57a98f2bc8 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 6 Jul 2017 11:25:28 +0800 Subject: [PATCH 315/542] FIX: propagation dependencies and out of date rebuild --- cmake/generic.cmake | 51 ++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index cae9524b2f..87d8caaec4 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -99,15 +99,37 @@ function(merge_static_libs TARGET_NAME) set(libs ${ARGN}) list(REMOVE_DUPLICATES libs) - # First get the file names of the libraries to be merged + # Get all propagation dependencies from the merged libraries foreach(lib ${libs}) + list(APPEND libs_deps ${${lib}_LIB_DEPENDS}) + endforeach() + + # To produce a library we need at least one source file. + # It is created by add_custom_command below and will helps + # also help to track dependencies. + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) + + # Make the generated dummy source file depended on all static input + # libs. If input lib changes,the source file is touched + # which causes the desired effect (relink). + add_custom_command(OUTPUT ${dummyfile} + COMMAND ${CMAKE_COMMAND} -E touch ${dummyfile} + DEPENDS ${libs}) + + # Generate dummy staic lib + file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") + add_library(${TARGET_NAME} STATIC ${dummyfile}) + target_link_libraries(${TARGET_NAME} ${libs_deps}) + + foreach(lib ${libs}) + # Get the file names of the libraries to be merged set(libfiles ${libfiles} $) endforeach() + # Get the file name of the generated library + set(outlibfile "$") + if(APPLE) # Use OSX's libtool to merge archives - set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) - file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") - add_library(${TARGET_NAME} STATIC ${dummyfile}) add_custom_command(TARGET ${TARGET_NAME} POST_BUILD COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}) @@ -117,7 +139,8 @@ function(merge_static_libs TARGET_NAME) set(objdir ${lib}.objdir) add_custom_command(OUTPUT ${objdir} - COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir}) + COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir} + DEPENDS ${lib}) add_custom_command(OUTPUT ${objlistfile} COMMAND ${CMAKE_AR} -x "$" @@ -125,23 +148,9 @@ function(merge_static_libs TARGET_NAME) DEPENDS ${lib} ${objdir} WORKING_DIRECTORY ${objdir}) - # Empty dummy source file that goes into merged library - set(mergebase ${lib}.mergebase.c) - add_custom_command(OUTPUT ${mergebase} - COMMAND ${CMAKE_COMMAND} -E touch ${mergebase} - DEPENDS ${objlistfile}) - - list(APPEND mergebases "${mergebase}") - endforeach() - - # We need a target for the output merged library - add_library(${TARGET_NAME} STATIC ${mergebases}) - set(outlibfile "$") - - foreach(lib ${libs}) add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND ${CMAKE_AR} ru ${outlibfile} @"../${lib}.objlist" - WORKING_DIRECTORY ${lib}.objdir) + COMMAND ${CMAKE_AR} ru ${outlibfile} *.o + WORKING_DIRECTORY ${objdir}) endforeach() add_custom_command(TARGET ${TARGET_NAME} POST_BUILD From 3e4ba647eec7bc16511e1146d5a696cd124c6a27 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 6 Jul 2017 11:28:52 +0800 Subject: [PATCH 316/542] FIX: remove duplicate --- cmake/generic.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 87d8caaec4..1a4600ef4b 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -103,6 +103,7 @@ function(merge_static_libs TARGET_NAME) foreach(lib ${libs}) list(APPEND libs_deps ${${lib}_LIB_DEPENDS}) endforeach() + list(REMOVE_DUPLICATES libs_deps) # To produce a library we need at least one source file. # It is created by add_custom_command below and will helps From a669bf48d966a92206c57d72258bb625b5ff2fbc Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 6 Jul 2017 13:38:11 +0800 Subject: [PATCH 317/542] FIX: explicit construct pool element --- paddle/memory/detail/buddy_allocator.cc | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc index ed2eedf9af..2cfacec46c 100644 --- a/paddle/memory/detail/buddy_allocator.cc +++ b/paddle/memory/detail/buddy_allocator.cc @@ -118,8 +118,9 @@ void BuddyAllocator::Free(void* p) { if (right_buddy->type(cache_) == MemoryBlock::FREE_CHUNK) { // Take away right buddy from pool - pool_.erase({right_buddy->index(cache_), right_buddy->total_size(cache_), - right_buddy}); + pool_.erase(IndexSizeAddress(right_buddy->index(cache_), + right_buddy->total_size(cache_), + right_buddy)); // merge its right buddy to the block block->merge(cache_, right_buddy); @@ -135,8 +136,8 @@ void BuddyAllocator::Free(void* p) { if (left_buddy->type(cache_) == MemoryBlock::FREE_CHUNK) { // Take away right buddy from pool - pool_.erase({left_buddy->index(cache_), left_buddy->total_size(cache_), - left_buddy}); + pool_.erase(IndexSizeAddress(left_buddy->index(cache_), + left_buddy->total_size(cache_), left_buddy)); // merge the block to its left buddy left_buddy->merge(cache_, block); @@ -147,7 +148,8 @@ void BuddyAllocator::Free(void* p) { // Dumping this block into pool DLOG(INFO) << "Inserting free block (" << block << ", " << block->total_size(cache_) << ")"; - pool_.insert({block->index(cache_), block->total_size(cache_), block}); + pool_.insert( + IndexSizeAddress(block->index(cache_), block->total_size(cache_), block)); // TODO(gangliao): Clean up if existing too much free memory } @@ -193,14 +195,14 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() { total_free_ += max_chunk_size_; // dump the block into pool - return pool_.insert({index, max_chunk_size_, p}).first; + return pool_.insert(IndexSizeAddress(index, max_chunk_size_, p)).first; } BuddyAllocator::PoolSet::iterator BuddyAllocator::FindExistChunk(size_t size) { size_t index = 0; while (1) { - auto it = pool_.lower_bound({index, size, nullptr}); + auto it = pool_.lower_bound(IndexSizeAddress(index, size, nullptr)); // no match chunk memory if (it == pool_.end()) return it; @@ -237,9 +239,10 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it, DLOG(INFO) << "Insert right block (" << block->right_buddy(cache_) << ", " << block->right_buddy(cache_)->total_size(cache_) << ")"; - pool_.insert({block->right_buddy(cache_)->index(cache_), - block->right_buddy(cache_)->total_size(cache_), - block->right_buddy(cache_)}); + pool_.insert( + IndexSizeAddress(block->right_buddy(cache_)->index(cache_), + block->right_buddy(cache_)->total_size(cache_), + block->right_buddy(cache_))); } } From 108b0fad2ffdf8faf281e34ea64437abe7a3eca3 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 6 Jul 2017 06:40:58 +0000 Subject: [PATCH 318/542] fix by helin and wuyi's comments --- go/master/service.go | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/go/master/service.go b/go/master/service.go index c47319317a..29ff63bcc9 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -39,9 +39,9 @@ type Task struct { } type taskEntry struct { - NumTimeout int - Task Task - NumFailed int + Task Task + // A task fails if it's timeout or trainer reports it exits unnormally. + NumFailure int } type taskQueues struct { @@ -53,11 +53,11 @@ type taskQueues struct { // Service is the master server service. type Service struct { - chunksPerTask int - timeoutDur time.Duration - failortimeoutMax int - ready chan struct{} - store Store + chunksPerTask int + timeoutDur time.Duration + failureMax int + ready chan struct{} + store Store mu sync.Mutex initDone bool @@ -92,11 +92,11 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { } // NewService creates a new service. -func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, failortimeoutMax int) (*Service, error) { +func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, failureMax int) (*Service, error) { s := &Service{} s.chunksPerTask = chunksPerTask s.timeoutDur = timeoutDur - s.failortimeoutMax = failortimeoutMax + s.failureMax = failureMax s.taskQueues = taskQueues{} s.taskQueues.Pending = make(map[int]taskEntry) s.ready = make(chan struct{}) @@ -258,7 +258,7 @@ func (s *Service) SetDataset(globPaths []string, dummy *int) error { return nil } -func (s *Service) checkTaskStatus(t taskEntry, epoch int) { +func (s *Service) procFailedTask(t taskEntry, epoch int) { if t.Task.Epoch != epoch { // new epoch, task launched after the // schedule of this timeout check or failed status report. @@ -274,14 +274,14 @@ func (s *Service) checkTaskStatus(t taskEntry, epoch int) { delete(s.taskQueues.Pending, t.Task.ID) - t.NumTimeout++ - if t.NumTimeout+t.NumFailed > s.failortimeoutMax { - log.Warningf("Task %v timed out %d times and failed %d times, discard.", t.Task, t.NumTimeout, t.NumFailed) + t.NumFailure++ + if t.NumFailure > s.failureMax { + log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure) s.taskQueues.Failed = append(s.taskQueues.Failed, t) return } - log.Warningf("Task %v timed out %d times and failed %d times, discard.", t.Task, t.NumTimeout, t.NumFailed) + log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure) s.taskQueues.Todo = append(s.taskQueues.Todo, t) return } @@ -296,7 +296,7 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { return } - s.checkTaskStatus(t, epoch) + s.procFailedTask(t, epoch) } } @@ -377,8 +377,7 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { } // task finished, reset timeout - t.NumTimeout = 0 - t.NumFailed = 0 + t.NumFailure = 0 s.taskQueues.Done = append(s.taskQueues.Done, t) delete(s.taskQueues.Pending, taskID) @@ -413,6 +412,6 @@ func (s *Service) TaskFailed(taskID int, epoch int) error { return err } - s.checkTaskStatus(t, epoch) + s.procFailedTask(t, epoch) return nil } From 1b366dc2fff2b896fc92c1aa161183e6c88f6b7e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 6 Jul 2017 14:44:40 +0800 Subject: [PATCH 319/542] Fix CI error on test_LayerGrad.LSTM * We should not EXPECT_EQ between a float value and a int value. Use ASSERT_NEAR instead. --- paddle/gserver/tests/LayerGradUtil.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index 15b8cedeb8..9eca58f1a1 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -241,7 +241,7 @@ void testBatchState(LayerPtr testLayer, std::vector args; args.push_back(out); - EXPECT_EQ(0, Argument::sum(args)) << "testBatchState failed"; + ASSERT_NEAR(0, Argument::sum(args), 1e-5) << "testBatchState failed"; for (size_t seqId = 0; seqId < numSequences; ++seqId) { start[seqId] += seqLens[seqId]; } From a94d217487a222526e303c443aaa3370321447ae Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 6 Jul 2017 07:09:55 +0000 Subject: [PATCH 320/542] add TaskID --- go/master/client.go | 4 ++-- go/master/service.go | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/go/master/client.go b/go/master/client.go index b6ca8cad15..bf2612d91b 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -113,8 +113,8 @@ func (c *Client) taskFinished(taskID int) error { } // TaskFailed tell the master server as task is failed. -func (c *Client) taskFailed(taskID int, epoch int) error { - return c.conn.Call("Service.TaskFinished", taskID, epoch) +func (c *Client) taskFailed(taskID TaskID) error { + return c.conn.Call("Service.TaskFinished", taskID, nil) } // NextRecord returns next record in the dataset. diff --git a/go/master/service.go b/go/master/service.go index 29ff63bcc9..b1334a2d8e 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -396,8 +396,14 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { return err } -// TaskFailed tell the service that a task is failed. -func (s *Service) TaskFailed(taskID int, epoch int) error { +// TaskID is a struct which client uses for reports failure. +type TaskID struct { + ID int + Epoch int +} + +// TaskFailed tells the service that a task is failed. +func (s *Service) TaskFailed(taskID TaskID, dummy *int) error { select { case <-s.ready: } @@ -405,13 +411,13 @@ func (s *Service) TaskFailed(taskID int, epoch int) error { s.mu.Lock() defer s.mu.Unlock() - t, ok := s.taskQueues.Pending[taskID] + t, ok := s.taskQueues.Pending[taskID.ID] if !ok { err := errors.New("pending task not found") log.WithFields(s.logFields()).Warningln("TaskFailed:Pending task #%d not found.", taskID) return err } - s.procFailedTask(t, epoch) + s.procFailedTask(t, taskID.Epoch) return nil } From 8f7088590c7031dedd554f62762a559a4efe6b9c Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 6 Jul 2017 07:14:10 +0000 Subject: [PATCH 321/542] fix bugs --- go/master/service.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/master/service.go b/go/master/service.go index b1334a2d8e..daf3928230 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -414,7 +414,7 @@ func (s *Service) TaskFailed(taskID TaskID, dummy *int) error { t, ok := s.taskQueues.Pending[taskID.ID] if !ok { err := errors.New("pending task not found") - log.WithFields(s.logFields()).Warningln("TaskFailed:Pending task #%d not found.", taskID) + log.WithFields(s.logFields()).Warningln("TaskFailed:Pending task #%v not found.", taskID) return err } From e2ea1f42e9202e5591e2de1ce5f96c573dcc6484 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 6 Jul 2017 14:12:45 +0800 Subject: [PATCH 322/542] Generate python protobufs for paddle.v2.framework Python should be able to manipulate Protobuf message because: 1. Python's `create_op_creation_methods` take the `OpProto` array to generate all `op_creation_methods` in RunTime. 2. All `op_creation_methods` will create an `OpDesc` and pass it to Paddle C++ method `CreateOp` and return the Op handle. Here is the list of what is added in this commit: * Add `protobuf_generate_python` if it is not defined. * Before cmake 3.4, `protobuf_generate_python` is not defined. Just copy the implementation of that function in `protobuf.cmake` * Add `py_proto_compile` function in `cmake/generic.cmake`. * It follows bazel's API interface. * https://github.com/pubref/rules_protobuf#rules * Add an empty package named `paddle.v2.framework`, all python code of `paddle::framework` will be in that package. * Generate protobuf's python module `__init__.py` by `touch` while compiling. * Change setup.py.in, make `paddle.v2.framework.proto` uses the generated protobuf pythons. --- cmake/external/protobuf.cmake | 59 +++++++++++++++++++ cmake/generic.cmake | 9 +++ paddle/framework/CMakeLists.txt | 5 +- python/CMakeLists.txt | 3 +- python/paddle/v2/framework/__init__.py | 1 + .../paddle/v2/framework/tests/CMakeLists.txt | 1 + .../v2/framework/tests/test_protobuf.py | 26 ++++++++ python/setup.py.in | 9 ++- 8 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 python/paddle/v2/framework/__init__.py create mode 100644 python/paddle/v2/framework/tests/CMakeLists.txt create mode 100644 python/paddle/v2/framework/tests/test_protobuf.py diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 3c74944bc2..e629d61585 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -17,6 +17,65 @@ INCLUDE(ExternalProject) FIND_PACKAGE(Protobuf QUIET) SET(PROTOBUF_FOUND "OFF") +if(NOT COMMAND protobuf_generate_python) # before cmake 3.4, protobuf_genrerate_python is not defined. + function(protobuf_generate_python SRCS) + # shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake + if(NOT ARGN) + message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called without any proto files") + return() + endif() + + if(PROTOBUF_GENERATE_CPP_APPEND_PATH) + # Create an include path for each file specified + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(ABS_PATH ${ABS_FIL} PATH) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() + endforeach() + else() + set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) + endif() + + if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) + set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") + endif() + + if(DEFINED Protobuf_IMPORT_DIRS) + foreach(DIR ${Protobuf_IMPORT_DIRS}) + get_filename_component(ABS_PATH ${DIR} ABSOLUTE) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() + endforeach() + endif() + + set(${SRCS}) + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(FIL_WE ${FIL} NAME_WE) + if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) + get_filename_component(FIL_DIR ${FIL} DIRECTORY) + if(FIL_DIR) + set(FIL_WE "${FIL_DIR}/${FIL_WE}") + endif() + endif() + + list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py") + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py" + COMMAND ${Protobuf_PROTOC_EXECUTABLE} --python_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL} + DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} + COMMENT "Running Python protocol buffer compiler on ${FIL}" + VERBATIM ) + endforeach() + + set(${SRCS} ${${SRCS}} PARENT_SCOPE) + endfunction() +endif() # Print and set the protobuf library information, # finish this cmake process and exit from this file. diff --git a/cmake/generic.cmake b/cmake/generic.cmake index d51b95a5d7..a92671ae62 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -335,3 +335,12 @@ function(proto_library TARGET_NAME) protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS}) cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf) endfunction() + +function(py_proto_compile TARGET_NAME) + set(oneValueArgs "") + set(multiValueArgs SRCS) + cmake_parse_arguments(py_proto_compile "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(py_srcs) + protobuf_generate_python(py_srcs ${py_proto_compile_SRCS}) + add_custom_target(${TARGET_NAME} ALL DEPENDS ${py_srcs}) +endfunction() \ No newline at end of file diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index dcd70d2851..970b2b9abd 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -9,6 +9,9 @@ cc_test(enforce_test SRCS enforce_test.cc) proto_library(attr_type SRCS attr_type.proto) proto_library(op_proto SRCS op_proto.proto DEPS attr_type) cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) - proto_library(op_desc SRCS op_desc.proto DEPS attr_type) cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) +py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto) +# Generate an empty __init__.py to make framework_py_proto as a valid python module. +add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) +add_dependencies(framework_py_proto framework_py_proto_init) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 361e764e25..13a1802ee3 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -29,7 +29,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp - DEPENDS gen_proto_py ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) + DEPENDS gen_proto_py framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) add_custom_target(paddle_python ALL DEPENDS ${OUTPUT_DIR}/.timestamp) @@ -43,6 +43,7 @@ if (WITH_TESTING) add_subdirectory(paddle/v2/tests) add_subdirectory(paddle/v2/reader/tests) add_subdirectory(paddle/v2/plot/tests) + add_subdirectory(paddle/v2/framework/tests) endif() endif() install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR} diff --git a/python/paddle/v2/framework/__init__.py b/python/paddle/v2/framework/__init__.py new file mode 100644 index 0000000000..c942373c66 --- /dev/null +++ b/python/paddle/v2/framework/__init__.py @@ -0,0 +1 @@ +__all__ = ['proto'] diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt new file mode 100644 index 0000000000..8cb0c5c376 --- /dev/null +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -0,0 +1 @@ +add_python_test(test_framework test_protobuf.py) diff --git a/python/paddle/v2/framework/tests/test_protobuf.py b/python/paddle/v2/framework/tests/test_protobuf.py new file mode 100644 index 0000000000..f0e6019199 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_protobuf.py @@ -0,0 +1,26 @@ +import paddle.v2.framework.proto.op_proto_pb2 +import paddle.v2.framework.proto.attr_type_pb2 +import unittest + + +class TestFrameworkProto(unittest.TestCase): + def test_all(self): + op_proto_lib = paddle.v2.framework.proto.op_proto_pb2 + attr_type_lib = paddle.v2.framework.proto.attr_type_pb2 + op_proto = op_proto_lib.OpProto() + ipt0 = op_proto.inputs.add() + ipt0.name = "a" + ipt0.comment = "the input of cosine op" + ipt1 = op_proto.inputs.add() + ipt1.name = "b" + ipt1.comment = "the other input of cosine op" + opt = op_proto.outputs.add() + opt.name = "output" + opt.comment = "the output of cosine op" + op_proto.comment = "cosine op, output = scale*cos(a, b)" + attr = op_proto.attrs.add() + attr.name = "scale" + attr.comment = "scale of cosine op" + attr.type = attr_type_lib.FLOAT + op_proto.type = "cos" + self.assertTrue(op_proto.IsInitialized()) diff --git a/python/setup.py.in b/python/setup.py.in index dae0166487..78423614a6 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -9,7 +9,9 @@ packages=['paddle', 'paddle.v2.dataset', 'paddle.v2.reader', 'paddle.v2.master', - 'paddle.v2.plot'] + 'paddle.v2.plot', + 'paddle.v2.framework', + 'paddle.v2.framework.proto'] setup_requires=["requests", "numpy", @@ -29,6 +31,9 @@ setup(name='paddle', packages=packages, package_data={'paddle.v2.master': ['${paddle_master_LIB_NAME}'], }, package_dir={ - '': '${CMAKE_CURRENT_SOURCE_DIR}' + '': '${CMAKE_CURRENT_SOURCE_DIR}', + # The paddle.v2.framework.proto will be generated while compiling. + # So that package points to other directory. + 'paddle.v2.framework.proto': '${CMAKE_BINARY_DIR}/paddle/framework' }, ) From adf8c95b62fc5ef1f608bc06dce32bb4b396828c Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 6 Jul 2017 15:40:22 +0800 Subject: [PATCH 323/542] FIX: propagation dependencies under linux --- cmake/generic.cmake | 68 ++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 87d8caaec4..3900ea2604 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -104,36 +104,32 @@ function(merge_static_libs TARGET_NAME) list(APPEND libs_deps ${${lib}_LIB_DEPENDS}) endforeach() - # To produce a library we need at least one source file. - # It is created by add_custom_command below and will helps - # also help to track dependencies. - set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) - - # Make the generated dummy source file depended on all static input - # libs. If input lib changes,the source file is touched - # which causes the desired effect (relink). - add_custom_command(OUTPUT ${dummyfile} - COMMAND ${CMAKE_COMMAND} -E touch ${dummyfile} - DEPENDS ${libs}) - - # Generate dummy staic lib - file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") - add_library(${TARGET_NAME} STATIC ${dummyfile}) - target_link_libraries(${TARGET_NAME} ${libs_deps}) + if(APPLE) # Use OSX's libtool to merge archives + # To produce a library we need at least one source file. + # It is created by add_custom_command below and will helps + # also help to track dependencies. + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) - foreach(lib ${libs}) - # Get the file names of the libraries to be merged - set(libfiles ${libfiles} $) - endforeach() + # Make the generated dummy source file depended on all static input + # libs. If input lib changes,the source file is touched + # which causes the desired effect (relink). + add_custom_command(OUTPUT ${dummyfile} + COMMAND ${CMAKE_COMMAND} -E touch ${dummyfile} + DEPENDS ${libs}) - # Get the file name of the generated library - set(outlibfile "$") + # Generate dummy staic lib + file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") + add_library(${TARGET_NAME} STATIC ${dummyfile}) + target_link_libraries(${TARGET_NAME} ${libs_deps}) - if(APPLE) # Use OSX's libtool to merge archives + foreach(lib ${libs}) + # Get the file names of the libraries to be merged + set(libfiles ${libfiles} $) + endforeach() add_custom_command(TARGET ${TARGET_NAME} POST_BUILD COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}) - else() # general UNIX: use "ar" to extract objects and re-add to a common lib + else() # general UNIX: use "ar" to extract objects and re-add to a common lib foreach(lib ${libs}) set(objlistfile ${lib}.objlist) # list of objects in the input library set(objdir ${lib}.objdir) @@ -148,13 +144,27 @@ function(merge_static_libs TARGET_NAME) DEPENDS ${lib} ${objdir} WORKING_DIRECTORY ${objdir}) - add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND ${CMAKE_AR} ru ${outlibfile} *.o - WORKING_DIRECTORY ${objdir}) + # Empty dummy source file that goes into merged library + set(mergebase ${lib}.mergebase.c) + add_custom_command(OUTPUT ${mergebase} + COMMAND ${CMAKE_COMMAND} -E touch ${mergebase} + DEPENDS ${objlistfile}) + + list(APPEND mergebases "${mergebase}") endforeach() - add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND ${CMAKE_RANLIB} ${outlibfile}) + add_library(${TARGET_NAME} STATIC ${mergebases}) + target_link_libraries(${TARGET_NAME} ${libs_deps}) + + # Get the file name of the generated library + set(outlibfile "$") + + foreach(lib ${libs}) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND ${CMAKE_AR} cr ${outlibfile} *.o + COMMAND ${CMAKE_RANLIB} ${outlibfile} + WORKING_DIRECTORY ${lib}.objdir) + endforeach() endif() endfunction(merge_static_libs) From 847535f4fe6cea0b954a67fffea4c7b9ed96bd77 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 6 Jul 2017 15:42:29 +0800 Subject: [PATCH 324/542] FIX: propagation dependencies under linux --- cmake/generic.cmake | 69 +++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 1a4600ef4b..3900ea2604 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -103,38 +103,33 @@ function(merge_static_libs TARGET_NAME) foreach(lib ${libs}) list(APPEND libs_deps ${${lib}_LIB_DEPENDS}) endforeach() - list(REMOVE_DUPLICATES libs_deps) - # To produce a library we need at least one source file. - # It is created by add_custom_command below and will helps - # also help to track dependencies. - set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) - - # Make the generated dummy source file depended on all static input - # libs. If input lib changes,the source file is touched - # which causes the desired effect (relink). - add_custom_command(OUTPUT ${dummyfile} - COMMAND ${CMAKE_COMMAND} -E touch ${dummyfile} - DEPENDS ${libs}) - - # Generate dummy staic lib - file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") - add_library(${TARGET_NAME} STATIC ${dummyfile}) - target_link_libraries(${TARGET_NAME} ${libs_deps}) + if(APPLE) # Use OSX's libtool to merge archives + # To produce a library we need at least one source file. + # It is created by add_custom_command below and will helps + # also help to track dependencies. + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) - foreach(lib ${libs}) - # Get the file names of the libraries to be merged - set(libfiles ${libfiles} $) - endforeach() + # Make the generated dummy source file depended on all static input + # libs. If input lib changes,the source file is touched + # which causes the desired effect (relink). + add_custom_command(OUTPUT ${dummyfile} + COMMAND ${CMAKE_COMMAND} -E touch ${dummyfile} + DEPENDS ${libs}) - # Get the file name of the generated library - set(outlibfile "$") + # Generate dummy staic lib + file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") + add_library(${TARGET_NAME} STATIC ${dummyfile}) + target_link_libraries(${TARGET_NAME} ${libs_deps}) - if(APPLE) # Use OSX's libtool to merge archives + foreach(lib ${libs}) + # Get the file names of the libraries to be merged + set(libfiles ${libfiles} $) + endforeach() add_custom_command(TARGET ${TARGET_NAME} POST_BUILD COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}) - else() # general UNIX: use "ar" to extract objects and re-add to a common lib + else() # general UNIX: use "ar" to extract objects and re-add to a common lib foreach(lib ${libs}) set(objlistfile ${lib}.objlist) # list of objects in the input library set(objdir ${lib}.objdir) @@ -149,13 +144,27 @@ function(merge_static_libs TARGET_NAME) DEPENDS ${lib} ${objdir} WORKING_DIRECTORY ${objdir}) - add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND ${CMAKE_AR} ru ${outlibfile} *.o - WORKING_DIRECTORY ${objdir}) + # Empty dummy source file that goes into merged library + set(mergebase ${lib}.mergebase.c) + add_custom_command(OUTPUT ${mergebase} + COMMAND ${CMAKE_COMMAND} -E touch ${mergebase} + DEPENDS ${objlistfile}) + + list(APPEND mergebases "${mergebase}") endforeach() - add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND ${CMAKE_RANLIB} ${outlibfile}) + add_library(${TARGET_NAME} STATIC ${mergebases}) + target_link_libraries(${TARGET_NAME} ${libs_deps}) + + # Get the file name of the generated library + set(outlibfile "$") + + foreach(lib ${libs}) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND ${CMAKE_AR} cr ${outlibfile} *.o + COMMAND ${CMAKE_RANLIB} ${outlibfile} + WORKING_DIRECTORY ${lib}.objdir) + endforeach() endif() endfunction(merge_static_libs) From 203364281ed8b86c53c520142b881f00aca5485e Mon Sep 17 00:00:00 2001 From: caoying03 Date: Thu, 6 Jul 2017 16:44:54 +0800 Subject: [PATCH 325/542] enable error clipping in FC layer. --- python/paddle/trainer/config_parser.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 370529ed97..e020be9378 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -13,6 +13,7 @@ # limitations under the License. from __future__ import print_function +import pdb ''' The following functions are available in the config file: @@ -761,8 +762,8 @@ class DotMulOperator(Operator): def check_dims(self): for i in range(2): - config_assert(self.operator_conf.input_sizes[i] == - self.operator_conf.output_size, + config_assert(self.operator_conf.input_sizes[ + i] == self.operator_conf.output_size, "DotMul input_size != output_size") def calc_output_size(self, input_sizes): @@ -1193,8 +1194,7 @@ def parse_image(image, input_layer_name, image_conf): def parse_norm(norm, input_layer_name, norm_conf): norm_conf.norm_type = norm.norm_type config_assert( - norm.norm_type in - ['rnorm', 'cmrnorm-projection', 'cross-channel-norm'], + norm.norm_type in ['rnorm', 'cmrnorm-projection', 'cross-channel-norm'], "norm-type %s is not in [rnorm, cmrnorm-projection, cross-channel-norm]" % norm.norm_type) norm_conf.channels = norm.channels @@ -1571,7 +1571,13 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): @config_layer('fc') class FCLayer(LayerBase): - def __init__(self, name, size, inputs, bias=True, **xargs): + def __init__(self, + name, + size, + inputs, + bias=True, + error_clipping_threshold=None, + **xargs): super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) @@ -1589,6 +1595,9 @@ class FCLayer(LayerBase): format) self.create_bias_parameter(bias, self.config.size) + if error_clipping_threshold is not None: + self.config.error_clipping_threshold = error_clipping_threshold + @config_layer('selective_fc') class SelectiveFCLayer(LayerBase): @@ -3425,7 +3434,8 @@ DEFAULT_SETTING = dict( settings = copy.deepcopy(DEFAULT_SETTING) -settings_deprecated = dict(usage_ratio=1., ) +settings_deprecated = dict( + usage_ratio=1., ) trainer_settings = dict( save_dir="./output/model", From ddfa6cf0d1fe91f8bf2e1d55841afee9e30d1859 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 6 Jul 2017 17:07:04 +0800 Subject: [PATCH 326/542] FIX: remove boost from memory folder --- paddle/memory/memory.cc | 56 +++++++++++++++++++---------------------- paddle/memory/memory.h | 11 +++++--- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index 43f2084e8d..def580f7a4 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -32,7 +32,22 @@ detail::BuddyAllocator* GetCPUBuddyAllocator() { return a; } -#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. +template <> +void* Alloc(platform::CPUPlace place, size_t size) { + return GetCPUBuddyAllocator()->Alloc(size); +} + +template <> +void Free(platform::CPUPlace place, void* p) { + GetCPUBuddyAllocator()->Free(p); +} + +template <> +size_t Used(platform::CPUPlace place) { + return GetCPUBuddyAllocator()->Used(); +} + +#ifndef PADDLE_ONLY_CPU detail::BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { static detail::BuddyAllocator** as = NULL; @@ -49,41 +64,22 @@ detail::BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { return as[gpu_id]; } -#endif // PADDLE_ONLY_CPU +template <> +void* Alloc(platform::GPUPlace place, size_t size) { + return GetGPUBuddyAllocator(place.device)->Alloc(size); +} -void* Alloc(platform::Place pl, size_t size) { -#ifndef PADDLE_ONLY_CPU - if (paddle::platform::is_gpu_place(pl)) { - size_t gpu_id = boost::get(pl).device; - return GetGPUBuddyAllocator(gpu_id)->Alloc(size); - } -#endif // PADDLE_ONLY_CPU - PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); - return GetCPUBuddyAllocator()->Alloc(size); +template <> +void Free(platform::GPUPlace place, void* p) { + GetGPUBuddyAllocator(place.device)->Free(p); } -void Free(paddle::platform::Place pl, void* p) { -#ifndef PADDLE_ONLY_CPU - if (paddle::platform::is_gpu_place(pl)) { - size_t gpu_id = boost::get(pl).device; - GetGPUBuddyAllocator(gpu_id)->Free(p); - return; - } -#endif // PADDLE_ONLY_CPU - PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); - GetCPUBuddyAllocator()->Free(p); +template <> +size_t Used(platform::GPUPlace place) { + return GetGPUBuddyAllocator(place.device)->Used(); } -size_t Used(paddle::platform::Place pl) { -#ifndef PADDLE_ONLY_CPU - if (paddle::platform::is_gpu_place(pl)) { - size_t gpu_id = boost::get(pl).device; - return GetGPUBuddyAllocator(gpu_id)->Used(); - } #endif // PADDLE_ONLY_CPU - PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); - return GetCPUBuddyAllocator()->Used(); -} } // namespace memory } // namespace paddle diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h index a33092bade..2d6f4fd2a0 100644 --- a/paddle/memory/memory.h +++ b/paddle/memory/memory.h @@ -19,9 +19,14 @@ limitations under the License. */ namespace paddle { namespace memory { -void* Alloc(paddle::platform::Place, size_t); -void Free(paddle::platform::Place, void*); -size_t Used(paddle::platform::Place); +template +void* Alloc(Place, size_t); + +template +void Free(Place, void*); + +template +size_t Used(Place); } // namespace memory } // namespace paddle From 075954c17ceaf422478961d9a5d6aaa364458415 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Thu, 6 Jul 2017 17:40:58 +0800 Subject: [PATCH 327/542] follow comment. --- python/paddle/trainer/config_parser.py | 28 +++++++------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 1fed6db33c..826ba2834a 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1353,7 +1353,8 @@ class LayerBase(object): device=None, active_type="", drop_rate=0., - coeff=None): + coeff=None, + error_clipping_threshold=None): config_assert('@' not in name, "layer name: %s contain special character @" % name) global g_current_submodel @@ -1387,6 +1388,9 @@ class LayerBase(object): elif g_default_device is not None: self.config.device = g_default_device + if error_clipping_threshold is not None: + self.config.error_clipping_threshold = error_clipping_threshold + for input_index in xrange(len(self.inputs)): input = self.inputs[input_index] input_config = None @@ -1571,13 +1575,7 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): @config_layer('fc') class FCLayer(LayerBase): - def __init__(self, - name, - size, - inputs, - bias=True, - error_clipping_threshold=None, - **xargs): + def __init__(self, name, size, inputs, bias=True, **xargs): super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) @@ -1595,9 +1593,6 @@ class FCLayer(LayerBase): format) self.create_bias_parameter(bias, self.config.size) - if error_clipping_threshold is not None: - self.config.error_clipping_threshold = error_clipping_threshold - @config_layer('selective_fc') class SelectiveFCLayer(LayerBase): @@ -2791,13 +2786,7 @@ class TensorLayer(LayerBase): @config_layer('mixed') class MixedLayer(LayerBase): - def __init__(self, - name, - inputs, - size=0, - bias=True, - error_clipping_threshold=None, - **xargs): + def __init__(self, name, inputs, size=0, bias=True, **xargs): config_assert(inputs, 'inputs cannot be empty') super(MixedLayer, self).__init__( name, 'mixed', size, inputs=inputs, **xargs) @@ -2879,9 +2868,6 @@ class MixedLayer(LayerBase): self.config.bias_size = psize self.create_bias_parameter(bias, psize) - if error_clipping_threshold is not None: - self.config.error_clipping_threshold = error_clipping_threshold - # like MixedLayer, but no bias parameter @config_func From f2a82b16a25c2eb825ddb0a46b4966b01f248f22 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 6 Jul 2017 11:58:43 +0000 Subject: [PATCH 328/542] add print messages --- python/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 361e764e25..7a57d922ef 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -17,15 +17,21 @@ add_custom_target(copy_paddle_master) SET(COPY_PADDLE_MASTER "") if(WITH_GOLANG) SET(COPY_PADDLE_MASTER "copy_paddle_master") + message("paddle_master_lib_path:" ${paddle_master_LIB_PATH}) + message("PROJ_ROOT:" ${PROJ_ROOT}) add_custom_command(TARGET ${COPY_PADDLE_MASTER} COMMAND cp ${paddle_master_LIB_PATH} ${PROJ_ROOT}/python/paddle/v2/master/ ) add_dependencies(copy_paddle_master paddle_master) endif(WITH_GOLANG) +message("paddle_master_LIB_NAME:" ${paddle_master_LIB_NAME}) +message("CMAKE_CURRENT_BINARY_DIR:" ${CMAKE_CURRENT_BINARY_DIR}) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) +message("OUTPUT_DIR:" ${OUTPUT_DIR}) +message("py_env:" ${py_env}) add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp From 660475b5ab1c6cc295420a527d549dc1f38ba03a Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 6 Jul 2017 12:14:30 +0000 Subject: [PATCH 329/542] modify to add paddle_master name --- python/CMakeLists.txt | 1 + python/setup.py.in | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 7a57d922ef..633d2b3786 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -27,6 +27,7 @@ endif(WITH_GOLANG) message("paddle_master_LIB_NAME:" ${paddle_master_LIB_NAME}) message("CMAKE_CURRENT_BINARY_DIR:" ${CMAKE_CURRENT_BINARY_DIR}) +message("CMAKE_CURRENT_SOURCE_DIR:" ${CMAKE_CURRENT_SOURCE_DIR}) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) diff --git a/python/setup.py.in b/python/setup.py.in index dae0166487..9c77bed15f 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -27,7 +27,7 @@ setup(name='paddle', description='Parallel Distributed Deep Learning', install_requires=setup_requires, packages=packages, - package_data={'paddle.v2.master': ['${paddle_master_LIB_NAME}'], }, + package_data={'paddle.v2.master': ['libpaddle_master.so'], }, package_dir={ '': '${CMAKE_CURRENT_SOURCE_DIR}' }, From b396055499c5bd34bea5753e7ca19e18e2f7044b Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 6 Jul 2017 13:34:40 +0000 Subject: [PATCH 330/542] add -V --- paddle/scripts/docker/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index ab60f1a38d..0579bfcc7a 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -60,7 +60,7 @@ EOF make -j `nproc` if [ ${WITH_TESTING:-OFF} == "ON" ] && [ ${RUN_TEST:-OFF} == "ON" ] ; then pip uninstall -y py-paddle paddle || true - ctest --output-on-failure + ctest -V --output-on-failure fi From 4daa247d80a3f94b8f60fe084bd3887b4b5c698e Mon Sep 17 00:00:00 2001 From: gongweibao Date: Fri, 7 Jul 2017 01:12:48 +0000 Subject: [PATCH 331/542] rm -v --- paddle/scripts/docker/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 0579bfcc7a..ab60f1a38d 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -60,7 +60,7 @@ EOF make -j `nproc` if [ ${WITH_TESTING:-OFF} == "ON" ] && [ ${RUN_TEST:-OFF} == "ON" ] ; then pip uninstall -y py-paddle paddle || true - ctest -V --output-on-failure + ctest --output-on-failure fi From 126e64fc830ba5b787a787fdd2e2b7f7e2ef1939 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Fri, 7 Jul 2017 01:35:16 +0000 Subject: [PATCH 332/542] add cmake --- python/CMakeLists.txt | 7 ------- 1 file changed, 7 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 633d2b3786..361e764e25 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -17,22 +17,15 @@ add_custom_target(copy_paddle_master) SET(COPY_PADDLE_MASTER "") if(WITH_GOLANG) SET(COPY_PADDLE_MASTER "copy_paddle_master") - message("paddle_master_lib_path:" ${paddle_master_LIB_PATH}) - message("PROJ_ROOT:" ${PROJ_ROOT}) add_custom_command(TARGET ${COPY_PADDLE_MASTER} COMMAND cp ${paddle_master_LIB_PATH} ${PROJ_ROOT}/python/paddle/v2/master/ ) add_dependencies(copy_paddle_master paddle_master) endif(WITH_GOLANG) -message("paddle_master_LIB_NAME:" ${paddle_master_LIB_NAME}) -message("CMAKE_CURRENT_BINARY_DIR:" ${CMAKE_CURRENT_BINARY_DIR}) -message("CMAKE_CURRENT_SOURCE_DIR:" ${CMAKE_CURRENT_SOURCE_DIR}) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) -message("OUTPUT_DIR:" ${OUTPUT_DIR}) -message("py_env:" ${py_env}) add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp From c78f41a331ddc181f98e5885f0aa64c29acb8182 Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 7 Jul 2017 11:53:29 +0800 Subject: [PATCH 333/542] FIX: explicitly specify glog install path --- cmake/external/glog.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake index b70e94a170..bd401faa6e 100644 --- a/cmake/external/glog.cmake +++ b/cmake/external/glog.cmake @@ -38,12 +38,14 @@ ExternalProject_Add( CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR} + CMAKE_ARGS -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON CMAKE_ARGS -DWITH_GFLAGS=ON CMAKE_ARGS -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags CMAKE_ARGS -DBUILD_TESTING=OFF CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_BUILD_TYPE:STRING=Release ) From 1d2ef1db82136de8817229252774a797323f8eac Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 7 Jul 2017 14:19:47 +0800 Subject: [PATCH 334/542] [draft] add registry for Op, OpProto and OpAttrChecker (#2739) * init op_registry.h * dev op_registry.h * add 'attr_checker.h', which is a draft of op attribute checker. * rename some macro parameters * 1. Use `Attribute` and `AttributeMap` instead of `OpDesc`. `AttributeMap` is a unordered_map of , and `Attribute` is a boost::variant object to hold multiple types of attribute value. 2. Use `PADDLE_ENFORCE` to print checkers' fail message. 3. Abstract default value operations to a new function: `DefaultChecker`. * rename DefaultChecker to DefaultValueSetter ZZ * Finish op_registry 1. Complete the development of interfaces between OpRegistry and Protobuf. 2. Add unit test for op_registry.h * Add demo and test of custome checker * fix merge conflict --- paddle/framework/CMakeLists.txt | 1 + paddle/framework/attr_checker.h | 119 +++++++++++++ paddle/framework/op_registry.h | 253 +++++++++++++++++++++++++++ paddle/framework/op_registry_test.cc | 122 +++++++++++++ 4 files changed, 495 insertions(+) create mode 100644 paddle/framework/attr_checker.h create mode 100644 paddle/framework/op_registry.h create mode 100644 paddle/framework/op_registry_test.cc diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 970b2b9abd..4409c6feae 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -11,6 +11,7 @@ proto_library(op_proto SRCS op_proto.proto DEPS attr_type) cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) proto_library(op_desc SRCS op_desc.proto DEPS attr_type) cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) +cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_proto op_desc) py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto) # Generate an empty __init__.py to make framework_py_proto as a valid python module. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) diff --git a/paddle/framework/attr_checker.h b/paddle/framework/attr_checker.h new file mode 100644 index 0000000000..c0c33d8114 --- /dev/null +++ b/paddle/framework/attr_checker.h @@ -0,0 +1,119 @@ +#pragma once + +#include +#include +#include +#include +#include +#include "paddle/framework/enforce.h" + +namespace paddle { +namespace framework { + +typedef boost::variant, + std::vector, std::vector> + Attribute; +typedef std::unordered_map AttributeMap; + +// check whether a value(attribute) fit a certain limit +template +class LargerThanChecker { + public: + LargerThanChecker(T lower_bound) : lower_bound_(lower_bound) {} + void operator()(T& value) const { + PADDLE_ENFORCE(value > lower_bound_, "larger_than check fail"); + } + + private: + T lower_bound_; +}; + +// we can provide users more common Checker, like 'LessThanChecker', +// 'BetweenChecker'... + +template +class DefaultValueSetter { + public: + DefaultValueSetter(T default_value) : default_value_(default_value) {} + void operator()(T& value) const { value = default_value_; } + + private: + T default_value_; +}; + +// check whether a certain attribute fit its limits +// an attribute can have more than one limits +template +class TypedAttrChecker { + typedef std::function ValueChecker; + + public: + TypedAttrChecker(const std::string& attr_name) : attr_name_(attr_name) {} + + TypedAttrChecker& LargerThan(const T& lower_bound) { + value_checkers_.push_back(LargerThanChecker(lower_bound)); + return *this; + } + + // we can add more common limits, like LessThan(), Between()... + + TypedAttrChecker& SetDefault(const T& default_value) { + PADDLE_ENFORCE(default_value_setter_.empty(), + "%s can't have more than one default value!", attr_name_); + default_value_setter_.push_back(DefaultValueSetter(default_value)); + return *this; + } + + // allow users provide their own checker + TypedAttrChecker& AddCustomChecker(const ValueChecker& checker) { + value_checkers_.push_back(checker); + return *this; + } + + void operator()(AttributeMap& attr_map) const { + if (!attr_map.count(attr_name_)) { + // user do not set this attr + PADDLE_ENFORCE(!default_value_setter_.empty(), + "Attribute '%s' is required!", attr_name_); + // default_value_setter_ has no more than one element + T val; + (default_value_setter_[0])(val); + attr_map[attr_name_] = val; + } + Attribute& attr = attr_map.at(attr_name_); + T& attr_value = boost::get(attr); + for (const auto& checker : value_checkers_) { + checker(attr_value); + } + } + + private: + std::string attr_name_; + std::vector value_checkers_; + std::vector default_value_setter_; +}; + +// check whether op's all attributes fit their own limits +class OpAttrChecker { + typedef std::function AttrChecker; + + public: + template + TypedAttrChecker& AddAttrChecker(const std::string& attr_name) { + attr_checkers_.push_back(TypedAttrChecker(attr_name)); + AttrChecker& checker = attr_checkers_.back(); + return *(checker.target>()); + } + + void Check(AttributeMap& attr_map) const { + for (const auto& checker : attr_checkers_) { + checker(attr_map); + } + } + + private: + std::vector attr_checkers_; +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h new file mode 100644 index 0000000000..81241b5342 --- /dev/null +++ b/paddle/framework/op_registry.h @@ -0,0 +1,253 @@ +#pragma once + +#include "paddle/framework/attr_checker.h" + +//#include "paddle/framework/op_base.h" +#include "paddle/framework/op_desc.pb.h" +#include "paddle/framework/op_proto.pb.h" + +namespace paddle { +namespace framework { + +//==================For test================// +class OpBase { + public: + std::vector inputs_; + std::vector outputs_; + AttributeMap attr_map_; + + virtual std::string Run() const = 0; + virtual ~OpBase() {} +}; +//=========================================// + +// helper class to set attribute type +struct AttrTypeHelper { + template + static void SetAttrType(AttrProto* attr); + + static Attribute GetAttrValue(const AttrDesc& attr_desc) { + switch (attr_desc.type()) { + case paddle::framework::AttrType::INT: { + return attr_desc.i(); + } + case paddle::framework::AttrType::FLOAT: { + return attr_desc.f(); + } + case paddle::framework::AttrType::STRING: { + return attr_desc.s(); + } + case paddle::framework::AttrType::INTS: { + std::vector val(attr_desc.ints_size()); + for (int i = 0; i < attr_desc.ints_size(); ++i) { + val[i] = attr_desc.ints(i); + } + return val; + } + case paddle::framework::AttrType::FLOATS: { + std::vector val(attr_desc.floats_size()); + for (int i = 0; i < attr_desc.floats_size(); ++i) { + val[i] = attr_desc.floats(i); + } + return val; + } + case paddle::framework::AttrType::STRINGS: { + std::vector val(attr_desc.strings_size()); + for (int i = 0; i < attr_desc.strings_size(); ++i) { + val[i] = attr_desc.strings(i); + } + return val; + } + } + PADDLE_ENFORCE(false, "Unknown OpDesc::AttrDesc::type !"); + return boost::blank(); + } +}; + +template <> +void AttrTypeHelper::SetAttrType(AttrProto* attr) { + attr->set_type(paddle::framework::AttrType::INT); +} + +template <> +void AttrTypeHelper::SetAttrType(AttrProto* attr) { + attr->set_type(paddle::framework::AttrType::FLOAT); +} + +template <> +void AttrTypeHelper::SetAttrType(AttrProto* attr) { + attr->set_type(paddle::framework::AttrType::STRING); +} + +template <> +void AttrTypeHelper::SetAttrType>(AttrProto* attr) { + attr->set_type(paddle::framework::AttrType::INTS); +} + +template <> +void AttrTypeHelper::SetAttrType>(AttrProto* attr) { + attr->set_type(paddle::framework::AttrType::FLOATS); +} + +template <> +void AttrTypeHelper::SetAttrType>(AttrProto* attr) { + attr->set_type(paddle::framework::AttrType::STRINGS); +} + +// this class not only make proto but also init attribute checkers. +class OpProtoAndCheckerMaker { + public: + OpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) + : proto_(proto), op_checker_(op_checker) {} + + protected: + void AddInput(const std::string& name, const std::string& comment) { + auto input = proto_->mutable_inputs()->Add(); + *(input->mutable_name()) = name; + *(input->mutable_comment()) = comment; + } + + void AddOutput(const std::string& name, const std::string& comment) { + auto output = proto_->mutable_outputs()->Add(); + *(output->mutable_name()) = name; + *(output->mutable_comment()) = comment; + } + + template + TypedAttrChecker& AddAttr(const std::string& name, + const std::string& comment) { + auto attr = proto_->mutable_attrs()->Add(); + *(attr->mutable_name()) = name; + *(attr->mutable_comment()) = comment; + AttrTypeHelper::SetAttrType(attr); + return op_checker_->AddAttrChecker(name); + } + + void AddType(const std::string& op_type) { proto_->set_type(op_type); } + + void AddComment(const std::string& comment) { + *(proto_->mutable_comment()) = comment; + } + + OpProto* proto_; + OpAttrChecker* op_checker_; +}; + +class OpRegistry { + typedef std::function OpCreator; + + public: + template + static void RegisterOp(const std::string& op_type) { + creators_[op_type] = []() { return new OpType; }; + OpProto& op_proto = protos_[op_type]; + OpAttrChecker& op_checker = op_checkers_[op_type]; + ProtoMakerType(&op_proto, &op_checker); + PADDLE_ENFORCE(op_proto.IsInitialized() == true, + "Fail to initialize %s's OpProto !", op_type); + } + + static OpBase* CreateOp(const OpDesc& op_desc) { + std::string op_type = op_desc.type(); + OpBase* op = (creators_.at(op_type))(); + (op->inputs_).resize(op_desc.inputs_size()); + for (int i = 0; i < op_desc.inputs_size(); ++i) { + (op->inputs_)[i] = op_desc.inputs(i); + } + (op->outputs_).resize(op_desc.outputs_size()); + for (int i = 0; i < op_desc.outputs_size(); ++i) { + (op->outputs_)[i] = op_desc.outputs(i); + } + for (int i = 0; i < op_desc.attrs_size(); ++i) { + const AttrDesc& ith_attr = op_desc.attrs(i); + std::string name = ith_attr.name(); + (op->attr_map_)[name] = AttrTypeHelper::GetAttrValue(ith_attr); + } + const OpAttrChecker& op_checker = op_checkers_.at(op_type); + op_checker.Check(op->attr_map_); + return op; + } + + private: + static std::unordered_map creators_; + static std::unordered_map protos_; + static std::unordered_map op_checkers_; +}; + +std::unordered_map> OpRegistry::creators_; +std::unordered_map OpRegistry::protos_; +std::unordered_map OpRegistry::op_checkers_; + +template +class OpRegisterHelper { + public: + OpRegisterHelper(std::string op_type) { + OpRegistry::RegisterOp(op_type); + } +}; + +#define REGISTER_OP(__op_class, __op_maker_class, __op_type) \ + class __op_class##Register { \ + private: \ + const static OpRegisterHelper<__op_class, __op_maker_class> reg; \ + }; \ + const OpRegisterHelper<__op_class, __op_maker_class> \ + __op_class##Register::reg(#__op_type); + +// Demos + +class CosineOp : public OpBase { + public: + virtual std::string Run() const { + std::string msg = "CosineOp runs! scale = " + + std::to_string(boost::get(attr_map_.at("scale"))); + return msg; + } +}; + +class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { + public: + CosineOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("input", "input of cosine op"); + AddOutput("output", "output of cosine op"); + AddAttr("scale", "scale of cosine op") + .SetDefault(1.0) + .LargerThan(0.0); + AddType("cos"); + AddComment("This is cos op"); + } +}; + +REGISTER_OP(CosineOp, CosineOpProtoAndCheckerMaker, cos_sim) + +class MyTestOp : public OpBase { + public: + virtual std::string Run() const { + std::string msg = + "MyTestOp runs! test_attr = " + + std::to_string(boost::get(attr_map_.at("test_attr"))); + return msg; + } +}; + +class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { + public: + MyTestOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("input", "input of cosine op"); + AddOutput("output", "output of cosine op"); + auto my_checker = [](int i) { + PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!"); + }; + AddAttr("test_attr", "a simple test attribute") + .AddCustomChecker(my_checker); + AddType("my_test_op"); + AddComment("This is my_test op"); + } +}; + +REGISTER_OP(MyTestOp, MyTestOpProtoAndCheckerMaker, my_test_op) + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc new file mode 100644 index 0000000000..17849ca019 --- /dev/null +++ b/paddle/framework/op_registry_test.cc @@ -0,0 +1,122 @@ +#include "paddle/framework/op_registry.h" +#include + +TEST(OpRegistry, CreateOp) { + paddle::framework::OpDesc op_desc; + op_desc.set_type("cos_sim"); + op_desc.add_inputs("aa"); + op_desc.add_outputs("bb"); + + auto attr = op_desc.mutable_attrs()->Add(); + attr->set_name("scale"); + attr->set_type(paddle::framework::AttrType::FLOAT); + attr->set_f(3.3); + + paddle::framework::OpBase* op = + paddle::framework::OpRegistry::CreateOp(op_desc); + std::string debug_str = op->Run(); + std::string str = "CosineOp runs! scale = " + std::to_string(3.3); + ASSERT_EQ(str.size(), debug_str.size()); + for (size_t i = 0; i < debug_str.length(); ++i) { + ASSERT_EQ(debug_str[i], str[i]); + } +} + +TEST(OpRegistry, IllegalAttr) { + paddle::framework::OpDesc op_desc; + op_desc.set_type("cos_sim"); + op_desc.add_inputs("aa"); + op_desc.add_outputs("bb"); + + auto attr = op_desc.mutable_attrs()->Add(); + attr->set_name("scale"); + attr->set_type(paddle::framework::AttrType::FLOAT); + attr->set_f(-2.0); + + bool caught = false; + try { + paddle::framework::OpBase* op __attribute__((unused)) = + paddle::framework::OpRegistry::CreateOp(op_desc); + } catch (paddle::framework::EnforceNotMet err) { + caught = true; + std::string msg = "larger_than check fail"; + const char* err_msg = err.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(err_msg[i], msg[i]); + } + } + ASSERT_TRUE(caught); +} + +TEST(OpRegistry, DefaultValue) { + paddle::framework::OpDesc op_desc; + op_desc.set_type("cos_sim"); + op_desc.add_inputs("aa"); + op_desc.add_outputs("bb"); + + paddle::framework::OpBase* op = + paddle::framework::OpRegistry::CreateOp(op_desc); + std::string debug_str = op->Run(); + float default_value = 1.0; + std::string str = "CosineOp runs! scale = " + std::to_string(default_value); + ASSERT_EQ(str.size(), debug_str.size()); + for (size_t i = 0; i < debug_str.length(); ++i) { + ASSERT_EQ(debug_str[i], str[i]); + } +} + +TEST(OpRegistry, CustomChecker) { + paddle::framework::OpDesc op_desc; + op_desc.set_type("my_test_op"); + op_desc.add_inputs("ii"); + op_desc.add_outputs("oo"); + + // attr 'test_attr' is not set + bool caught = false; + try { + paddle::framework::OpBase* op __attribute__((unused)) = + paddle::framework::OpRegistry::CreateOp(op_desc); + } catch (paddle::framework::EnforceNotMet err) { + caught = true; + std::string msg = "Attribute 'test_attr' is required!"; + const char* err_msg = err.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(err_msg[i], msg[i]); + } + } + ASSERT_TRUE(caught); + + // set 'test_attr' set to an illegal value + auto attr = op_desc.mutable_attrs()->Add(); + attr->set_name("test_attr"); + attr->set_type(paddle::framework::AttrType::INT); + attr->set_i(3); + caught = false; + try { + paddle::framework::OpBase* op __attribute__((unused)) = + paddle::framework::OpRegistry::CreateOp(op_desc); + } catch (paddle::framework::EnforceNotMet err) { + caught = true; + std::string msg = "'test_attr' must be even!"; + const char* err_msg = err.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(err_msg[i], msg[i]); + } + } + ASSERT_TRUE(caught); + + // set 'test_attr' set to a legal value + op_desc.mutable_attrs()->Clear(); + attr = op_desc.mutable_attrs()->Add(); + attr->set_name("test_attr"); + attr->set_type(paddle::framework::AttrType::INT); + attr->set_i(4); + paddle::framework::OpBase* op = + paddle::framework::OpRegistry::CreateOp(op_desc); + std::string debug_str = op->Run(); + std::string str = "MyTestOp runs! test_attr = " + std::to_string(4); + ASSERT_EQ(str.size(), debug_str.size()); + for (size_t i = 0; i < debug_str.length(); ++i) { + ASSERT_EQ(debug_str[i], str[i]); + } +} \ No newline at end of file From 50e29bac38f485dca831b62ddcc40da2f38521ff Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 7 Jul 2017 15:39:00 +0800 Subject: [PATCH 335/542] mistaken: Folk -> Fork in develop branch --- doc_theme/templates/layout.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc_theme/templates/layout.html b/doc_theme/templates/layout.html index 65e61c5f29..9fca69dc4e 100644 --- a/doc_theme/templates/layout.html +++ b/doc_theme/templates/layout.html @@ -101,7 +101,7 @@