From 597a58c3efe015be43e1e20a20a04921a9ae7c60 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 13 Jun 2017 23:52:16 +0800 Subject: [PATCH 01/10] Add DetectionMAPEvaluator. --- .../evaluators/DetectionMAPEvaluator.cpp | 312 ++++++++++++++++++ paddle/gserver/tests/test_Evaluator.cpp | 17 + proto/ModelConfig.proto | 9 + python/paddle/trainer/config_parser.py | 43 ++- .../trainer_config_helpers/evaluators.py | 105 ++++-- 5 files changed, 453 insertions(+), 33 deletions(-) create mode 100644 paddle/gserver/evaluators/DetectionMAPEvaluator.cpp diff --git a/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp b/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp new file mode 100644 index 0000000000..7d326c2db1 --- /dev/null +++ b/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp @@ -0,0 +1,312 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Evaluator.h" +#include "paddle/gserver/layers/DetectionUtil.h" + +using std::map; +using std::vector; +using std::pair; +using std::make_pair; + +namespace paddle { + +/** + * @brief detection map Evaluator + * + * The config file api is detection_map_evaluator. + */ +class DetectionMAPEvaluator : public Evaluator { +public: + DetectionMAPEvaluator() + : evaluateDifficult_(false), cpuOutput_(nullptr), cpuLabel_(nullptr) {} + + virtual void start() { + Evaluator::start(); + allTruePos_.clear(); + allFalsePos_.clear(); + numPos_.clear(); + } + + virtual real evalImp(std::vector& arguments) { + overlapThreshold_ = config_.overlap_threshold(); + backgroundId_ = config_.background_id(); + evaluateDifficult_ = config_.evaluate_difficult(); + apType_ = config_.ap_type(); + + MatrixPtr detectTmpValue = arguments[0].value; + Matrix::resizeOrCreate(cpuOutput_, + detectTmpValue->getHeight(), + detectTmpValue->getWidth(), + false, + false); + + MatrixPtr labelTmpValue = arguments[1].value; + Matrix::resizeOrCreate(cpuLabel_, + labelTmpValue->getHeight(), + labelTmpValue->getWidth(), + false, + false); + + cpuOutput_->copyFrom(*detectTmpValue); + cpuLabel_->copyFrom(*labelTmpValue); + + Argument label = arguments[1]; + const int* labelIndex = label.sequenceStartPositions->getData(false); + size_t batchSize = label.getNumSequences(); + + vector>> allGTBBoxes; + vector>>> allDetectBBoxes; + + for (size_t n = 0; n < batchSize; ++n) { + map> bboxes; + for (int i = labelIndex[n]; i < labelIndex[n + 1]; ++i) { + vector bbox; + getBBoxFromLabelData(cpuLabel_->getData() + i * 6, 1, bbox); + int c = cpuLabel_->getData()[i * 6]; + bboxes[c].push_back(bbox[0]); + } + allGTBBoxes.push_back(bboxes); + } + + size_t imgId = 0; + for (size_t n = 0; n < cpuOutput_->getHeight();) { + map>> bboxes; + while (cpuOutput_->getData()[n * 7] == imgId && + n < cpuOutput_->getHeight()) { + vector label; + vector score; + vector bbox; + getBBoxFromDetectData( + cpuOutput_->getData() + n * 7, 1, label, score, bbox); + bboxes[label[0]].push_back(make_pair(score[0], bbox[0])); + ++n; + } + ++imgId; + if (imgId > batchSize) break; + allDetectBBoxes.push_back(bboxes); + } + + for (size_t n = 0; n < batchSize; ++n) { + for (map>::iterator it = + allGTBBoxes[n].begin(); + it != allGTBBoxes[n].end(); + ++it) { + size_t count = 0; + if (evaluateDifficult_) { + count = it->second.size(); + } else { + for (size_t i = 0; i < it->second.size(); ++i) + if (!(it->second[i].isDifficult)) ++count; + } + if (numPos_.find(it->first) == numPos_.end() && count != 0) { + numPos_[it->first] = count; + } else { + numPos_[it->first] += count; + } + } + } + + // calcTFPos + calcTFPos( + batchSize, allGTBBoxes, allDetectBBoxes, &allTruePos_, &allFalsePos_); + + return 0; + } + + virtual void printStats(std::ostream& os) const { + real mAP = calcMAP(); + os << "Detection mAP=" << mAP * 100; + } + + virtual void distributeEval(ParameterClient2* client) { + LOG(FATAL) << "Distribute detection evaluation not implemented."; + } + +protected: + void calcTFPos(const size_t batchSize, + const vector>>& allGTBBoxes, + const vector>>>& + allDetectBBoxes, + map>>* allTruePos, + map>>* allFalsePos) { + for (size_t n = 0; n < allDetectBBoxes.size(); ++n) { + if (allGTBBoxes[n].size() == 0) { + for (map>>::const_iterator + it = allDetectBBoxes[n].begin(); + it != allDetectBBoxes[n].end(); + ++it) { + size_t label = it->first; + for (size_t i = 0; i < it->second.size(); ++i) { + (*allTruePos)[label].push_back(make_pair(it->second[i].first, 0)); + (*allFalsePos)[label].push_back(make_pair(it->second[i].first, 1)); + } + } + } else { + for (map>>::const_iterator + it = allDetectBBoxes[n].begin(); + it != allDetectBBoxes[n].end(); + ++it) { + size_t label = it->first; + vector> predBBoxes = it->second; + if (allGTBBoxes[n].find(label) == allGTBBoxes[n].end()) { + for (size_t i = 0; i < predBBoxes.size(); ++i) { + (*allTruePos)[label].push_back(make_pair(predBBoxes[i].first, 0)); + (*allFalsePos)[label].push_back( + make_pair(predBBoxes[i].first, 1)); + } + } else { + vector gtBBoxes = + allGTBBoxes[n].find(label)->second; + vector visited(gtBBoxes.size(), false); + // Sort detections in descend order based on scores + std::sort(predBBoxes.begin(), + predBBoxes.end(), + sortScorePairDescend); + for (size_t i = 0; i < predBBoxes.size(); ++i) { + real maxOverlap = -1.0; + size_t maxIdx = 0; + for (size_t j = 0; j < gtBBoxes.size(); ++j) { + real overlap = + jaccardOverlap(predBBoxes[i].second, gtBBoxes[j]); + if (overlap > maxOverlap) { + maxOverlap = overlap; + maxIdx = j; + } + } + if (maxOverlap > overlapThreshold_) { + if (evaluateDifficult_ || + (!evaluateDifficult_ && !gtBBoxes[maxIdx].isDifficult)) { + if (!visited[maxIdx]) { + (*allTruePos)[label].push_back( + make_pair(predBBoxes[i].first, 1)); + (*allFalsePos)[label].push_back( + make_pair(predBBoxes[i].first, 0)); + visited[maxIdx] = true; + } else { + (*allTruePos)[label].push_back( + make_pair(predBBoxes[i].first, 0)); + (*allFalsePos)[label].push_back( + make_pair(predBBoxes[i].first, 1)); + } + } + } else { + (*allTruePos)[label].push_back( + make_pair(predBBoxes[i].first, 0)); + (*allFalsePos)[label].push_back( + make_pair(predBBoxes[i].first, 1)); + } + } + } + } + } + } + } + + real calcMAP() const { + real mAP = 0.0; + size_t count = 0; + for (map::const_iterator it = numPos_.begin(); + it != numPos_.end(); + ++it) { + size_t label = it->first; + size_t labelNumPos = it->second; + if (labelNumPos == 0 || allTruePos_.find(label) == allTruePos_.end()) + continue; + vector> labelTruePos = allTruePos_.find(label)->second; + vector> labelFalsePos = + allFalsePos_.find(label)->second; + // Compute average precision. + vector tpCumSum; + getAccumulation(labelTruePos, &tpCumSum); + vector fpCumSum; + getAccumulation(labelFalsePos, &fpCumSum); + std::vector precision, recall; + size_t num = tpCumSum.size(); + // Compute Precision. + for (size_t i = 0; i < num; ++i) { + CHECK_LE(tpCumSum[i], labelNumPos); + precision.push_back(static_cast(tpCumSum[i]) / + static_cast(tpCumSum[i] + fpCumSum[i])); + recall.push_back(static_cast(tpCumSum[i]) / labelNumPos); + } + // VOC2007 style + if (apType_ == "11point") { + vector maxPrecisions(11, 0.0); + int startIdx = num - 1; + for (int j = 10; j >= 0; --j) + for (int i = startIdx; i >= 0; --i) { + if (recall[i] < j / 10.) { + startIdx = i; + if (j > 0) maxPrecisions[j - 1] = maxPrecisions[j]; + break; + } else { + if (maxPrecisions[j] < precision[i]) + maxPrecisions[j] = precision[i]; + } + } + for (int j = 10; j >= 0; --j) mAP += maxPrecisions[j] / 11; + ++count; + } else if (apType_ == "Integral") { + // Nature integral + real averagePrecisions = 0.; + real prevRecall = 0.; + for (size_t i = 0; i < num; ++i) { + if (fabs(recall[i] - prevRecall) > 1e-6) + averagePrecisions += precision[i] * fabs(recall[i] - prevRecall); + prevRecall = recall[i]; + } + mAP += averagePrecisions; + ++count; + } else { + LOG(FATAL) << "Unkown ap version: " << apType_; + } + } + if (count != 0) mAP /= count; + return mAP; + } + + void getAccumulation(vector> inPairs, + vector* accuVec) const { + std::stable_sort( + inPairs.begin(), inPairs.end(), sortScorePairDescend); + accuVec->clear(); + size_t sum = 0; + for (size_t i = 0; i < inPairs.size(); ++i) { + sum += inPairs[i].second; + accuVec->push_back(sum); + } + } + + std::string getTypeImpl() const { return "detection_map"; } + + real getValueImpl() const { return calcMAP() * 100; } + +private: + real overlapThreshold_; + bool evaluateDifficult_; + size_t backgroundId_; + std::string apType_; + + MatrixPtr cpuOutput_; + MatrixPtr cpuLabel_; + + map numPos_; + map>> allTruePos_; + map>> allFalsePos_; +}; + +REGISTER_EVALUATOR(detection_map, DetectionMAPEvaluator); + +} // namespace paddle diff --git a/paddle/gserver/tests/test_Evaluator.cpp b/paddle/gserver/tests/test_Evaluator.cpp index 4f5fdbb37c..93996392d2 100644 --- a/paddle/gserver/tests/test_Evaluator.cpp +++ b/paddle/gserver/tests/test_Evaluator.cpp @@ -138,6 +138,23 @@ void testEvaluatorAll(TestConfig testConf, testEvaluator(testConf, testEvaluatorName, batchSize, false); } +TEST(Evaluator, detection_map) { + TestConfig config; + config.evaluatorConfig.set_type("detection_map"); + config.evaluatorConfig.set_overlap_threshold(0.5); + config.evaluatorConfig.set_background_id(0); + config.evaluatorConfig.set_ap_type("Integral"); + config.evaluatorConfig.set_evaluate_difficult(0); + + config.inputDefs.push_back({INPUT_DATA, "output", 7}); + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "label", 6}); + config.evaluatorConfig.set_evaluate_difficult(false); + testEvaluatorAll(config, "detection_map", 100); + + config.evaluatorConfig.set_evaluate_difficult(true); + testEvaluatorAll(config, "detection_map", 100); +} + TEST(Evaluator, classification_error) { TestConfig config; config.evaluatorConfig.set_type("classification_error"); diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 29270829bb..ebe4f5cbb5 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -489,6 +489,15 @@ message EvaluatorConfig { // Used by ClassificationErrorEvaluator // top # classification error optional int32 top_k = 13 [default = 1]; + + // Used by DetectionMAPEvaluator + optional double overlap_threshold = 14 [default = 0.5]; + + optional int32 background_id = 15 [default = 0]; + + optional bool evaluate_difficult = 16 [default = false]; + + optional string ap_type = 17 [default = "11point"]; } message LinkConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 0792e2d40b..e78dc4f3b4 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1300,20 +1300,23 @@ def parse_maxout(maxout, input_layer_name, maxout_conf): # Define an evaluator @config_func -def Evaluator( - name, - type, - inputs, - chunk_scheme=None, - num_chunk_types=None, - classification_threshold=None, - positive_label=None, - dict_file=None, - result_file=None, - num_results=None, - top_k=None, - delimited=None, - excluded_chunk_types=None, ): +def Evaluator(name, + type, + inputs, + chunk_scheme=None, + num_chunk_types=None, + classification_threshold=None, + positive_label=None, + dict_file=None, + result_file=None, + num_results=None, + top_k=None, + delimited=None, + excluded_chunk_types=None, + overlap_threshold=None, + background_id=None, + evaluate_difficult=None, + ap_type=None): evaluator = g_config.model_config.evaluators.add() evaluator.type = type evaluator.name = MakeLayerNameInSubmodel(name) @@ -1347,6 +1350,18 @@ def Evaluator( if excluded_chunk_types: evaluator.excluded_chunk_types.extend(excluded_chunk_types) + if overlap_threshold is not None: + evaluator.overlap_threshold = overlap_threshold + + if background_id is not None: + evaluator.background_id = background_id + + if evaluate_difficult is not None: + evaluator.evaluate_difficult = evaluate_difficult + + if ap_type is not None: + evaluator.ap_type = ap_type + class LayerBase(object): def __init__( diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py index a5234f3e47..1dcd804803 100644 --- a/python/paddle/trainer_config_helpers/evaluators.py +++ b/python/paddle/trainer_config_helpers/evaluators.py @@ -21,7 +21,8 @@ __all__ = [ "chunk_evaluator", "sum_evaluator", "column_sum_evaluator", "value_printer_evaluator", "gradient_printer_evaluator", "maxid_printer_evaluator", "maxframe_printer_evaluator", - "seqtext_printer_evaluator", "classification_error_printer_evaluator" + "seqtext_printer_evaluator", "classification_error_printer_evaluator", + "detection_map_evaluator" ] @@ -31,10 +32,11 @@ class EvaluatorAttribute(object): FOR_RANK = 1 << 2 FOR_PRINT = 1 << 3 FOR_UTILS = 1 << 4 + FOR_DETECTION = 1 << 5 KEYS = [ "for_classification", "for_regression", "for_rank", "for_print", - "for_utils" + "for_utils", "for_detection" ] @staticmethod @@ -57,22 +59,25 @@ def evaluator(*attrs): return impl -def evaluator_base( - input, - type, - label=None, - weight=None, - name=None, - chunk_scheme=None, - num_chunk_types=None, - classification_threshold=None, - positive_label=None, - dict_file=None, - result_file=None, - num_results=None, - delimited=None, - top_k=None, - excluded_chunk_types=None, ): +def evaluator_base(input, + type, + label=None, + weight=None, + name=None, + chunk_scheme=None, + num_chunk_types=None, + classification_threshold=None, + positive_label=None, + dict_file=None, + result_file=None, + num_results=None, + delimited=None, + top_k=None, + excluded_chunk_types=None, + overlap_threshold=None, + background_id=None, + evaluate_difficult=None, + ap_type=None): """ Evaluator will evaluate the network status while training/testing. @@ -107,6 +112,14 @@ def evaluator_base( :type weight: LayerOutput. :param top_k: number k in top-k error rate :type top_k: int + :param overlap_threshold: In detection tasks to filter detection results + :type overlap_threshold: float + :param background_id: Identifier of background class + :type background_id: int + :param evaluate_difficult: Whether to evaluate difficult objects + :type evaluate_difficult: bool + :param ap_type: How to calculate average persicion + :type ap_type: str """ # inputs type assertions. assert classification_threshold is None or isinstance( @@ -136,7 +149,61 @@ def evaluator_base( delimited=delimited, num_results=num_results, top_k=top_k, - excluded_chunk_types=excluded_chunk_types, ) + excluded_chunk_types=excluded_chunk_types, + overlap_threshold=overlap_threshold, + background_id=background_id, + evaluate_difficult=evaluate_difficult, + ap_type=ap_type) + + +@evaluator(EvaluatorAttribute.FOR_DETECTION) +@wrap_name_default() +def detection_map_evaluator(input, + label, + overlap_threshold=0.5, + background_id=0, + evaluate_difficult=False, + ap_type="11point", + name=None): + """ + Detection mAP Evaluator. It will print mean Average Precision for detection. + + The detection mAP Evaluator according to the detection_output's output count + the true positive and the false positive bbox and integral them to get the + mAP. + + The simple usage is: + + .. code-block:: python + + eval = detection_map_evaluator(input=det_output,label=lbl) + + :param input: Input layer. + :type input: LayerOutput + :param label: Label layer. + :type label: LayerOutput + :param overlap_threshold: The bbox overlap threshold of a true positive. + :type overlap_threshold: float + :param background_id: The background class index. + :type background_id: int + :param evaluate_difficult: Wether evaluate a difficult ground truth. + :type evaluate_difficult: bool + """ + if not isinstance(input, list): + input = [input] + + if label: + input.append(label) + + evaluator_base( + name=name, + type="detection_map", + input=input, + label=label, + overlap_threshold=overlap_threshold, + background_id=background_id, + evaluate_difficult=evaluate_difficult, + ap_type=ap_type) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) From 5f924d5d533831c29f1f5243eb1790467c9aac1a Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 19 Jun 2017 18:15:15 +0800 Subject: [PATCH 02/10] Follow comments. --- doc/api/v2/config/evaluators.rst | 9 +++ .../evaluators/DetectionMAPEvaluator.cpp | 66 +++++++++---------- .../trainer_config_helpers/evaluators.py | 6 +- 3 files changed, 43 insertions(+), 38 deletions(-) diff --git a/doc/api/v2/config/evaluators.rst b/doc/api/v2/config/evaluators.rst index 39db51fa4a..9ac972fb19 100644 --- a/doc/api/v2/config/evaluators.rst +++ b/doc/api/v2/config/evaluators.rst @@ -99,3 +99,12 @@ value_printer .. automodule:: paddle.v2.evaluator :members: value_printer :noindex: + +Detection +===== + +detection_map +------------- +.. automodule:: paddle.v2.evaluator + :members: detection_map + :noindex: diff --git a/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp b/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp index 7d326c2db1..9b825db574 100644 --- a/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp +++ b/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp @@ -80,21 +80,20 @@ public: allGTBBoxes.push_back(bboxes); } - size_t imgId = 0; - for (size_t n = 0; n < cpuOutput_->getHeight();) { + size_t n = 0; + const real* cpuOutputData = cpuOutput_->getData(); + for (size_t imgId = 0; imgId < batchSize; ++imgId) { map>> bboxes; - while (cpuOutput_->getData()[n * 7] == imgId && - n < cpuOutput_->getHeight()) { + size_t curImgId = static_cast((cpuOutputData + n * 7)[0]); + while (curImgId == imgId && n < cpuOutput_->getHeight()) { vector label; vector score; vector bbox; - getBBoxFromDetectData( - cpuOutput_->getData() + n * 7, 1, label, score, bbox); + getBBoxFromDetectData(cpuOutputData + n * 7, 1, label, score, bbox); bboxes[label[0]].push_back(make_pair(score[0], bbox[0])); ++n; + curImgId = static_cast((cpuOutputData + n * 7)[0]); } - ++imgId; - if (imgId > batchSize) break; allDetectBBoxes.push_back(bboxes); } @@ -119,15 +118,14 @@ public: } // calcTFPos - calcTFPos( - batchSize, allGTBBoxes, allDetectBBoxes, &allTruePos_, &allFalsePos_); + calcTFPos(batchSize, allGTBBoxes, allDetectBBoxes); return 0; } virtual void printStats(std::ostream& os) const { real mAP = calcMAP(); - os << "Detection mAP=" << mAP * 100; + os << "Detection mAP=" << mAP; } virtual void distributeEval(ParameterClient2* client) { @@ -138,9 +136,7 @@ protected: void calcTFPos(const size_t batchSize, const vector>>& allGTBBoxes, const vector>>>& - allDetectBBoxes, - map>>* allTruePos, - map>>* allFalsePos) { + allDetectBBoxes) { for (size_t n = 0; n < allDetectBBoxes.size(); ++n) { if (allGTBBoxes[n].size() == 0) { for (map>>::const_iterator @@ -149,8 +145,8 @@ protected: ++it) { size_t label = it->first; for (size_t i = 0; i < it->second.size(); ++i) { - (*allTruePos)[label].push_back(make_pair(it->second[i].first, 0)); - (*allFalsePos)[label].push_back(make_pair(it->second[i].first, 1)); + allTruePos_[label].push_back(make_pair(it->second[i].first, 0)); + allFalsePos_[label].push_back(make_pair(it->second[i].first, 1)); } } } else { @@ -162,9 +158,8 @@ protected: vector> predBBoxes = it->second; if (allGTBBoxes[n].find(label) == allGTBBoxes[n].end()) { for (size_t i = 0; i < predBBoxes.size(); ++i) { - (*allTruePos)[label].push_back(make_pair(predBBoxes[i].first, 0)); - (*allFalsePos)[label].push_back( - make_pair(predBBoxes[i].first, 1)); + allTruePos_[label].push_back(make_pair(predBBoxes[i].first, 0)); + allFalsePos_[label].push_back(make_pair(predBBoxes[i].first, 1)); } } else { vector gtBBoxes = @@ -189,22 +184,21 @@ protected: if (evaluateDifficult_ || (!evaluateDifficult_ && !gtBBoxes[maxIdx].isDifficult)) { if (!visited[maxIdx]) { - (*allTruePos)[label].push_back( + allTruePos_[label].push_back( make_pair(predBBoxes[i].first, 1)); - (*allFalsePos)[label].push_back( + allFalsePos_[label].push_back( make_pair(predBBoxes[i].first, 0)); visited[maxIdx] = true; } else { - (*allTruePos)[label].push_back( + allTruePos_[label].push_back( make_pair(predBBoxes[i].first, 0)); - (*allFalsePos)[label].push_back( + allFalsePos_[label].push_back( make_pair(predBBoxes[i].first, 1)); } } } else { - (*allTruePos)[label].push_back( - make_pair(predBBoxes[i].first, 0)); - (*allFalsePos)[label].push_back( + allTruePos_[label].push_back(make_pair(predBBoxes[i].first, 0)); + allFalsePos_[label].push_back( make_pair(predBBoxes[i].first, 1)); } } @@ -274,7 +268,7 @@ protected: } } if (count != 0) mAP /= count; - return mAP; + return mAP * 100; } void getAccumulation(vector> inPairs, @@ -291,20 +285,22 @@ protected: std::string getTypeImpl() const { return "detection_map"; } - real getValueImpl() const { return calcMAP() * 100; } + real getValueImpl() const { return calcMAP(); } private: - real overlapThreshold_; - bool evaluateDifficult_; - size_t backgroundId_; - std::string apType_; + real overlapThreshold_; // overlap threshold when determining whether matched + bool evaluateDifficult_; // whether evaluate difficult ground truth + size_t backgroundId_; // class index of background + std::string apType_; // how to calculate mAP (Integral or 11point) MatrixPtr cpuOutput_; MatrixPtr cpuLabel_; - map numPos_; - map>> allTruePos_; - map>> allFalsePos_; + map numPos_; // counts of true objects each classification + map>> + allTruePos_; // true positive prediction + map>> + allFalsePos_; // false positive prediction }; REGISTER_EVALUATOR(detection_map, DetectionMAPEvaluator); diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py index 1dcd804803..44d52edfa7 100644 --- a/python/paddle/trainer_config_helpers/evaluators.py +++ b/python/paddle/trainer_config_helpers/evaluators.py @@ -166,9 +166,9 @@ def detection_map_evaluator(input, ap_type="11point", name=None): """ - Detection mAP Evaluator. It will print mean Average Precision for detection. + Detection mAP Evaluator. It will print mean Average Precision (mAP) for detection. - The detection mAP Evaluator according to the detection_output's output count + The detection mAP Evaluator based on the output of detection_output layer counts the true positive and the false positive bbox and integral them to get the mAP. @@ -186,7 +186,7 @@ def detection_map_evaluator(input, :type overlap_threshold: float :param background_id: The background class index. :type background_id: int - :param evaluate_difficult: Wether evaluate a difficult ground truth. + :param evaluate_difficult: Whether evaluate a difficult ground truth. :type evaluate_difficult: bool """ if not isinstance(input, list): From 3438d650edee11f3488994370a95ab11696d28d1 Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Mon, 19 Jun 2017 23:41:49 -0700 Subject: [PATCH 03/10] Fix bugs for rnn generation 1. v2.layer.parse_network does not correctly handle the generation output. 2. GatherAgentLayer does not correctly handle generation output when batch_size > 1 3. Fix CustomStackTrace for rnn group --- .../gradientmachines/NeuralNetwork.cpp | 9 +-- .../RecurrentGradientMachine.cpp | 16 +++-- .../RecurrentGradientMachine.h | 1 + paddle/gserver/layers/AgentLayer.cpp | 69 +++++++++---------- .../tests/sample_trainer_nest_rnn_gen.conf | 8 +-- .../trainer/tests/sample_trainer_rnn_gen.conf | 6 +- paddle/utils/CustomStackTrace.h | 6 +- paddle/utils/tests/test_CustomStackTrace.cpp | 1 - python/paddle/v2/layer.py | 16 ++++- 9 files changed, 76 insertions(+), 56 deletions(-) diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 4512aacc81..a361d7deac 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -241,11 +241,14 @@ void NeuralNetwork::forward(const std::vector& inArgs, dataLayers_[i]->setData(inArgs[i]); } + gLayerStackTrace.set_stage(true); + { for (auto& layer : layers_) { REGISTER_TIMER_INFO("ForwardTimer", layer->getName().c_str()); gLayerStackTrace.push(layer->getName()); layer->forward(passType); + gLayerStackTrace.pop(layer->getName()); } } @@ -254,9 +257,6 @@ void NeuralNetwork::forward(const std::vector& inArgs, for (auto& layer : outputLayers_) { outArgs->push_back(layer->getOutput()); } - if (passType == PASS_TEST) { - gLayerStackTrace.clear(); - } } void NeuralNetwork::resetState() { @@ -283,9 +283,10 @@ void NeuralNetwork::getState(MachineState& machineState) { } void NeuralNetwork::backward(const UpdateCallback& callback) { - gLayerStackTrace.pop(""); // tell layer trace is during backward. + gLayerStackTrace.set_stage(false); FOR_EACH_R(layer, layers_) { REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str()); + gLayerStackTrace.push((*layer)->getName()); if ((*layer)->needGradient()) { (*layer)->backward(callback); } diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index 3e93038022..867c99ede3 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -208,6 +208,7 @@ void RecurrentGradientMachine::init( }); CHECK(subModelConfig != config.sub_models().end()); reversed_ = subModelConfig->reversed(); + generating_ = subModelConfig->has_generator(); inFrameLines_.resize(subModelConfig->in_links_size()); for (size_t i = 0; i < inFrameLines_.size(); ++i) { @@ -538,7 +539,7 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, The outputs are outFramesLines_[i].agentLayer */ - if (inFrameLines_.empty() && passType == PASS_TEST) { + if (generating_) { generateSequence(); return; } // else forward.. @@ -569,6 +570,9 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, } void RecurrentGradientMachine::backward(const UpdateCallback& callback) { + if (generating_) { + return; + } REGISTER_TIMER_INFO("RecurrentBwTime", "RecurrentBwTime"); AsyncGpuBlock asyncGpuBlock; for (int i = maxSequenceLength_ - 1; i >= 0; --i) { @@ -1321,11 +1325,10 @@ void RecurrentGradientMachine::fillGenOutputs() { batchMachineIdVec_.clear(); generator_.ids.clear(); + int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false); + starts[0] = 0; if (numResults > 1) { real* probs = generator_.outArg.in->getData(); - int* starts = - generator_.outArg.sequenceStartPositions->getMutableData(false); - starts[0] = 0; for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t j = 0; j < finalPaths_[i].size(); ++j) { Path& path = finalPaths_[i][j]; @@ -1348,7 +1351,10 @@ void RecurrentGradientMachine::fillGenOutputs() { } else { for (size_t i = 0; i < finalPaths_.size(); ++i) { CHECK(!finalPaths_[i].empty()); - generator_.ids = finalPaths_[i][0].ids; + generator_.ids.insert(generator_.ids.begin(), + finalPaths_[i][0].ids.begin(), + finalPaths_[i][0].ids.end()); + starts[i + 1] = starts[i] + finalPaths_[i][0].ids.size(); } } } diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h index 8d94d7e2df..8e30883ac7 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h @@ -414,6 +414,7 @@ protected: std::vector ids; // store generated sequences Argument outArg; // final output argument }; + bool generating_; Generator generator_; std::vector> frames_; diff --git a/paddle/gserver/layers/AgentLayer.cpp b/paddle/gserver/layers/AgentLayer.cpp index 31463823b3..512932d9a5 100644 --- a/paddle/gserver/layers/AgentLayer.cpp +++ b/paddle/gserver/layers/AgentLayer.cpp @@ -109,6 +109,40 @@ void GatherAgentLayer::forwardValue(PassType passType) { } } +namespace { + +// dest[index[i]] <- src[i] for each i +void copyElements(const IVector& srcVec, + const IVector& indexVec, + IVector& destVec) { + const int* src = srcVec.getData(); + const int* index = indexVec.getData(); + int* dest = destVec.getData(); + int len = indexVec.getSize(); + CHECK_EQ(srcVec.getSize(), indexVec.getSize()); + for (int i = 0; i < len; ++i) { + dest[index[i]] = src[i]; + } +} +} + +void GatherAgentLayer::forwardIds(PassType passType) { + IVectorPtr realId = realLayers_[0]->getOutputLabel(); + if (!realId) return; + + IVector::resizeOrCreate(output_.ids, allIds_->getSize(), useGpu_); + IVectorPtr outId = output_.ids; + idsVec_.resize(idIndex_.size()); + + for (size_t i = 0; i < realLayers_.size(); ++i) { + const IVectorPtr& realId = realLayers_[i]->getOutputLabel(); + idsVec_[i] = IVector::create(allIds_->getData() + idIndex_[i], + /* size */ realId->getSize(), + useGpu_); + execViaCpu(©Elements, *realId, *idsVec_[i], *outId); + } +} + void GatherAgentLayer::backward(const UpdateCallback& callback) { (void)callback; const MatrixPtr& outputGrad = getOutputGrad(); @@ -174,41 +208,6 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) { REGISTER_LAYER(gather_agent, GatherAgentLayer); REGISTER_LAYER(scatter_agent, ScatterAgentLayer); -void GatherAgentLayer::forwardIds(PassType passType) { - int height = 0; - IVectorPtr idReal = realLayers_[0]->getOutputLabel(); - - if (!idReal) return; - - if (output_.subSequenceStartPositions) { - int* starts = output_.subSequenceStartPositions->getMutableData(false); - // Gather generator.idsVec - // if is beam search generation result. Get first result. - if (idReal->getData()[idReal->getSize() - 1] == -1) { - for (size_t i = 0; i < realLayers_.size(); ++i) { - // The first element stores first result size - idReal = realLayers_[i]->getOutputLabel(); - idReal->subVecFrom(*idReal, 1, idReal->getData()[0]); - } - } - for (size_t i = 0; i < realLayers_.size(); ++i) { - CHECK(realLayers_[i]->getOutputLabel()); - starts[i] = height; - height += realLayers_[i]->getOutputLabel()->getSize(); - } - starts[realLayers_.size()] = height; - output_.sequenceStartPositions->getMutableData(false)[1] = height; - - IVector::resizeOrCreate(output_.ids, height, false); - for (size_t i = 0; i < realLayers_.size(); ++i) { - output_.ids->subVec(starts[i], starts[i + 1] - starts[i]) - ->copyFrom(*realLayers_[i]->getOutputLabel()); - } - } else { - LOG(FATAL) << "Not implemented"; - } -} - void ScatterAgentLayer::forwardSequence(PassType passType) { Layer::forward(passType); CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); diff --git a/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf b/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf index d669fbc40c..741a0aa71d 100644 --- a/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf +++ b/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf @@ -35,7 +35,7 @@ def outer_step(dummy_data): embedding_size=num_words)] def inner_step(dummy_memory, predict_word): - + # simplified RNN for testing with mixed_layer(size=num_words) as layer: layer += full_matrix_projection(input=predict_word, @@ -46,15 +46,15 @@ def outer_step(dummy_data): param_attr=ParamAttr(name="wordvec")) return out - + beam_gen = beam_search(name="rnn_gen", step=inner_step, input=gen_inputs, bos_id=0, eos_id=num_words-1, beam_size=2 if beam_flag else 1, - num_results_per_sample=2 if beam_flag else 1, - max_length=10) + num_results_per_sample=1, + max_length=10) return beam_gen beam_gen_concat = recurrent_group(name="rnn_gen_concat", diff --git a/paddle/trainer/tests/sample_trainer_rnn_gen.conf b/paddle/trainer/tests/sample_trainer_rnn_gen.conf index 2b337282f6..58d27f15ae 100644 --- a/paddle/trainer/tests/sample_trainer_rnn_gen.conf +++ b/paddle/trainer/tests/sample_trainer_rnn_gen.conf @@ -33,7 +33,7 @@ gen_inputs = [StaticInput(input=dummy_data, size=2), embedding_size=num_words)] def step(dummy_memory, predict_word): - + # simplified RNN for testing with mixed_layer(size=num_words) as layer: layer += full_matrix_projection(input=predict_word, @@ -44,7 +44,7 @@ def step(dummy_memory, predict_word): param_attr=ParamAttr(name="wordvec")) return out - + beam_gen = beam_search(name="rnn_gen", step=step, input=gen_inputs, @@ -52,7 +52,7 @@ beam_gen = beam_search(name="rnn_gen", eos_id=num_words-1, beam_size=2 if beam_flag else 1, num_results_per_sample=2 if beam_flag else 1, - max_length=10) + max_length=10) seqtext_printer_evaluator(input=beam_gen, id_input=sent_id, diff --git a/paddle/utils/CustomStackTrace.h b/paddle/utils/CustomStackTrace.h index 6992e85622..52a6df9497 100644 --- a/paddle/utils/CustomStackTrace.h +++ b/paddle/utils/CustomStackTrace.h @@ -55,13 +55,17 @@ public: * Else, just set status to popping. */ void pop(const T& item) { - pushing() = false; auto& s = this->stack(); if (item == s.top()) { s.pop(); } } + /** + * @brief Indicate whether we are at forward or backward stage of computation + */ + void set_stage(bool isForward) { pushing() = isForward; } + /** * @brief clear current thread stack. */ diff --git a/paddle/utils/tests/test_CustomStackTrace.cpp b/paddle/utils/tests/test_CustomStackTrace.cpp index b5d9f93f13..c320074fba 100644 --- a/paddle/utils/tests/test_CustomStackTrace.cpp +++ b/paddle/utils/tests/test_CustomStackTrace.cpp @@ -72,7 +72,6 @@ TEST(CustomStackTrace, normalTrain) { for (size_t i = 0; i < layerSize; ++i) { tracer.push("layer_" + paddle::str::to_string(i)); } - tracer.pop(""); for (size_t i = 0; i < layerSize; ++i) { tracer.pop("layer_" + paddle::str::to_string(layerSize - 1 - i)); } diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index bbb9c3ea8c..4ade1c6f32 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -45,12 +45,12 @@ __all__ = ['data', 'parse_network'] def __need_to_keep__(name): return name in [ 'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType', - 'layer_support' + 'layer_support', 'BaseGeneratedInput' ] def __need_to_wrap__(name): - return name not in ['AggregateLevel', 'ExpandLevel'] + return name not in ['AggregateLevel', 'ExpandLevel', 'BaseGeneratedInput'] def __convert_name__(inname): @@ -199,6 +199,15 @@ def __get_used_submodels__(layer_names): return submodel_names +def __get_submodel_data_out_links__(): + data_links = set() + for submodel in cp.g_config.model_config.sub_models: + for link in submodel.out_links: + if cp.g_layer_map[link.link_name].type == 'data': + data_links.add(link.link_name) + return data_links + + def __get_used_evaluators__(layer_names): evaluator_names = set() for e in cp.g_config.model_config.evaluators: @@ -264,6 +273,7 @@ def parse_network(output_layers, extra_layers=None): submodel_names = __get_used_submodels__(layer_names) submodel_names.add('root') evaluator_names = __get_used_evaluators__(layer_names) + data_out_links = __get_submodel_data_out_links__() input_layer_names = set() output_layer_names = set() @@ -279,7 +289,7 @@ def parse_network(output_layers, extra_layers=None): continue model_config.layers.extend([l]) if l.type == 'data': - if l.name in model_config.output_layer_names: + if l.name in data_out_links: """ In text generation, the outlink to save the generated word indices is a data_layer defined in recurrent_group. This From 02cc7d90a606875a44d605c18e17855ce8339652 Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Mon, 19 Jun 2017 13:23:24 -0700 Subject: [PATCH 04/10] Evaluator for recurrent group Make the evaluators inside a recurrent goup true evaluator, meaning that their evaluation results are incorporated into the whole evaluator result. --- .../gradientmachines/NeuralNetwork.cpp | 35 ++++++++++++++++++- .../gserver/gradientmachines/NeuralNetwork.h | 2 ++ .../RecurrentGradientMachine.cpp | 20 ++--------- .../RecurrentGradientMachine.h | 2 -- 4 files changed, 38 insertions(+), 21 deletions(-) diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 4512aacc81..f245c16bfe 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -320,7 +320,7 @@ public: } } - virtual void eval(const NeuralNetwork& nn) { + virtual void eval(const NeuralNetwork& nn) override { for (auto& evaluator : evaluators_) { evaluator->eval(nn); } @@ -395,6 +395,30 @@ private: } }; +class SubnetEvaluator : public CombinedEvaluator { +public: + SubnetEvaluator(const std::string& layerName, + std::unique_ptr&& evaluator) + : layerName_(layerName) { + addEvaluator(std::move(evaluator)); + } + virtual void eval(const NeuralNetwork& nn) override { + const LayerPtr& layer = nn.getLayer(layerName_); + CHECK(layer) << "Nonexisted layer: " << layerName_ << " in submodel " + << nn.getName(); + bool accessed = false; + layer->accessSubNetwork([this, &accessed](NeuralNetwork& subnet) { + subnet.eval(evaluators_[0].get()); + accessed = true; + }); + CHECK(accessed) << "There is no subnetwork for layer " << layerName_ + << " in submodel " << nn.getName(); + } + +protected: + std::string layerName_; +}; + Evaluator* NeuralNetwork::makeEvaluator() const { CombinedEvaluator* combinedEvaluator = new CombinedEvaluator(); auto subModelConfig = std::find_if(config_.sub_models().begin(), @@ -421,6 +445,15 @@ Evaluator* NeuralNetwork::makeEvaluator() const { combinedEvaluator->addEvaluator(std::move(evaluator)); } } + for (auto& layer : layers_) { + layer->accessSubNetwork( + [layer, combinedEvaluator](NeuralNetwork& subnet) { + std::unique_ptr subEvaluator(new SubnetEvaluator( + layer->getName(), + std::unique_ptr(subnet.makeEvaluator()))); + combinedEvaluator->addEvaluator(std::move(subEvaluator)); + }); + } } else { for (const EvaluatorConfig& evalConfig : config_.evaluators()) { std::unique_ptr evaluator(Evaluator::create(evalConfig)); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index e7b6c43840..12810f6425 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -129,6 +129,8 @@ public: static NeuralNetwork* newNeuralNetwork(const std::string& name = "", NeuralNetwork* rootNetwork = nullptr); + const std::string& getName() const { return subModelName_; } + protected: /** * The constructor of NeuralNetwork. diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index 3e93038022..5d4b67da84 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -287,10 +287,6 @@ void RecurrentGradientMachine::init( parameterIds_.push_back(para->getID()); } } - - if (subModelConfig->evaluator_names_size() > 0) { - evaluator_.reset(frames_[0]->makeEvaluator()); - } } void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) { @@ -561,9 +557,6 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, std::vector outArgs; frames_[i]->forward(inArgs, &outArgs, passType); } - if (evaluator_ && passType == PASS_TEST) { - this->eval(evaluator_.get()); - } reorganizeOutput(passType); } @@ -577,11 +570,6 @@ void RecurrentGradientMachine::backward(const UpdateCallback& callback) { for (auto& memoryFrameLine : memoryFrameLines_) { memoryFrameLine.bootLayer->backward(nullptr); } - - // call printers here so the gradient can be printed - if (evaluator_) { - this->eval(evaluator_.get()); - } } void RecurrentGradientMachine::forwardBackward( @@ -595,9 +583,9 @@ void RecurrentGradientMachine::forwardBackward( void RecurrentGradientMachine::eval(Evaluator* evaluator) const { // call printers frame by frame for (int i = 0; i < maxSequenceLength_; ++i) { - LOG(INFO) << "Recurrent Layer Group eval frame " << i << " begin"; + VLOG(2) << "Recurrent Layer Group eval frame " << i << " begin"; evaluator->eval(*(frames_[i].get())); - LOG(INFO) << "Recurrent Layer Group eval frame " << i << " end"; + VLOG(2) << "Recurrent Layer Group eval frame " << i << " end"; } } @@ -1093,10 +1081,6 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) { copyDataOutlinkFrame(machineCur); - // call value printer - if (evaluator_) { - evaluator_->eval(*(frames_[machineCur].get())); - } // check eos const IVectorPtr& eosVec = eosFrameLine_->layers[machineCur]->getOutput().ids; diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h index 8d94d7e2df..9f957a9401 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h @@ -428,8 +428,6 @@ protected: std::vector parameterIds_; // parameters actually used by this Layer Group - std::unique_ptr evaluator_; // frame printers in this layer group - // store final argument of outFrameLines_ std::vector dataArgs_; // store each frame's output argument of outFrameLines_ From b6910529181cdd1d0f560bf71d77a3fed43886f6 Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Tue, 20 Jun 2017 15:10:46 -0700 Subject: [PATCH 05/10] Fix bug of ScatterAgentLayer for generation --- paddle/gserver/layers/AgentLayer.cpp | 61 +++++++++++++++------------- paddle/gserver/layers/AgentLayer.h | 7 +++- 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/paddle/gserver/layers/AgentLayer.cpp b/paddle/gserver/layers/AgentLayer.cpp index 512932d9a5..15e7411b5f 100644 --- a/paddle/gserver/layers/AgentLayer.cpp +++ b/paddle/gserver/layers/AgentLayer.cpp @@ -170,23 +170,22 @@ void ScatterAgentLayer::forward(PassType passType) { CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); int width = this->getSize(); - if (realOutArg_.hasSeq()) { - forwardSequence(passType); - } else if (realOutArg_.value || realOutArg_.ids) { - output_.subArgFrom( - realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_); - } else { // used in generation - if (realLayer_->getOutput().ids) { - IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_); - output_.ids->selectFrom(*realLayer_->getOutput().ids, *ids_); - } - if (realLayer_->getOutput().value) { - int height = ids_->getSize(); - resetOutput(height, width); - - const MatrixPtr& outV = getOutputValue(); - const MatrixPtr& realV = realLayer_->getOutputValue(); - outV->selectRows(*realV, *ids_); + if (selectionMode_) { + forwardWithSelection(passType); + } else { + if (realOutArg_.hasSeq()) { + output_.subArgFrom(realOutArg_, + /* offset */ idIndex_, + idSize_, + width, + useGpu_, + /* trans */ false, + /* seqFlag */ true, + /* seqStart */ seqStartPosIndex_, + /* seqSize */ numSequences_); + } else { + output_.subArgFrom( + realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_); } } } @@ -194,6 +193,8 @@ void ScatterAgentLayer::forward(PassType passType) { void ScatterAgentLayer::backward(const UpdateCallback& callback) { (void)callback; + CHECK(!selectionMode_); + const MatrixPtr& outputGrad = realOutArg_.grad; const MatrixPtr& realGrad = realLayer_->getOutputGrad(); if (realGrad) { @@ -208,7 +209,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) { REGISTER_LAYER(gather_agent, GatherAgentLayer); REGISTER_LAYER(scatter_agent, ScatterAgentLayer); -void ScatterAgentLayer::forwardSequence(PassType passType) { +void ScatterAgentLayer::forwardWithSelection(PassType passType) { Layer::forward(passType); CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); @@ -219,17 +220,19 @@ void ScatterAgentLayer::forwardSequence(PassType passType) { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceAgentLayerForward", getName().c_str()); - if (realOutArg_.value || realOutArg_.ids) { - CHECK(realOutArg_.sequenceStartPositions); - output_.subArgFrom(realOutArg_, - /* offset */ idIndex_, - idSize_, - width, - useGpu_, - /* trans */ false, - /* seqFlag */ true, - /* seqStart */ seqStartPosIndex_, - /* seqSize */ numSequences_); + if (!input.hasSeq()) { + if (realLayer_->getOutput().ids) { + IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_); + output_.ids->selectFrom(*realLayer_->getOutput().ids, *ids_); + } + if (realLayer_->getOutput().value) { + int height = ids_->getSize(); + resetOutput(height, width); + + const MatrixPtr& outV = getOutputValue(); + const MatrixPtr& realV = realLayer_->getOutputValue(); + outV->selectRows(*realV, *ids_); + } } else { // Putting the generation logic here is really an ugly hack! // used in generation diff --git a/paddle/gserver/layers/AgentLayer.h b/paddle/gserver/layers/AgentLayer.h index 461b84b17e..29681b29c6 100644 --- a/paddle/gserver/layers/AgentLayer.h +++ b/paddle/gserver/layers/AgentLayer.h @@ -110,6 +110,9 @@ protected: // of real layer. ICpuGpuVectorPtr inputStartPos_; + // true for setRealLayer, false for setRealLayerAndOutput + bool selectionMode_; + public: explicit ScatterAgentLayer(const LayerConfig& config) : Layer(config) {} @@ -137,6 +140,7 @@ public: } else { cpuIds_ = ids_; } + selectionMode_ = true; } // set real layer and output, [idIndex, idIndex + idSize) of *ids* @@ -153,6 +157,7 @@ public: idIndex_ = idIndex; idSize_ = idSize; handleBackward_ = handleBackward; + selectionMode_ = false; } void setSequenceStartPositions(const ICpuGpuVectorPtr& sequenceStartPositions, @@ -166,7 +171,7 @@ public: void forward(PassType passType) override; void backward(const UpdateCallback& callback) override; - void forwardSequence(PassType passType); + void forwardWithSelection(PassType passType); }; } // namespace paddle From badcdfe1e539ffcad75f601e687a83fd1512cff1 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Wed, 21 Jun 2017 15:05:41 +0800 Subject: [PATCH 06/10] pserver etcd registration --- go/cmd/pserver/pserver.go | 20 ++++++- go/pserver/client_test.go | 8 ++- go/pserver/service.go | 112 ++++++++++++++++++++++++++++++++++++- go/pserver/service_test.go | 25 ++++++--- go/utils/helper.go | 45 +++++++++++++++ go/utils/helper_test.go | 10 ++++ 6 files changed, 206 insertions(+), 14 deletions(-) create mode 100644 go/utils/helper.go create mode 100644 go/utils/helper_test.go diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index f0be251c24..ddf5ad40fd 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -5,18 +5,34 @@ import ( "net/http" "net/rpc" "strconv" + "time" "github.com/namsral/flag" "github.com/PaddlePaddle/Paddle/go/pserver" + log "github.com/sirupsen/logrus" ) func main() { port := flag.Int("port", 0, "port of the pserver") + etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379", + "comma separated endpoint string for pserver to connect to etcd") + etcdTimeout := flag.Int("etcd-timeout", 5, "timeout for etcd calls") + logLevel := flag.String("log-level", "info", "log level, one of debug") flag.Parse() - s := pserver.NewService() - err := rpc.Register(s) + level, err := log.ParseLevel(*logLevel) + if err != nil { + panic(err) + } + log.SetLevel(level) + + timeout := time.Second * time.Duration((*etcdTimeout)) + s, err := pserver.NewService(*etcdEndpoint, timeout) + if err != nil { + panic(err) + } + err = rpc.Register(s) if err != nil { panic(err) } diff --git a/go/pserver/client_test.go b/go/pserver/client_test.go index d0371a26a1..6ecf1fa08a 100644 --- a/go/pserver/client_test.go +++ b/go/pserver/client_test.go @@ -7,6 +7,7 @@ import ( "strconv" "strings" "testing" + "time" "github.com/PaddlePaddle/Paddle/go/pserver" ) @@ -30,9 +31,12 @@ func init() { port[i] = p go func(l net.Listener) { - s := pserver.NewService() + s, err := pserver.NewService("", time.Second*5) + if err != nil { + panic(err) + } server := rpc.NewServer() - err := server.Register(s) + err = server.Register(s) if err != nil { panic(err) } diff --git a/go/pserver/service.go b/go/pserver/service.go index 78a2bfaf63..a5c76857ab 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -1,9 +1,18 @@ package pserver import ( + "context" "errors" "fmt" + "strconv" + "strings" "sync" + "time" + + "github.com/PaddlePaddle/Paddle/go/utils" + "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/clientv3/concurrency" + log "github.com/sirupsen/logrus" ) // ElementType is the type of elements of a Parameter. @@ -47,14 +56,113 @@ type Service struct { mu sync.Mutex opt *optimizer paramMap map[string]Parameter + + etcdEndpoints string + etcdClient *clientv3.Client + // etcdTimeout is also used as retry intervals. + etcdTimeout time.Duration + // desired number of pservers in the job. + // assume desired will not change during one training job. + desired int + // FIXME: ensure GetExternalIP gets the correct ip for trainers to connect. + externalIP string } // NewService creates a new service. -func NewService() *Service { +func NewService(endpoints string, timeout time.Duration) (*Service, error) { s := &Service{opt: newOptimizer(sgd, 0.005)} s.paramMap = make(map[string]Parameter) s.initialized = make(chan struct{}) - return s + s.etcdEndpoints = endpoints + s.etcdTimeout = timeout + + var err error + s.externalIP, err = utils.GetExternalIP() + if err != nil { + return nil, err + } + + if endpoints != "" { + // initialize connection to etcd, try + ep := strings.Split(s.etcdEndpoints, ",") + for { + cli, err := clientv3.New(clientv3.Config{ + Endpoints: ep, + DialTimeout: s.etcdTimeout, + }) + if err != nil { + log.Errorf("connect to etcd error: %v", err) + time.Sleep(s.etcdTimeout) + continue + } + s.etcdClient = cli + log.Debugf("inited client to %s", s.etcdEndpoints) + break + } + // wait and set s.desired init value + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + resp, err := s.etcdClient.Get(ctx, "/ps_desired") + cancel() + if err != nil { + log.Errorf("getting /ps_desired error: %v", err) + time.Sleep(s.etcdTimeout) + continue + } + for _, ev := range resp.Kvs { + log.Debugf("key: %s, value: %s", ev.Key, ev.Value) + if string(ev.Key) == "/ps_desired" { + s.desired, err = strconv.Atoi(string(ev.Value)) + if err != nil { + log.Errorf("value of /ps_desired invalid %v\n", err) + time.Sleep(s.etcdTimeout) + // NOTE: wait util ps_desired value change + continue + } + } + } + break + } + s.registerPserverEtcd() + } // if endpoints != "" + // Bypass etcd registration if no endpoints specified + return s, nil +} + +// registerPserverEtcd registers pserver node on etcd using transaction. +func (s *Service) registerPserverEtcd() (*clientv3.TxnResponse, error) { + return concurrency.NewSTMRepeatable(context.TODO(), s.etcdClient, func(c concurrency.STM) error { + for i := 0; i < s.desired; i++ { + psKey := "/ps/" + strconv.Itoa(i) + log.Debugf("checking %s", psKey) + ps := c.Get(psKey) + log.Debugf("got value (%s) for key: %s", ps, psKey) + + resp, err := s.etcdClient.Grant(context.TODO(), 5) + if err != nil { + log.Fatal(err) + } + + if ps == "" { + // find the first id and write info + c.Put(psKey, s.externalIP, clientv3.WithLease(resp.ID)) + log.Debugf("set pserver node %s with value %s", psKey, s.externalIP) + ch, kaerr := s.etcdClient.KeepAlive(context.TODO(), resp.ID) + if kaerr != nil { + log.Errorf("keepalive etcd node error: %v", kaerr) + return kaerr + } + // FIXME: does this really needed? + go func(ch <-chan *clientv3.LeaseKeepAliveResponse) { + ka := <-ch + log.Debugf("keepalive: %d\n", ka.TTL) + }(ch) + break + } + } + log.Debug("register finished") + return nil + }) } // InitParam initializes a parameter. diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index b746d13e1c..f317535592 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -10,12 +10,15 @@ import ( ) func TestFull(t *testing.T) { - s := pserver.NewService() + s, err := pserver.NewService("", time.Second*5) + if err != nil { + t.Error(err) + } var p pserver.Parameter p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) + err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) if err != nil { t.FailNow() } @@ -72,8 +75,11 @@ func TestFull(t *testing.T) { } func TestMultipleInit(t *testing.T) { - s := pserver.NewService() - err := s.FinishInitParams(0, nil) + s, err := pserver.NewService("", time.Second*5) + if err != nil { + t.Error(err) + } + err = s.FinishInitParams(0, nil) if err != nil { t.FailNow() } @@ -85,15 +91,18 @@ func TestMultipleInit(t *testing.T) { } func TestUninitialized(t *testing.T) { - s := pserver.NewService() - err := s.SendGrad(pserver.Gradient{}, nil) + s, err := pserver.NewService("", time.Second*5) + err = s.SendGrad(pserver.Gradient{}, nil) if err.Error() != pserver.Uninitialized { t.FailNow() } } func TestBlockUntilInitialized(t *testing.T) { - s := pserver.NewService() + s, err := pserver.NewService("", time.Second*5) + if err != nil { + t.Error(err) + } ch := make(chan struct{}, 2) errCh := make(chan error, 2) var wg sync.WaitGroup @@ -133,7 +142,7 @@ func TestBlockUntilInitialized(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) + err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) if err != nil { t.FailNow() } diff --git a/go/utils/helper.go b/go/utils/helper.go new file mode 100644 index 0000000000..3220fd6c78 --- /dev/null +++ b/go/utils/helper.go @@ -0,0 +1,45 @@ +package utils + +import ( + "errors" + "net" +) + +// GetExternalIP returns the ip address of local network interface, not the +// loopback device. +func GetExternalIP() (string, error) { + ifaces, err := net.Interfaces() + if err != nil { + return "", err + } + for _, iface := range ifaces { + if iface.Flags&net.FlagUp == 0 { + continue // interface down + } + if iface.Flags&net.FlagLoopback != 0 { + continue // loopback interface + } + addrs, err := iface.Addrs() + if err != nil { + return "", err + } + for _, addr := range addrs { + var ip net.IP + switch v := addr.(type) { + case *net.IPNet: + ip = v.IP + case *net.IPAddr: + ip = v.IP + } + if ip == nil || ip.IsLoopback() { + continue + } + ip = ip.To4() + if ip == nil { + continue // not an ipv4 address + } + return ip.String(), nil + } + } + return "", errors.New("are you connected to the network?") +} diff --git a/go/utils/helper_test.go b/go/utils/helper_test.go new file mode 100644 index 0000000000..aa7c509768 --- /dev/null +++ b/go/utils/helper_test.go @@ -0,0 +1,10 @@ +package utils + +import "testing" + +func TestGetIP(t *testing.T) { + _, err := GetExternalIP() + if err != nil { + t.Errorf("GetExternalIP returns error : %v\n", err) + } +} From b7a52bd9767de41d65382929b1629e95e35a3fe5 Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Wed, 21 Jun 2017 15:25:02 +0800 Subject: [PATCH 07/10] add started info log --- go/cmd/pserver/pserver.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index ddf5ad40fd..f42c90c6c6 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -43,7 +43,9 @@ func main() { panic(err) } + log.Infof("start pserver at port %d", *port) err = http.Serve(l, nil) + if err != nil { panic(err) } From 106dd4bed2de2b3e71700de4487e9f4ca009df8e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Jun 2017 12:37:51 +0800 Subject: [PATCH 08/10] Using previous image for travis-ci Because travis-ci has been updated Ubuntu Trusty image, it causes Paddle CI building error. Just using old image now for hot-fix, I will add another issue to fix Paddle building in new TravisCI image. Related link https://blog.travis-ci.com/2017-06-21-trusty-updates-2017-Q2-launch --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index f9b4a7e083..87cef10b2b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,4 @@ +group: deprecated-2017Q2 language: cpp cache: directories: From 0b936e9399f2a5f01f6fde1d1b78b56306a8f9ac Mon Sep 17 00:00:00 2001 From: wuyi05 Date: Thu, 22 Jun 2017 15:00:39 +0800 Subject: [PATCH 09/10] update pserver etcd --- go/cmd/pserver/pserver.go | 3 +- go/pserver/service.go | 75 ++++++++++++--------- go/utils/{ => networkhelper}/helper.go | 2 +- go/utils/{ => networkhelper}/helper_test.go | 2 +- 4 files changed, 47 insertions(+), 35 deletions(-) rename go/utils/{ => networkhelper}/helper.go (97%) rename go/utils/{ => networkhelper}/helper_test.go (87%) diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index f42c90c6c6..fe1fe5f6f0 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -18,7 +18,8 @@ func main() { etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379", "comma separated endpoint string for pserver to connect to etcd") etcdTimeout := flag.Int("etcd-timeout", 5, "timeout for etcd calls") - logLevel := flag.String("log-level", "info", "log level, one of debug") + logLevel := flag.String("log-level", "info", + "log level, possible values: debug, info, warning, error, fatal, panic") flag.Parse() level, err := log.ParseLevel(*logLevel) diff --git a/go/pserver/service.go b/go/pserver/service.go index a5c76857ab..7400b48832 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -9,7 +9,7 @@ import ( "sync" "time" - "github.com/PaddlePaddle/Paddle/go/utils" + "github.com/PaddlePaddle/Paddle/go/utils/networkhelper" "github.com/coreos/etcd/clientv3" "github.com/coreos/etcd/clientv3/concurrency" log "github.com/sirupsen/logrus" @@ -33,6 +33,9 @@ const ( Float64 ) +// PsDesired is etcd path for store desired pserver count +const PsDesired = "/ps_desired" + // Parameter is a piece of data to sync with the parameter server. type Parameter struct { Name string @@ -68,7 +71,8 @@ type Service struct { externalIP string } -// NewService creates a new service. +// NewService creates a new service, will bypass etcd registration if no +// endpoints specified. func NewService(endpoints string, timeout time.Duration) (*Service, error) { s := &Service{opt: newOptimizer(sgd, 0.005)} s.paramMap = make(map[string]Parameter) @@ -77,7 +81,7 @@ func NewService(endpoints string, timeout time.Duration) (*Service, error) { s.etcdTimeout = timeout var err error - s.externalIP, err = utils.GetExternalIP() + s.externalIP, err = networkhelper.GetExternalIP() if err != nil { return nil, err } @@ -102,67 +106,74 @@ func NewService(endpoints string, timeout time.Duration) (*Service, error) { // wait and set s.desired init value for { ctx, cancel := context.WithTimeout(context.Background(), time.Second) - resp, err := s.etcdClient.Get(ctx, "/ps_desired") + resp, err := s.etcdClient.Get(ctx, PsDesired) cancel() if err != nil { - log.Errorf("getting /ps_desired error: %v", err) + log.Errorf("getting %s error: %v", PsDesired, err) time.Sleep(s.etcdTimeout) continue } - for _, ev := range resp.Kvs { - log.Debugf("key: %s, value: %s", ev.Key, ev.Value) - if string(ev.Key) == "/ps_desired" { - s.desired, err = strconv.Atoi(string(ev.Value)) - if err != nil { - log.Errorf("value of /ps_desired invalid %v\n", err) - time.Sleep(s.etcdTimeout) - // NOTE: wait util ps_desired value change - continue - } + if len(resp.Kvs) != 0 { + s.desired, err = strconv.Atoi(string(resp.Kvs[0].Value)) + if err != nil { + log.Errorf("value of %s invalid %v\n", PsDesired, err) + time.Sleep(s.etcdTimeout) + // NOTE: wait util ps_desired value change + continue } + break + } + } + // try register pserver node on etcd + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + _, err := s.registerPserverEtcd(ctx) + cancel() + if err != nil { + log.Warn(err) + time.Sleep(s.etcdTimeout) + continue } break } - s.registerPserverEtcd() } // if endpoints != "" // Bypass etcd registration if no endpoints specified return s, nil } // registerPserverEtcd registers pserver node on etcd using transaction. -func (s *Service) registerPserverEtcd() (*clientv3.TxnResponse, error) { - return concurrency.NewSTMRepeatable(context.TODO(), s.etcdClient, func(c concurrency.STM) error { +func (s *Service) registerPserverEtcd(ctx context.Context) (*clientv3.TxnResponse, error) { + return concurrency.NewSTM(s.etcdClient, func(c concurrency.STM) error { + registered := false for i := 0; i < s.desired; i++ { psKey := "/ps/" + strconv.Itoa(i) log.Debugf("checking %s", psKey) ps := c.Get(psKey) log.Debugf("got value (%s) for key: %s", ps, psKey) - resp, err := s.etcdClient.Grant(context.TODO(), 5) - if err != nil { - log.Fatal(err) - } - if ps == "" { + resp, err := s.etcdClient.Grant(context.TODO(), 5) + if err != nil { + log.Fatal(err) + } // find the first id and write info c.Put(psKey, s.externalIP, clientv3.WithLease(resp.ID)) log.Debugf("set pserver node %s with value %s", psKey, s.externalIP) - ch, kaerr := s.etcdClient.KeepAlive(context.TODO(), resp.ID) + _, kaerr := s.etcdClient.KeepAlive(context.TODO(), resp.ID) if kaerr != nil { log.Errorf("keepalive etcd node error: %v", kaerr) return kaerr } - // FIXME: does this really needed? - go func(ch <-chan *clientv3.LeaseKeepAliveResponse) { - ka := <-ch - log.Debugf("keepalive: %d\n", ka.TTL) - }(ch) + log.Debug("register finished") + registered = true break } } - log.Debug("register finished") - return nil - }) + if registered == true { + return nil + } + return errors.New("not registerd, may due to already have enough pservers") + }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) } // InitParam initializes a parameter. diff --git a/go/utils/helper.go b/go/utils/networkhelper/helper.go similarity index 97% rename from go/utils/helper.go rename to go/utils/networkhelper/helper.go index 3220fd6c78..fbeaea8f5e 100644 --- a/go/utils/helper.go +++ b/go/utils/networkhelper/helper.go @@ -1,4 +1,4 @@ -package utils +package networkhelper import ( "errors" diff --git a/go/utils/helper_test.go b/go/utils/networkhelper/helper_test.go similarity index 87% rename from go/utils/helper_test.go rename to go/utils/networkhelper/helper_test.go index aa7c509768..4208f9e358 100644 --- a/go/utils/helper_test.go +++ b/go/utils/networkhelper/helper_test.go @@ -1,4 +1,4 @@ -package utils +package networkhelper import "testing" From 7cf640b58ddeb2cc91d027ade8a6f326d42b5a8d Mon Sep 17 00:00:00 2001 From: Peng Li Date: Fri, 23 Jun 2017 10:26:46 +0800 Subject: [PATCH 10/10] add coeff parameter to classification_cost --- python/paddle/trainer_config_helpers/layers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index b8ce0373c0..84ed160773 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -3839,7 +3839,8 @@ def classification_cost(input, weight=None, name=None, evaluator=classification_error_evaluator, - layer_attr=None): + layer_attr=None, + coeff=1.): """ classification cost Layer. @@ -3855,6 +3856,8 @@ def classification_cost(input, :param evaluator: Evaluator method. :param layer_attr: layer's extra attribute. :type layer_attr: ExtraLayerAttribute + :param coeff: The coefficient affects the gradient in the backward. + :type coeff: float :return: LayerOutput object. :rtype: LayerOutput """ @@ -3868,6 +3871,7 @@ def classification_cost(input, name=name, type="multi-class-cross-entropy", inputs=ipts, + coeff=coeff, **ExtraLayerAttribute.to_kwargs(layer_attr)) def __add_evaluator__(e):