From 0f4c7332969bdb057f855cd4a37174f3c06de281 Mon Sep 17 00:00:00 2001 From: guosheng Date: Thu, 20 Jul 2017 12:03:23 +0800 Subject: [PATCH 01/10] add ROIPooling for Fast(er) R-CNN --- paddle/gserver/layers/ROIPoolLayer.cpp | 154 ++++++++++++++++++ paddle/gserver/layers/ROIPoolLayer.h | 53 ++++++ paddle/gserver/tests/test_LayerGrad.cpp | 34 ++++ proto/ModelConfig.proto | 9 + python/paddle/trainer/config_parser.py | 11 ++ .../paddle/trainer_config_helpers/layers.py | 37 +++++ 6 files changed, 298 insertions(+) create mode 100644 paddle/gserver/layers/ROIPoolLayer.cpp create mode 100644 paddle/gserver/layers/ROIPoolLayer.h diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp new file mode 100644 index 0000000000..04763fd152 --- /dev/null +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -0,0 +1,154 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "ROIPoolLayer.h" + +namespace paddle { + +REGISTER_LAYER(roi_pool, ROIPoolLayer); + +bool ROIPoolLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + const ROIPoolConfig& layerConf = config_.inputs(0).roi_pool_conf(); + pooledWidth_ = layerConf.pooled_width(); + pooledHeight_ = layerConf.pooled_height(); + spatialScale_ = layerConf.spatial_scale(); + + return true; +} + +void ROIPoolLayer::forward(PassType passType) { + Layer::forward(passType); + + const ROIPoolConfig& layerConf = config_.inputs(0).roi_pool_conf(); + height_ = getInput(0).getFrameHeight(); + if (!height_) height_ = layerConf.height(); + width_ = getInput(0).getFrameWidth(); + if (!width_) width_ = layerConf.width(); + channels_ = getInputValue(0)->getWidth() / width_ / height_; + + size_t batchSize = getInput(0).getBatchSize(); + size_t numROIs = getInput(1).getBatchSize(); + + real* bottomData = getInputValue(0)->getData(); + size_t batchOffset = getInputValue(0)->getWidth(); + size_t channelOffset = height_ * width_; + real* bottomROIs = getInputValue(1)->getData(); + size_t roiOffset = getInputValue(1)->getWidth(); + size_t poolChannelOffset = pooledHeight_ * pooledWidth_; + + resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); + real* outputData = getOutputValue()->getData(); + Matrix::resizeOrCreate(maxIdxs_, + numROIs, + channels_ * pooledHeight_ * pooledWidth_, + false, + false); + real* argmaxData = maxIdxs_->getData(); + + size_t uZero = 0; + size_t uOne = 1; + + for (size_t n = 0; n < numROIs; ++n) { + size_t roiBatchIdx = bottomROIs[0]; + size_t roiStartW = std::round(bottomROIs[1] * spatialScale_); + size_t roiStartH = std::round(bottomROIs[2] * spatialScale_); + size_t roiEndW = std::round(bottomROIs[3] * spatialScale_); + size_t roiEndH = std::round(bottomROIs[4] * spatialScale_); + CHECK_GE(roiBatchIdx, 0); + CHECK_LT(roiBatchIdx, batchSize); + size_t roiHeight = std::max(roiEndH - roiStartH + 1, uOne); + size_t roiWidth = std::max(roiEndW - roiStartW + 1, uOne); + real binSizeH = + static_cast(roiHeight) / static_cast(pooledHeight_); + real binSizeW = + static_cast(roiWidth) / static_cast(pooledWidth_); + real* batchData = bottomData + batchOffset * roiBatchIdx; + for (size_t c = 0; c < channels_; ++c) { + for (size_t ph = 0; ph < pooledHeight_; ++ph) { + for (size_t pw = 0; pw < pooledWidth_; ++pw) { + size_t hstart = static_cast(std::floor(ph * binSizeH)); + size_t wstart = static_cast(std::floor(pw * binSizeW)); + size_t hend = static_cast(std::ceil((ph + 1) * binSizeH)); + size_t wend = static_cast(std::ceil((pw + 1) * binSizeW)); + hstart = std::min(std::max(hstart + roiStartH, uZero), height_); + wstart = std::min(std::max(wstart + roiStartW, uZero), width_); + hend = std::min(std::max(hend + roiStartH, uZero), height_); + wend = std::min(std::max(wend + roiStartW, uZero), width_); + + bool isEmpty = (hend <= hstart) || (wend <= wstart); + size_t poolIndex = ph * pooledWidth_ + pw; + if (isEmpty) { + outputData[poolIndex] = 0; + argmaxData[poolIndex] = -1; + } + + for (size_t h = hstart; h < hend; ++h) { + for (size_t w = wstart; w < wend; ++w) { + size_t index = h * width_ + w; + if (batchData[index] > outputData[poolIndex]) { + outputData[poolIndex] = batchData[index]; + argmaxData[poolIndex] = index; + } + } + } + } + } + batchData += channelOffset; + outputData += poolChannelOffset; + argmaxData += poolChannelOffset; + } + bottomROIs += roiOffset; + } +} + +void ROIPoolLayer::backward(const UpdateCallback& callback) { + real* bottomROIs = getInputValue(1)->getData(); + size_t numROIs = getInput(1).getBatchSize(); + size_t roiOffset = getInputValue(1)->getWidth(); + + MatrixPtr inGrad = getInputGrad(0); + real* inDiffData = inGrad->getData(); + size_t batchOffset = getInputValue(0)->getWidth(); + size_t channelOffset = height_ * width_; + + MatrixPtr outGrad = getOutputGrad(); + real* outDiffData = outGrad->getData(); + size_t poolChannelOffset = pooledHeight_ * pooledWidth_; + real* argmaxData = maxIdxs_->getData(); + + for (size_t n = 0; n < numROIs; ++n) { + size_t roiBatchIdx = bottomROIs[0]; + real* batchDiffData = inDiffData + batchOffset * roiBatchIdx; + for (size_t c = 0; c < channels_; ++c) { + for (size_t ph = 0; ph < pooledHeight_; ++ph) { + for (size_t pw = 0; pw < pooledWidth_; ++pw) { + size_t poolIndex = ph * pooledWidth_ + pw; + if (argmaxData[poolIndex] > 0) { + size_t index = static_cast(argmaxData[poolIndex]); + batchDiffData[index] += outDiffData[poolIndex]; + } + } + } + batchDiffData += channelOffset; + outDiffData += poolChannelOffset; + argmaxData += poolChannelOffset; + } + bottomROIs += roiOffset; + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/ROIPoolLayer.h b/paddle/gserver/layers/ROIPoolLayer.h new file mode 100644 index 0000000000..ca412d2845 --- /dev/null +++ b/paddle/gserver/layers/ROIPoolLayer.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" + +namespace paddle { + +/** + * A layer used by Fast R-CNN to extract feature maps of ROIs from the last + * feature map. + * - Input: This layer needs two input layers: The first input layer is a + * convolution layer; The second input layer contains the ROI data which is the + * output of ProposalLayer in Faster R-CNN. layers for generating bbox + * location offset and the classification confidence. - Output: The + * ROIs' feature map. Reference: Shaoqing Ren, Kaiming He, Ross Girshick, and + * Jian Sun. Faster R-CNN: Towards Real-Time Object Detection with Region + * Proposal + */ + +class ROIPoolLayer : public Layer { +protected: + size_t channels_; + size_t width_; + size_t height_; + size_t pooledWidth_; + size_t pooledHeight_; + real spatialScale_; + + MatrixPtr maxIdxs_; + +public: + explicit ROIPoolLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 9af083468c..77feb6d4c9 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1830,6 +1830,40 @@ TEST(Layer, CropLayer) { } } +TEST(Layer, roi_pool) { + TestConfig config; + config.layerConfig.set_type("roi_pool"); + config.biasSize = 0; + LayerInputConfig* input = config.layerConfig.add_inputs(); + ROIPoolConfig* roiPoolConf = input->mutable_roi_pool_conf(); + roiPoolConf->set_pooled_width(7); + roiPoolConf->set_pooled_height(7); + roiPoolConf->set_spatial_scale(1. / 16); + roiPoolConf->set_width(14); + roiPoolConf->set_height(14); + + MatrixPtr roiValue = Matrix::create(10, 10, false, false); + roiValue->zeroMem(); + real* roiData = roiValue->getData(); + for (size_t i = 0; i < roiValue->getElementCnt() / 5; ++i) { + *roiData++ = std::rand() % 2; + *roiData++ = std::rand() % 224; + *roiData++ = std::rand() % 224; + size_t xMin = static_cast(*(roiData - 2)); + size_t yMin = static_cast(*(roiData - 1)); + *roiData++ = xMin + std::rand() % (224 - xMin); + *roiData++ = yMin + std::rand() % (224 - yMin); + } + + config.inputDefs.push_back({INPUT_DATA, "input", 3 * 14 * 14, {}}); + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "rois", roiValue, {}}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "roi_pool", 5, false, useGpu, false); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 83f72c137b..275723272b 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -289,6 +289,14 @@ message DetectionOutputConfig { optional uint32 width = 9 [default = 1]; } +message ROIPoolConfig { + required uint32 pooled_width = 1; + required uint32 pooled_height = 2; + required float spatial_scale = 3; + optional uint32 height = 4 [default = 1]; + optional uint32 width = 5 [default = 1]; +} + message LayerInputConfig { required string input_layer_name = 1; optional string input_parameter_name = 2; @@ -309,6 +317,7 @@ message LayerInputConfig { optional RowConvConfig row_conv_conf = 15; optional MultiBoxLossConfig multibox_loss_conf = 16; optional DetectionOutputConfig detection_output_conf = 17; + optional ROIPoolConfig roi_pool_conf = 18; } message LayerConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index ab81e67579..bfb9dd7f1d 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1732,6 +1732,17 @@ class DetectionOutputLayer(LayerBase): self.config.size = size +@config_layer('roi_pool') +class ROIPoolLayer(LayerBase): + def __init__(self, name, inputs, pooled_width, pooled_height, + spatial_scale): + super(ROIPoolLayer, self).__init__(name, 'roi_pool', 0, inputs) + config_assert(len(inputs) == 2, 'ROIPoolLayer must have 2 inputs') + self.config.inputs[0].roi_pool_conf.pooled_width = pooled_width + self.config.inputs[0].roi_pool_conf.pooled_height = pooled_height + self.config.inputs[0].roi_pool_conf.spatial_scale = spatial_scale + + @config_layer('data') class DataLayer(LayerBase): def __init__(self, name, size, height=None, width=None, device=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index fdb6f83f2b..c1bdeb6808 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -117,6 +117,7 @@ __all__ = [ 'cross_channel_norm_layer', 'multibox_loss_layer', 'detection_output_layer', + 'roi_pool_layer', 'spp_layer', 'pad_layer', 'eos_layer', @@ -201,6 +202,7 @@ class LayerType(object): PRIORBOX_LAYER = 'priorbox' MULTIBOX_LOSS_LAYER = 'multibox_loss' DETECTION_OUTPUT_LAYER = 'detection_output' + ROI_POOL_LAYER = 'roi_pool' CTC_LAYER = 'ctc' WARP_CTC_LAYER = 'warp_ctc' @@ -1200,6 +1202,41 @@ def detection_output_layer(input_loc, name, LayerType.DETECTION_OUTPUT_LAYER, parents=parents, size=size) +@wrap_name_default("roi_pool") +def roi_pool_layer(input, + rois, + pooled_width, + pooled_height, + spatial_scale, + name=None): + """ + A layer used by Fast R-CNN to extract feature maps of ROIs from the last + feature map. + + :param name: The Layer Name. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput. + :param rois: The input ROIs' data. + :type rois: LayerOutput. + :param pooled_width: The width after pooling. + :type pooled_width: int + :param pooled_height: The height after pooling. + :type pooled_height: int + :param spatial_scale: The spatial scale between the image and feature map. + :type spatial_scale: float + :return: LayerOutput + """ + Layer( + name=name, + type=LayerType.ROI_POOL_LAYER, + inputs=[input.name, rois.name], + pooled_width=pooled_width, + pooled_height=pooled_height, + spatial_scale=spatial_scale) + return LayerOutput(name, LayerType.ROI_POOL_LAYER, parents=[input, rois]) + + @wrap_name_default("cross_channel_norm") def cross_channel_norm_layer(input, name=None, param_attr=None): """ From d5384e640f1f972e9685e51cf018d0ff478c4362 Mon Sep 17 00:00:00 2001 From: guosheng Date: Thu, 20 Jul 2017 13:12:10 +0800 Subject: [PATCH 02/10] refine layer gradient test of ROIPoolLayer --- paddle/gserver/tests/test_LayerGrad.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 77feb6d4c9..b6282b472f 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1842,17 +1842,20 @@ TEST(Layer, roi_pool) { roiPoolConf->set_width(14); roiPoolConf->set_height(14); - MatrixPtr roiValue = Matrix::create(10, 10, false, false); + const size_t roiNum = 10; + const size_t roiDim = 10; + const size_t batchSize = 5; + MatrixPtr roiValue = Matrix::create(roiNum, roiDim, false, false); roiValue->zeroMem(); real* roiData = roiValue->getData(); - for (size_t i = 0; i < roiValue->getElementCnt() / 5; ++i) { - *roiData++ = std::rand() % 2; - *roiData++ = std::rand() % 224; - *roiData++ = std::rand() % 224; - size_t xMin = static_cast(*(roiData - 2)); - size_t yMin = static_cast(*(roiData - 1)); - *roiData++ = xMin + std::rand() % (224 - xMin); - *roiData++ = yMin + std::rand() % (224 - yMin); + for (size_t i = 0; i < roiNum; ++i) { + roiData[i * roiDim + 0] = std::rand() % batchSize; + roiData[i * roiDim + 1] = std::rand() % 224; // xMin + roiData[i * roiDim + 2] = std::rand() % 224; // yMin + size_t xMin = static_cast(roiData[i * roiDim + 1]); + size_t yMin = static_cast(roiData[i * roiDim + 2]); + roiData[i * roiDim + 3] = xMin + std::rand() % (224 - xMin); // xMax + roiData[i * roiDim + 4] = yMin + std::rand() % (224 - yMin); // yMax } config.inputDefs.push_back({INPUT_DATA, "input", 3 * 14 * 14, {}}); @@ -1860,7 +1863,7 @@ TEST(Layer, roi_pool) { config.layerConfig.add_inputs(); for (auto useGpu : {false, true}) { - testLayerGrad(config, "roi_pool", 5, false, useGpu, false); + testLayerGrad(config, "roi_pool", batchSize, false, useGpu, false); } } From 1c00767731e2cf6d16abfd7b3c5002015fe5fd27 Mon Sep 17 00:00:00 2001 From: guosheng Date: Thu, 20 Jul 2017 15:21:45 +0800 Subject: [PATCH 03/10] fix ci bug on andriod building --- paddle/gserver/layers/ROIPoolLayer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 04763fd152..34ba9030f7 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -64,10 +64,10 @@ void ROIPoolLayer::forward(PassType passType) { for (size_t n = 0; n < numROIs; ++n) { size_t roiBatchIdx = bottomROIs[0]; - size_t roiStartW = std::round(bottomROIs[1] * spatialScale_); - size_t roiStartH = std::round(bottomROIs[2] * spatialScale_); - size_t roiEndW = std::round(bottomROIs[3] * spatialScale_); - size_t roiEndH = std::round(bottomROIs[4] * spatialScale_); + size_t roiStartW = round(bottomROIs[1] * spatialScale_); + size_t roiStartH = round(bottomROIs[2] * spatialScale_); + size_t roiEndW = round(bottomROIs[3] * spatialScale_); + size_t roiEndH = round(bottomROIs[4] * spatialScale_); CHECK_GE(roiBatchIdx, 0); CHECK_LT(roiBatchIdx, batchSize); size_t roiHeight = std::max(roiEndH - roiStartH + 1, uOne); From 687b3749b4a4217c7f5d8b7e85c7b0c922cc4f6c Mon Sep 17 00:00:00 2001 From: guosheng Date: Sat, 22 Jul 2017 13:57:21 +0800 Subject: [PATCH 04/10] fix bug on GPU test --- paddle/gserver/layers/ROIPoolLayer.cpp | 89 ++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 11 deletions(-) diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 34ba9030f7..3d26286376 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -43,15 +43,46 @@ void ROIPoolLayer::forward(PassType passType) { size_t batchSize = getInput(0).getBatchSize(); size_t numROIs = getInput(1).getBatchSize(); - real* bottomData = getInputValue(0)->getData(); - size_t batchOffset = getInputValue(0)->getWidth(); + MatrixPtr dataValue = getInputValue(0); + MatrixPtr roiValue = getInputValue(1); + resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); + MatrixPtr outputValue = getOutputValue(); + + if (useGpu_) { + MatrixPtr dataCpuBuffer; + Matrix::resizeOrCreate(dataCpuBuffer, + dataValue->getHeight(), + dataValue->getWidth(), + false, + false); + MatrixPtr roiCpuBuffer; + Matrix::resizeOrCreate(roiCpuBuffer, + roiValue->getHeight(), + roiValue->getWidth(), + false, + false); + dataCpuBuffer->copyFrom(*dataValue); + roiCpuBuffer->copyFrom(*roiValue); + dataValue = dataCpuBuffer; + roiValue = roiCpuBuffer; + MatrixPtr outputCpuBuffer; + Matrix::resizeOrCreate(outputCpuBuffer, + outputValue->getHeight(), + outputValue->getWidth(), + false, + false); + outputCpuBuffer->copyFrom(*outputValue); + outputValue = outputCpuBuffer; + } + + real* bottomData = dataValue->getData(); + size_t batchOffset = dataValue->getWidth(); size_t channelOffset = height_ * width_; - real* bottomROIs = getInputValue(1)->getData(); - size_t roiOffset = getInputValue(1)->getWidth(); + real* bottomROIs = roiValue->getData(); + size_t roiOffset = roiValue->getWidth(); size_t poolChannelOffset = pooledHeight_ * pooledWidth_; - resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); - real* outputData = getOutputValue()->getData(); + real* outputData = outputValue->getData(); Matrix::resizeOrCreate(maxIdxs_, numROIs, channels_ * pooledHeight_ * pooledWidth_, @@ -113,20 +144,52 @@ void ROIPoolLayer::forward(PassType passType) { } bottomROIs += roiOffset; } + if (useGpu_) { + getOutputValue()->copyFrom(*outputValue); + } } void ROIPoolLayer::backward(const UpdateCallback& callback) { - real* bottomROIs = getInputValue(1)->getData(); + MatrixPtr inGradValue = getInputGrad(0); + MatrixPtr outGradValue = getOutputGrad(); + MatrixPtr roiValue = getInputValue(1); + + if (useGpu_) { + MatrixPtr inGradCpuBuffer; + Matrix::resizeOrCreate(inGradCpuBuffer, + inGradValue->getHeight(), + inGradValue->getWidth(), + false, + false); + MatrixPtr outGradCpuBuffer; + Matrix::resizeOrCreate(outGradCpuBuffer, + outGradValue->getHeight(), + outGradValue->getWidth(), + false, + false); + MatrixPtr roiCpuBuffer; + Matrix::resizeOrCreate(roiCpuBuffer, + roiValue->getHeight(), + roiValue->getWidth(), + false, + false); + inGradCpuBuffer->copyFrom(*inGradValue); + outGradCpuBuffer->copyFrom(*outGradValue); + roiCpuBuffer->copyFrom(*roiValue); + inGradValue = inGradCpuBuffer; + outGradValue = outGradCpuBuffer; + roiValue = roiCpuBuffer; + } + + real* bottomROIs = roiValue->getData(); size_t numROIs = getInput(1).getBatchSize(); size_t roiOffset = getInputValue(1)->getWidth(); - MatrixPtr inGrad = getInputGrad(0); - real* inDiffData = inGrad->getData(); + real* inDiffData = inGradValue->getData(); size_t batchOffset = getInputValue(0)->getWidth(); size_t channelOffset = height_ * width_; - MatrixPtr outGrad = getOutputGrad(); - real* outDiffData = outGrad->getData(); + real* outDiffData = outGradValue->getData(); size_t poolChannelOffset = pooledHeight_ * pooledWidth_; real* argmaxData = maxIdxs_->getData(); @@ -149,6 +212,10 @@ void ROIPoolLayer::backward(const UpdateCallback& callback) { } bottomROIs += roiOffset; } + + if (useGpu_) { + getInputGrad(0)->copyFrom(*inGradValue); + } } } // namespace paddle From 3cf01b5d52616e1605d3d089ceb798bb16ab8f80 Mon Sep 17 00:00:00 2001 From: guosheng Date: Wed, 16 Aug 2017 17:19:02 +0800 Subject: [PATCH 05/10] refine ROIPoolLayer --- doc/api/v2/config/layer.rst | 5 +++ paddle/gserver/layers/ROIPoolLayer.cpp | 17 +++---- paddle/gserver/layers/ROIPoolLayer.h | 1 + .../paddle/trainer_config_helpers/layers.py | 10 ++++- .../tests/configs/file_list.sh | 2 +- .../protostr/test_roi_pool_layer.protostr | 45 +++++++++++++++++++ .../tests/configs/test_roi_pool_layer.py | 14 ++++++ 7 files changed, 82 insertions(+), 12 deletions(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index cb330ea5e1..3b2ee37628 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -82,6 +82,11 @@ maxout .. autoclass:: paddle.v2.layer.maxout :noindex: +roi_pool +-------- +.. autoclass:: paddle.v2.layer.roi_pool + :noindex: + Norm Layer ========== diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 3d26286376..131fd7e52b 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -48,7 +48,7 @@ void ROIPoolLayer::forward(PassType passType) { resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); MatrixPtr outputValue = getOutputValue(); - if (useGpu_) { + if (useGpu_) { // TODO(guosheng): implement on GPU later MatrixPtr dataCpuBuffer; Matrix::resizeOrCreate(dataCpuBuffer, dataValue->getHeight(), @@ -90,9 +90,6 @@ void ROIPoolLayer::forward(PassType passType) { false); real* argmaxData = maxIdxs_->getData(); - size_t uZero = 0; - size_t uOne = 1; - for (size_t n = 0; n < numROIs; ++n) { size_t roiBatchIdx = bottomROIs[0]; size_t roiStartW = round(bottomROIs[1] * spatialScale_); @@ -101,8 +98,8 @@ void ROIPoolLayer::forward(PassType passType) { size_t roiEndH = round(bottomROIs[4] * spatialScale_); CHECK_GE(roiBatchIdx, 0); CHECK_LT(roiBatchIdx, batchSize); - size_t roiHeight = std::max(roiEndH - roiStartH + 1, uOne); - size_t roiWidth = std::max(roiEndW - roiStartW + 1, uOne); + size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL); + size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL); real binSizeH = static_cast(roiHeight) / static_cast(pooledHeight_); real binSizeW = @@ -115,10 +112,10 @@ void ROIPoolLayer::forward(PassType passType) { size_t wstart = static_cast(std::floor(pw * binSizeW)); size_t hend = static_cast(std::ceil((ph + 1) * binSizeH)); size_t wend = static_cast(std::ceil((pw + 1) * binSizeW)); - hstart = std::min(std::max(hstart + roiStartH, uZero), height_); - wstart = std::min(std::max(wstart + roiStartW, uZero), width_); - hend = std::min(std::max(hend + roiStartH, uZero), height_); - wend = std::min(std::max(wend + roiStartW, uZero), width_); + hstart = std::min(std::max(hstart + roiStartH, 0UL), height_); + wstart = std::min(std::max(wstart + roiStartW, 0UL), width_); + hend = std::min(std::max(hend + roiStartH, 0UL), height_); + wend = std::min(std::max(wend + roiStartW, 0UL), width_); bool isEmpty = (hend <= hstart) || (wend <= wstart); size_t poolIndex = ph * pooledWidth_ + pw; diff --git a/paddle/gserver/layers/ROIPoolLayer.h b/paddle/gserver/layers/ROIPoolLayer.h index d04362f0d4..796467a5c8 100644 --- a/paddle/gserver/layers/ROIPoolLayer.h +++ b/paddle/gserver/layers/ROIPoolLayer.h @@ -29,6 +29,7 @@ namespace paddle { * Reference: * Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. * Faster R-CNN: Towards Real-Time Object Detection with Region Proposal + * Networks */ class ROIPoolLayer : public Layer { diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 590097b96b..6703db5f0b 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1257,6 +1257,7 @@ def roi_pool_layer(input, pooled_width, pooled_height, spatial_scale, + num_channels=None, name=None): """ A layer used by Fast R-CNN to extract feature maps of ROIs from the last @@ -1274,8 +1275,14 @@ def roi_pool_layer(input, :type pooled_height: int :param spatial_scale: The spatial scale between the image and feature map. :type spatial_scale: float + :param num_channels: number of input channel. + :type num_channels: int :return: LayerOutput """ + if num_channels is None: + assert input.num_filters is not None + num_channels = input.num_filters + size = num_channels * pooled_width * pooled_height Layer( name=name, type=LayerType.ROI_POOL_LAYER, @@ -1283,7 +1290,8 @@ def roi_pool_layer(input, pooled_width=pooled_width, pooled_height=pooled_height, spatial_scale=spatial_scale) - return LayerOutput(name, LayerType.ROI_POOL_LAYER, parents=[input, rois]) + return LayerOutput( + name, LayerType.ROI_POOL_LAYER, parents=[input, rois], size=size) @wrap_name_default("cross_channel_norm") diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index a61beb871a..58e36eb333 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer -test_kmax_seq_socre_layer test_seq_select_layers) +test_kmax_seq_socre_layer test_seq_select_layers test_roi_pool_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr new file mode 100644 index 0000000000..e8c379b17b --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr @@ -0,0 +1,45 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 588 + active_type: "" + height: 14 + width: 14 +} +layers { + name: "rois" + type: "data" + size: 10 + active_type: "" +} +layers { + name: "__roi_pool_0__" + type: "roi_pool" + active_type: "" + inputs { + input_layer_name: "data" + roi_pool_conf { + pooled_width: 7 + pooled_height: 7 + spatial_scale: 0.0625 + } + } + inputs { + input_layer_name: "rois" + } +} +input_layer_names: "data" +input_layer_names: "rois" +output_layer_names: "__roi_pool_0__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "rois" + layer_names: "__roi_pool_0__" + input_layer_names: "data" + input_layer_names: "rois" + output_layer_names: "__roi_pool_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py new file mode 100644 index 0000000000..0d6ca9f1bb --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py @@ -0,0 +1,14 @@ +from paddle.trainer_config_helpers import * + +data = data_layer(name='data', size=3 * 14 * 14, height=14, width=14) + +rois = data_layer(name='rois', size=10) + +roi_pool = roi_pool_layer( + input=data, + rois=rois, + pooled_width=7, + pooled_height=7, + spatial_scale=1. / 16) + +outputs(roi_pool) From 7829034da441ab3eddbc111c19ff433f8f843e0a Mon Sep 17 00:00:00 2001 From: guosheng Date: Fri, 10 Nov 2017 18:40:55 +0800 Subject: [PATCH 06/10] Refine ROIPoolLayer by following comments --- paddle/gserver/layers/ROIPoolLayer.cpp | 2 ++ paddle/gserver/layers/ROIPoolLayer.h | 1 + python/paddle/trainer/config_parser.py | 5 +++-- python/paddle/trainer_config_helpers/layers.py | 3 ++- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 131fd7e52b..99cfddb0cf 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -91,6 +91,8 @@ void ROIPoolLayer::forward(PassType passType) { real* argmaxData = maxIdxs_->getData(); for (size_t n = 0; n < numROIs; ++n) { + // the first five elememts of each RoI should be: + // batch_idx, roi_x_start, roi_y_start, roi_x_end, roi_y_end size_t roiBatchIdx = bottomROIs[0]; size_t roiStartW = round(bottomROIs[1] * spatialScale_); size_t roiStartH = round(bottomROIs[2] * spatialScale_); diff --git a/paddle/gserver/layers/ROIPoolLayer.h b/paddle/gserver/layers/ROIPoolLayer.h index 796467a5c8..4f07e49d6f 100644 --- a/paddle/gserver/layers/ROIPoolLayer.h +++ b/paddle/gserver/layers/ROIPoolLayer.h @@ -41,6 +41,7 @@ protected: size_t pooledHeight_; real spatialScale_; + // Since there is no int matrix, use real maxtrix instead. MatrixPtr maxIdxs_; public: diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index f31252882e..43d02bf70e 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1971,13 +1971,14 @@ class DetectionOutputLayer(LayerBase): @config_layer('roi_pool') class ROIPoolLayer(LayerBase): - def __init__(self, name, inputs, pooled_width, pooled_height, - spatial_scale): + def __init__(self, name, inputs, pooled_width, pooled_height, spatial_scale, + num_channels, **xargs): super(ROIPoolLayer, self).__init__(name, 'roi_pool', 0, inputs) config_assert(len(inputs) == 2, 'ROIPoolLayer must have 2 inputs') self.config.inputs[0].roi_pool_conf.pooled_width = pooled_width self.config.inputs[0].roi_pool_conf.pooled_height = pooled_height self.config.inputs[0].roi_pool_conf.spatial_scale = spatial_scale + self.set_cnn_layer(name, pooled_height, pooled_width, num_channels) @config_layer('data') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 623ca047cd..617fbff948 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1345,7 +1345,8 @@ def roi_pool_layer(input, inputs=[input.name, rois.name], pooled_width=pooled_width, pooled_height=pooled_height, - spatial_scale=spatial_scale) + spatial_scale=spatial_scale, + num_channels=num_channels) return LayerOutput( name, LayerType.ROI_POOL_LAYER, parents=[input, rois], size=size) From 79e0a26a6472a047ff5b3ebaedc3da6c6eeb6d2a Mon Sep 17 00:00:00 2001 From: guosheng Date: Fri, 10 Nov 2017 20:03:36 +0800 Subject: [PATCH 07/10] Fix test_roi_pool_layer.py --- .../protostr/test_roi_pool_layer.protostr | 55 ++++++++++++++++++- .../tests/configs/test_roi_pool_layer.py | 11 +++- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr index e8c379b17b..f1bc65b3ae 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr @@ -13,12 +13,44 @@ layers { size: 10 active_type: "" } +layers { + name: "__conv_0__" + type: "exconv" + size: 3136 + active_type: "" + inputs { + input_layer_name: "data" + input_parameter_name: "___conv_0__.w0" + conv_conf { + filter_size: 3 + channels: 3 + stride: 1 + padding: 1 + groups: 1 + filter_channels: 3 + output_x: 14 + img_size: 14 + caffe_mode: true + filter_size_y: 3 + padding_y: 1 + stride_y: 1 + output_y: 14 + img_size_y: 14 + } + } + bias_parameter_name: "___conv_0__.wbias" + num_filters: 16 + shared_biases: true + height: 14 + width: 14 +} layers { name: "__roi_pool_0__" type: "roi_pool" + size: 784 active_type: "" inputs { - input_layer_name: "data" + input_layer_name: "__conv_0__" roi_pool_conf { pooled_width: 7 pooled_height: 7 @@ -28,6 +60,26 @@ layers { inputs { input_layer_name: "rois" } + height: 7 + width: 7 +} +parameters { + name: "___conv_0__.w0" + size: 432 + initial_mean: 0.0 + initial_std: 0.272165526976 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___conv_0__.wbias" + size: 16 + initial_mean: 0.0 + initial_std: 0.0 + dims: 16 + dims: 1 + initial_strategy: 0 + initial_smart: false } input_layer_names: "data" input_layer_names: "rois" @@ -36,6 +88,7 @@ sub_models { name: "root" layer_names: "data" layer_names: "rois" + layer_names: "__conv_0__" layer_names: "__roi_pool_0__" input_layer_names: "data" input_layer_names: "rois" diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py index 0d6ca9f1bb..b739a81b85 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py @@ -4,8 +4,17 @@ data = data_layer(name='data', size=3 * 14 * 14, height=14, width=14) rois = data_layer(name='rois', size=10) -roi_pool = roi_pool_layer( +conv = img_conv_layer( input=data, + filter_size=3, + num_channels=3, + num_filters=16, + padding=1, + act=LinearActivation(), + bias_attr=True) + +roi_pool = roi_pool_layer( + input=conv, rois=rois, pooled_width=7, pooled_height=7, From 2378679a9e4344d513654838726cb97ac2f318ff Mon Sep 17 00:00:00 2001 From: emailweixu Date: Fri, 10 Nov 2017 09:05:06 -0800 Subject: [PATCH 08/10] Fix a dead lock bug for dyload/nccl.h when nccl lib cannot be loaded (#5533) It caused by a bug of std::call_once described in https://stackoverflow.com/questions/41717579/stdcall-once-hangs-on-second-call-after-callable-threw-on-first-call. It is likely caused by a deeper bug of pthread_once, which is discussed in https://patchwork.ozlabs.org/patch/482350/ --- paddle/operators/nccl/nccl_gpu_common.h | 11 ++++-- paddle/platform/call_once.h | 50 +++++++++++++++++++++++++ paddle/platform/dynload/nccl.h | 25 +++++++------ 3 files changed, 71 insertions(+), 15 deletions(-) create mode 100644 paddle/platform/call_once.h diff --git a/paddle/operators/nccl/nccl_gpu_common.h b/paddle/operators/nccl/nccl_gpu_common.h index 5858cd4839..48e322f993 100644 --- a/paddle/operators/nccl/nccl_gpu_common.h +++ b/paddle/operators/nccl/nccl_gpu_common.h @@ -35,6 +35,7 @@ constexpr int kInvalidGPUId = -1; struct Communicator { std::vector comms_; std::unordered_map comm_id_map_; + bool inited_; Communicator() {} @@ -42,17 +43,21 @@ struct Communicator { void InitAll(const std::vector& gpus) { comms_.resize(gpus.size()); + inited_ = false; for (size_t i = 0; i < gpus.size(); ++i) { comm_id_map_[gpus[i]] = i; } PADDLE_ENFORCE( dynload::ncclCommInitAll(comms_.data(), gpus.size(), gpus.data())); + inited_ = true; } ~Communicator() { - for (size_t i = 0; i < comms_.size(); ++i) { - // FIXME(dzh) : PADDLE_ENFORCE return void - dynload::ncclCommDestroy(comms_[i]); + if (inited_) { + for (size_t i = 0; i < comms_.size(); ++i) { + // FIXME(dzh) : PADDLE_ENFORCE return void + dynload::ncclCommDestroy(comms_[i]); + } } } diff --git a/paddle/platform/call_once.h b/paddle/platform/call_once.h new file mode 100644 index 0000000000..248baf6613 --- /dev/null +++ b/paddle/platform/call_once.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include + +namespace paddle { +namespace platform { + +/* + The current implementation of std::call_once has a bug described in + https://stackoverflow.com/questions/41717579/stdcall-once-hangs-on-second-call-after-callable-threw-on-first-call. + This is likely caused by a deeper bug of pthread_once, which is discussed in + https://patchwork.ozlabs.org/patch/482350/ + + This wrap is a hack to avoid this bug. +*/ +template +inline void call_once(std::once_flag& flag, Callable&& f, Args&&... args) { + bool good = false; + std::exception ex; + std::call_once(flag, [&]() { + try { + f(args...); + good = true; + } catch (const std::exception& e) { + ex = e; + } catch (...) { + ex = std::runtime_error("excption caught in call_once"); + } + }); + if (!good) { + throw std::exception(ex); + } +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/dynload/nccl.h b/paddle/platform/dynload/nccl.h index 0618c7414f..981b2ab258 100644 --- a/paddle/platform/dynload/nccl.h +++ b/paddle/platform/dynload/nccl.h @@ -17,6 +17,7 @@ #include #include #include +#include "paddle/platform/call_once.h" #include "paddle/platform/dynload/dynamic_loader.h" namespace paddle { @@ -27,18 +28,18 @@ extern std::once_flag nccl_dso_flag; extern void* nccl_dso_handle; #ifdef PADDLE_USE_DSO -#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - using nccl_func = decltype(__name(args...)) (*)(Args...); \ - std::call_once(nccl_dso_flag, \ - paddle::platform::dynload::GetNCCLDsoHandle, \ - &nccl_dso_handle); \ - void* p_##__name = dlsym(nccl_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using nccl_func = decltype(__name(args...)) (*)(Args...); \ + platform::call_once(nccl_dso_flag, \ + paddle::platform::dynload::GetNCCLDsoHandle, \ + &nccl_dso_handle); \ + void* p_##__name = dlsym(nccl_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #else #define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ From edb22c2f0c10bd8e70e3e917a6e2c10a2ab044b3 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 10 Nov 2017 10:54:21 -0800 Subject: [PATCH 09/10] Add Scope::Rename (#5534) it is useful in gradient phase of an operator with block --- paddle/framework/scope.cc | 18 ++++++++++++++++++ paddle/framework/scope.h | 9 ++++++++- paddle/operators/recurrent_op.cc | 24 ++++++++---------------- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index fb2c691056..9428b8a07e 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -98,5 +98,23 @@ void Scope::DeleteScope(Scope* scope) { delete scope; } +void Scope::Rename(const std::string& origin_name, + const std::string& new_name) const { + auto origin_it = vars_.find(origin_name); + PADDLE_ENFORCE(origin_it != vars_.end(), + "Cannot find original variable with name %s", origin_name); + auto new_it = vars_.find(new_name); + PADDLE_ENFORCE(new_it == vars_.end(), + "The variable with name %s is already in the scope", new_name); + vars_[new_name] = origin_it->second; + vars_.erase(origin_it); +} + +std::string Scope::Rename(const std::string& origin_name) const { + auto var_name = string::Sprintf("%p.%d", this, vars_.size()); + Rename(origin_name, var_name); + return var_name; +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index fb66094939..c2aafb6ad8 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -68,11 +68,18 @@ class Scope { // enumerate all the variables current contains. std::vector GetAllNames(bool recursive = false) const; + // Rename variable to a new name + void Rename(const std::string& origin_name, + const std::string& new_name) const; + + // Rename variable to a new name and return the new name + std::string Rename(const std::string& origin_name) const; + private: // Call Scope::NewScope for a sub-scope. explicit Scope(Scope const* parent) : parent_(parent) {} - std::unordered_map vars_; + mutable std::unordered_map vars_; mutable std::list kids_; Scope const* parent_{nullptr}; diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index b0e87b7059..0075ccd242 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -387,8 +387,8 @@ class RecurrentGradOp : public RecurrentBase { auto &p_names = Inputs(kParameters); PADDLE_ENFORCE_EQ(pg_names.size(), p_names.size()); - for (size_t prog_id = 0; prog_id < pg_names.size(); ++prog_id) { - auto inside_grad_name = framework::GradVarName(p_names[prog_id]); + for (size_t param_id = 0; param_id < pg_names.size(); ++param_id) { + auto inside_grad_name = framework::GradVarName(p_names[param_id]); // If does not compute gradient of that variable inside rnn, just // continue @@ -406,27 +406,19 @@ class RecurrentGradOp : public RecurrentBase { attrs["value"] = 0.0f; auto zero_op = framework::OpRegistry::CreateOp( - "fill_constant", {}, {{"Out", {pg_names[prog_id]}}}, attrs); + "fill_constant", {}, {{"Out", {pg_names[param_id]}}}, attrs); zero_op->Run(scope, dev_ctx); } + auto new_inside_name = cur_scope.Rename(inside_grad_name); // sum gradient - auto *outside_var = scope.FindVar(pg_names[prog_id]); - PADDLE_ENFORCE(outside_var != nullptr); - auto &outside_tensor = - *outside_var->GetMutable(); - - std::string result_var_name; - auto *local_result_var = cur_scope.Var(&result_var_name); - auto &local_result_tensor = - *local_result_var->GetMutable(); - - local_result_tensor.ShareDataWith(outside_tensor); auto sum_op = framework::OpRegistry::CreateOp( - "sum", {{"X", {result_var_name, inside_grad_name}}}, - {{"Out", {result_var_name}}}, {}); + "sum", {{"X", {pg_names[param_id], new_inside_name}}}, + {{"Out", {pg_names[param_id]}}}, {}); sum_op->Run(cur_scope, dev_ctx); + + cur_scope.Rename(new_inside_name, inside_grad_name); } } VLOG(5) << "Accumulate Parameter finished "; From 58b4c9af34da909c81ff8ce1c6f6f1e114c97537 Mon Sep 17 00:00:00 2001 From: emailweixu Date: Fri, 10 Nov 2017 11:45:11 -0800 Subject: [PATCH 10/10] Fixing duplicate struct name TensorSetConstant. (#5532) TensorSetConstant struct is used both in math_function.cc and math_function.cu. Somehow the release version can correctly handle it. But in debug version, set_constant_with_place() in math_function.cu uses the TensorSetConstant in math_function.cc and causes crash. --- paddle/operators/math/math_function.cc | 6 +++--- paddle/operators/math/math_function.cu | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/paddle/operators/math/math_function.cc b/paddle/operators/math/math_function.cc index 09c3f0b1e6..1b0d4c8bdc 100644 --- a/paddle/operators/math/math_function.cc +++ b/paddle/operators/math/math_function.cc @@ -234,8 +234,8 @@ void gemv(const platform::DeviceContext& context, template struct SetConstant; -struct TensorSetConstant { - TensorSetConstant(framework::Tensor* tensor, float value) +struct TensorSetConstantCPU { + TensorSetConstantCPU(framework::Tensor* tensor, float value) : tensor_(tensor), value_(value) {} template void operator()() const { @@ -252,7 +252,7 @@ void set_constant_with_place( const platform::DeviceContext& context, framework::Tensor* tensor, float value) { framework::VisitDataType(framework::ToDataType(tensor->type()), - TensorSetConstant(tensor, value)); + TensorSetConstantCPU(tensor, value)); } struct TensorSetConstantWithPlace : public boost::static_visitor { diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu index 255e480680..817deec943 100644 --- a/paddle/operators/math/math_function.cu +++ b/paddle/operators/math/math_function.cu @@ -233,8 +233,8 @@ void gemv(const platform::DeviceContext& context, template struct SetConstant; -struct TensorSetConstant { - TensorSetConstant(const platform::DeviceContext& context, +struct TensorSetConstantGPU { + TensorSetConstantGPU(const platform::DeviceContext& context, framework::Tensor* tensor, float value) : context_(context), tensor_(tensor), value_(value) {} @@ -254,7 +254,7 @@ void set_constant_with_place( const platform::DeviceContext& context, framework::Tensor* tensor, float value) { framework::VisitDataType(framework::ToDataType(tensor->type()), - TensorSetConstant(context, tensor, value)); + TensorSetConstantGPU(context, tensor, value)); } } // namespace math