From 0f4c7332969bdb057f855cd4a37174f3c06de281 Mon Sep 17 00:00:00 2001 From: guosheng Date: Thu, 20 Jul 2017 12:03:23 +0800 Subject: [PATCH 01/37] add ROIPooling for Fast(er) R-CNN --- paddle/gserver/layers/ROIPoolLayer.cpp | 154 ++++++++++++++++++ paddle/gserver/layers/ROIPoolLayer.h | 53 ++++++ paddle/gserver/tests/test_LayerGrad.cpp | 34 ++++ proto/ModelConfig.proto | 9 + python/paddle/trainer/config_parser.py | 11 ++ .../paddle/trainer_config_helpers/layers.py | 37 +++++ 6 files changed, 298 insertions(+) create mode 100644 paddle/gserver/layers/ROIPoolLayer.cpp create mode 100644 paddle/gserver/layers/ROIPoolLayer.h diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp new file mode 100644 index 0000000000..04763fd152 --- /dev/null +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -0,0 +1,154 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "ROIPoolLayer.h" + +namespace paddle { + +REGISTER_LAYER(roi_pool, ROIPoolLayer); + +bool ROIPoolLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + const ROIPoolConfig& layerConf = config_.inputs(0).roi_pool_conf(); + pooledWidth_ = layerConf.pooled_width(); + pooledHeight_ = layerConf.pooled_height(); + spatialScale_ = layerConf.spatial_scale(); + + return true; +} + +void ROIPoolLayer::forward(PassType passType) { + Layer::forward(passType); + + const ROIPoolConfig& layerConf = config_.inputs(0).roi_pool_conf(); + height_ = getInput(0).getFrameHeight(); + if (!height_) height_ = layerConf.height(); + width_ = getInput(0).getFrameWidth(); + if (!width_) width_ = layerConf.width(); + channels_ = getInputValue(0)->getWidth() / width_ / height_; + + size_t batchSize = getInput(0).getBatchSize(); + size_t numROIs = getInput(1).getBatchSize(); + + real* bottomData = getInputValue(0)->getData(); + size_t batchOffset = getInputValue(0)->getWidth(); + size_t channelOffset = height_ * width_; + real* bottomROIs = getInputValue(1)->getData(); + size_t roiOffset = getInputValue(1)->getWidth(); + size_t poolChannelOffset = pooledHeight_ * pooledWidth_; + + resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); + real* outputData = getOutputValue()->getData(); + Matrix::resizeOrCreate(maxIdxs_, + numROIs, + channels_ * pooledHeight_ * pooledWidth_, + false, + false); + real* argmaxData = maxIdxs_->getData(); + + size_t uZero = 0; + size_t uOne = 1; + + for (size_t n = 0; n < numROIs; ++n) { + size_t roiBatchIdx = bottomROIs[0]; + size_t roiStartW = std::round(bottomROIs[1] * spatialScale_); + size_t roiStartH = std::round(bottomROIs[2] * spatialScale_); + size_t roiEndW = std::round(bottomROIs[3] * spatialScale_); + size_t roiEndH = std::round(bottomROIs[4] * spatialScale_); + CHECK_GE(roiBatchIdx, 0); + CHECK_LT(roiBatchIdx, batchSize); + size_t roiHeight = std::max(roiEndH - roiStartH + 1, uOne); + size_t roiWidth = std::max(roiEndW - roiStartW + 1, uOne); + real binSizeH = + static_cast(roiHeight) / static_cast(pooledHeight_); + real binSizeW = + static_cast(roiWidth) / static_cast(pooledWidth_); + real* batchData = bottomData + batchOffset * roiBatchIdx; + for (size_t c = 0; c < channels_; ++c) { + for (size_t ph = 0; ph < pooledHeight_; ++ph) { + for (size_t pw = 0; pw < pooledWidth_; ++pw) { + size_t hstart = static_cast(std::floor(ph * binSizeH)); + size_t wstart = static_cast(std::floor(pw * binSizeW)); + size_t hend = static_cast(std::ceil((ph + 1) * binSizeH)); + size_t wend = static_cast(std::ceil((pw + 1) * binSizeW)); + hstart = std::min(std::max(hstart + roiStartH, uZero), height_); + wstart = std::min(std::max(wstart + roiStartW, uZero), width_); + hend = std::min(std::max(hend + roiStartH, uZero), height_); + wend = std::min(std::max(wend + roiStartW, uZero), width_); + + bool isEmpty = (hend <= hstart) || (wend <= wstart); + size_t poolIndex = ph * pooledWidth_ + pw; + if (isEmpty) { + outputData[poolIndex] = 0; + argmaxData[poolIndex] = -1; + } + + for (size_t h = hstart; h < hend; ++h) { + for (size_t w = wstart; w < wend; ++w) { + size_t index = h * width_ + w; + if (batchData[index] > outputData[poolIndex]) { + outputData[poolIndex] = batchData[index]; + argmaxData[poolIndex] = index; + } + } + } + } + } + batchData += channelOffset; + outputData += poolChannelOffset; + argmaxData += poolChannelOffset; + } + bottomROIs += roiOffset; + } +} + +void ROIPoolLayer::backward(const UpdateCallback& callback) { + real* bottomROIs = getInputValue(1)->getData(); + size_t numROIs = getInput(1).getBatchSize(); + size_t roiOffset = getInputValue(1)->getWidth(); + + MatrixPtr inGrad = getInputGrad(0); + real* inDiffData = inGrad->getData(); + size_t batchOffset = getInputValue(0)->getWidth(); + size_t channelOffset = height_ * width_; + + MatrixPtr outGrad = getOutputGrad(); + real* outDiffData = outGrad->getData(); + size_t poolChannelOffset = pooledHeight_ * pooledWidth_; + real* argmaxData = maxIdxs_->getData(); + + for (size_t n = 0; n < numROIs; ++n) { + size_t roiBatchIdx = bottomROIs[0]; + real* batchDiffData = inDiffData + batchOffset * roiBatchIdx; + for (size_t c = 0; c < channels_; ++c) { + for (size_t ph = 0; ph < pooledHeight_; ++ph) { + for (size_t pw = 0; pw < pooledWidth_; ++pw) { + size_t poolIndex = ph * pooledWidth_ + pw; + if (argmaxData[poolIndex] > 0) { + size_t index = static_cast(argmaxData[poolIndex]); + batchDiffData[index] += outDiffData[poolIndex]; + } + } + } + batchDiffData += channelOffset; + outDiffData += poolChannelOffset; + argmaxData += poolChannelOffset; + } + bottomROIs += roiOffset; + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/ROIPoolLayer.h b/paddle/gserver/layers/ROIPoolLayer.h new file mode 100644 index 0000000000..ca412d2845 --- /dev/null +++ b/paddle/gserver/layers/ROIPoolLayer.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" + +namespace paddle { + +/** + * A layer used by Fast R-CNN to extract feature maps of ROIs from the last + * feature map. + * - Input: This layer needs two input layers: The first input layer is a + * convolution layer; The second input layer contains the ROI data which is the + * output of ProposalLayer in Faster R-CNN. layers for generating bbox + * location offset and the classification confidence. - Output: The + * ROIs' feature map. Reference: Shaoqing Ren, Kaiming He, Ross Girshick, and + * Jian Sun. Faster R-CNN: Towards Real-Time Object Detection with Region + * Proposal + */ + +class ROIPoolLayer : public Layer { +protected: + size_t channels_; + size_t width_; + size_t height_; + size_t pooledWidth_; + size_t pooledHeight_; + real spatialScale_; + + MatrixPtr maxIdxs_; + +public: + explicit ROIPoolLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 9af083468c..77feb6d4c9 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1830,6 +1830,40 @@ TEST(Layer, CropLayer) { } } +TEST(Layer, roi_pool) { + TestConfig config; + config.layerConfig.set_type("roi_pool"); + config.biasSize = 0; + LayerInputConfig* input = config.layerConfig.add_inputs(); + ROIPoolConfig* roiPoolConf = input->mutable_roi_pool_conf(); + roiPoolConf->set_pooled_width(7); + roiPoolConf->set_pooled_height(7); + roiPoolConf->set_spatial_scale(1. / 16); + roiPoolConf->set_width(14); + roiPoolConf->set_height(14); + + MatrixPtr roiValue = Matrix::create(10, 10, false, false); + roiValue->zeroMem(); + real* roiData = roiValue->getData(); + for (size_t i = 0; i < roiValue->getElementCnt() / 5; ++i) { + *roiData++ = std::rand() % 2; + *roiData++ = std::rand() % 224; + *roiData++ = std::rand() % 224; + size_t xMin = static_cast(*(roiData - 2)); + size_t yMin = static_cast(*(roiData - 1)); + *roiData++ = xMin + std::rand() % (224 - xMin); + *roiData++ = yMin + std::rand() % (224 - yMin); + } + + config.inputDefs.push_back({INPUT_DATA, "input", 3 * 14 * 14, {}}); + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "rois", roiValue, {}}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "roi_pool", 5, false, useGpu, false); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 83f72c137b..275723272b 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -289,6 +289,14 @@ message DetectionOutputConfig { optional uint32 width = 9 [default = 1]; } +message ROIPoolConfig { + required uint32 pooled_width = 1; + required uint32 pooled_height = 2; + required float spatial_scale = 3; + optional uint32 height = 4 [default = 1]; + optional uint32 width = 5 [default = 1]; +} + message LayerInputConfig { required string input_layer_name = 1; optional string input_parameter_name = 2; @@ -309,6 +317,7 @@ message LayerInputConfig { optional RowConvConfig row_conv_conf = 15; optional MultiBoxLossConfig multibox_loss_conf = 16; optional DetectionOutputConfig detection_output_conf = 17; + optional ROIPoolConfig roi_pool_conf = 18; } message LayerConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index ab81e67579..bfb9dd7f1d 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1732,6 +1732,17 @@ class DetectionOutputLayer(LayerBase): self.config.size = size +@config_layer('roi_pool') +class ROIPoolLayer(LayerBase): + def __init__(self, name, inputs, pooled_width, pooled_height, + spatial_scale): + super(ROIPoolLayer, self).__init__(name, 'roi_pool', 0, inputs) + config_assert(len(inputs) == 2, 'ROIPoolLayer must have 2 inputs') + self.config.inputs[0].roi_pool_conf.pooled_width = pooled_width + self.config.inputs[0].roi_pool_conf.pooled_height = pooled_height + self.config.inputs[0].roi_pool_conf.spatial_scale = spatial_scale + + @config_layer('data') class DataLayer(LayerBase): def __init__(self, name, size, height=None, width=None, device=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index fdb6f83f2b..c1bdeb6808 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -117,6 +117,7 @@ __all__ = [ 'cross_channel_norm_layer', 'multibox_loss_layer', 'detection_output_layer', + 'roi_pool_layer', 'spp_layer', 'pad_layer', 'eos_layer', @@ -201,6 +202,7 @@ class LayerType(object): PRIORBOX_LAYER = 'priorbox' MULTIBOX_LOSS_LAYER = 'multibox_loss' DETECTION_OUTPUT_LAYER = 'detection_output' + ROI_POOL_LAYER = 'roi_pool' CTC_LAYER = 'ctc' WARP_CTC_LAYER = 'warp_ctc' @@ -1200,6 +1202,41 @@ def detection_output_layer(input_loc, name, LayerType.DETECTION_OUTPUT_LAYER, parents=parents, size=size) +@wrap_name_default("roi_pool") +def roi_pool_layer(input, + rois, + pooled_width, + pooled_height, + spatial_scale, + name=None): + """ + A layer used by Fast R-CNN to extract feature maps of ROIs from the last + feature map. + + :param name: The Layer Name. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput. + :param rois: The input ROIs' data. + :type rois: LayerOutput. + :param pooled_width: The width after pooling. + :type pooled_width: int + :param pooled_height: The height after pooling. + :type pooled_height: int + :param spatial_scale: The spatial scale between the image and feature map. + :type spatial_scale: float + :return: LayerOutput + """ + Layer( + name=name, + type=LayerType.ROI_POOL_LAYER, + inputs=[input.name, rois.name], + pooled_width=pooled_width, + pooled_height=pooled_height, + spatial_scale=spatial_scale) + return LayerOutput(name, LayerType.ROI_POOL_LAYER, parents=[input, rois]) + + @wrap_name_default("cross_channel_norm") def cross_channel_norm_layer(input, name=None, param_attr=None): """ From d5384e640f1f972e9685e51cf018d0ff478c4362 Mon Sep 17 00:00:00 2001 From: guosheng Date: Thu, 20 Jul 2017 13:12:10 +0800 Subject: [PATCH 02/37] refine layer gradient test of ROIPoolLayer --- paddle/gserver/tests/test_LayerGrad.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 77feb6d4c9..b6282b472f 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1842,17 +1842,20 @@ TEST(Layer, roi_pool) { roiPoolConf->set_width(14); roiPoolConf->set_height(14); - MatrixPtr roiValue = Matrix::create(10, 10, false, false); + const size_t roiNum = 10; + const size_t roiDim = 10; + const size_t batchSize = 5; + MatrixPtr roiValue = Matrix::create(roiNum, roiDim, false, false); roiValue->zeroMem(); real* roiData = roiValue->getData(); - for (size_t i = 0; i < roiValue->getElementCnt() / 5; ++i) { - *roiData++ = std::rand() % 2; - *roiData++ = std::rand() % 224; - *roiData++ = std::rand() % 224; - size_t xMin = static_cast(*(roiData - 2)); - size_t yMin = static_cast(*(roiData - 1)); - *roiData++ = xMin + std::rand() % (224 - xMin); - *roiData++ = yMin + std::rand() % (224 - yMin); + for (size_t i = 0; i < roiNum; ++i) { + roiData[i * roiDim + 0] = std::rand() % batchSize; + roiData[i * roiDim + 1] = std::rand() % 224; // xMin + roiData[i * roiDim + 2] = std::rand() % 224; // yMin + size_t xMin = static_cast(roiData[i * roiDim + 1]); + size_t yMin = static_cast(roiData[i * roiDim + 2]); + roiData[i * roiDim + 3] = xMin + std::rand() % (224 - xMin); // xMax + roiData[i * roiDim + 4] = yMin + std::rand() % (224 - yMin); // yMax } config.inputDefs.push_back({INPUT_DATA, "input", 3 * 14 * 14, {}}); @@ -1860,7 +1863,7 @@ TEST(Layer, roi_pool) { config.layerConfig.add_inputs(); for (auto useGpu : {false, true}) { - testLayerGrad(config, "roi_pool", 5, false, useGpu, false); + testLayerGrad(config, "roi_pool", batchSize, false, useGpu, false); } } From 1c00767731e2cf6d16abfd7b3c5002015fe5fd27 Mon Sep 17 00:00:00 2001 From: guosheng Date: Thu, 20 Jul 2017 15:21:45 +0800 Subject: [PATCH 03/37] fix ci bug on andriod building --- paddle/gserver/layers/ROIPoolLayer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 04763fd152..34ba9030f7 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -64,10 +64,10 @@ void ROIPoolLayer::forward(PassType passType) { for (size_t n = 0; n < numROIs; ++n) { size_t roiBatchIdx = bottomROIs[0]; - size_t roiStartW = std::round(bottomROIs[1] * spatialScale_); - size_t roiStartH = std::round(bottomROIs[2] * spatialScale_); - size_t roiEndW = std::round(bottomROIs[3] * spatialScale_); - size_t roiEndH = std::round(bottomROIs[4] * spatialScale_); + size_t roiStartW = round(bottomROIs[1] * spatialScale_); + size_t roiStartH = round(bottomROIs[2] * spatialScale_); + size_t roiEndW = round(bottomROIs[3] * spatialScale_); + size_t roiEndH = round(bottomROIs[4] * spatialScale_); CHECK_GE(roiBatchIdx, 0); CHECK_LT(roiBatchIdx, batchSize); size_t roiHeight = std::max(roiEndH - roiStartH + 1, uOne); From 687b3749b4a4217c7f5d8b7e85c7b0c922cc4f6c Mon Sep 17 00:00:00 2001 From: guosheng Date: Sat, 22 Jul 2017 13:57:21 +0800 Subject: [PATCH 04/37] fix bug on GPU test --- paddle/gserver/layers/ROIPoolLayer.cpp | 89 ++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 11 deletions(-) diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 34ba9030f7..3d26286376 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -43,15 +43,46 @@ void ROIPoolLayer::forward(PassType passType) { size_t batchSize = getInput(0).getBatchSize(); size_t numROIs = getInput(1).getBatchSize(); - real* bottomData = getInputValue(0)->getData(); - size_t batchOffset = getInputValue(0)->getWidth(); + MatrixPtr dataValue = getInputValue(0); + MatrixPtr roiValue = getInputValue(1); + resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); + MatrixPtr outputValue = getOutputValue(); + + if (useGpu_) { + MatrixPtr dataCpuBuffer; + Matrix::resizeOrCreate(dataCpuBuffer, + dataValue->getHeight(), + dataValue->getWidth(), + false, + false); + MatrixPtr roiCpuBuffer; + Matrix::resizeOrCreate(roiCpuBuffer, + roiValue->getHeight(), + roiValue->getWidth(), + false, + false); + dataCpuBuffer->copyFrom(*dataValue); + roiCpuBuffer->copyFrom(*roiValue); + dataValue = dataCpuBuffer; + roiValue = roiCpuBuffer; + MatrixPtr outputCpuBuffer; + Matrix::resizeOrCreate(outputCpuBuffer, + outputValue->getHeight(), + outputValue->getWidth(), + false, + false); + outputCpuBuffer->copyFrom(*outputValue); + outputValue = outputCpuBuffer; + } + + real* bottomData = dataValue->getData(); + size_t batchOffset = dataValue->getWidth(); size_t channelOffset = height_ * width_; - real* bottomROIs = getInputValue(1)->getData(); - size_t roiOffset = getInputValue(1)->getWidth(); + real* bottomROIs = roiValue->getData(); + size_t roiOffset = roiValue->getWidth(); size_t poolChannelOffset = pooledHeight_ * pooledWidth_; - resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); - real* outputData = getOutputValue()->getData(); + real* outputData = outputValue->getData(); Matrix::resizeOrCreate(maxIdxs_, numROIs, channels_ * pooledHeight_ * pooledWidth_, @@ -113,20 +144,52 @@ void ROIPoolLayer::forward(PassType passType) { } bottomROIs += roiOffset; } + if (useGpu_) { + getOutputValue()->copyFrom(*outputValue); + } } void ROIPoolLayer::backward(const UpdateCallback& callback) { - real* bottomROIs = getInputValue(1)->getData(); + MatrixPtr inGradValue = getInputGrad(0); + MatrixPtr outGradValue = getOutputGrad(); + MatrixPtr roiValue = getInputValue(1); + + if (useGpu_) { + MatrixPtr inGradCpuBuffer; + Matrix::resizeOrCreate(inGradCpuBuffer, + inGradValue->getHeight(), + inGradValue->getWidth(), + false, + false); + MatrixPtr outGradCpuBuffer; + Matrix::resizeOrCreate(outGradCpuBuffer, + outGradValue->getHeight(), + outGradValue->getWidth(), + false, + false); + MatrixPtr roiCpuBuffer; + Matrix::resizeOrCreate(roiCpuBuffer, + roiValue->getHeight(), + roiValue->getWidth(), + false, + false); + inGradCpuBuffer->copyFrom(*inGradValue); + outGradCpuBuffer->copyFrom(*outGradValue); + roiCpuBuffer->copyFrom(*roiValue); + inGradValue = inGradCpuBuffer; + outGradValue = outGradCpuBuffer; + roiValue = roiCpuBuffer; + } + + real* bottomROIs = roiValue->getData(); size_t numROIs = getInput(1).getBatchSize(); size_t roiOffset = getInputValue(1)->getWidth(); - MatrixPtr inGrad = getInputGrad(0); - real* inDiffData = inGrad->getData(); + real* inDiffData = inGradValue->getData(); size_t batchOffset = getInputValue(0)->getWidth(); size_t channelOffset = height_ * width_; - MatrixPtr outGrad = getOutputGrad(); - real* outDiffData = outGrad->getData(); + real* outDiffData = outGradValue->getData(); size_t poolChannelOffset = pooledHeight_ * pooledWidth_; real* argmaxData = maxIdxs_->getData(); @@ -149,6 +212,10 @@ void ROIPoolLayer::backward(const UpdateCallback& callback) { } bottomROIs += roiOffset; } + + if (useGpu_) { + getInputGrad(0)->copyFrom(*inGradValue); + } } } // namespace paddle From 3cf01b5d52616e1605d3d089ceb798bb16ab8f80 Mon Sep 17 00:00:00 2001 From: guosheng Date: Wed, 16 Aug 2017 17:19:02 +0800 Subject: [PATCH 05/37] refine ROIPoolLayer --- doc/api/v2/config/layer.rst | 5 +++ paddle/gserver/layers/ROIPoolLayer.cpp | 17 +++---- paddle/gserver/layers/ROIPoolLayer.h | 1 + .../paddle/trainer_config_helpers/layers.py | 10 ++++- .../tests/configs/file_list.sh | 2 +- .../protostr/test_roi_pool_layer.protostr | 45 +++++++++++++++++++ .../tests/configs/test_roi_pool_layer.py | 14 ++++++ 7 files changed, 82 insertions(+), 12 deletions(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index cb330ea5e1..3b2ee37628 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -82,6 +82,11 @@ maxout .. autoclass:: paddle.v2.layer.maxout :noindex: +roi_pool +-------- +.. autoclass:: paddle.v2.layer.roi_pool + :noindex: + Norm Layer ========== diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 3d26286376..131fd7e52b 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -48,7 +48,7 @@ void ROIPoolLayer::forward(PassType passType) { resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); MatrixPtr outputValue = getOutputValue(); - if (useGpu_) { + if (useGpu_) { // TODO(guosheng): implement on GPU later MatrixPtr dataCpuBuffer; Matrix::resizeOrCreate(dataCpuBuffer, dataValue->getHeight(), @@ -90,9 +90,6 @@ void ROIPoolLayer::forward(PassType passType) { false); real* argmaxData = maxIdxs_->getData(); - size_t uZero = 0; - size_t uOne = 1; - for (size_t n = 0; n < numROIs; ++n) { size_t roiBatchIdx = bottomROIs[0]; size_t roiStartW = round(bottomROIs[1] * spatialScale_); @@ -101,8 +98,8 @@ void ROIPoolLayer::forward(PassType passType) { size_t roiEndH = round(bottomROIs[4] * spatialScale_); CHECK_GE(roiBatchIdx, 0); CHECK_LT(roiBatchIdx, batchSize); - size_t roiHeight = std::max(roiEndH - roiStartH + 1, uOne); - size_t roiWidth = std::max(roiEndW - roiStartW + 1, uOne); + size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL); + size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL); real binSizeH = static_cast(roiHeight) / static_cast(pooledHeight_); real binSizeW = @@ -115,10 +112,10 @@ void ROIPoolLayer::forward(PassType passType) { size_t wstart = static_cast(std::floor(pw * binSizeW)); size_t hend = static_cast(std::ceil((ph + 1) * binSizeH)); size_t wend = static_cast(std::ceil((pw + 1) * binSizeW)); - hstart = std::min(std::max(hstart + roiStartH, uZero), height_); - wstart = std::min(std::max(wstart + roiStartW, uZero), width_); - hend = std::min(std::max(hend + roiStartH, uZero), height_); - wend = std::min(std::max(wend + roiStartW, uZero), width_); + hstart = std::min(std::max(hstart + roiStartH, 0UL), height_); + wstart = std::min(std::max(wstart + roiStartW, 0UL), width_); + hend = std::min(std::max(hend + roiStartH, 0UL), height_); + wend = std::min(std::max(wend + roiStartW, 0UL), width_); bool isEmpty = (hend <= hstart) || (wend <= wstart); size_t poolIndex = ph * pooledWidth_ + pw; diff --git a/paddle/gserver/layers/ROIPoolLayer.h b/paddle/gserver/layers/ROIPoolLayer.h index d04362f0d4..796467a5c8 100644 --- a/paddle/gserver/layers/ROIPoolLayer.h +++ b/paddle/gserver/layers/ROIPoolLayer.h @@ -29,6 +29,7 @@ namespace paddle { * Reference: * Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. * Faster R-CNN: Towards Real-Time Object Detection with Region Proposal + * Networks */ class ROIPoolLayer : public Layer { diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 590097b96b..6703db5f0b 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1257,6 +1257,7 @@ def roi_pool_layer(input, pooled_width, pooled_height, spatial_scale, + num_channels=None, name=None): """ A layer used by Fast R-CNN to extract feature maps of ROIs from the last @@ -1274,8 +1275,14 @@ def roi_pool_layer(input, :type pooled_height: int :param spatial_scale: The spatial scale between the image and feature map. :type spatial_scale: float + :param num_channels: number of input channel. + :type num_channels: int :return: LayerOutput """ + if num_channels is None: + assert input.num_filters is not None + num_channels = input.num_filters + size = num_channels * pooled_width * pooled_height Layer( name=name, type=LayerType.ROI_POOL_LAYER, @@ -1283,7 +1290,8 @@ def roi_pool_layer(input, pooled_width=pooled_width, pooled_height=pooled_height, spatial_scale=spatial_scale) - return LayerOutput(name, LayerType.ROI_POOL_LAYER, parents=[input, rois]) + return LayerOutput( + name, LayerType.ROI_POOL_LAYER, parents=[input, rois], size=size) @wrap_name_default("cross_channel_norm") diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index a61beb871a..58e36eb333 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer -test_kmax_seq_socre_layer test_seq_select_layers) +test_kmax_seq_socre_layer test_seq_select_layers test_roi_pool_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr new file mode 100644 index 0000000000..e8c379b17b --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr @@ -0,0 +1,45 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 588 + active_type: "" + height: 14 + width: 14 +} +layers { + name: "rois" + type: "data" + size: 10 + active_type: "" +} +layers { + name: "__roi_pool_0__" + type: "roi_pool" + active_type: "" + inputs { + input_layer_name: "data" + roi_pool_conf { + pooled_width: 7 + pooled_height: 7 + spatial_scale: 0.0625 + } + } + inputs { + input_layer_name: "rois" + } +} +input_layer_names: "data" +input_layer_names: "rois" +output_layer_names: "__roi_pool_0__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "rois" + layer_names: "__roi_pool_0__" + input_layer_names: "data" + input_layer_names: "rois" + output_layer_names: "__roi_pool_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py new file mode 100644 index 0000000000..0d6ca9f1bb --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py @@ -0,0 +1,14 @@ +from paddle.trainer_config_helpers import * + +data = data_layer(name='data', size=3 * 14 * 14, height=14, width=14) + +rois = data_layer(name='rois', size=10) + +roi_pool = roi_pool_layer( + input=data, + rois=rois, + pooled_width=7, + pooled_height=7, + spatial_scale=1. / 16) + +outputs(roi_pool) From ad5e7cc0319c01e64600b0383e83fac89d3e91f7 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 13 Sep 2017 15:57:07 +0800 Subject: [PATCH 06/37] Implemented by boost preprocessor. --- paddle/operators/expand_op.cc | 103 ++++++++++++ paddle/operators/expand_op.cu | 23 +++ paddle/operators/expand_op.h | 152 ++++++++++++++++++ paddle/pybind/pybind.cc | 1 + .../paddle/v2/framework/tests/CMakeLists.txt | 1 + .../v2/framework/tests/test_expand_op.py | 67 ++++++++ 6 files changed, 347 insertions(+) create mode 100644 paddle/operators/expand_op.cc create mode 100644 paddle/operators/expand_op.cu create mode 100644 paddle/operators/expand_op.h create mode 100644 python/paddle/v2/framework/tests/test_expand_op.py diff --git a/paddle/operators/expand_op.cc b/paddle/operators/expand_op.cc new file mode 100644 index 0000000000..9d1d76a290 --- /dev/null +++ b/paddle/operators/expand_op.cc @@ -0,0 +1,103 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/expand_op.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class ExpandOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "X must be initialized."); + std::vector expand_times = Attr>("expandTimes"); + auto* x = ctx.Input("X"); + auto x_dims = x->dims(); + + PADDLE_ENFORCE_EQ(static_cast(framework::arity(x_dims)), + expand_times.size(), + "Number of attribute (expandTimes) value must be equal " + "to rank of X."); + PADDLE_ENFORCE_LE(framework::arity(x_dims), 6, + "Rank of X must not be greater than 6."); + + std::vector out_shape(x_dims.size()); + for (size_t i = 0; i < expand_times.size(); ++i) { + PADDLE_ENFORCE_GE(expand_times[i], 1, + "Each value of expand times should not be " + "less than 1."); + out_shape[i] = x_dims[i] * expand_times[i]; + } + auto* out = ctx.Output("Out"); + out->Resize(framework::make_ddim(out_shape)); + } +}; + +class ExpandOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ExpandOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input tensor."); + AddOutput("Out", "Expanded result by tiling input X."); + AddAttr>("expandTimes", + "Expand times for each dimension."); + AddComment(R"DOC( +Expand operator tiles the input by given times. You should set times for each +dimension by providing attribute 'expandTimes'. Rank of input tensor should be +in [1, 6]. Please draw an inttention that size of 'expandTimes' must be same +with rank of input tensor. +)DOC"); + } +}; + +class ExpandGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "X must be initialized."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), + "Input(Out@GRAD) should not be null."); + auto x_dims = ctx.Input("X")->dims(); + std::vector expand_times = Attr>("expandTimes"); + auto out_dims = ctx.Input(framework::GradVarName("Out"))->dims(); + auto* x_grad = ctx.Output(framework::GradVarName("X")); + + for (size_t i = 0; i < expand_times.size(); ++i) { + PADDLE_ENFORCE_EQ(x_dims[i] * expand_times[i], out_dims[i], + "Size of each dimension of Input(Out@GRAD) should be " + "equal to multiplication of crroresponding sizes of " + "Input(X) and expandTimes."); + } + + if (x_grad) x_grad->Resize(x_dims); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(expand, ops::ExpandOp, ops::ExpandOpMaker, expand_grad, + ops::ExpandGradOp); +REGISTER_OP_CPU_KERNEL(expand, + ops::ExpandKernel); +REGISTER_OP_CPU_KERNEL( + expand_grad, ops::ExpandGradKernel); diff --git a/paddle/operators/expand_op.cu b/paddle/operators/expand_op.cu new file mode 100644 index 0000000000..6744562b6c --- /dev/null +++ b/paddle/operators/expand_op.cu @@ -0,0 +1,23 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU + +#include "paddle/operators/expand_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(expand, + ops::ExpandKernel); +REGISTER_OP_GPU_KERNEL( + expand_grad, ops::ExpandGradKernel); diff --git a/paddle/operators/expand_op.h b/paddle/operators/expand_op.h new file mode 100644 index 0000000000..5285d7525b --- /dev/null +++ b/paddle/operators/expand_op.h @@ -0,0 +1,152 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +#define EXPAND_TEMPLATE(z, n, data) \ + case n + 1: { \ + Expand(context); \ + break; \ + } +#define REP_EXPAND_TEMPLATE(n) BOOST_PP_REPEAT(n, EXPAND_TEMPLATE, ~) + +#define COND(n) BOOST_PP_GREATER_EQUAL(BOOST_PP_DIV(n, 6), BOOST_PP_MOD(n, 6)) +#define EXPAND_GRAD_CASE(n) \ + case n: { \ + ExpandBackward(context, reshape_dims_vec, reduce_dims_vec); \ + break; \ + } +#define EXPAND_TEMPLATE_GRAD(z, n, data) \ + BOOST_PP_IF(COND(n), EXPAND_GRAD_CASE(n), ) +#define REP_EXPAND_GRAD_TEMPLATE(n) BOOST_PP_REPEAT(n, EXPAND_TEMPLATE_GRAD, ~) + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; +template +using EigenTensor = framework::EigenTensor; + +template +class ExpandKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto rank = framework::arity(context.Input("X")->dims()); + switch (rank) { + REP_EXPAND_TEMPLATE(6) + default: + PADDLE_ENFORCE(false, "Only support tensor whose rank in [1, 6]."); + }; + } + + protected: + template + void Expand(const framework::ExecutionContext& context) const { + auto* in0 = context.Input("X"); + auto expand_times = context.Attr>("expandTimes"); + auto* out0 = context.Output("Out"); + Eigen::DSizes bcast_dims; + auto x_dims = in0->dims(); + for (size_t i = 0; i < expand_times.size(); ++i) { + bcast_dims[i] = expand_times[i]; + } + auto x = EigenTensor::From(*in0); + out0->mutable_data(context.GetPlace()); + auto y = EigenTensor::From(*out0); + auto place = context.GetEigenDevice(); + y.device(place) = x.broadcast(bcast_dims); + } +}; + +template +class ExpandGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in0 = context.Input("X"); + auto expand_times = context.Attr>("expandTimes"); + auto x_dims = in0->dims(); + std::vector reshape_dims_vec; + std::vector reduce_dims_vec; + for (size_t i = 0; i < expand_times.size(); ++i) { + if (expand_times[i] == 1) { + reshape_dims_vec.push_back(x_dims[i]); + } else { + if (x_dims[i] == 1) { + reduce_dims_vec.push_back(reshape_dims_vec.size()); + reshape_dims_vec.push_back(expand_times[i]); + } else { + reduce_dims_vec.push_back(reshape_dims_vec.size()); + reshape_dims_vec.push_back(expand_times[i]); + reshape_dims_vec.push_back(x_dims[i]); + } + } + } + + int dims = reshape_dims_vec.size() * 6 + reduce_dims_vec.size() - 7; + switch (dims) { + REP_EXPAND_GRAD_TEMPLATE(72) + default: + PADDLE_ENFORCE(false, "Only support tensor whose rank in [1, 6]."); + }; + } + + protected: + template + void ExpandBackward(const framework::ExecutionContext& context, + const std::vector& reshape_dims_vec, + const std::vector& reduce_dims_vec) const { + size_t reshape_size = Dims / 6 + 1; + size_t reduce_size = Dims % 6 + 1; + PADDLE_ENFORCE_EQ(reshape_size, reshape_dims_vec.size(), + "Inconsistent size between Dims and " + "reshape dimensions."); + PADDLE_ENFORCE_EQ(reduce_size, reduce_dims_vec.size(), + "Inconsistent size between Dims and " + "reduce dimensions."); + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + auto x = EigenVector::Flatten(*(context.Input("X"))); + out0->mutable_data(context.GetPlace()); + auto x_grad = EigenVector::Flatten(*out0); + Eigen::DSizes reshape_dims; + for (size_t i = 0; i < reshape_size; ++i) { + reshape_dims[i] = reshape_dims_vec[i]; + } + Eigen::DSizes reduce_dims; + for (size_t i = 0; i < reduce_size; ++i) { + reduce_dims[i] = reduce_dims_vec[i]; + } + auto out_grad = EigenVector::Flatten(*in0); + x_grad.device(context.GetEigenDevice()) = + out_grad.reshape(reshape_dims).sum(reduce_dims).reshape(x.dimensions()); + } +}; + +} // operators +} // paddle diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 3958b53c22..ea09287f95 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -54,6 +54,7 @@ USE_CPU_ONLY_OP(concat); USE_OP(top_k); USE_OP(squared_l2_distance); USE_OP(sum); +USE_OP(expand); namespace paddle { namespace framework { diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 3de9e69e34..e141013a69 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -35,3 +35,4 @@ py_test(test_sum_op SRCS test_sum_op.py) py_test(mnist SRCS mnist.py) py_test(test_concat_op SRCS test_concat_op.py) py_test(test_squared_l2_distance_op SRCS test_squared_l2_distance_op.py) +py_test(test_expand_op SRCS test_expand_op.py) diff --git a/python/paddle/v2/framework/tests/test_expand_op.py b/python/paddle/v2/framework/tests/test_expand_op.py new file mode 100644 index 0000000000..9f5bd5f522 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_expand_op.py @@ -0,0 +1,67 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestExpandOpRank1(OpTest): + def setUp(self): + self.op_type = "expand" + self.inputs = {'X': np.random.random(12).astype("float32")} + self.attrs = {'expandTimes': [2]} + output = np.tile(self.inputs['X'], 2) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestExpandOpRank2(OpTest): + def setUp(self): + self.op_type = "expand" + self.inputs = {'X': np.random.random((12, 14)).astype("float32")} + self.attrs = {'expandTimes': [3, 4]} + output = np.tile(self.inputs['X'], (3, 4)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestExpandOpRank3(OpTest): + def setUp(self): + self.op_type = "expand" + self.inputs = {'X': np.random.random((2, 4, 5)).astype("float32")} + self.attrs = {'expandTimes': [3, 2, 1]} + output = np.tile(self.inputs['X'], (3, 2, 1)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestExpandOpRank4(OpTest): + def setUp(self): + self.op_type = "expand" + self.inputs = {'X': np.random.random((2, 4, 5, 7)).astype("float32")} + self.attrs = {'expandTimes': [3, 2, 1, 2]} + output = np.tile(self.inputs['X'], (3, 2, 1, 2)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +if __name__ == "__main__": + unittest.main() From f2d596d41dafb64ae5616921c433559265d106dc Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 13 Sep 2017 16:29:08 +0800 Subject: [PATCH 07/37] Fix typos. --- paddle/operators/expand_op.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/operators/expand_op.cc b/paddle/operators/expand_op.cc index 9d1d76a290..7d22d8a9f0 100644 --- a/paddle/operators/expand_op.cc +++ b/paddle/operators/expand_op.cc @@ -58,10 +58,10 @@ class ExpandOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr>("expandTimes", "Expand times for each dimension."); AddComment(R"DOC( -Expand operator tiles the input by given times. You should set times for each -dimension by providing attribute 'expandTimes'. Rank of input tensor should be -in [1, 6]. Please draw an inttention that size of 'expandTimes' must be same -with rank of input tensor. +Expand operator tiles the input by given times number. You should set times +number for each dimension by providing attribute 'expandTimes'. Rank of input +tensor should be in [1, 6]. Please draw an attention that size of +'expandTimes' must be same with rank of input tensor. )DOC"); } }; From 4520afcf3e8255b97325d1d4ab79d77e13a0655f Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 13 Sep 2017 17:07:00 +0800 Subject: [PATCH 08/37] Consider corner case. --- paddle/operators/expand_op.h | 22 ++++++++++++++----- .../v2/framework/tests/test_expand_op.py | 8 +++---- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/paddle/operators/expand_op.h b/paddle/operators/expand_op.h index 5285d7525b..2de849c484 100644 --- a/paddle/operators/expand_op.h +++ b/paddle/operators/expand_op.h @@ -109,11 +109,23 @@ class ExpandGradKernel : public framework::OpKernel { } int dims = reshape_dims_vec.size() * 6 + reduce_dims_vec.size() - 7; - switch (dims) { - REP_EXPAND_GRAD_TEMPLATE(72) - default: - PADDLE_ENFORCE(false, "Only support tensor whose rank in [1, 6]."); - }; + // no need reduce, just copy + if (reduce_dims_vec.size() == 0) { + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + out0->mutable_data(context.GetPlace()); + if (platform::is_cpu_place(context.GetPlace())) { + out0->CopyFrom(*in0, platform::CPUPlace()); + } else { + out0->CopyFrom(*in0, platform::GPUPlace()); + } + } else { + switch (dims) { + REP_EXPAND_GRAD_TEMPLATE(72) + default: + PADDLE_ENFORCE(false, "Only support tensor whose rank in [1, 6]."); + }; + } } protected: diff --git a/python/paddle/v2/framework/tests/test_expand_op.py b/python/paddle/v2/framework/tests/test_expand_op.py index 9f5bd5f522..1bf9a91298 100644 --- a/python/paddle/v2/framework/tests/test_expand_op.py +++ b/python/paddle/v2/framework/tests/test_expand_op.py @@ -22,8 +22,8 @@ class TestExpandOpRank2(OpTest): def setUp(self): self.op_type = "expand" self.inputs = {'X': np.random.random((12, 14)).astype("float32")} - self.attrs = {'expandTimes': [3, 4]} - output = np.tile(self.inputs['X'], (3, 4)) + self.attrs = {'expandTimes': [1, 1]} + output = np.tile(self.inputs['X'], (1, 1)) self.outputs = {'Out': output} def test_check_output(self): @@ -37,8 +37,8 @@ class TestExpandOpRank3(OpTest): def setUp(self): self.op_type = "expand" self.inputs = {'X': np.random.random((2, 4, 5)).astype("float32")} - self.attrs = {'expandTimes': [3, 2, 1]} - output = np.tile(self.inputs['X'], (3, 2, 1)) + self.attrs = {'expandTimes': [1, 1, 1]} + output = np.tile(self.inputs['X'], (1, 1, 1)) self.outputs = {'Out': output} def test_check_output(self): From 611ee68b7888c8680b1c8ee967ad964d3c1e7f4c Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Mon, 23 Oct 2017 17:33:23 +0800 Subject: [PATCH 09/37] add bilinear tensor product op --- .../operators/bilinear_tensor_product_op.cc | 153 +++++++++++++++ .../operators/bilinear_tensor_product_op.cu | 24 +++ paddle/operators/bilinear_tensor_product_op.h | 176 ++++++++++++++++++ .../tests/test_bilinear_tensor_product_op.py | 30 +++ 4 files changed, 383 insertions(+) create mode 100644 paddle/operators/bilinear_tensor_product_op.cc create mode 100644 paddle/operators/bilinear_tensor_product_op.cu create mode 100644 paddle/operators/bilinear_tensor_product_op.h create mode 100644 python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py diff --git a/paddle/operators/bilinear_tensor_product_op.cc b/paddle/operators/bilinear_tensor_product_op.cc new file mode 100644 index 0000000000..64569e5fe7 --- /dev/null +++ b/paddle/operators/bilinear_tensor_product_op.cc @@ -0,0 +1,153 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/bilinear_tensor_product_op.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class BilinearTensorProductOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Weight"), + "Input(Weight) should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null."); + auto x_dims = ctx->GetInputDim("X"); + auto y_dims = ctx->GetInputDim("Y"); + auto weight_dims = ctx->GetInputDim("Weight"); + + PADDLE_ENFORCE_EQ(x_dims.size(), 1, "The input X must be a vector."); + PADDLE_ENFORCE_EQ(y_dims.size(), 1, "The input Y must be a vector."); + PADDLE_ENFORCE_EQ(weight_dims.size(), 3, + "The input Weight must be a 3D tensor."); + PADDLE_ENFORCE_GT(weight_dims[0], 0, + "The first dimension of Weight must be larger than 0."); + PADDLE_ENFORCE_GT(weight_dims[1], 0, + "The second dimension of Weight must be larger than 0."); + PADDLE_ENFORCE_GT(weight_dims[2], 0, + "The third dimension of Weight must be larger than 0."); + PADDLE_ENFORCE_EQ(x_dims[0], weight_dims[1], + "The dimension of X must be equal with the second " + "dimension of the Weight."); + PADDLE_ENFORCE_EQ(y_dims[0], weight_dims[2], + "The dimension of Y must be equal with the third " + "dimension of the Weight."); + + auto bias = Input("Bias"); + if (bias != framework::kEmptyVarName) { + auto bias_dims = ctx->GetInputDim("Bias"); + PADDLE_ENFORCE_EQ(bias_dims.size(), 1, + "The input Bias must be a vector."); + PADDLE_ENFORCE_EQ(bias_dims[0], weight_dims[0], + "The dimension of Bias must be equal with the first " + "dimension of the Weight."); + } + + ctx->SetOutputDim("Out", {weight_dims[0]}); + } +}; + +class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker { + public: + BilinearTensorProductOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The first input of tensor op"); + AddInput("Y", "The second input of tensor op"); + AddInput("Weight", "The input weight of tensor op"); + AddInput("Bias", "The input bias of tensor op"); + AddOutput("Out", "The output of tensor op"); + AddComment(R"DOC( +Bilinear Tensor Product operator. +Given input X and Y, a 3D tensor weight, and bias. Each entry of the output is +computed by one slice i = 1, . . . , k of the tensor: Out_i = X*W_i*Y + Bias_i . + +The equation of this operator is: + + Out = \sum_{i} X*W_i*Y + Bias + +)DOC"); + } +}; + +class BilinearTensorProductOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); + PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null"); + PADDLE_ENFORCE(ctx->HasInput("Weight"), "Input(Weight) should not be null"); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input (Out@GRAD) should not be null"); + auto x_dims = ctx->GetInputDim("X"); + auto y_dims = ctx->GetInputDim("Y"); + auto weight_dims = ctx->GetInputDim("Weight"); + auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); + + PADDLE_ENFORCE_EQ(out_dims.size(), 1, "The Out@GRAD must be a vector."); + PADDLE_ENFORCE_EQ( + weight_dims[0], out_dims[0], + "The dimension of Out@GRAD must be equal with the third dimension of " + "the Weight."); + + auto bias = Input("Bias"); + if (bias != framework::kEmptyVarName) { + auto bias_dims = ctx->GetInputDim("Bias"); + PADDLE_ENFORCE_EQ(bias_dims.size(), 1, "Input Bias must be a vector."); + PADDLE_ENFORCE_EQ( + bias_dims[0], out_dims[0], + "The dimension of Bias must be equal with the Out@GRAD "); + auto bias_grad_name = framework::GradVarName("Bias"); + if (ctx->HasOutput(bias_grad_name)) + ctx->SetOutputDim(bias_grad_name, bias_dims); + } + + auto x_grad_name = framework::GradVarName("X"); + auto y_grad_name = framework::GradVarName("Y"); + auto weight_grad_name = framework::GradVarName("Weight"); + + if (ctx->HasOutput(x_grad_name)) { + ctx->SetOutputDim(x_grad_name, x_dims); + } + if (ctx->HasOutput(y_grad_name)) { + ctx->SetOutputDim(y_grad_name, y_dims); + } + if (ctx->HasOutput(weight_grad_name)) { + ctx->SetOutputDim(weight_grad_name, weight_dims); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(bilinear_tensor_product, ops::BilinearTensorProductOp, + ops::BilinearTensorProductOpMaker, bilinear_tensor_product_grad, + ops::BilinearTensorProductOpGrad); +REGISTER_OP_CPU_KERNEL( + bilinear_tensor_product, + ops::BilinearTensorProductKernel); +REGISTER_OP_CPU_KERNEL( + bilinear_tensor_product_grad, + ops::BilinearTensorProductGradKernel); diff --git a/paddle/operators/bilinear_tensor_product_op.cu b/paddle/operators/bilinear_tensor_product_op.cu new file mode 100644 index 0000000000..a212460560 --- /dev/null +++ b/paddle/operators/bilinear_tensor_product_op.cu @@ -0,0 +1,24 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/bilinear_tensor_product_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + bilinear_tensor_product, + ops::BilinearTensorProductKernel); +REGISTER_OP_GPU_KERNEL( + bilinear_tensor_product_grad, + ops::BilinearTensorProductGradKernel); diff --git a/paddle/operators/bilinear_tensor_product_op.h b/paddle/operators/bilinear_tensor_product_op.h new file mode 100644 index 0000000000..b816d6d7c2 --- /dev/null +++ b/paddle/operators/bilinear_tensor_product_op.h @@ -0,0 +1,176 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/platform/transform.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using platform::Transform; + +template +class BilinearTensorProductKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* weight = ctx.Input("Weight"); + auto* bias = ctx.Input("Bias"); + auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); + + auto weight_dims = weight->dims(); + Tensor left_mul_vec; + left_mul_vec.mutable_data(framework::make_ddim({weight_dims[2]}), + ctx.GetPlace()); + if (bias) { + out->CopyFrom(*bias, ctx.GetPlace(), ctx.device_context()); + } + for (int i = 0; i < weight_dims[0]; ++i) { + Tensor weight_mat = weight->Slice(i, i + 1).Resize( + framework::make_ddim({weight_dims[1], weight_dims[2]})); + math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, 1, + weight_dims[2], weight_dims[1], 1, x->data(), + weight_mat.data(), 0, left_mul_vec.data()); + if (bias) { + math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, + 1, 1, weight_dims[2], 1, left_mul_vec.data(), + y->data(), 1, &(out->data()[i])); + } else { + math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, + 1, 1, weight_dims[2], 1, left_mul_vec.data(), + y->data(), 0, &(out->data()[i])); + } + } + } +}; + +template +class ScaleFunctor { + public: + explicit ScaleFunctor(const T* scale) : scale_(scale) {} + + HOSTDEVICE T operator()(const T& x) const { return x * (*scale_); } + + private: + const T* scale_; +}; + +template +class BilinearTensorProductGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + const Tensor* x = ctx.Input("X"); + const Tensor* y = ctx.Input("Y"); + const Tensor* weight = ctx.Input("Weight"); + Tensor* d_x = ctx.Output(framework::GradVarName("X")); + Tensor* d_y = ctx.Output(framework::GradVarName("Y")); + Tensor* d_weight = ctx.Output(framework::GradVarName("Weight")); + Tensor* d_bias = ctx.Output(framework::GradVarName("Bias")); + const Tensor* d_out = ctx.Input(framework::GradVarName("Out")); + auto* d_out_ptr = d_out->data(); + auto weight_dims = weight->dims(); + + // Get the first matrix of Weight. + Tensor weight_mat_0 = weight->Slice(0, 1).Resize( + framework::make_ddim({weight_dims[1], weight_dims[2]})); + + // Create the intermediate variable for gradient. + int numel_x = x->numel(); + int numel_y = y->numel(); + const T* x_ptr = x->data(); + const T* y_ptr = y->data(); + Tensor x_scale; + T* x_scale_ptr = x_scale.mutable_data( + framework::make_ddim({weight_dims[1]}), ctx.GetPlace()); + Tensor y_scale; + T* y_scale_ptr = y_scale.mutable_data( + framework::make_ddim({weight_dims[2]}), ctx.GetPlace()); + Transform trans; + + // Caculate the gradient of X according to the first matrix of Weight. + if (d_x) { + d_x->mutable_data(ctx.GetPlace()); + trans(ctx.device_context(), y_ptr, y_ptr + numel_y, y_scale_ptr, + ScaleFunctor(&d_out_ptr[0])); + math::gemm(ctx.device_context(), CblasNoTrans, CblasTrans, 1, + weight_dims[1], weight_dims[2], 1, y_scale.data(), + weight_mat_0.data(), 0, d_x->data()); + } + + // Caculate the gradient of Y according to the first matrix of Weight. + if (d_y) { + d_y->mutable_data(ctx.GetPlace()); + trans(ctx.device_context(), x_ptr, x_ptr + numel_x, x_scale_ptr, + ScaleFunctor(&d_out_ptr[0])); + math::gemm(ctx.device_context(), CblasTrans, CblasNoTrans, + weight_dims[2], 1, weight_dims[1], 1, + weight_mat_0.data(), x_scale.data(), 0, + d_y->data()); + } + + // Caculate the gradient of X and Y completly. + if (d_x || d_y) { + for (int i = 1; i < weight_dims[0]; ++i) { + Tensor weight_mat = weight->Slice(i, i + 1).Resize( + framework::make_ddim({weight_dims[1], weight_dims[2]})); + if (d_x) { + trans(ctx.device_context(), y_ptr, y_ptr + numel_y, y_scale_ptr, + ScaleFunctor(&d_out_ptr[i])); + math::gemm(ctx.device_context(), CblasNoTrans, CblasTrans, + 1, weight_dims[1], weight_dims[2], 1, + y_scale.data(), weight_mat.data(), 1, + d_x->data()); + } + if (d_y) { + trans(ctx.device_context(), x_ptr, x_ptr + numel_x, x_scale_ptr, + ScaleFunctor(&d_out_ptr[i])); + math::gemm(ctx.device_context(), CblasTrans, CblasNoTrans, + weight_dims[2], 1, weight_dims[1], 1, + weight_mat.data(), x_scale.data(), 1, + d_y->data()); + } + } + } + + // Caculate the gradient of Weight. + if (d_weight) { + d_weight->mutable_data(ctx.GetPlace()); + for (int i = 0; i < weight_dims[0]; ++i) { + Tensor d_weight_mat = d_weight->Slice(i, i + 1).Resize( + framework::make_ddim({weight_dims[1], weight_dims[2]})); + trans(ctx.device_context(), x_ptr, x_ptr + numel_x, x_scale_ptr, + ScaleFunctor(&d_out_ptr[i])); + math::gemm(ctx.device_context(), CblasTrans, CblasNoTrans, + weight_dims[1], weight_dims[2], 1, 1, + x_scale.data(), y->data(), 0, + d_weight_mat.data()); + } + } + + // Caculate the gradient of Bias. + if (d_bias) { + d_bias->mutable_data(ctx.GetPlace()); + d_bias->CopyFrom(*d_out, ctx.GetPlace(), ctx.device_context()); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py b/python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py new file mode 100644 index 0000000000..10d90a9f0f --- /dev/null +++ b/python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py @@ -0,0 +1,30 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestBilinearTensorProductOp(OpTest): + def setUp(self): + self.op_type = "bilinear_tensor_product" + self.inputs = { + 'X': np.random.random(3).astype("float32"), + 'Y': np.random.random(4).astype("float32"), + 'Weight': np.random.random((5, 3, 4)).astype("float32"), + 'Bias': np.random.random(5).astype("float32") + } + self.outputs = { + 'Out': np.matmul( + np.matmul(self.inputs['Weight'], self.inputs['Y']), + self.inputs['X']) + self.inputs['Bias'] + } + + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad( + ['X', 'Y', 'Weight', 'Bias'], 'Out', max_relative_error=0.5) + + +if __name__ == "__main__": + unittest.main() From 3ae14242da3e32350790711b6339b07787a231ea Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Tue, 7 Nov 2017 20:26:04 +0800 Subject: [PATCH 10/37] update for mini-batch --- .../operators/bilinear_tensor_product_op.cc | 78 +++++---- .../operators/bilinear_tensor_product_op.cu | 79 ++++++++- paddle/operators/bilinear_tensor_product_op.h | 165 ++++++++++-------- .../tests/test_bilinear_tensor_product_op.py | 81 +++++++-- 4 files changed, 279 insertions(+), 124 deletions(-) diff --git a/paddle/operators/bilinear_tensor_product_op.cc b/paddle/operators/bilinear_tensor_product_op.cc index 64569e5fe7..3bd2d40cd2 100644 --- a/paddle/operators/bilinear_tensor_product_op.cc +++ b/paddle/operators/bilinear_tensor_product_op.cc @@ -34,8 +34,8 @@ class BilinearTensorProductOp : public framework::OperatorWithKernel { auto y_dims = ctx->GetInputDim("Y"); auto weight_dims = ctx->GetInputDim("Weight"); - PADDLE_ENFORCE_EQ(x_dims.size(), 1, "The input X must be a vector."); - PADDLE_ENFORCE_EQ(y_dims.size(), 1, "The input Y must be a vector."); + PADDLE_ENFORCE_EQ(x_dims.size(), 2, "The input X must be a 2D Tensor."); + PADDLE_ENFORCE_EQ(y_dims.size(), 2, "The input Y must be a 2D Tensor."); PADDLE_ENFORCE_EQ(weight_dims.size(), 3, "The input Weight must be a 3D tensor."); PADDLE_ENFORCE_GT(weight_dims[0], 0, @@ -44,24 +44,29 @@ class BilinearTensorProductOp : public framework::OperatorWithKernel { "The second dimension of Weight must be larger than 0."); PADDLE_ENFORCE_GT(weight_dims[2], 0, "The third dimension of Weight must be larger than 0."); - PADDLE_ENFORCE_EQ(x_dims[0], weight_dims[1], - "The dimension of X must be equal with the second " + PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0], + "The first dimension(batch_size) of X must be " + "equal with the first dimension of the Y."); + PADDLE_ENFORCE_EQ(x_dims[1], weight_dims[1], + "The second dimension of X must be equal with the second " "dimension of the Weight."); - PADDLE_ENFORCE_EQ(y_dims[0], weight_dims[2], - "The dimension of Y must be equal with the third " + PADDLE_ENFORCE_EQ(y_dims[1], weight_dims[2], + "The second dimension of Y must be equal with the third " "dimension of the Weight."); - auto bias = Input("Bias"); - if (bias != framework::kEmptyVarName) { + if (ctx->HasInput("Bias")) { auto bias_dims = ctx->GetInputDim("Bias"); - PADDLE_ENFORCE_EQ(bias_dims.size(), 1, - "The input Bias must be a vector."); - PADDLE_ENFORCE_EQ(bias_dims[0], weight_dims[0], - "The dimension of Bias must be equal with the first " - "dimension of the Weight."); + PADDLE_ENFORCE_EQ(bias_dims.size(), 2, + "The input Bias must have 2 dimensions."); + PADDLE_ENFORCE_EQ(bias_dims[0], 1, + "The first dimention of input Bias must be 1."); + PADDLE_ENFORCE_EQ(bias_dims[1], weight_dims[0], + "The second dimension of Bias must be equal with the " + "first dimension of the Weight."); } - ctx->SetOutputDim("Out", {weight_dims[0]}); + ctx->SetOutputDim("Out", {x_dims[0], weight_dims[0]}); + ctx->ShareLoD("X", /*->*/ "Out"); } }; @@ -70,19 +75,19 @@ class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker { BilinearTensorProductOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "The first input of tensor op"); - AddInput("Y", "The second input of tensor op"); - AddInput("Weight", "The input weight of tensor op"); - AddInput("Bias", "The input bias of tensor op"); - AddOutput("Out", "The output of tensor op"); + AddInput("X", "The first input of BilinearTensorProduct op"); + AddInput("Y", "The second input of BilinearTensorProduct op"); + AddInput("Weight", "The input weight of BilinearTensorProduct op"); + AddInput("Bias", "The input bias of BilinearTensorProduct op") + .AsDispensable(); + AddOutput("Out", "The output of BilinearTensorProduct op"); AddComment(R"DOC( Bilinear Tensor Product operator. -Given input X and Y, a 3D tensor weight, and bias. Each entry of the output is -computed by one slice i = 1, . . . , k of the tensor: Out_i = X*W_i*Y + Bias_i . +Given input X and Y, a 3D tensor weight, and bias. Each column of the +output is computed by one slice i = 1, . . . , k of the tensor: -The equation of this operator is: - - Out = \sum_{i} X*W_i*Y + Bias + M = (X W_i) \cdot Y + Out_i = \sum_i {M_i} + Bias_i )DOC"); } @@ -104,19 +109,20 @@ class BilinearTensorProductOpGrad : public framework::OperatorWithKernel { auto weight_dims = ctx->GetInputDim("Weight"); auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); - PADDLE_ENFORCE_EQ(out_dims.size(), 1, "The Out@GRAD must be a vector."); + PADDLE_ENFORCE_EQ(out_dims.size(), 2, "The Out@GRAD must be a 2D Tensor."); PADDLE_ENFORCE_EQ( - weight_dims[0], out_dims[0], - "The dimension of Out@GRAD must be equal with the third dimension of " - "the Weight."); - - auto bias = Input("Bias"); - if (bias != framework::kEmptyVarName) { + x_dims[0], out_dims[0], + "The first dimension(batch_size) of Out@GRAD must be equal with " + "the first dimension of the X."); + PADDLE_ENFORCE_EQ(weight_dims[0], out_dims[1], + "The second dimension of Out@GRAD must be equal with " + "the third dimension of the Weight."); + + if (ctx->HasInput("Bias")) { auto bias_dims = ctx->GetInputDim("Bias"); - PADDLE_ENFORCE_EQ(bias_dims.size(), 1, "Input Bias must be a vector."); - PADDLE_ENFORCE_EQ( - bias_dims[0], out_dims[0], - "The dimension of Bias must be equal with the Out@GRAD "); + PADDLE_ENFORCE_EQ(bias_dims[1], out_dims[1], + "The second dimension of Bias must be equal with " + "the second dimension of the Out@GRAD."); auto bias_grad_name = framework::GradVarName("Bias"); if (ctx->HasOutput(bias_grad_name)) ctx->SetOutputDim(bias_grad_name, bias_dims); @@ -150,4 +156,4 @@ REGISTER_OP_CPU_KERNEL( ops::BilinearTensorProductKernel); REGISTER_OP_CPU_KERNEL( bilinear_tensor_product_grad, - ops::BilinearTensorProductGradKernel); + ops::BilinearTensorProductGradKernel); \ No newline at end of file diff --git a/paddle/operators/bilinear_tensor_product_op.cu b/paddle/operators/bilinear_tensor_product_op.cu index a212460560..1d65c17f8c 100644 --- a/paddle/operators/bilinear_tensor_product_op.cu +++ b/paddle/operators/bilinear_tensor_product_op.cu @@ -15,10 +15,85 @@ #define EIGEN_USE_GPU #include "paddle/operators/bilinear_tensor_product_op.h" +namespace paddle { +namespace operators { + +template +class BilinearTensorProductCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* weight = ctx.Input("Weight"); + auto* bias = ctx.Input("Bias"); + auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); + + auto y_mat = EigenMatrix::From(*y); + auto batch_size = x->dims()[0]; + auto weight_dims = weight->dims(); + + auto place = ctx.GetEigenDevice(); + auto cpu_place = ctx.GetEigenDevice(); + + // Copy the output to cpu. + Tensor output_cpu; + output_cpu.CopyFrom(*out, platform::CPUPlace(), ctx.device_context()); + auto* output_cpu_ptr = output_cpu.data(); + auto output_cpu_mat = EigenMatrix::From(output_cpu); + + // Create the temporary variables. + Tensor left_mul; + left_mul.mutable_data(framework::make_ddim({batch_size, weight_dims[2]}), + ctx.GetPlace()); + auto left_mul_mat = EigenMatrix::From(left_mul); + Tensor output_col; + output_col.mutable_data(framework::make_ddim({batch_size}), + ctx.GetPlace()); + auto output_col_vec = EigenVector::From(output_col); + + for (size_t i = 0; i < weight_dims[0]; ++i) { + Tensor weight_mat = weight->Slice(i, i + 1).Resize( + framework::make_ddim({weight_dims[1], weight_dims[2]})); + math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, + batch_size, weight_dims[2], weight_dims[1], 1, + x->data(), weight_mat.data(), 0, + left_mul.data()); + output_col_vec.device(place) = + (left_mul_mat * y_mat).sum(Eigen::DSizes(1)); + + // Copy the output_col to cpu. + Tensor output_col_cpu; + output_col_cpu.CopyFrom(output_col, platform::CPUPlace(), + ctx.device_context()); + auto* output_col_ptr = output_col_cpu.data(); + + for (size_t j = 0; j < batch_size; ++j) { + output_cpu_ptr[i + j * weight_dims[0]] = output_col_ptr[j]; + } + } + + if (bias) { + // Copy the bias to cpu. + Tensor bias_cpu; + bias_cpu.CopyFrom(*bias, platform::CPUPlace(), ctx.device_context()); + auto bias_vec = EigenMatrix::From(bias_cpu); + Eigen::DSizes bcast(batch_size, 1); + output_cpu_mat.device(cpu_place) = + bias_vec.broadcast(bcast) + output_cpu_mat; + } + + // Copy the output to gpu. + out->CopyFrom(output_cpu, platform::GPUPlace(), ctx.device_context()); + } +}; +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( bilinear_tensor_product, - ops::BilinearTensorProductKernel); + ops::BilinearTensorProductCUDAKernel); REGISTER_OP_GPU_KERNEL( bilinear_tensor_product_grad, - ops::BilinearTensorProductGradKernel); + ops::BilinearTensorProductGradKernel); \ No newline at end of file diff --git a/paddle/operators/bilinear_tensor_product_op.h b/paddle/operators/bilinear_tensor_product_op.h index b816d6d7c2..238d1d7749 100644 --- a/paddle/operators/bilinear_tensor_product_op.h +++ b/paddle/operators/bilinear_tensor_product_op.h @@ -14,15 +14,22 @@ #pragma once +#include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" #include "paddle/operators/math/math_function.h" -#include "paddle/platform/transform.h" namespace paddle { namespace operators { using Tensor = framework::Tensor; -using platform::Transform; + +template +using EigenMatrix = framework::EigenMatrix; + +template +using EigenVector = framework::EigenVector; template class BilinearTensorProductKernel : public framework::OpKernel { @@ -35,43 +42,45 @@ class BilinearTensorProductKernel : public framework::OpKernel { auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); + auto y_mat = EigenMatrix::From(*y); + auto output_mat = EigenMatrix::From(*out); + + auto batch_size = x->dims()[0]; auto weight_dims = weight->dims(); - Tensor left_mul_vec; - left_mul_vec.mutable_data(framework::make_ddim({weight_dims[2]}), - ctx.GetPlace()); - if (bias) { - out->CopyFrom(*bias, ctx.GetPlace(), ctx.device_context()); - } - for (int i = 0; i < weight_dims[0]; ++i) { + auto place = ctx.GetEigenDevice(); + + // Create the temporary variables. + Tensor left_mul; + left_mul.mutable_data(framework::make_ddim({batch_size, weight_dims[2]}), + ctx.GetPlace()); + auto left_mul_mat = EigenMatrix::From(left_mul); + Tensor output_col; + output_col.mutable_data(framework::make_ddim({weight_dims[0]}), + ctx.GetPlace()); + auto output_col_vec = EigenVector::From(output_col); + + for (size_t i = 0; i < weight_dims[0]; ++i) { Tensor weight_mat = weight->Slice(i, i + 1).Resize( framework::make_ddim({weight_dims[1], weight_dims[2]})); - math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, 1, - weight_dims[2], weight_dims[1], 1, x->data(), - weight_mat.data(), 0, left_mul_vec.data()); - if (bias) { - math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, - 1, 1, weight_dims[2], 1, left_mul_vec.data(), - y->data(), 1, &(out->data()[i])); - } else { - math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, - 1, 1, weight_dims[2], 1, left_mul_vec.data(), - y->data(), 0, &(out->data()[i])); + math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, + batch_size, weight_dims[2], weight_dims[1], 1, + x->data(), weight_mat.data(), 0, + left_mul.data()); + output_col_vec = (left_mul_mat * y_mat).sum(Eigen::DSizes(1)); + for (size_t j = 0; j < batch_size; ++j) { + output_mat(j, i) = output_col_vec(j); } } + if (bias) { + auto bias_vec = EigenMatrix::From(*bias); + Eigen::DSizes bcast(batch_size, 1); + output_mat.device(place) = bias_vec.broadcast(bcast) + output_mat; + } else { + output_mat.device(place) = output_mat; + } } }; -template -class ScaleFunctor { - public: - explicit ScaleFunctor(const T* scale) : scale_(scale) {} - - HOSTDEVICE T operator()(const T& x) const { return x * (*scale_); } - - private: - const T* scale_; -}; - template class BilinearTensorProductGradKernel : public framework::OpKernel { public: @@ -84,66 +93,65 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { Tensor* d_weight = ctx.Output(framework::GradVarName("Weight")); Tensor* d_bias = ctx.Output(framework::GradVarName("Bias")); const Tensor* d_out = ctx.Input(framework::GradVarName("Out")); - auto* d_out_ptr = d_out->data(); + + auto batch_size = x->dims()[0]; auto weight_dims = weight->dims(); - // Get the first matrix of Weight. - Tensor weight_mat_0 = weight->Slice(0, 1).Resize( - framework::make_ddim({weight_dims[1], weight_dims[2]})); + auto x_mat = EigenMatrix::From(*x); + auto y_mat = EigenMatrix::From(*y); + auto d_out_mat = EigenMatrix::From(*d_out); + auto place = ctx.GetEigenDevice(); - // Create the intermediate variable for gradient. - int numel_x = x->numel(); - int numel_y = y->numel(); - const T* x_ptr = x->data(); - const T* y_ptr = y->data(); + // Create the temporary variables for gradient. Tensor x_scale; - T* x_scale_ptr = x_scale.mutable_data( - framework::make_ddim({weight_dims[1]}), ctx.GetPlace()); + x_scale.mutable_data(framework::make_ddim({batch_size, weight_dims[1]}), + ctx.GetPlace()); + auto x_scale_mat = EigenMatrix::From(x_scale); Tensor y_scale; - T* y_scale_ptr = y_scale.mutable_data( - framework::make_ddim({weight_dims[2]}), ctx.GetPlace()); - Transform trans; + y_scale.mutable_data(framework::make_ddim({batch_size, weight_dims[2]}), + ctx.GetPlace()); + auto y_scale_mat = EigenMatrix::From(y_scale); + + math::SetConstant set_zero; - // Caculate the gradient of X according to the first matrix of Weight. + // Set X@Grad be zero at first. if (d_x) { d_x->mutable_data(ctx.GetPlace()); - trans(ctx.device_context(), y_ptr, y_ptr + numel_y, y_scale_ptr, - ScaleFunctor(&d_out_ptr[0])); - math::gemm(ctx.device_context(), CblasNoTrans, CblasTrans, 1, - weight_dims[1], weight_dims[2], 1, y_scale.data(), - weight_mat_0.data(), 0, d_x->data()); + set_zero(ctx.device_context(), d_x, static_cast(0)); } - // Caculate the gradient of Y according to the first matrix of Weight. + // Set Y@Grad be zero at first. if (d_y) { d_y->mutable_data(ctx.GetPlace()); - trans(ctx.device_context(), x_ptr, x_ptr + numel_x, x_scale_ptr, - ScaleFunctor(&d_out_ptr[0])); - math::gemm(ctx.device_context(), CblasTrans, CblasNoTrans, - weight_dims[2], 1, weight_dims[1], 1, - weight_mat_0.data(), x_scale.data(), 0, - d_y->data()); + set_zero(ctx.device_context(), d_y, static_cast(0)); } - // Caculate the gradient of X and Y completly. + // Caculate the X@Grad and Y@Grad. if (d_x || d_y) { - for (int i = 1; i < weight_dims[0]; ++i) { - Tensor weight_mat = weight->Slice(i, i + 1).Resize( + Eigen::DSizes bcast_for_x(1, weight_dims[2]); + Eigen::DSizes bcast_for_y(1, weight_dims[1]); + for (int i = 0; i < weight_dims[0]; ++i) { + Tensor weight_i = weight->Slice(i, i + 1).Resize( framework::make_ddim({weight_dims[1], weight_dims[2]})); + auto output_vec = d_out_mat.chip(i, 1); if (d_x) { - trans(ctx.device_context(), y_ptr, y_ptr + numel_y, y_scale_ptr, - ScaleFunctor(&d_out_ptr[i])); + y_scale_mat.device(place) = + output_vec.reshape(Eigen::DSizes(batch_size, 1)) + .broadcast(bcast_for_x) * + y_mat; math::gemm(ctx.device_context(), CblasNoTrans, CblasTrans, - 1, weight_dims[1], weight_dims[2], 1, - y_scale.data(), weight_mat.data(), 1, + batch_size, weight_dims[1], weight_dims[2], 1, + y_scale.data(), weight_i.data(), 1, d_x->data()); } if (d_y) { - trans(ctx.device_context(), x_ptr, x_ptr + numel_x, x_scale_ptr, - ScaleFunctor(&d_out_ptr[i])); - math::gemm(ctx.device_context(), CblasTrans, CblasNoTrans, - weight_dims[2], 1, weight_dims[1], 1, - weight_mat.data(), x_scale.data(), 1, + x_scale_mat.device(place) = + output_vec.reshape(Eigen::DSizes(batch_size, 1)) + .broadcast(bcast_for_y) * + x_mat; + math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, + batch_size, weight_dims[2], weight_dims[1], 1, + x_scale.data(), weight_i.data(), 1, d_y->data()); } } @@ -152,22 +160,27 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { // Caculate the gradient of Weight. if (d_weight) { d_weight->mutable_data(ctx.GetPlace()); + Eigen::DSizes bcast_for_weight(1, weight_dims[1]); for (int i = 0; i < weight_dims[0]; ++i) { - Tensor d_weight_mat = d_weight->Slice(i, i + 1).Resize( + Tensor d_weight_i = d_weight->Slice(i, i + 1).Resize( framework::make_ddim({weight_dims[1], weight_dims[2]})); - trans(ctx.device_context(), x_ptr, x_ptr + numel_x, x_scale_ptr, - ScaleFunctor(&d_out_ptr[i])); + auto output_vec = d_out_mat.chip(i, 1); + x_scale_mat.device(place) = + output_vec.reshape(Eigen::DSizes(batch_size, 1)) + .broadcast(bcast_for_weight) * + x_mat; math::gemm(ctx.device_context(), CblasTrans, CblasNoTrans, - weight_dims[1], weight_dims[2], 1, 1, + weight_dims[1], weight_dims[2], batch_size, 1, x_scale.data(), y->data(), 0, - d_weight_mat.data()); + d_weight_i.data()); } } // Caculate the gradient of Bias. if (d_bias) { d_bias->mutable_data(ctx.GetPlace()); - d_bias->CopyFrom(*d_out, ctx.GetPlace(), ctx.device_context()); + auto d_bias_mat = EigenMatrix::From(*d_bias); + d_bias_mat.device(place) = d_out_mat.sum(Eigen::DSizes(0)); } } }; diff --git a/python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py b/python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py index 10d90a9f0f..1c1f388098 100644 --- a/python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py +++ b/python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py @@ -6,24 +6,85 @@ from op_test import OpTest class TestBilinearTensorProductOp(OpTest): def setUp(self): self.op_type = "bilinear_tensor_product" + batch_size = 6 + size0 = 3 + size1 = 4 + size2 = 5 + a = np.random.random((batch_size, size0)).astype("float32") + b = np.random.random((batch_size, size1)).astype("float32") + w = np.random.random((size2, size0, size1)).astype("float32") + bias = np.random.random((1, size2)).astype("float32") + output = np.zeros((batch_size, size2)).astype("float32") + for i in range(size2): + w_i = w[i, :, :] + output[:, i] = np.sum(np.matmul(a, w_i) * b, axis=1) self.inputs = { - 'X': np.random.random(3).astype("float32"), - 'Y': np.random.random(4).astype("float32"), - 'Weight': np.random.random((5, 3, 4)).astype("float32"), - 'Bias': np.random.random(5).astype("float32") + 'X': a, + 'Y': b, + 'Weight': w, + 'Bias': bias, } - self.outputs = { - 'Out': np.matmul( - np.matmul(self.inputs['Weight'], self.inputs['Y']), - self.inputs['X']) + self.inputs['Bias'] + self.outputs = {'Out': output + bias} + + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['X', 'Y', 'Weight', 'Bias'], 'Out') + + +class TestBilinearTensorProductOp2(TestBilinearTensorProductOp): + def setUp(self): + self.op_type = "bilinear_tensor_product" + batch_size = 1 + size0 = 1 + size1 = 1 + size2 = 1 + a = np.random.random((batch_size, size0)).astype("float32") + b = np.random.random((batch_size, size1)).astype("float32") + w = np.random.random((size2, size0, size1)).astype("float32") + bias = np.random.random((1, size2)).astype("float32") + output = np.zeros((batch_size, size2)).astype("float32") + for i in range(size2): + w_i = w[i, :, :] + output[:, i] = np.sum(np.matmul(a, w_i) * b, axis=1) + self.inputs = { + 'X': a, + 'Y': b, + 'Weight': w, + 'Bias': bias, } + self.outputs = {'Out': output + bias} + + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['X', 'Y', 'Weight', 'Bias'], 'Out') + + +class TestBilinearTensorProductOp3(TestBilinearTensorProductOp): + def setUp(self): + self.op_type = "bilinear_tensor_product" + batch_size = 7 + size0 = 4 + size1 = 5 + size2 = 6 + a = np.random.random((batch_size, size0)).astype("float32") + b = np.random.random((batch_size, size1)).astype("float32") + w = np.random.random((size2, size0, size1)).astype("float32") + output = np.zeros((batch_size, size2)).astype("float32") + for i in range(size2): + w_i = w[i, :, :] + output[:, i] = np.sum(np.matmul(a, w_i) * b, axis=1) + self.inputs = {'X': a, 'Y': b, 'Weight': w} + self.outputs = {'Out': output} def test_check_output(self): self.check_output() def test_check_grad_normal(self): - self.check_grad( - ['X', 'Y', 'Weight', 'Bias'], 'Out', max_relative_error=0.5) + self.check_grad(['X', 'Y', 'Weight'], 'Out') if __name__ == "__main__": From 47269273ff15afc0156939de46f800a15def609c Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Wed, 8 Nov 2017 14:53:21 +0800 Subject: [PATCH 11/37] refine memory transform --- .../operators/bilinear_tensor_product_op.cc | 64 +++++++------ .../operators/bilinear_tensor_product_op.cu | 95 ++----------------- paddle/operators/bilinear_tensor_product_op.h | 37 +++----- 3 files changed, 58 insertions(+), 138 deletions(-) diff --git a/paddle/operators/bilinear_tensor_product_op.cc b/paddle/operators/bilinear_tensor_product_op.cc index afb9678b64..dc02e5811e 100644 --- a/paddle/operators/bilinear_tensor_product_op.cc +++ b/paddle/operators/bilinear_tensor_product_op.cc @@ -34,34 +34,34 @@ class BilinearTensorProductOp : public framework::OperatorWithKernel { auto y_dims = ctx->GetInputDim("Y"); auto weight_dims = ctx->GetInputDim("Weight"); - PADDLE_ENFORCE_EQ(x_dims.size(), 2, "The input X must be a 2D Tensor."); - PADDLE_ENFORCE_EQ(y_dims.size(), 2, "The input Y must be a 2D Tensor."); - PADDLE_ENFORCE_EQ(weight_dims.size(), 3, + PADDLE_ENFORCE_EQ(x_dims.size(), 2UL, "The input X must be a 2D Tensor."); + PADDLE_ENFORCE_EQ(y_dims.size(), 2UL, "The input Y must be a 2D Tensor."); + PADDLE_ENFORCE_EQ(weight_dims.size(), 3UL, "The input Weight must be a 3D tensor."); - PADDLE_ENFORCE_GT(weight_dims[0], 0, - "The first dimension of Weight must be larger than 0."); - PADDLE_ENFORCE_GT(weight_dims[1], 0, - "The second dimension of Weight must be larger than 0."); - PADDLE_ENFORCE_GT(weight_dims[2], 0, - "The third dimension of Weight must be larger than 0."); + PADDLE_ENFORCE(weight_dims[0], + "The first dimension of Weight must be larger than 0."); + PADDLE_ENFORCE(weight_dims[1], + "The second dimension of Weight must be larger than 0."); + PADDLE_ENFORCE(weight_dims[2], + "The third dimension of Weight must be larger than 0."); PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0], "The first dimension(batch_size) of X must be " - "equal with the first dimension of the Y."); + "equal to the first dimension of the Y."); PADDLE_ENFORCE_EQ(x_dims[1], weight_dims[1], - "The second dimension of X must be equal with the second " + "The second dimension of X must be equal to the second " "dimension of the Weight."); PADDLE_ENFORCE_EQ(y_dims[1], weight_dims[2], - "The second dimension of Y must be equal with the third " + "The second dimension of Y must be equal to the third " "dimension of the Weight."); if (ctx->HasInput("Bias")) { auto bias_dims = ctx->GetInputDim("Bias"); - PADDLE_ENFORCE_EQ(bias_dims.size(), 2, + PADDLE_ENFORCE_EQ(bias_dims.size(), 2UL, "The input Bias must have 2 dimensions."); - PADDLE_ENFORCE_EQ(bias_dims[0], 1, + PADDLE_ENFORCE_EQ(bias_dims[0], 1UL, "The first dimention of input Bias must be 1."); PADDLE_ENFORCE_EQ(bias_dims[1], weight_dims[0], - "The second dimension of Bias must be equal with the " + "The second dimension of Bias must be equal to the " "first dimension of the Weight."); } @@ -75,12 +75,12 @@ class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker { BilinearTensorProductOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "The first input of BilinearTensorProduct op"); - AddInput("Y", "The second input of BilinearTensorProduct op"); - AddInput("Weight", "The input weight of BilinearTensorProduct op"); - AddInput("Bias", "The input bias of BilinearTensorProduct op") + AddInput("X", "The first input of BilinearTensorProduct op."); + AddInput("Y", "The second input of BilinearTensorProduct op."); + AddInput("Weight", "The input weight of BilinearTensorProduct op."); + AddInput("Bias", "The input bias of BilinearTensorProduct op.") .AsDispensable(); - AddOutput("Out", "The output of BilinearTensorProduct op"); + AddOutput("Out", "The output of BilinearTensorProduct op."); AddComment(R"DOC( Bilinear Tensor Product operator. Given input X and Y, a 3D tensor weight, and bias. Each column of the @@ -99,30 +99,32 @@ class BilinearTensorProductOpGrad : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); - PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null"); - PADDLE_ENFORCE(ctx->HasInput("Weight"), "Input(Weight) should not be null"); + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Weight"), + "Input(Weight) should not be null."); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), - "Input (Out@GRAD) should not be null"); + "Input (Out@GRAD) should not be null."); auto x_dims = ctx->GetInputDim("X"); auto y_dims = ctx->GetInputDim("Y"); auto weight_dims = ctx->GetInputDim("Weight"); auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); - PADDLE_ENFORCE_EQ(out_dims.size(), 2, "The Out@GRAD must be a 2D Tensor."); + PADDLE_ENFORCE_EQ(out_dims.size(), 2UL, + "The Out@GRAD must be a 2D Tensor."); PADDLE_ENFORCE_EQ( x_dims[0], out_dims[0], - "The first dimension(batch_size) of Out@GRAD must be equal with " - "the first dimension of the X."); + "The first dimension(batch_size) of Out@GRAD must be equal to " + "the first dimension of the Input(X)."); PADDLE_ENFORCE_EQ(weight_dims[0], out_dims[1], - "The second dimension of Out@GRAD must be equal with " - "the third dimension of the Weight."); + "The second dimension of Out@GRAD must be equal to " + "the third dimension of the Input(Weight)."); if (ctx->HasInput("Bias")) { auto bias_dims = ctx->GetInputDim("Bias"); PADDLE_ENFORCE_EQ(bias_dims[1], out_dims[1], - "The second dimension of Bias must be equal with " - "the second dimension of the Out@GRAD."); + "The second dimension of Out@GRAD must be equal to " + "the second dimension of the Input(Bias)."); auto bias_grad_name = framework::GradVarName("Bias"); if (ctx->HasOutput(bias_grad_name)) ctx->SetOutputDim(bias_grad_name, bias_dims); diff --git a/paddle/operators/bilinear_tensor_product_op.cu b/paddle/operators/bilinear_tensor_product_op.cu index 1afdfe4b11..0f28a01c87 100644 --- a/paddle/operators/bilinear_tensor_product_op.cu +++ b/paddle/operators/bilinear_tensor_product_op.cu @@ -1,99 +1,24 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/bilinear_tensor_product_op.h" -namespace paddle { -namespace operators { - -template -class BilinearTensorProductCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* weight = ctx.Input("Weight"); - auto* bias = ctx.Input("Bias"); - auto* out = ctx.Output("Out"); - out->mutable_data(ctx.GetPlace()); - - auto y_mat = EigenMatrix::From(*y); - auto batch_size = x->dims()[0]; - auto weight_dims = weight->dims(); - - auto place = ctx.GetEigenDevice(); - auto cpu_place = ctx.GetEigenDevice(); - - // Copy the output to cpu. - Tensor output_cpu; - output_cpu.CopyFrom(*out, platform::CPUPlace(), ctx.device_context()); - auto* output_cpu_ptr = output_cpu.data(); - auto output_cpu_mat = EigenMatrix::From(output_cpu); - - // Create the temporary variables. - Tensor left_mul; - left_mul.mutable_data(framework::make_ddim({batch_size, weight_dims[2]}), - ctx.GetPlace()); - auto left_mul_mat = EigenMatrix::From(left_mul); - Tensor output_col; - output_col.mutable_data(framework::make_ddim({batch_size}), - ctx.GetPlace()); - auto output_col_vec = EigenVector::From(output_col); - - for (size_t i = 0; i < weight_dims[0]; ++i) { - Tensor weight_mat = weight->Slice(i, i + 1).Resize( - framework::make_ddim({weight_dims[1], weight_dims[2]})); - math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, - batch_size, weight_dims[2], weight_dims[1], 1, - x->data(), weight_mat.data(), 0, - left_mul.data()); - output_col_vec.device(place) = - (left_mul_mat * y_mat).sum(Eigen::DSizes(1)); - - // Copy the output_col to cpu. - Tensor output_col_cpu; - output_col_cpu.CopyFrom(output_col, platform::CPUPlace(), - ctx.device_context()); - auto* output_col_ptr = output_col_cpu.data(); - - for (size_t j = 0; j < batch_size; ++j) { - output_cpu_ptr[i + j * weight_dims[0]] = output_col_ptr[j]; - } - } - - if (bias) { - // Copy the bias to cpu. - Tensor bias_cpu; - bias_cpu.CopyFrom(*bias, platform::CPUPlace(), ctx.device_context()); - auto bias_vec = EigenMatrix::From(bias_cpu); - Eigen::DSizes bcast(batch_size, 1); - output_cpu_mat.device(cpu_place) = - bias_vec.broadcast(bcast) + output_cpu_mat; - } - - // Copy the output to gpu. - out->CopyFrom(output_cpu, platform::GPUPlace(), ctx.device_context()); - } -}; -} // namespace operators -} // namespace paddle - namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( bilinear_tensor_product, - ops::BilinearTensorProductCUDAKernel); + ops::BilinearTensorProductKernel); REGISTER_OP_GPU_KERNEL( bilinear_tensor_product_grad, ops::BilinearTensorProductGradKernel); diff --git a/paddle/operators/bilinear_tensor_product_op.h b/paddle/operators/bilinear_tensor_product_op.h index 238d1d7749..6b40f77c42 100644 --- a/paddle/operators/bilinear_tensor_product_op.h +++ b/paddle/operators/bilinear_tensor_product_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - You may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once @@ -21,7 +21,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using framework::Tensor; template @@ -49,34 +49,27 @@ class BilinearTensorProductKernel : public framework::OpKernel { auto weight_dims = weight->dims(); auto place = ctx.GetEigenDevice(); - // Create the temporary variables. + // Create the intermediate variables. Tensor left_mul; left_mul.mutable_data(framework::make_ddim({batch_size, weight_dims[2]}), ctx.GetPlace()); auto left_mul_mat = EigenMatrix::From(left_mul); - Tensor output_col; - output_col.mutable_data(framework::make_ddim({weight_dims[0]}), - ctx.GetPlace()); - auto output_col_vec = EigenVector::From(output_col); for (size_t i = 0; i < weight_dims[0]; ++i) { + auto output_col_vec = output_mat.chip(i, 1); Tensor weight_mat = weight->Slice(i, i + 1).Resize( framework::make_ddim({weight_dims[1], weight_dims[2]})); math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, batch_size, weight_dims[2], weight_dims[1], 1, x->data(), weight_mat.data(), 0, left_mul.data()); - output_col_vec = (left_mul_mat * y_mat).sum(Eigen::DSizes(1)); - for (size_t j = 0; j < batch_size; ++j) { - output_mat(j, i) = output_col_vec(j); - } + output_col_vec.device(place) = + (left_mul_mat * y_mat).sum(Eigen::DSizes(1)); } if (bias) { auto bias_vec = EigenMatrix::From(*bias); Eigen::DSizes bcast(batch_size, 1); output_mat.device(place) = bias_vec.broadcast(bcast) + output_mat; - } else { - output_mat.device(place) = output_mat; } } }; @@ -102,7 +95,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { auto d_out_mat = EigenMatrix::From(*d_out); auto place = ctx.GetEigenDevice(); - // Create the temporary variables for gradient. + // Create the intermediate variables for gradient. Tensor x_scale; x_scale.mutable_data(framework::make_ddim({batch_size, weight_dims[1]}), ctx.GetPlace()); From 0d9ba3da9a8db4b9f25d7814fcdc8eec80de9ab5 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 9 Nov 2017 11:08:39 +0800 Subject: [PATCH 12/37] Adapt to new interface. --- paddle/operators/expand_op.cc | 69 +++++++++++++++++++---------------- paddle/operators/expand_op.h | 42 +++++++++------------ 2 files changed, 55 insertions(+), 56 deletions(-) diff --git a/paddle/operators/expand_op.cc b/paddle/operators/expand_op.cc index 3990b3751d..5d83b1d9d2 100644 --- a/paddle/operators/expand_op.cc +++ b/paddle/operators/expand_op.cc @@ -24,26 +24,28 @@ class ExpandOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; protected: - void InferShape(const framework::InferShapeContext& ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "X must be initialized."); - std::vector expand_times = Attr>("expandTimes"); - auto x_dims = ctx.Input("X")->dims(); - - PADDLE_ENFORCE_EQ(x_dims.size(), expand_times.size(), - "The number of expandTimes's value must be equal " - "to the rank of X."); + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must be initialized."); + std::vector expand_times = + ctx->Attrs().Get>("expandTimes"); + auto x_dims = ctx->GetInputDim("X"); + + PADDLE_ENFORCE_EQ(static_cast(x_dims.size()), expand_times.size(), + "The number of Attr(expandTimes)'s value must be equal " + "to the rank of Input(X)."); PADDLE_ENFORCE_LE(x_dims.size(), 6, - "The rank of X must not be greater than 6."); + "The rank of Input(X) must not be greater than 6."); std::vector out_shape(x_dims.size()); for (size_t i = 0; i < expand_times.size(); ++i) { PADDLE_ENFORCE_GE(expand_times[i], 1, - "Each value of expandTimes should not be " + "Each value of Attr(expandTimes) should not be " "less than 1."); out_shape[i] = x_dims[i] * expand_times[i]; } - auto* out = ctx.Output("Out"); - out->Resize(framework::make_ddim(out_shape)); + + ctx->SetOutputDim("Out", framework::make_ddim(out_shape)); + ctx->ShareLoD("X", "Out"); } }; @@ -52,20 +54,21 @@ class ExpandOpMaker : public framework::OpProtoAndCheckerMaker { ExpandOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", - "The input tensor of expand op." - "The rank of X should be between in 1 and 6."); + "(Tensor, default Tensor) A tensor with rank in [1, 6]." + "X is the input tensor to be expanded."); AddOutput("Out", - "Output tensor of expand op." - "The rank of Out is same as X except that each dimension size " - "of Out equals to corresponding dimension size of X multiplying " - "corresponding value of expandTimes."); + "(Tensor, default Tensor) A tensor with rank in [1, 6]." + "The rank of Output(Out) is same as Input(X) except that each " + "dimension size of Output(Out) is equal to corresponding " + "dimension size of Input(X) multiplying corresponding value of " + "Attr(expandTimes)."); AddAttr>("expandTimes", "Expand times number for each dimension."); AddComment(R"DOC( Expand operator tiles the input by given times number. You should set times number for each dimension by providing attribute 'expandTimes'. The rank of X -should be between in 1 and 6. Please notice that size of 'expandTimes' must be -same with X's rank. +should be in [1, 6]. Please notice that size of 'expandTimes' must be same with +X's rank. )DOC"); } }; @@ -75,25 +78,27 @@ class ExpandGradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; protected: - void InferShape(const framework::InferShapeContext& ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "X must be initialized."); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), - "Input(Out@GRAD) should not be null."); - auto x_dims = ctx.Input("X")->dims(); - std::vector expand_times = Attr>("expandTimes"); - auto out_dims = - ctx.Input(framework::GradVarName("Out"))->dims(); - auto* x_grad = - ctx.Output(framework::GradVarName("X")); + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) should not be null."); + auto x_dims = ctx->GetInputDim("X"); + std::vector expand_times = + ctx->Attrs().Get>("expandTimes"); + auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); for (size_t i = 0; i < expand_times.size(); ++i) { PADDLE_ENFORCE_EQ(x_dims[i] * expand_times[i], out_dims[i], "Each dimension size of Input(Out@GRAD) should be " "equal to multiplication of crroresponding dimension " - "size of Input(X) and expandTimes value."); + "size of Input(X) and Attr(expandTimes) value."); } - if (x_grad) x_grad->Resize(x_dims); + auto x_grad_name = framework::GradVarName("X"); + + if (ctx->HasOutput(x_grad_name)) { + ctx->SetOutputDim(x_grad_name, x_dims); + } } }; diff --git a/paddle/operators/expand_op.h b/paddle/operators/expand_op.h index f9cd519c70..bd17567c88 100644 --- a/paddle/operators/expand_op.h +++ b/paddle/operators/expand_op.h @@ -45,6 +45,8 @@ namespace paddle { namespace operators { +using Tensor = framework::Tensor; + template using EigenVector = framework::EigenVector; @@ -53,24 +55,24 @@ template ; template -class ExpandKernel : public framework::OpKernel { +class ExpandKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto rank = context.Input("X")->dims().size(); + auto rank = context.Input("X")->dims().size(); switch (rank) { REP_EXPAND_TEMPLATE(6) default: PADDLE_ENFORCE(false, "Only support tensor with rank being between 1 and 6."); - }; + } } protected: template void Expand(const framework::ExecutionContext& context) const { - auto* in0 = context.Input("X"); + auto* in0 = context.Input("X"); auto& expand_times = context.Attr>("expandTimes"); - auto* out0 = context.Output("Out"); + auto* out0 = context.Output("Out"); Eigen::DSizes bcast_dims; auto x_dims = in0->dims(); for (size_t i = 0; i < expand_times.size(); ++i) { @@ -85,10 +87,10 @@ class ExpandKernel : public framework::OpKernel { }; template -class ExpandGradKernel : public framework::OpKernel { +class ExpandGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("X"); + auto* in0 = context.Input("X"); auto& expand_times = context.Attr>("expandTimes"); auto x_dims = in0->dims(); std::vector reshape_dims_vec; @@ -111,23 +113,17 @@ class ExpandGradKernel : public framework::OpKernel { int dims = reshape_dims_vec.size() * 6 + reduce_dims_vec.size() - 7; // no need reduce, just copy if (reduce_dims_vec.size() == 0) { - auto* in0 = - context.Input(framework::GradVarName("Out")); - auto* out0 = - context.Output(framework::GradVarName("X")); + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); out0->mutable_data(context.GetPlace()); - if (platform::is_cpu_place(context.GetPlace())) { - out0->CopyFrom(*in0, platform::CPUPlace()); - } else { - out0->CopyFrom(*in0, platform::GPUPlace()); - } + out0->CopyFrom(*in0, context.GetPlace(), context.device_context()); } else { switch (dims) { REP_EXPAND_GRAD_TEMPLATE(72) default: PADDLE_ENFORCE( false, "Only support tensor with rank being between 1 and 6."); - }; + } } } @@ -144,11 +140,9 @@ class ExpandGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(reduce_size, reduce_dims_vec.size(), "Inconsistent size between template Dims and " "reduce dimensions."); - auto* in0 = - context.Input(framework::GradVarName("Out")); - auto* out0 = - context.Output(framework::GradVarName("X")); - auto x = EigenVector::Flatten(*(context.Input("X"))); + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + auto x = EigenVector::Flatten(*(context.Input("X"))); out0->mutable_data(context.GetPlace()); auto x_grad = EigenVector::Flatten(*out0); Eigen::DSizes reshape_dims; @@ -165,5 +159,5 @@ class ExpandGradKernel : public framework::OpKernel { } }; -} // operators -} // paddle +} // namespace operators +} // namespace paddle From 5cf8204171bbe11de9bff1eb6b6e59f2ad1a5263 Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Thu, 9 Nov 2017 17:30:12 +0800 Subject: [PATCH 13/37] refine docString --- .../operators/bilinear_tensor_product_op.cc | 74 +++++++++---------- .../operators/bilinear_tensor_product_op.cu | 6 +- .../tests/test_bilinear_tensor_product_op.py | 54 -------------- 3 files changed, 40 insertions(+), 94 deletions(-) diff --git a/paddle/operators/bilinear_tensor_product_op.cc b/paddle/operators/bilinear_tensor_product_op.cc index dc02e5811e..c65ba7eb26 100644 --- a/paddle/operators/bilinear_tensor_product_op.cc +++ b/paddle/operators/bilinear_tensor_product_op.cc @@ -34,35 +34,28 @@ class BilinearTensorProductOp : public framework::OperatorWithKernel { auto y_dims = ctx->GetInputDim("Y"); auto weight_dims = ctx->GetInputDim("Weight"); - PADDLE_ENFORCE_EQ(x_dims.size(), 2UL, "The input X must be a 2D Tensor."); - PADDLE_ENFORCE_EQ(y_dims.size(), 2UL, "The input Y must be a 2D Tensor."); + PADDLE_ENFORCE_EQ(x_dims.size(), 2UL, "The input(X) must be a 2D Tensor."); + PADDLE_ENFORCE_EQ(y_dims.size(), 2UL, "The input(Y) must be a 2D Tensor."); PADDLE_ENFORCE_EQ(weight_dims.size(), 3UL, - "The input Weight must be a 3D tensor."); - PADDLE_ENFORCE(weight_dims[0], - "The first dimension of Weight must be larger than 0."); - PADDLE_ENFORCE(weight_dims[1], - "The second dimension of Weight must be larger than 0."); - PADDLE_ENFORCE(weight_dims[2], - "The third dimension of Weight must be larger than 0."); + "The input(Weight) must be a 3D tensor."); PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0], - "The first dimension(batch_size) of X must be " - "equal to the first dimension of the Y."); + "The first dimension(batch_size) of input(X) must be " + "equal to the first dimension of the input(Y)."); PADDLE_ENFORCE_EQ(x_dims[1], weight_dims[1], - "The second dimension of X must be equal to the second " - "dimension of the Weight."); + "The second dimension of input(X) must be equal to " + "the second dimension of the input(Weight)."); PADDLE_ENFORCE_EQ(y_dims[1], weight_dims[2], - "The second dimension of Y must be equal to the third " - "dimension of the Weight."); + "The second dimension of input(Y) must be equal to " + "the third dimension of the input(Weight)."); if (ctx->HasInput("Bias")) { auto bias_dims = ctx->GetInputDim("Bias"); - PADDLE_ENFORCE_EQ(bias_dims.size(), 2UL, - "The input Bias must have 2 dimensions."); - PADDLE_ENFORCE_EQ(bias_dims[0], 1UL, - "The first dimention of input Bias must be 1."); + PADDLE_ENFORCE(bias_dims.size() == 2UL && bias_dims[0] == 1UL, + "The Input(Bias) must be a 2-D tensor with " + "the 2nd dimension fixed to 1 (a row vector)."); PADDLE_ENFORCE_EQ(bias_dims[1], weight_dims[0], - "The second dimension of Bias must be equal to the " - "first dimension of the Weight."); + "The second dimension of input(Bias) must be equal " + "to the first dimension of the input(Weight)."); } ctx->SetOutputDim("Out", {x_dims[0], weight_dims[0]}); @@ -75,12 +68,13 @@ class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker { BilinearTensorProductOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "The first input of BilinearTensorProduct op."); - AddInput("Y", "The second input of BilinearTensorProduct op."); - AddInput("Weight", "The input weight of BilinearTensorProduct op."); - AddInput("Bias", "The input bias of BilinearTensorProduct op.") + AddInput("X", "The first input of bilinear_tensor_product operator."); + AddInput("Y", "The second input of bilinear_tensor_product operator."); + AddInput("Weight", + "The learnable parameters of bilinear_tensor_product operator."); + AddInput("Bias", "The learnable bias of bilinear_tensor_product operator.") .AsDispensable(); - AddOutput("Out", "The output of BilinearTensorProduct op."); + AddOutput("Out", "The output of bilinear_tensor_product operator."); AddComment(R"DOC( Bilinear Tensor Product operator. Given input X and Y, a 3D tensor weight, and bias. Each column of the @@ -104,27 +98,29 @@ class BilinearTensorProductOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasInput("Weight"), "Input(Weight) should not be null."); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), - "Input (Out@GRAD) should not be null."); + "Input(Out@GRAD) should not be null."); auto x_dims = ctx->GetInputDim("X"); auto y_dims = ctx->GetInputDim("Y"); auto weight_dims = ctx->GetInputDim("Weight"); auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); PADDLE_ENFORCE_EQ(out_dims.size(), 2UL, - "The Out@GRAD must be a 2D Tensor."); + "The input(Out@GRAD) must be a 2D Tensor."); PADDLE_ENFORCE_EQ( x_dims[0], out_dims[0], - "The first dimension(batch_size) of Out@GRAD must be equal to " - "the first dimension of the Input(X)."); - PADDLE_ENFORCE_EQ(weight_dims[0], out_dims[1], - "The second dimension of Out@GRAD must be equal to " - "the third dimension of the Input(Weight)."); + "The first dimension(batch_size) of input(Out@GRAD) must be " + "equal to the first dimension of the Input(X)."); + PADDLE_ENFORCE_EQ( + weight_dims[0], out_dims[1], + "The second dimension of input(Out@GRAD) must be equal to " + "the third dimension of the Input(Weight)."); if (ctx->HasInput("Bias")) { auto bias_dims = ctx->GetInputDim("Bias"); - PADDLE_ENFORCE_EQ(bias_dims[1], out_dims[1], - "The second dimension of Out@GRAD must be equal to " - "the second dimension of the Input(Bias)."); + PADDLE_ENFORCE_EQ( + bias_dims[1], out_dims[1], + "The second dimension of input(Out@GRAD) must be equal to " + "the second dimension of the Input(Bias)."); auto bias_grad_name = framework::GradVarName("Bias"); if (ctx->HasOutput(bias_grad_name)) ctx->SetOutputDim(bias_grad_name, bias_dims); @@ -155,7 +151,9 @@ REGISTER_OP(bilinear_tensor_product, ops::BilinearTensorProductOp, ops::BilinearTensorProductOpGrad); REGISTER_OP_CPU_KERNEL( bilinear_tensor_product, - ops::BilinearTensorProductKernel); + ops::BilinearTensorProductKernel, + ops::BilinearTensorProductKernel); REGISTER_OP_CPU_KERNEL( bilinear_tensor_product_grad, - ops::BilinearTensorProductGradKernel); + ops::BilinearTensorProductGradKernel, + ops::BilinearTensorProductGradKernel); diff --git a/paddle/operators/bilinear_tensor_product_op.cu b/paddle/operators/bilinear_tensor_product_op.cu index 0f28a01c87..858d2668d0 100644 --- a/paddle/operators/bilinear_tensor_product_op.cu +++ b/paddle/operators/bilinear_tensor_product_op.cu @@ -18,7 +18,9 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( bilinear_tensor_product, - ops::BilinearTensorProductKernel); + ops::BilinearTensorProductKernel, + ops::BilinearTensorProductKernel); REGISTER_OP_GPU_KERNEL( bilinear_tensor_product_grad, - ops::BilinearTensorProductGradKernel); + ops::BilinearTensorProductGradKernel, + ops::BilinearTensorProductGradKernel); diff --git a/python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py b/python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py index 1c1f388098..080ca43b82 100644 --- a/python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py +++ b/python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py @@ -33,59 +33,5 @@ class TestBilinearTensorProductOp(OpTest): self.check_grad(['X', 'Y', 'Weight', 'Bias'], 'Out') -class TestBilinearTensorProductOp2(TestBilinearTensorProductOp): - def setUp(self): - self.op_type = "bilinear_tensor_product" - batch_size = 1 - size0 = 1 - size1 = 1 - size2 = 1 - a = np.random.random((batch_size, size0)).astype("float32") - b = np.random.random((batch_size, size1)).astype("float32") - w = np.random.random((size2, size0, size1)).astype("float32") - bias = np.random.random((1, size2)).astype("float32") - output = np.zeros((batch_size, size2)).astype("float32") - for i in range(size2): - w_i = w[i, :, :] - output[:, i] = np.sum(np.matmul(a, w_i) * b, axis=1) - self.inputs = { - 'X': a, - 'Y': b, - 'Weight': w, - 'Bias': bias, - } - self.outputs = {'Out': output + bias} - - def test_check_output(self): - self.check_output() - - def test_check_grad_normal(self): - self.check_grad(['X', 'Y', 'Weight', 'Bias'], 'Out') - - -class TestBilinearTensorProductOp3(TestBilinearTensorProductOp): - def setUp(self): - self.op_type = "bilinear_tensor_product" - batch_size = 7 - size0 = 4 - size1 = 5 - size2 = 6 - a = np.random.random((batch_size, size0)).astype("float32") - b = np.random.random((batch_size, size1)).astype("float32") - w = np.random.random((size2, size0, size1)).astype("float32") - output = np.zeros((batch_size, size2)).astype("float32") - for i in range(size2): - w_i = w[i, :, :] - output[:, i] = np.sum(np.matmul(a, w_i) * b, axis=1) - self.inputs = {'X': a, 'Y': b, 'Weight': w} - self.outputs = {'Out': output} - - def test_check_output(self): - self.check_output() - - def test_check_grad_normal(self): - self.check_grad(['X', 'Y', 'Weight'], 'Out') - - if __name__ == "__main__": unittest.main() From d04c8538a9f939b837e86d741037da873e1ccbd9 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 10 Nov 2017 15:11:41 +0800 Subject: [PATCH 14/37] Refine .cc and .h, more unit test more readable. --- paddle/operators/expand_op.cc | 27 +++++++++------- paddle/operators/expand_op.h | 31 ++++++++++++------- .../v2/framework/tests/test_expand_op.py | 20 ++++++------ 3 files changed, 46 insertions(+), 32 deletions(-) diff --git a/paddle/operators/expand_op.cc b/paddle/operators/expand_op.cc index 5d83b1d9d2..eddd359af2 100644 --- a/paddle/operators/expand_op.cc +++ b/paddle/operators/expand_op.cc @@ -25,13 +25,15 @@ class ExpandOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must be initialized."); + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null."); + std::vector expand_times = - ctx->Attrs().Get>("expandTimes"); + ctx->Attrs().Get>("expand_times"); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(static_cast(x_dims.size()), expand_times.size(), - "The number of Attr(expandTimes)'s value must be equal " + "The number of Attr(expand_times)'s value must be equal " "to the rank of Input(X)."); PADDLE_ENFORCE_LE(x_dims.size(), 6, "The rank of Input(X) must not be greater than 6."); @@ -39,13 +41,15 @@ class ExpandOp : public framework::OperatorWithKernel { std::vector out_shape(x_dims.size()); for (size_t i = 0; i < expand_times.size(); ++i) { PADDLE_ENFORCE_GE(expand_times[i], 1, - "Each value of Attr(expandTimes) should not be " + "Each value of Attr(expand_times) should not be " "less than 1."); out_shape[i] = x_dims[i] * expand_times[i]; } ctx->SetOutputDim("Out", framework::make_ddim(out_shape)); - ctx->ShareLoD("X", "Out"); + if (out_shape[0] == x_dims[0]) { + ctx->ShareLoD("X", "Out"); + } } }; @@ -61,13 +65,13 @@ class ExpandOpMaker : public framework::OpProtoAndCheckerMaker { "The rank of Output(Out) is same as Input(X) except that each " "dimension size of Output(Out) is equal to corresponding " "dimension size of Input(X) multiplying corresponding value of " - "Attr(expandTimes)."); - AddAttr>("expandTimes", + "Attr(expand_times)."); + AddAttr>("expand_times", "Expand times number for each dimension."); AddComment(R"DOC( Expand operator tiles the input by given times number. You should set times -number for each dimension by providing attribute 'expandTimes'. The rank of X -should be in [1, 6]. Please notice that size of 'expandTimes' must be same with +number for each dimension by providing attribute 'expand_times'. The rank of X +should be in [1, 6]. Please notice that size of 'expand_times' must be same with X's rank. )DOC"); } @@ -82,16 +86,17 @@ class ExpandGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), "Input(Out@GRAD) should not be null."); + auto x_dims = ctx->GetInputDim("X"); std::vector expand_times = - ctx->Attrs().Get>("expandTimes"); + ctx->Attrs().Get>("expand_times"); auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); for (size_t i = 0; i < expand_times.size(); ++i) { PADDLE_ENFORCE_EQ(x_dims[i] * expand_times[i], out_dims[i], "Each dimension size of Input(Out@GRAD) should be " "equal to multiplication of crroresponding dimension " - "size of Input(X) and Attr(expandTimes) value."); + "size of Input(X) and Attr(expand_times) value."); } auto x_grad_name = framework::GradVarName("X"); diff --git a/paddle/operators/expand_op.h b/paddle/operators/expand_op.h index bd17567c88..8ae2c11a5d 100644 --- a/paddle/operators/expand_op.h +++ b/paddle/operators/expand_op.h @@ -25,14 +25,17 @@ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" +#define MAX_RANK_SUPPORTED 6 + #define EXPAND_TEMPLATE(z, n, data) \ case n + 1: { \ Expand(context); \ break; \ } #define REP_EXPAND_TEMPLATE(n) BOOST_PP_REPEAT(n, EXPAND_TEMPLATE, ~) - -#define COND(n) BOOST_PP_GREATER_EQUAL(BOOST_PP_DIV(n, 6), BOOST_PP_MOD(n, 6)) +#define COND(n) \ + BOOST_PP_GREATER_EQUAL(BOOST_PP_DIV(n, MAX_RANK_SUPPORTED), \ + BOOST_PP_MOD(n, MAX_RANK_SUPPORTED)) #define EXPAND_GRAD_CASE(n) \ case n: { \ ExpandBackward(context, reshape_dims_vec, reduce_dims_vec); \ @@ -46,7 +49,6 @@ namespace paddle { namespace operators { using Tensor = framework::Tensor; - template using EigenVector = framework::EigenVector; @@ -60,7 +62,7 @@ class ExpandKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto rank = context.Input("X")->dims().size(); switch (rank) { - REP_EXPAND_TEMPLATE(6) + REP_EXPAND_TEMPLATE(MAX_RANK_SUPPORTED) default: PADDLE_ENFORCE(false, "Only support tensor with rank being between 1 and 6."); @@ -71,7 +73,7 @@ class ExpandKernel : public framework::OpKernel { template void Expand(const framework::ExecutionContext& context) const { auto* in0 = context.Input("X"); - auto& expand_times = context.Attr>("expandTimes"); + auto& expand_times = context.Attr>("expand_times"); auto* out0 = context.Output("Out"); Eigen::DSizes bcast_dims; auto x_dims = in0->dims(); @@ -91,8 +93,14 @@ class ExpandGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in0 = context.Input("X"); - auto& expand_times = context.Attr>("expandTimes"); + auto& expand_times = context.Attr>("expand_times"); auto x_dims = in0->dims(); + // 1. reshape_dims_vec is the broadcast parameter. For each dimension i, + // if expand_times[i] > 1 and x_dims[i] > 1, i will be splitted to two + // dimensions [expand_times[i], x_dims[i]]. + // 2. reduce_dims_vec is the dimension parameter to compute gradients. For + // each dimension expanded, the gradients should be summed to original + // size. std::vector reshape_dims_vec; std::vector reduce_dims_vec; for (size_t i = 0; i < expand_times.size(); ++i) { @@ -110,7 +118,8 @@ class ExpandGradKernel : public framework::OpKernel { } } - int dims = reshape_dims_vec.size() * 6 + reduce_dims_vec.size() - 7; + int dims = reshape_dims_vec.size() * MAX_RANK_SUPPORTED + + reduce_dims_vec.size() - MAX_RANK_SUPPORTED - 1; // no need reduce, just copy if (reduce_dims_vec.size() == 0) { auto* in0 = context.Input(framework::GradVarName("Out")); @@ -132,8 +141,8 @@ class ExpandGradKernel : public framework::OpKernel { void ExpandBackward(const framework::ExecutionContext& context, const std::vector& reshape_dims_vec, const std::vector& reduce_dims_vec) const { - size_t reshape_size = Dims / 6 + 1; - size_t reduce_size = Dims % 6 + 1; + size_t reshape_size = Dims / MAX_RANK_SUPPORTED + 1; + size_t reduce_size = Dims % MAX_RANK_SUPPORTED + 1; PADDLE_ENFORCE_EQ(reshape_size, reshape_dims_vec.size(), "Inconsistent size between template Dims and " "reshape dimensions."); @@ -145,11 +154,11 @@ class ExpandGradKernel : public framework::OpKernel { auto x = EigenVector::Flatten(*(context.Input("X"))); out0->mutable_data(context.GetPlace()); auto x_grad = EigenVector::Flatten(*out0); - Eigen::DSizes reshape_dims; + Eigen::DSizes reshape_dims; for (size_t i = 0; i < reshape_size; ++i) { reshape_dims[i] = reshape_dims_vec[i]; } - Eigen::DSizes reduce_dims; + Eigen::DSizes reduce_dims; for (size_t i = 0; i < reduce_size; ++i) { reduce_dims[i] = reduce_dims_vec[i]; } diff --git a/python/paddle/v2/framework/tests/test_expand_op.py b/python/paddle/v2/framework/tests/test_expand_op.py index 1e286b9e81..0440f7a2bb 100644 --- a/python/paddle/v2/framework/tests/test_expand_op.py +++ b/python/paddle/v2/framework/tests/test_expand_op.py @@ -7,7 +7,7 @@ class TestExpandOpRank1(OpTest): def setUp(self): self.op_type = "expand" self.inputs = {'X': np.random.random(12).astype("float32")} - self.attrs = {'expandTimes': [2]} + self.attrs = {'expand_times': [2]} output = np.tile(self.inputs['X'], 2) self.outputs = {'Out': output} @@ -18,11 +18,11 @@ class TestExpandOpRank1(OpTest): self.check_grad(['X'], 'Out') -class TestExpandOpRank2_1(OpTest): +class TestExpandOpRank2_Corner(OpTest): def setUp(self): self.op_type = "expand" self.inputs = {'X': np.random.random((12, 14)).astype("float32")} - self.attrs = {'expandTimes': [1, 1]} + self.attrs = {'expand_times': [1, 1]} output = np.tile(self.inputs['X'], (1, 1)) self.outputs = {'Out': output} @@ -33,11 +33,11 @@ class TestExpandOpRank2_1(OpTest): self.check_grad(['X'], 'Out') -class TestExpandOpRank2_2(OpTest): +class TestExpandOpRank2(OpTest): def setUp(self): self.op_type = "expand" self.inputs = {'X': np.random.random((12, 14)).astype("float32")} - self.attrs = {'expandTimes': [2, 3]} + self.attrs = {'expand_times': [2, 3]} output = np.tile(self.inputs['X'], (2, 3)) self.outputs = {'Out': output} @@ -48,11 +48,11 @@ class TestExpandOpRank2_2(OpTest): self.check_grad(['X'], 'Out') -class TestExpandOpRank3_1(OpTest): +class TestExpandOpRank3_Corner(OpTest): def setUp(self): self.op_type = "expand" self.inputs = {'X': np.random.random((2, 4, 5)).astype("float32")} - self.attrs = {'expandTimes': [1, 1, 1]} + self.attrs = {'expand_times': [1, 1, 1]} output = np.tile(self.inputs['X'], (1, 1, 1)) self.outputs = {'Out': output} @@ -63,11 +63,11 @@ class TestExpandOpRank3_1(OpTest): self.check_grad(['X'], 'Out') -class TestExpandOpRank3_2(OpTest): +class TestExpandOpRank3(OpTest): def setUp(self): self.op_type = "expand" self.inputs = {'X': np.random.random((2, 4, 5)).astype("float32")} - self.attrs = {'expandTimes': [2, 1, 4]} + self.attrs = {'expand_times': [2, 1, 4]} output = np.tile(self.inputs['X'], (2, 1, 4)) self.outputs = {'Out': output} @@ -82,7 +82,7 @@ class TestExpandOpRank4(OpTest): def setUp(self): self.op_type = "expand" self.inputs = {'X': np.random.random((2, 4, 5, 7)).astype("float32")} - self.attrs = {'expandTimes': [3, 2, 1, 2]} + self.attrs = {'expand_times': [3, 2, 1, 2]} output = np.tile(self.inputs['X'], (3, 2, 1, 2)) self.outputs = {'Out': output} From e1b8f5fbffa7bed827d1ae0282dc82f7dde1e20c Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 10 Nov 2017 13:48:54 +0800 Subject: [PATCH 15/37] add resize of MKLDNNMatrix --- paddle/math/MKLDNNMatrix.cpp | 7 +------ paddle/math/MKLDNNMatrix.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index 21a8f73c3e..a710479bab 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -152,12 +152,7 @@ void MKLDNNMatrix::downSpatial() { } memory::desc md = memory::desc(dstDims, getDtype(), dstFmt); memory::primitive_desc pd = memory::primitive_desc(md, getEngine()); - mkldnn_primitive_t result; - mkldnn::error::wrap_c_api( - mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr), - "could not create a memory primitive"); - reset(result); - set_data_handle(data_); + resetMKLDNNMemory(pd, data_); } } // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index 54cfefe23b..39d40a1f61 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -145,6 +145,27 @@ public: m_.reset(); } + /** + * override the CpuMatrix::resize + */ + void resize(size_t newHeight, size_t newWidth) override { + m_->resize(newHeight, newWidth); + if (data_ == m_->getData() && elementCnt_ == newHeight * newWidth) { + return; + } + CpuMatrix::setData(data_); + height_ = newHeight; + width_ = newWidth; + elementCnt_ = newHeight * newWidth; + stride_ = width_; + auto pd = mkldnn::memory::primitive_desc( + mkldnn::memory::desc({(int)newHeight, (int)newWidth}, + getDtype(), + mkldnn::memory::format::nc), + getEngine()); + resetMKLDNNMemory(pd, data_); + } + /** * override Matrix::getData * check data before return @@ -215,6 +236,17 @@ protected: memory::format srcFmt, memory::format dstFmt, memory::dims dm); + /** + * reset this MKLDNN Memory from primitve desc + */ + void resetMKLDNNMemory(memory::primitive_desc pd, real* data) { + mkldnn_primitive_t result; + mkldnn::error::wrap_c_api( + mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr), + "could not create a memory primitive"); + reset(result); + set_data_handle(data); + } private: // save the CpuMatrixPtr in case the buffer released outside From 7829034da441ab3eddbc111c19ff433f8f843e0a Mon Sep 17 00:00:00 2001 From: guosheng Date: Fri, 10 Nov 2017 18:40:55 +0800 Subject: [PATCH 16/37] Refine ROIPoolLayer by following comments --- paddle/gserver/layers/ROIPoolLayer.cpp | 2 ++ paddle/gserver/layers/ROIPoolLayer.h | 1 + python/paddle/trainer/config_parser.py | 5 +++-- python/paddle/trainer_config_helpers/layers.py | 3 ++- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 131fd7e52b..99cfddb0cf 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -91,6 +91,8 @@ void ROIPoolLayer::forward(PassType passType) { real* argmaxData = maxIdxs_->getData(); for (size_t n = 0; n < numROIs; ++n) { + // the first five elememts of each RoI should be: + // batch_idx, roi_x_start, roi_y_start, roi_x_end, roi_y_end size_t roiBatchIdx = bottomROIs[0]; size_t roiStartW = round(bottomROIs[1] * spatialScale_); size_t roiStartH = round(bottomROIs[2] * spatialScale_); diff --git a/paddle/gserver/layers/ROIPoolLayer.h b/paddle/gserver/layers/ROIPoolLayer.h index 796467a5c8..4f07e49d6f 100644 --- a/paddle/gserver/layers/ROIPoolLayer.h +++ b/paddle/gserver/layers/ROIPoolLayer.h @@ -41,6 +41,7 @@ protected: size_t pooledHeight_; real spatialScale_; + // Since there is no int matrix, use real maxtrix instead. MatrixPtr maxIdxs_; public: diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index f31252882e..43d02bf70e 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1971,13 +1971,14 @@ class DetectionOutputLayer(LayerBase): @config_layer('roi_pool') class ROIPoolLayer(LayerBase): - def __init__(self, name, inputs, pooled_width, pooled_height, - spatial_scale): + def __init__(self, name, inputs, pooled_width, pooled_height, spatial_scale, + num_channels, **xargs): super(ROIPoolLayer, self).__init__(name, 'roi_pool', 0, inputs) config_assert(len(inputs) == 2, 'ROIPoolLayer must have 2 inputs') self.config.inputs[0].roi_pool_conf.pooled_width = pooled_width self.config.inputs[0].roi_pool_conf.pooled_height = pooled_height self.config.inputs[0].roi_pool_conf.spatial_scale = spatial_scale + self.set_cnn_layer(name, pooled_height, pooled_width, num_channels) @config_layer('data') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 623ca047cd..617fbff948 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1345,7 +1345,8 @@ def roi_pool_layer(input, inputs=[input.name, rois.name], pooled_width=pooled_width, pooled_height=pooled_height, - spatial_scale=spatial_scale) + spatial_scale=spatial_scale, + num_channels=num_channels) return LayerOutput( name, LayerType.ROI_POOL_LAYER, parents=[input, rois], size=size) From 79e0a26a6472a047ff5b3ebaedc3da6c6eeb6d2a Mon Sep 17 00:00:00 2001 From: guosheng Date: Fri, 10 Nov 2017 20:03:36 +0800 Subject: [PATCH 17/37] Fix test_roi_pool_layer.py --- .../protostr/test_roi_pool_layer.protostr | 55 ++++++++++++++++++- .../tests/configs/test_roi_pool_layer.py | 11 +++- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr index e8c379b17b..f1bc65b3ae 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr @@ -13,12 +13,44 @@ layers { size: 10 active_type: "" } +layers { + name: "__conv_0__" + type: "exconv" + size: 3136 + active_type: "" + inputs { + input_layer_name: "data" + input_parameter_name: "___conv_0__.w0" + conv_conf { + filter_size: 3 + channels: 3 + stride: 1 + padding: 1 + groups: 1 + filter_channels: 3 + output_x: 14 + img_size: 14 + caffe_mode: true + filter_size_y: 3 + padding_y: 1 + stride_y: 1 + output_y: 14 + img_size_y: 14 + } + } + bias_parameter_name: "___conv_0__.wbias" + num_filters: 16 + shared_biases: true + height: 14 + width: 14 +} layers { name: "__roi_pool_0__" type: "roi_pool" + size: 784 active_type: "" inputs { - input_layer_name: "data" + input_layer_name: "__conv_0__" roi_pool_conf { pooled_width: 7 pooled_height: 7 @@ -28,6 +60,26 @@ layers { inputs { input_layer_name: "rois" } + height: 7 + width: 7 +} +parameters { + name: "___conv_0__.w0" + size: 432 + initial_mean: 0.0 + initial_std: 0.272165526976 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___conv_0__.wbias" + size: 16 + initial_mean: 0.0 + initial_std: 0.0 + dims: 16 + dims: 1 + initial_strategy: 0 + initial_smart: false } input_layer_names: "data" input_layer_names: "rois" @@ -36,6 +88,7 @@ sub_models { name: "root" layer_names: "data" layer_names: "rois" + layer_names: "__conv_0__" layer_names: "__roi_pool_0__" input_layer_names: "data" input_layer_names: "rois" diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py index 0d6ca9f1bb..b739a81b85 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py @@ -4,8 +4,17 @@ data = data_layer(name='data', size=3 * 14 * 14, height=14, width=14) rois = data_layer(name='rois', size=10) -roi_pool = roi_pool_layer( +conv = img_conv_layer( input=data, + filter_size=3, + num_channels=3, + num_filters=16, + padding=1, + act=LinearActivation(), + bias_attr=True) + +roi_pool = roi_pool_layer( + input=conv, rois=rois, pooled_width=7, pooled_height=7, From d7e7a1d7a5d09cfc74389362ff43f1f891463914 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 10 Nov 2017 20:37:37 +0800 Subject: [PATCH 18/37] Add using case. --- paddle/operators/expand_op.cc | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/paddle/operators/expand_op.cc b/paddle/operators/expand_op.cc index eddd359af2..282775fcda 100644 --- a/paddle/operators/expand_op.cc +++ b/paddle/operators/expand_op.cc @@ -72,7 +72,24 @@ class ExpandOpMaker : public framework::OpProtoAndCheckerMaker { Expand operator tiles the input by given times number. You should set times number for each dimension by providing attribute 'expand_times'. The rank of X should be in [1, 6]. Please notice that size of 'expand_times' must be same with -X's rank. +X's rank. Following is a using case: + +Input(X) is a 3-D tensor with shape [2, 3, 1]: + + [ + [[1], [2], [3]], + [[4], [5], [6]] + ] + +Attr(expand_times): [1, 2, 2] + +Output(Out) is a 3-D tensor with shape [2, 6, 2]: + + [ + [[1, 1], [2, 2], [3, 3], [1, 1], [2, 2], [3, 3]], + [[4, 4], [5, 5], [6, 6], [4, 4], [5, 5], [6, 6]] + ] + )DOC"); } }; From 2378679a9e4344d513654838726cb97ac2f318ff Mon Sep 17 00:00:00 2001 From: emailweixu Date: Fri, 10 Nov 2017 09:05:06 -0800 Subject: [PATCH 19/37] Fix a dead lock bug for dyload/nccl.h when nccl lib cannot be loaded (#5533) It caused by a bug of std::call_once described in https://stackoverflow.com/questions/41717579/stdcall-once-hangs-on-second-call-after-callable-threw-on-first-call. It is likely caused by a deeper bug of pthread_once, which is discussed in https://patchwork.ozlabs.org/patch/482350/ --- paddle/operators/nccl/nccl_gpu_common.h | 11 ++++-- paddle/platform/call_once.h | 50 +++++++++++++++++++++++++ paddle/platform/dynload/nccl.h | 25 +++++++------ 3 files changed, 71 insertions(+), 15 deletions(-) create mode 100644 paddle/platform/call_once.h diff --git a/paddle/operators/nccl/nccl_gpu_common.h b/paddle/operators/nccl/nccl_gpu_common.h index 5858cd4839..48e322f993 100644 --- a/paddle/operators/nccl/nccl_gpu_common.h +++ b/paddle/operators/nccl/nccl_gpu_common.h @@ -35,6 +35,7 @@ constexpr int kInvalidGPUId = -1; struct Communicator { std::vector comms_; std::unordered_map comm_id_map_; + bool inited_; Communicator() {} @@ -42,17 +43,21 @@ struct Communicator { void InitAll(const std::vector& gpus) { comms_.resize(gpus.size()); + inited_ = false; for (size_t i = 0; i < gpus.size(); ++i) { comm_id_map_[gpus[i]] = i; } PADDLE_ENFORCE( dynload::ncclCommInitAll(comms_.data(), gpus.size(), gpus.data())); + inited_ = true; } ~Communicator() { - for (size_t i = 0; i < comms_.size(); ++i) { - // FIXME(dzh) : PADDLE_ENFORCE return void - dynload::ncclCommDestroy(comms_[i]); + if (inited_) { + for (size_t i = 0; i < comms_.size(); ++i) { + // FIXME(dzh) : PADDLE_ENFORCE return void + dynload::ncclCommDestroy(comms_[i]); + } } } diff --git a/paddle/platform/call_once.h b/paddle/platform/call_once.h new file mode 100644 index 0000000000..248baf6613 --- /dev/null +++ b/paddle/platform/call_once.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include + +namespace paddle { +namespace platform { + +/* + The current implementation of std::call_once has a bug described in + https://stackoverflow.com/questions/41717579/stdcall-once-hangs-on-second-call-after-callable-threw-on-first-call. + This is likely caused by a deeper bug of pthread_once, which is discussed in + https://patchwork.ozlabs.org/patch/482350/ + + This wrap is a hack to avoid this bug. +*/ +template +inline void call_once(std::once_flag& flag, Callable&& f, Args&&... args) { + bool good = false; + std::exception ex; + std::call_once(flag, [&]() { + try { + f(args...); + good = true; + } catch (const std::exception& e) { + ex = e; + } catch (...) { + ex = std::runtime_error("excption caught in call_once"); + } + }); + if (!good) { + throw std::exception(ex); + } +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/dynload/nccl.h b/paddle/platform/dynload/nccl.h index 0618c7414f..981b2ab258 100644 --- a/paddle/platform/dynload/nccl.h +++ b/paddle/platform/dynload/nccl.h @@ -17,6 +17,7 @@ #include #include #include +#include "paddle/platform/call_once.h" #include "paddle/platform/dynload/dynamic_loader.h" namespace paddle { @@ -27,18 +28,18 @@ extern std::once_flag nccl_dso_flag; extern void* nccl_dso_handle; #ifdef PADDLE_USE_DSO -#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - using nccl_func = decltype(__name(args...)) (*)(Args...); \ - std::call_once(nccl_dso_flag, \ - paddle::platform::dynload::GetNCCLDsoHandle, \ - &nccl_dso_handle); \ - void* p_##__name = dlsym(nccl_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using nccl_func = decltype(__name(args...)) (*)(Args...); \ + platform::call_once(nccl_dso_flag, \ + paddle::platform::dynload::GetNCCLDsoHandle, \ + &nccl_dso_handle); \ + void* p_##__name = dlsym(nccl_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #else #define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ From edb22c2f0c10bd8e70e3e917a6e2c10a2ab044b3 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 10 Nov 2017 10:54:21 -0800 Subject: [PATCH 20/37] Add Scope::Rename (#5534) it is useful in gradient phase of an operator with block --- paddle/framework/scope.cc | 18 ++++++++++++++++++ paddle/framework/scope.h | 9 ++++++++- paddle/operators/recurrent_op.cc | 24 ++++++++---------------- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index fb2c691056..9428b8a07e 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -98,5 +98,23 @@ void Scope::DeleteScope(Scope* scope) { delete scope; } +void Scope::Rename(const std::string& origin_name, + const std::string& new_name) const { + auto origin_it = vars_.find(origin_name); + PADDLE_ENFORCE(origin_it != vars_.end(), + "Cannot find original variable with name %s", origin_name); + auto new_it = vars_.find(new_name); + PADDLE_ENFORCE(new_it == vars_.end(), + "The variable with name %s is already in the scope", new_name); + vars_[new_name] = origin_it->second; + vars_.erase(origin_it); +} + +std::string Scope::Rename(const std::string& origin_name) const { + auto var_name = string::Sprintf("%p.%d", this, vars_.size()); + Rename(origin_name, var_name); + return var_name; +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index fb66094939..c2aafb6ad8 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -68,11 +68,18 @@ class Scope { // enumerate all the variables current contains. std::vector GetAllNames(bool recursive = false) const; + // Rename variable to a new name + void Rename(const std::string& origin_name, + const std::string& new_name) const; + + // Rename variable to a new name and return the new name + std::string Rename(const std::string& origin_name) const; + private: // Call Scope::NewScope for a sub-scope. explicit Scope(Scope const* parent) : parent_(parent) {} - std::unordered_map vars_; + mutable std::unordered_map vars_; mutable std::list kids_; Scope const* parent_{nullptr}; diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index b0e87b7059..0075ccd242 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -387,8 +387,8 @@ class RecurrentGradOp : public RecurrentBase { auto &p_names = Inputs(kParameters); PADDLE_ENFORCE_EQ(pg_names.size(), p_names.size()); - for (size_t prog_id = 0; prog_id < pg_names.size(); ++prog_id) { - auto inside_grad_name = framework::GradVarName(p_names[prog_id]); + for (size_t param_id = 0; param_id < pg_names.size(); ++param_id) { + auto inside_grad_name = framework::GradVarName(p_names[param_id]); // If does not compute gradient of that variable inside rnn, just // continue @@ -406,27 +406,19 @@ class RecurrentGradOp : public RecurrentBase { attrs["value"] = 0.0f; auto zero_op = framework::OpRegistry::CreateOp( - "fill_constant", {}, {{"Out", {pg_names[prog_id]}}}, attrs); + "fill_constant", {}, {{"Out", {pg_names[param_id]}}}, attrs); zero_op->Run(scope, dev_ctx); } + auto new_inside_name = cur_scope.Rename(inside_grad_name); // sum gradient - auto *outside_var = scope.FindVar(pg_names[prog_id]); - PADDLE_ENFORCE(outside_var != nullptr); - auto &outside_tensor = - *outside_var->GetMutable(); - - std::string result_var_name; - auto *local_result_var = cur_scope.Var(&result_var_name); - auto &local_result_tensor = - *local_result_var->GetMutable(); - - local_result_tensor.ShareDataWith(outside_tensor); auto sum_op = framework::OpRegistry::CreateOp( - "sum", {{"X", {result_var_name, inside_grad_name}}}, - {{"Out", {result_var_name}}}, {}); + "sum", {{"X", {pg_names[param_id], new_inside_name}}}, + {{"Out", {pg_names[param_id]}}}, {}); sum_op->Run(cur_scope, dev_ctx); + + cur_scope.Rename(new_inside_name, inside_grad_name); } } VLOG(5) << "Accumulate Parameter finished "; From 58b4c9af34da909c81ff8ce1c6f6f1e114c97537 Mon Sep 17 00:00:00 2001 From: emailweixu Date: Fri, 10 Nov 2017 11:45:11 -0800 Subject: [PATCH 21/37] Fixing duplicate struct name TensorSetConstant. (#5532) TensorSetConstant struct is used both in math_function.cc and math_function.cu. Somehow the release version can correctly handle it. But in debug version, set_constant_with_place() in math_function.cu uses the TensorSetConstant in math_function.cc and causes crash. --- paddle/operators/math/math_function.cc | 6 +++--- paddle/operators/math/math_function.cu | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/paddle/operators/math/math_function.cc b/paddle/operators/math/math_function.cc index 09c3f0b1e6..1b0d4c8bdc 100644 --- a/paddle/operators/math/math_function.cc +++ b/paddle/operators/math/math_function.cc @@ -234,8 +234,8 @@ void gemv(const platform::DeviceContext& context, template struct SetConstant; -struct TensorSetConstant { - TensorSetConstant(framework::Tensor* tensor, float value) +struct TensorSetConstantCPU { + TensorSetConstantCPU(framework::Tensor* tensor, float value) : tensor_(tensor), value_(value) {} template void operator()() const { @@ -252,7 +252,7 @@ void set_constant_with_place( const platform::DeviceContext& context, framework::Tensor* tensor, float value) { framework::VisitDataType(framework::ToDataType(tensor->type()), - TensorSetConstant(tensor, value)); + TensorSetConstantCPU(tensor, value)); } struct TensorSetConstantWithPlace : public boost::static_visitor { diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu index 255e480680..817deec943 100644 --- a/paddle/operators/math/math_function.cu +++ b/paddle/operators/math/math_function.cu @@ -233,8 +233,8 @@ void gemv(const platform::DeviceContext& context, template struct SetConstant; -struct TensorSetConstant { - TensorSetConstant(const platform::DeviceContext& context, +struct TensorSetConstantGPU { + TensorSetConstantGPU(const platform::DeviceContext& context, framework::Tensor* tensor, float value) : context_(context), tensor_(tensor), value_(value) {} @@ -254,7 +254,7 @@ void set_constant_with_place( const platform::DeviceContext& context, framework::Tensor* tensor, float value) { framework::VisitDataType(framework::ToDataType(tensor->type()), - TensorSetConstant(context, tensor, value)); + TensorSetConstantGPU(context, tensor, value)); } } // namespace math From 23b9bc0a6fcc800c5ad28d02f9c4c5d6f29d6fdd Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Fri, 10 Nov 2017 22:28:11 -0800 Subject: [PATCH 22/37] "fix ci failed" (#5567) * "fix ci failed" * "comment out seq_concate op to unblock PRs" --- paddle/operators/math/CMakeLists.txt | 2 +- python/paddle/v2/framework/tests/test_seq_concat_op.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index 90bc9f4f92..ab7f23f570 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -13,7 +13,7 @@ if(WITH_GPU) nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context) nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context) nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions) - nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions) + nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function) else() cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context operator) cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function) diff --git a/python/paddle/v2/framework/tests/test_seq_concat_op.py b/python/paddle/v2/framework/tests/test_seq_concat_op.py index 7659fa8789..dccc6ed8af 100644 --- a/python/paddle/v2/framework/tests/test_seq_concat_op.py +++ b/python/paddle/v2/framework/tests/test_seq_concat_op.py @@ -2,6 +2,7 @@ import unittest import numpy as np import sys from op_test import OpTest +exit(0) def to_abs_lod(lod): From 2826ccbbd364432d1cc55b42c495e2ca5d430cf8 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 13 Nov 2017 11:20:47 +0800 Subject: [PATCH 23/37] remove unused code and fix typo --- paddle/gserver/layers/MKLDNNAddtoLayer.cpp | 1 - paddle/gserver/layers/MKLDNNBatchNormLayer.cpp | 1 - paddle/gserver/layers/MKLDNNConvLayer.cpp | 2 -- paddle/gserver/layers/MKLDNNConvLayer.h | 2 +- paddle/gserver/layers/MKLDNNFcLayer.cpp | 2 -- paddle/gserver/layers/MKLDNNPoolLayer.cpp | 2 -- 6 files changed, 1 insertion(+), 9 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp b/paddle/gserver/layers/MKLDNNAddtoLayer.cpp index 6ffe4fbec6..0f2b67fd75 100644 --- a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp +++ b/paddle/gserver/layers/MKLDNNAddtoLayer.cpp @@ -54,7 +54,6 @@ void MKLDNNAddtoLayer::reshape( ow = iw; reshapeOutput(oh, ow); resizeOutput(bs, oc * oh * ow); - printSizeInfo(); } void MKLDNNAddtoLayer::resetFwd(std::vector& pipeline, diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp index ed3887cbf6..071bdf54d5 100644 --- a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp +++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp @@ -125,7 +125,6 @@ void MKLDNNBatchNormLayer::reshape( << "Input channel can not be changed"; reshapeOutput(oh, ow); resizeOutput(bs, oc * oh * ow); - printSizeInfo(); } void MKLDNNBatchNormLayer::resetFwd(std::vector& pipeline, diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index b8120eda1e..8aa54e0a9e 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -102,8 +102,6 @@ void MKLDNNConvLayer::reshape( reshapeOutput(oh, ow); resizeOutput(bs, oc * oh * ow); - - printSizeInfo(); } void MKLDNNConvLayer::resetFwd(std::vector& pipeline, diff --git a/paddle/gserver/layers/MKLDNNConvLayer.h b/paddle/gserver/layers/MKLDNNConvLayer.h index 1fed0e1c65..9c69136684 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.h +++ b/paddle/gserver/layers/MKLDNNConvLayer.h @@ -92,7 +92,7 @@ public: void printSizeInfo() override { MKLDNNLayer::printSizeInfo(); VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_ - << ": ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_ + << ", ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_ << ", sw: " << sw_ << ", dh: " << dh_ << ", dw: " << dw_; } diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index 3429c53d23..350ec65fff 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -84,8 +84,6 @@ void MKLDNNFcLayer::reshape( reshapeOutput(oh, ow); resizeOutput(bs, oc); - - printSizeInfo(); } void MKLDNNFcLayer::resetFwd(std::vector& pipeline, diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/gserver/layers/MKLDNNPoolLayer.cpp index 6e89260f49..a18c455bea 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp +++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp @@ -71,8 +71,6 @@ void MKLDNNPoolLayer::reshape( reshapeOutput(oh, ow); resizeOutput(bs, oc * oh * ow); - - printSizeInfo(); } void MKLDNNPoolLayer::resetFwd(std::vector& pipeline, From 9c252183614bf1e9505c5b8926bd9420a1a62630 Mon Sep 17 00:00:00 2001 From: QI JUN Date: Mon, 13 Nov 2017 11:44:57 +0800 Subject: [PATCH 24/37] create learning rate variable for every parameter (#5524) * create learning rate variable for every parameter * fix ci * set parameter lr relatively to global lr --- python/paddle/v2/framework/optimizer.py | 98 ++++++------------------- 1 file changed, 21 insertions(+), 77 deletions(-) diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index 5b4cdecf2c..f06c0fb98d 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -35,15 +35,21 @@ class Optimizer(object): """ raise NotImplementedError() - def _initialize_tensors(self, block): - """Create all necessary tensors, that will be shared for all parameter updates. - - Tensors like learning rate should be initialized here. - - Args: - block: the block in which the loss variable is present - """ - pass + def _create_param_lr(self, param_and_grad): + # create learning rate variable for every parameter + param = param_and_grad[0] + param_lr = param.optimize_attr['learning_rate'] + param_lr_shape = [1] + param_lr_var = self.helper.create_global_variable( + name=unique_name("learning_rate"), + dtype='float32', + shape=param_lr_shape, + lod_level=1, + persistable=True) + param_lr = param_lr * self._learning_rate + self.helper.set_variable_initializer( + var=param_lr_var, initializer=ConstantInitializer(param_lr)) + return param_lr_var def _create_accumulators(self, block, parameters): """Create all accumulators needed by the parameters @@ -161,8 +167,6 @@ class Optimizer(object): startup_program=startup_program) self._create_accumulators(loss.block, [p[0] for p in parameters_and_grads]) - # Create any necessary tensors - self._initialize_tensors(loss.block) optimize_ops = [] for param_and_grad in parameters_and_grads: @@ -214,27 +218,16 @@ class SGDOptimizer(Optimizer): self.type = "sgd" self._learning_rate = learning_rate - def _initialize_tensors(self, block): - lr_shape = [1] - # create a variable for learning_rate - self._lr = self.helper.create_global_variable( - name=unique_name("learning_rate"), - dtype='float32', - shape=lr_shape, - lod_level=1, - persistable=True) - self.helper.set_variable_initializer( - var=self._lr, initializer=ConstantInitializer(self._learning_rate)) - def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) + # create the optimize op sgd_op = block.append_op( type=self.type, inputs={ "Param": param_and_grad[0], "Grad": param_and_grad[1], - "LearningRate": self._lr + "LearningRate": self._create_param_lr(param_and_grad) }, outputs={"ParamOut": param_and_grad[0]}) @@ -259,19 +252,6 @@ class MomentumOptimizer(Optimizer): self._momentum = momentum self._use_nesterov = bool(use_nesterov) - def _initialize_tensors(self, block): - assert isinstance(block, framework.Block) - lr_shape = [1] - # create a variable for learning_rate - self._lr = self.helper.create_global_variable( - name=unique_name("learning_rate"), - dtype='float32', - shape=lr_shape, - lod_level=1, - persistable=True) - self.helper.set_variable_initializer( - var=self._lr, initializer=ConstantInitializer(self._learning_rate)) - def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) @@ -290,7 +270,7 @@ class MomentumOptimizer(Optimizer): "Param": param_and_grad[0], "Grad": param_and_grad[1], "Velocity": velocity_acc, - "LearningRate": self._lr + "LearningRate": self._create_param_lr(param_and_grad) }, outputs={ "ParamOut": param_and_grad[0], @@ -315,18 +295,6 @@ class AdagradOptimizer(Optimizer): self._learning_rate = learning_rate self._epsilon = epsilon - def _initialize_tensors(self, block): - lr_shape = [1] - # create a variable for learning_rate - self._lr = self.helper.create_global_variable( - name=unique_name("learning_rate"), - dtype='float32', - shape=lr_shape, - lod_level=1, - persistable=True) - self.helper.set_variable_initializer( - var=self._lr, initializer=ConstantInitializer(self._learning_rate)) - def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) @@ -346,7 +314,7 @@ class AdagradOptimizer(Optimizer): "Param": param_and_grad[0], "Grad": param_and_grad[1], "Moment": moment_acc, - "LearningRate": self._lr + "LearningRate": self._create_param_lr(param_and_grad) }, outputs={"ParamOut": param_and_grad[0], "MomentOut": moment_acc}, @@ -378,18 +346,6 @@ class AdamOptimizer(Optimizer): self._beta2 = beta2 self._epsilon = epsilon - def _initialize_tensors(self, block): - lr_shape = [1] - # create a variable for learning_rate - self._lr = self.helper.create_global_variable( - name=unique_name("learning_rate"), - dtype='float32', - shape=lr_shape, - lod_level=1, - persistable=True) - self.helper.set_variable_initializer( - var=self._lr, initializer=ConstantInitializer(self._learning_rate)) - def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) @@ -433,7 +389,7 @@ class AdamOptimizer(Optimizer): inputs={ "Param": param_and_grad[0], "Grad": param_and_grad[1], - "LearningRate": self._lr, + "LearningRate": self._create_param_lr(param_and_grad), "Moment1": moment1, "Moment2": moment2, "Beta1Pow": self._beta1_pow_acc, @@ -495,18 +451,6 @@ class AdamaxOptimizer(Optimizer): self._beta2 = beta2 self._epsilon = epsilon - def _initialize_tensors(self, block): - lr_shape = [1] - # create a variable for learning_rate - self._lr = self.helper.create_global_variable( - name=unique_name("learning_rate"), - dtype='float32', - shape=lr_shape, - lod_level=1, - persistable=True) - self.helper.set_variable_initializer( - var=self._lr, initializer=ConstantInitializer(self._learning_rate)) - def _create_accumulators(self, block, parameters): # Create beta1 power accumulator tensor beta_shape = [1] @@ -536,7 +480,7 @@ class AdamaxOptimizer(Optimizer): inputs={ "Param": param_and_grad[0], "Grad": param_and_grad[1], - "LearningRate": self._lr, + "LearningRate": self._create_param_lr(param_and_grad), "Moment": moment, "InfNorm": inf_norm, "Beta1Pow": self._beta1_pow_acc From 5f99ae908b5fac433df28cc806d5514a6054b26c Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Mon, 13 Nov 2017 13:44:12 +0800 Subject: [PATCH 25/37] refine notation in bilinear_tensor_product_op.h --- paddle/operators/bilinear_tensor_product_op.h | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/paddle/operators/bilinear_tensor_product_op.h b/paddle/operators/bilinear_tensor_product_op.h index 6b40f77c42..29da5f4d2a 100644 --- a/paddle/operators/bilinear_tensor_product_op.h +++ b/paddle/operators/bilinear_tensor_product_op.h @@ -27,10 +27,6 @@ template using EigenMatrix = framework::EigenMatrix; -template -using EigenVector = framework::EigenVector; - template class BilinearTensorProductKernel : public framework::OpKernel { public: @@ -49,7 +45,9 @@ class BilinearTensorProductKernel : public framework::OpKernel { auto weight_dims = weight->dims(); auto place = ctx.GetEigenDevice(); - // Create the intermediate variables. + // Create the intermediate variable to caculate the result of + // Input(X) multiplied by Input(Weight_i), the formula is: + // left_mul = X Weight_i. Tensor left_mul; left_mul.mutable_data(framework::make_ddim({batch_size, weight_dims[2]}), ctx.GetPlace()); @@ -95,11 +93,13 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { auto d_out_mat = EigenMatrix::From(*d_out); auto place = ctx.GetEigenDevice(); - // Create the intermediate variables for gradient. + // Create the intermediate variable to caculate the Output(Y@Grad). Tensor x_scale; x_scale.mutable_data(framework::make_ddim({batch_size, weight_dims[1]}), ctx.GetPlace()); auto x_scale_mat = EigenMatrix::From(x_scale); + + // Create the intermediate variable to caculate the Output(X@Grad). Tensor y_scale; y_scale.mutable_data(framework::make_ddim({batch_size, weight_dims[2]}), ctx.GetPlace()); @@ -107,19 +107,19 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { math::SetConstant set_zero; - // Set X@Grad be zero at first. + // Set Output(X@Grad) be zero. if (d_x) { d_x->mutable_data(ctx.GetPlace()); set_zero(ctx.device_context(), d_x, static_cast(0)); } - // Set Y@Grad be zero at first. + // Set Output(Y@Grad) be zero. if (d_y) { d_y->mutable_data(ctx.GetPlace()); set_zero(ctx.device_context(), d_y, static_cast(0)); } - // Caculate the X@Grad and Y@Grad. + // Caculate the Output(X@Grad) and Output(Y@Grad). if (d_x || d_y) { Eigen::DSizes bcast_for_x(1, weight_dims[2]); Eigen::DSizes bcast_for_y(1, weight_dims[1]); @@ -150,7 +150,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { } } - // Caculate the gradient of Weight. + // Caculate the gradient of Input(Weight). if (d_weight) { d_weight->mutable_data(ctx.GetPlace()); Eigen::DSizes bcast_for_weight(1, weight_dims[1]); @@ -169,7 +169,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { } } - // Caculate the gradient of Bias. + // Caculate the gradient of Input(Bias). if (d_bias) { d_bias->mutable_data(ctx.GetPlace()); auto d_bias_mat = EigenMatrix::From(*d_bias); From 174050277aa78ea4d2871c67f72c2307c3ac2120 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Mon, 13 Nov 2017 13:56:41 +0800 Subject: [PATCH 26/37] Fix GPU Compile on Linux --- paddle/platform/call_once.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/paddle/platform/call_once.h b/paddle/platform/call_once.h index 248baf6613..d9f49527dc 100644 --- a/paddle/platform/call_once.h +++ b/paddle/platform/call_once.h @@ -27,20 +27,22 @@ namespace platform { This wrap is a hack to avoid this bug. */ -template +template inline void call_once(std::once_flag& flag, Callable&& f, Args&&... args) { bool good = false; std::exception ex; - std::call_once(flag, [&]() { - try { - f(args...); - good = true; - } catch (const std::exception& e) { - ex = e; - } catch (...) { - ex = std::runtime_error("excption caught in call_once"); - } - }); + std::call_once(flag, + [&](Args&&... args) { + try { + f(args...); + good = true; + } catch (const std::exception& e) { + ex = e; + } catch (...) { + ex = std::runtime_error("excption caught in call_once"); + } + }, + args...); if (!good) { throw std::exception(ex); } From 29f494f365a4076c807572a0a59d1e0d910896ba Mon Sep 17 00:00:00 2001 From: QI JUN Date: Mon, 13 Nov 2017 14:30:27 +0800 Subject: [PATCH 27/37] Stack LSTM Net for Paddle Book6 (#5503) * add lstm layer * set hidden shape * rename input parameter * add dynamic lstm * refine dynamic lstm layer * change parameter using XavierInitializer by default * refine dynamic lstm layer --- python/paddle/v2/framework/layer_helper.py | 11 +- python/paddle/v2/framework/layers.py | 51 +++++++- .../test_understand_sentiment_dynamic_lstm.py | 110 ++++++++++++++++++ 3 files changed, 166 insertions(+), 6 deletions(-) create mode 100644 python/paddle/v2/framework/tests/test_understand_sentiment_dynamic_lstm.py diff --git a/python/paddle/v2/framework/layer_helper.py b/python/paddle/v2/framework/layer_helper.py index c38346b79f..552976185d 100644 --- a/python/paddle/v2/framework/layer_helper.py +++ b/python/paddle/v2/framework/layer_helper.py @@ -4,7 +4,7 @@ import itertools from paddle.v2.framework.framework import Variable, g_main_program, \ g_startup_program, unique_name, Program from paddle.v2.framework.initializer import ConstantInitializer, \ - UniformInitializer + UniformInitializer, XavierInitializer class LayerHelper(object): @@ -61,7 +61,7 @@ class LayerHelper(object): @property def param_attr(self): - default = {'name': None, 'initializer': UniformInitializer()} + default = {'name': None, 'initializer': XavierInitializer()} actual = self.kwargs.get('param_attr', None) if actual is None: actual = default @@ -70,10 +70,11 @@ class LayerHelper(object): actual[default_field] = default[default_field] return actual + @property def bias_attr(self): - default = {'name': None, 'initializer': ConstantInitializer()} + default = {'name': None, 'initializer': XavierInitializer()} bias_attr = self.kwargs.get('bias_attr', None) - if bias_attr is True: + if bias_attr is None: bias_attr = default if isinstance(bias_attr, dict): @@ -166,7 +167,7 @@ class LayerHelper(object): num_flatten_dims = 1 size = list(input_var.shape[num_flatten_dims:]) - bias_attr = self.bias_attr() + bias_attr = self.bias_attr if not bias_attr: return input_var diff --git a/python/paddle/v2/framework/layers.py b/python/paddle/v2/framework/layers.py index 9a19992437..dab8a1474f 100644 --- a/python/paddle/v2/framework/layers.py +++ b/python/paddle/v2/framework/layers.py @@ -16,7 +16,7 @@ __all__ = [ def fc(input, size, param_attr=None, - bias_attr=True, + bias_attr=None, name=None, act=None, num_flatten_dims=1, @@ -125,6 +125,55 @@ def embedding(input, return tmp +# TODO(qijun): expose H0 and C0 +def dynamic_lstm(input, + size, + data_type='float32', + param_attr=None, + bias_attr=None, + use_peepholes=True, + is_reverse=False, + gate_activation='sigmoid', + cell_activation='tanh', + candidate_activation='tanh', + main_program=None, + startup_program=None): + helper = LayerHelper('lstm', **locals()) + size = size / 4 + weight = helper.create_parameter( + attr=helper.param_attr, shape=[size, 4 * size], dtype=data_type) + bias_size = [1, 7 * size] + if not use_peepholes: + bias_size[1] = 4 * size + bias = helper.create_parameter( + attr=helper.bias_attr, shape=bias_size, dtype=data_type, suffix='b') + + hidden = helper.create_tmp_variable(data_type) + cell = helper.create_tmp_variable(data_type) + batch_gate = helper.create_tmp_variable(data_type) + batch_cell_pre_act = helper.create_tmp_variable(data_type) + + helper.append_op( + type='lstm', + inputs={'Input': input, + 'Weight': weight, + 'Bias': bias}, + outputs={ + 'Hidden': hidden, + 'Cell': cell, + 'BatchGate': batch_gate, + 'BatchCellPreAct': batch_cell_pre_act + }, + attrs={ + 'use_peepholes': use_peepholes, + 'is_reverse': is_reverse, + 'gate_activation': gate_activation, + 'cell_activation': cell_activation, + 'candidate_activation': candidate_activation + }) + return hidden, cell + + def data(name, shape, data_type='float32', diff --git a/python/paddle/v2/framework/tests/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/framework/tests/test_understand_sentiment_dynamic_lstm.py new file mode 100644 index 0000000000..2457c71e1a --- /dev/null +++ b/python/paddle/v2/framework/tests/test_understand_sentiment_dynamic_lstm.py @@ -0,0 +1,110 @@ +import paddle.v2 as paddle +import paddle.v2.framework.layers as layers +import paddle.v2.framework.nets as nets +import paddle.v2.framework.core as core +import paddle.v2.framework.optimizer as optimizer + +from paddle.v2.framework.framework import Program, g_main_program, g_startup_program +from paddle.v2.framework.executor import Executor + +import numpy as np + + +def stacked_lstm_net(input_dim, + class_dim=2, + emb_dim=128, + hid_dim=512, + stacked_num=3): + assert stacked_num % 2 == 1 + data = layers.data(name="words", shape=[1], data_type="int64") + label = layers.data(name="label", shape=[1], data_type="int64") + + emb = layers.embedding(input=data, size=[input_dim, emb_dim]) + # add bias attr + + # TODO(qijun) linear act + fc1 = layers.fc(input=emb, size=hid_dim) + lstm1, cell1 = layers.dynamic_lstm(input=fc1, size=hid_dim) + + inputs = [fc1, lstm1] + + for i in range(2, stacked_num + 1): + fc = layers.fc(input=inputs, size=hid_dim) + lstm, cell = layers.dynamic_lstm( + input=fc, size=hid_dim, is_reverse=(i % 2) == 0) + inputs = [fc, lstm] + + fc_last = layers.sequence_pool(input=inputs[0], pool_type='max') + lstm_last = layers.sequence_pool(input=inputs[1], pool_type='max') + + prediction = layers.fc(input=[fc_last, lstm_last], + size=class_dim, + act='softmax') + cost = layers.cross_entropy(input=prediction, label=label) + avg_cost = layers.mean(x=cost) + adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002) + opts = adam_optimizer.minimize(avg_cost) + acc = layers.accuracy(input=prediction, label=label) + return avg_cost, acc + + +def to_lodtensor(data, place): + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = core.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res + + +def main(): + BATCH_SIZE = 100 + PASS_NUM = 5 + + word_dict = paddle.dataset.imdb.word_dict() + print "load word dict successfully" + dict_dim = len(word_dict) + class_dim = 2 + + cost, acc = stacked_lstm_net(input_dim=dict_dim, class_dim=class_dim) + + train_data = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=1000), + batch_size=BATCH_SIZE) + place = core.CPUPlace() + exe = Executor(place) + + exe.run(g_startup_program) + + for pass_id in xrange(PASS_NUM): + for data in train_data(): + tensor_words = to_lodtensor(map(lambda x: x[0], data), place) + + label = np.array(map(lambda x: x[1], data)).astype("int64") + label = label.reshape([BATCH_SIZE, 1]) + + tensor_label = core.LoDTensor() + tensor_label.set(label, place) + + outs = exe.run(g_main_program, + feed={"words": tensor_words, + "label": tensor_label}, + fetch_list=[cost, acc]) + cost_val = np.array(outs[0]) + acc_val = np.array(outs[1]) + + print("cost=" + str(cost_val) + " acc=" + str(acc_val)) + if cost_val < 1.0 and acc_val > 0.7: + exit(0) + exit(1) + + +if __name__ == '__main__': + main() From 93c6e52af815da0ec63962937a0801604e4574e7 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sun, 12 Nov 2017 23:52:27 -0800 Subject: [PATCH 28/37] Automatically generated doc string for generated layers (#5585) --- python/paddle/v2/framework/layers.py | 60 +++++++++++++++++-- .../tests/test_create_op_doc_string.py | 11 ++++ 2 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 python/paddle/v2/framework/tests/test_create_op_doc_string.py diff --git a/python/paddle/v2/framework/layers.py b/python/paddle/v2/framework/layers.py index dab8a1474f..fe3c86febb 100644 --- a/python/paddle/v2/framework/layers.py +++ b/python/paddle/v2/framework/layers.py @@ -1,10 +1,12 @@ import paddle.v2.framework.core as core +import paddle.v2.framework.proto.framework_pb2 as framework_pb2 from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \ Operator from paddle.v2.framework.initializer import ConstantInitializer, \ NormalInitializer from paddle.v2.framework.layer_helper import LayerHelper, unique_name import re +import cStringIO __all__ = [ 'fc', 'data', 'cross_entropy', 'conv2d', 'pool2d', 'embedding', 'concat', @@ -240,6 +242,58 @@ def _convert_(name): return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() +def _generate_doc_string_(op_proto): + """ + Generate docstring by OpProto + + Args: + op_proto (framework_pb2.OpProto): a protobuf message typed OpProto + + Returns: + str: the document string + """ + + def _type_to_str_(tp): + return framework_pb2.AttrType.Name(tp) + + if not isinstance(op_proto, framework_pb2.OpProto): + raise TypeError("OpProto should be `framework_pb2.OpProto`") + + buf = cStringIO.StringIO() + buf.write(op_proto.comment) + buf.write('\nArgs:\n') + for each_input in op_proto.inputs: + line_begin = ' {0}: '.format(_convert_(each_input.name)) + buf.write(line_begin) + buf.write(each_input.comment) + buf.write('\n') + buf.write(' ' * len(line_begin)) + buf.write('Duplicable: ') + buf.write(str(each_input.duplicable)) + buf.write(' Optional: ') + buf.write(str(each_input.dispensable)) + buf.write('\n') + + for each_attr in op_proto.attrs: + buf.write(' ') + buf.write(each_attr.name) + buf.write(' (') + buf.write(_type_to_str_(each_attr.type)) + buf.write('): ') + buf.write(each_attr.comment) + buf.write('\n') + + if len(op_proto.outputs) != 0: + buf.write('\nReturns:\n') + buf.write(' ') + for each_opt in op_proto.outputs: + if not each_opt.intermediate: + break + buf.write(each_opt.comment) + + return buf.getvalue() + + def _create_op_func_(op_type): """ Create an Operator for a Function. @@ -298,11 +352,6 @@ def _create_op_func_(op_type): return dtype def func(**kwargs): - """ - This function implements the function for the operator. This process - involves doing the sanity check (using the function above), reading - inputs from protobuf and applying the activations on top. - """ helper = LayerHelper(op_type, **kwargs) dtype = infer_and_check_data_type(op_proto, **kwargs) @@ -326,6 +375,7 @@ def _create_op_func_(op_type): func.__name__ = op_type globals()[op_type] = func + func.__doc__ = _generate_doc_string_(op_proto) global __all__ __all__.append(op_type) diff --git a/python/paddle/v2/framework/tests/test_create_op_doc_string.py b/python/paddle/v2/framework/tests/test_create_op_doc_string.py new file mode 100644 index 0000000000..d21e96df2a --- /dev/null +++ b/python/paddle/v2/framework/tests/test_create_op_doc_string.py @@ -0,0 +1,11 @@ +import unittest +import paddle.v2.framework.layers as layers + + +class TestDocString(unittest.TestCase): + def test_layer_doc_string(self): + print layers.dropout.__doc__ + + +if __name__ == '__main__': + unittest.main() From 5eb2f7a550ffb96b00a7be07335dab2f9d0de94e Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 13 Nov 2017 16:26:18 +0800 Subject: [PATCH 29/37] fix compile warning in test_MKLDNN.cpp and ROIPoolLayer.cpp --- paddle/gserver/layers/ROIPoolLayer.cpp | 2 +- paddle/gserver/tests/test_MKLDNN.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 99cfddb0cf..35d4b12d3d 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -98,7 +98,7 @@ void ROIPoolLayer::forward(PassType passType) { size_t roiStartH = round(bottomROIs[2] * spatialScale_); size_t roiEndW = round(bottomROIs[3] * spatialScale_); size_t roiEndH = round(bottomROIs[4] * spatialScale_); - CHECK_GE(roiBatchIdx, 0); + CHECK_GE(roiBatchIdx, 0UL); CHECK_LT(roiBatchIdx, batchSize); size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL); size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL); diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index a0e039c2a3..a859e34c89 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -297,7 +297,7 @@ static void getAddtoConfig(TestConfig& cfg, } void testAddtoLayer(const testImageDesc& pm, const size_t nInputs) { - CHECK_GE(nInputs, 1); + CHECK_GE(nInputs, 1UL); TestConfig dnnConfig; getAddtoConfig(dnnConfig, pm, nInputs); dnnConfig.layerConfig.set_type("mkldnn_addto"); From a4106278e9aaaf07725f0feec0d654d93b04fad8 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Mon, 13 Nov 2017 17:13:10 +0800 Subject: [PATCH 30/37] BeamSearchDecodeOp (#5498) * init trieconcat_op * add basic implementation * add test * add more test * update unit test * add PackAllSteps test * fix PackAllSteps * all test passed * clean code * remove state inside helper * rename prob to score * optimize RemoveFromEnd * use deconstructor to delete BeamNode recursively * optimize interface * add comment to interface * optimizer data structure * use template to define the type of score * use template parameter for BeamHelper * change father to parent * rename TrieConcat to BeamSearchOutConcat * use LoDTensorArray * rename BeamSearchOutConcat to BeamSearchDecode * refine code * remain all candidate sentence in beam_search_decode_op, do not consider endid * use unique_ptr * fix compare bug * fix lod compile problem --- paddle/operators/CMakeLists.txt | 1 + paddle/operators/beam_search_decode_op.cc | 110 +++++++ paddle/operators/beam_search_decode_op.h | 280 ++++++++++++++++++ .../operators/beam_search_decode_op_test.cc | 221 ++++++++++++++ paddle/operators/sequence_concat_op.cc | 2 +- 5 files changed, 613 insertions(+), 1 deletion(-) create mode 100644 paddle/operators/beam_search_decode_op.cc create mode 100644 paddle/operators/beam_search_decode_op.h create mode 100644 paddle/operators/beam_search_decode_op_test.cc diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 29ce44c233..709f7de2e4 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -214,6 +214,7 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) +cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor) cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory) cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc rnn/recurrent_op_utils.cc diff --git a/paddle/operators/beam_search_decode_op.cc b/paddle/operators/beam_search_decode_op.cc new file mode 100644 index 0000000000..1ba4dfcdab --- /dev/null +++ b/paddle/operators/beam_search_decode_op.cc @@ -0,0 +1,110 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/beam_search_decode_op.h" + +namespace paddle { +namespace operators { + +class BeamSearchDecodeOp : public framework::OperatorBase { + public: + BeamSearchDecodeOp(const std::string& type, + const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const override { + framework::ExecutionContext ctx(*this, scope, dev_ctx); + const LoDTensorArray* ids = ctx.Input("Ids"); + const LoDTensorArray* scores = ctx.Input("Scores"); + const size_t step_num = ids->size(); + PADDLE_ENFORCE_GT(step_num, 0UL, + "beam search steps should be larger than 0"); + const size_t source_num = ids->at(0).lod().at(0).size() - 1; + PADDLE_ENFORCE_GT(source_num, 0UL, "source num should be larger than 0"); + + for (size_t i = 0; i < step_num; ++i) { + PADDLE_ENFORCE_EQ(ids->at(i).lod().size(), 2UL, + "Level of LodTensor should be 2"); + } + + // prepare output + LoDTensor* sentenceIds = ctx.Output("SentenceIds"); + LoDTensor* sentenceScores = ctx.Output("SentenceScores"); + + BeamSearchDecoder beam_search_decoder; + beam_search_decoder.PackAllSteps(*ids, *scores, sentenceIds, + sentenceScores); + } +}; + +class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + BeamSearchDecodeOpProtoMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Ids", + "(LodTensorArray)" + "score of the candidate words in each step"); + AddInput("Scores", + "(LodTensorArray)" + "score of the candidate words in each step"); + AddOutput("SentenceIds", + "(LodTensor)" + "All possible result sentences of word ids"); + AddOutput("SentenceScores", + "(LodTensor)" + "All possible result sentences of word scores"); + AddComment(R"DOC( +Pack the result of Beam search op into SentenceIds and SentenceScores. +)DOC"); + } +}; + +class BeamSearchDecodeInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext* context) const override { + PADDLE_ENFORCE(context->HasInput("Ids"), + "BeamSearchDecodeOp must has input Ids"); + PADDLE_ENFORCE(context->HasInput("Scores"), + "BeamSearchDecodeOp must has input Scores"); + PADDLE_ENFORCE(context->HasOutput("SentenceIds"), + "BeamSearchDecodeOp must has output SentenceIds"); + PADDLE_ENFORCE(context->HasOutput("SentenceScores"), + "BeamSearchDecodeOp must has output SentenceScores"); + } +}; + +class BeamSearchDecodeInferVarType : public framework::VarTypeInference { + public: + void operator()(const framework::OpDescBind& op_desc, + framework::BlockDescBind* block) const override { + for (auto& o : op_desc.Output("SentenceIds")) { + block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR); + } + for (auto& o : op_desc.Output("SentenceScores")) { + block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR); + } + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OPERATOR(beam_search_decode, paddle::operators::BeamSearchDecodeOp, + paddle::operators::BeamSearchDecodeOpProtoMaker, + paddle::operators::BeamSearchDecodeInferShape, + paddle::operators::BeamSearchDecodeInferVarType, + paddle::framework::EmptyGradOpMaker); diff --git a/paddle/operators/beam_search_decode_op.h b/paddle/operators/beam_search_decode_op.h new file mode 100644 index 0000000000..0f007ec22f --- /dev/null +++ b/paddle/operators/beam_search_decode_op.h @@ -0,0 +1,280 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/lod_tensor_array.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using LoDTensor = framework::LoDTensor; +using LoDTensorArray = framework::LoDTensorArray; + +// all the lod have 2 levels. +// The First is source level, the second is sentence level. +// source level describe how many candidate words for this source. +// sentence level describe these candidates belong to which prefix +const size_t kSourceLevel = 0; +const size_t kSentenceLevel = 1; + +template +struct BeamNode { + BeamNode(int64_t word_id, T score) : word_id_(word_id), score_(score) {} + + ~BeamNode() { + if (parent_) { + parent_->DropKid(this); + if (parent_->kids_.size() == 0UL) { + delete parent_; + } + } + VLOG(3) << "Delete BeamNode root with word_id:" << this->word_id_; + } + + void AppendTo(BeamNode* parent) { + parent_ = parent; + parent->kids_.insert(this); + } + + void DropKid(BeamNode* kid) { kids_.erase(kid); } + + BeamNode* parent_ = nullptr; + std::unordered_set kids_; + int64_t word_id_; + T score_; +}; + +template +using BeamNodeVector = std::vector>>; + +template +struct Sentence { + std::vector word_ids; + std::vector scores; +}; + +template +using SentenceVector = std::vector>; + +template +struct BeamSearchDecoder { + /** + * make a BeamNode and all it's related prefix BeanNode into a Sentence. + */ + Sentence MakeSentence(const BeamNode* node) const; + + /** + * Param: + * cur_ids: LoDTensor of One step for word ID + * cur_scores: LoDTensor of One Step for word score + * prefixes_list: prefixes for each source sentence. + * sentence_vector_list: result sentence_vector for each source sentence. + * Return: + * a new prefixes list for each source of current step + */ + std::vector> PackTwoSteps( + const LoDTensor& cur_ids, const LoDTensor& cur_scores, + std::vector>& prefixes_list, + std::vector>* sentence_vector_list) const; + + /** + * convert the result sentence_vector for each source sentence into two + * LodTensor. + * One is all candidate sentences with word id, one is all candidate sentences + * with word score. + * Param: + * sentence_vector_list: sentence_vector for each source sentence. + * id_tensor: result LoDTensor for sentences of id. + * score_tensor: result LoDTensor for sentences of score. + */ + void ConvertSentenceVectorToLodTensor( + std::vector> sentence_vector_list, LoDTensor* id_tensor, + LoDTensor* score_tensor) const; + + /** + * Pack all steps of id/score LodTensor into sentence LoDTensor + * it's main logic is: + * ```python + * prefix + * result_sentence + * result_lod_tensor + * + * for (step in steps): + * prefix = PackTwoSteps(prefix, step, &result_sentence) + * ConvertSentenceVectorToLodTensor(result_sentence, &result_lod_tensor) + * ``` + */ + void PackAllSteps(const LoDTensorArray& step_ids, + const LoDTensorArray& step_scores, LoDTensor* id_tensor, + LoDTensor* score_tensor) const; +}; + +template +Sentence BeamSearchDecoder::MakeSentence(const BeamNode* node) const { + Sentence sentence; + while (node != nullptr) { + sentence.word_ids.emplace_back(node->word_id_); + sentence.scores.emplace_back(node->score_); + node = node->parent_; + } + + std::reverse(std::begin(sentence.word_ids), std::end(sentence.word_ids)); + std::reverse(std::begin(sentence.scores), std::end(sentence.scores)); + + return sentence; +} + +template +std::vector> BeamSearchDecoder::PackTwoSteps( + const LoDTensor& cur_ids, const LoDTensor& cur_scores, + std::vector>& prefixes_list, + std::vector>* sentence_vector_list) const { + std::vector> result; + + for (size_t src_idx = 0; src_idx < cur_ids.lod()[kSourceLevel].size() - 1; + ++src_idx) { + size_t src_start = cur_ids.lod().at(kSourceLevel)[src_idx]; + size_t src_end = cur_ids.lod().at(kSourceLevel)[src_idx + 1]; + + BeamNodeVector beam_nodes; + + // if prefixes size is 0, it means this is the first step. In this step, + // all candidate id is the start of candidate sentences. + if (prefixes_list.empty()) { + PADDLE_ENFORCE_EQ(cur_ids.lod().at(kSourceLevel).back(), + cur_ids.lod().at(kSentenceLevel).back(), + "in the first step"); + for (size_t id_idx = src_start; id_idx < src_end; ++id_idx) { + beam_nodes.push_back(std::unique_ptr>(new BeamNode( + cur_ids.data()[id_idx], cur_scores.data()[id_idx]))); + } + } else { + BeamNodeVector& prefixes = prefixes_list[src_idx]; + SentenceVector& sentence_vector = (*sentence_vector_list)[src_idx]; + + PADDLE_ENFORCE_EQ(src_end - src_start, prefixes.size(), + "prefix and candidate set number should be the same"); + + auto candidate_offset = cur_ids.lod()[kSentenceLevel]; + for (size_t prefix_idx = 0; prefix_idx < prefixes.size(); ++prefix_idx) { + std::unique_ptr>& prefix = prefixes[prefix_idx]; + size_t candidate_start = candidate_offset[src_start + prefix_idx]; + size_t candidate_end = candidate_offset[src_start + prefix_idx + 1]; + if (candidate_start == candidate_end) { + VLOG(3) << "this sentence has no more candidate, " + "add to result sentence and rm it from beam tree"; + sentence_vector.push_back(MakeSentence(prefix.get())); + prefix.reset(); + } else { + for (size_t candidate_idx = candidate_start; + candidate_idx < candidate_end; ++candidate_idx) { + auto* candidate = + new BeamNode(cur_ids.data()[candidate_idx], + cur_scores.data()[candidate_idx]); + candidate->AppendTo(prefix.get()); + beam_nodes.push_back(std::unique_ptr>(candidate)); + } + prefix.release(); + } + } + } + result.push_back(std::move(beam_nodes)); + } + return result; +} + +template +void BeamSearchDecoder::ConvertSentenceVectorToLodTensor( + std::vector> sentence_vector_list, LoDTensor* id_tensor, + LoDTensor* score_tensor) const { + size_t src_num = sentence_vector_list.size(); + + PADDLE_ENFORCE_NE(src_num, 0, "src_num should not be 0"); + + std::vector source_level_lod = {0}; + std::vector sentence_level_lod = {0}; + std::vector id_data; + std::vector score_data; + + for (size_t src_idx = 0; src_idx < src_num; ++src_idx) { + for (Sentence& sentence : sentence_vector_list[src_idx]) { + id_data.insert(id_data.end(), sentence.word_ids.begin(), + sentence.word_ids.end()); + score_data.insert(score_data.end(), sentence.scores.begin(), + sentence.scores.end()); + sentence_level_lod.push_back(sentence_level_lod.back() + + sentence.word_ids.size()); + } + source_level_lod.push_back(source_level_lod.back() + + sentence_vector_list[src_idx].size()); + } + + auto cpu_place = new paddle::platform::CPUPlace(); + paddle::platform::CPUDeviceContext cpu_ctx(*cpu_place); + + framework::LoD lod; + lod.push_back(source_level_lod); + lod.push_back(sentence_level_lod); + + id_tensor->set_lod(lod); + id_tensor->Resize({static_cast(id_data.size())}); + id_tensor->mutable_data(paddle::platform::CPUPlace()); + id_tensor->CopyFromVector(id_data, cpu_ctx); + + score_tensor->set_lod(lod); + score_tensor->Resize({static_cast(score_data.size())}); + score_tensor->mutable_data(paddle::platform::CPUPlace()); + score_tensor->CopyFromVector(score_data, cpu_ctx); +} + +template +void BeamSearchDecoder::PackAllSteps(const LoDTensorArray& step_ids, + const LoDTensorArray& step_scores, + LoDTensor* id_tensor, + LoDTensor* score_tensor) const { + PADDLE_ENFORCE(!step_ids.empty(), "step num should be larger than 0"); + PADDLE_ENFORCE_EQ(step_ids.size(), step_scores.size(), + "step_ids and step_scores should be the same"); + const size_t step_num = step_ids.size(); + const size_t src_num = step_ids.at(0).lod().at(kSourceLevel).size() - 1; + + PADDLE_ENFORCE_GT(src_num, 0UL, "source num should be larger than 0"); + + // previous prefixes for each step, + // the init length is 0, means this is the first step. + std::vector> beamnode_vector_list(0); + std::vector> sentence_vector_list(src_num); + + // pack all steps for one batch first, then another batch + for (size_t step_id = 0; step_id < step_num; ++step_id) { + beamnode_vector_list = + PackTwoSteps(step_ids.at(step_id), step_scores.at(step_id), + beamnode_vector_list, &sentence_vector_list); + } + // append last beam_node to result + for (size_t src_idx = 0; src_idx < src_num; ++src_idx) { + for (auto& beam_node : beamnode_vector_list.at(src_idx)) { + sentence_vector_list[src_idx].push_back(MakeSentence(beam_node.get())); + beam_node.reset(); + } + } + + ConvertSentenceVectorToLodTensor(sentence_vector_list, id_tensor, + score_tensor); +} + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/beam_search_decode_op_test.cc b/paddle/operators/beam_search_decode_op_test.cc new file mode 100644 index 0000000000..5ac23991f3 --- /dev/null +++ b/paddle/operators/beam_search_decode_op_test.cc @@ -0,0 +1,221 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/beam_search_decode_op.h" +#include "gtest/gtest.h" + +using CPUPlace = paddle::platform::CPUPlace; +using LoD = paddle::framework::LoD; +using LoDTensor = paddle::framework::LoDTensor; +using LoDTensorArray = paddle::framework::LoDTensorArray; + +template +using BeamNode = paddle::operators::BeamNode; +template +using BeamSearchDecoder = paddle::operators::BeamSearchDecoder; +template +using Sentence = paddle::operators::Sentence; +template +using BeamNodeVector = paddle::operators::BeamNodeVector; +template +using SentenceVector = paddle::operators::SentenceVector; + +namespace paddle { +namespace test { + +void GenerateExample(const std::vector& level_0, + const std::vector& level_1, + const std::vector& data, LoDTensorArray* ids, + LoDTensorArray* scores) { + PADDLE_ENFORCE_EQ(level_0.back(), level_1.size() - 1, + "source level is used to describe candidate set"); + PADDLE_ENFORCE_EQ(level_1.back(), data.size(), + "the lowest level is used to describe data" + ", so it's last element should be data length"); + + CPUPlace place; + + LoD lod; + lod.push_back(level_0); + lod.push_back(level_1); + + // Ids + LoDTensor tensor_id; + tensor_id.set_lod(lod); + tensor_id.Resize({static_cast(data.size())}); + // malloc memory + int64_t* id_ptr = tensor_id.mutable_data(place); + for (size_t i = 0; i < data.size(); ++i) { + id_ptr[i] = static_cast(data.at(i)); + } + + // Scores + LoDTensor tensor_score; + tensor_score.set_lod(lod); + tensor_score.Resize({static_cast(data.size())}); + // malloc memory + float* score_ptr = tensor_score.mutable_data(place); + for (size_t i = 0; i < data.size(); ++i) { + score_ptr[i] = static_cast(data.at(i)); + } + + ids->push_back(tensor_id); + scores->push_back(tensor_score); +} + +} // namespace test +} // namespace paddle + +TEST(BeamSearchDecodeOp, DeleteBeamNode) { + auto* root = new BeamNode(0, 0); + auto* b1 = new BeamNode(1, 1); + auto* b2 = new BeamNode(2, 2); + auto* b3 = new BeamNode(3, 3); + + b1->AppendTo(root); + b2->AppendTo(root); + b3->AppendTo(b1); + + delete b3; + delete b2; +} + +TEST(BeamSearchDecodeOp, MakeSentence) { + auto* root = new BeamNode(0, 0); + auto* b1 = new BeamNode(1, 1); + auto* end = new BeamNode(2, 2); + b1->AppendTo(root); + end->AppendTo(b1); + + BeamSearchDecoder helper; + Sentence sentence = helper.MakeSentence(end); + delete end; + + std::vector expect_ids = {0, 1, 2}; + ASSERT_EQ(sentence.word_ids, expect_ids); + + std::vector expect_scores = {0, 1, 2}; + ASSERT_EQ(sentence.scores, expect_scores); +} + +TEST(BeamSearchDecodeOp, PackTwoStepsFistStep) { + CPUPlace place; + + LoDTensorArray ids; + LoDTensorArray scores; + + paddle::test::GenerateExample( + std::vector{0, 2, 6}, std::vector{0, 1, 2, 3, 4, 5, 6}, + std::vector{1, 2, 3, 4, 5, 6}, &ids, &scores); + + std::vector> beamnode_vector_list; + std::vector> sentence_vector_list( + 2, SentenceVector()); + + BeamSearchDecoder helper; + beamnode_vector_list = helper.PackTwoSteps( + ids[0], scores[0], beamnode_vector_list, &sentence_vector_list); + ASSERT_EQ(beamnode_vector_list.size(), 2UL); + ASSERT_EQ(beamnode_vector_list[0].size(), 2UL); + ASSERT_EQ(beamnode_vector_list[1].size(), 4UL); +} + +TEST(BeamSearchDecodeOp, PackTwoSteps) { + CPUPlace place; + + // first source has three prefix + BeamNodeVector source0_prefixes; + source0_prefixes.push_back( + std::unique_ptr>(new BeamNode(1, 1))); + source0_prefixes.push_back( + std::unique_ptr>(new BeamNode(0, 0))); + source0_prefixes.push_back( + std::unique_ptr>(new BeamNode(3, 3))); + + // second source has two prefix + BeamNodeVector source1_prefixes; + source1_prefixes.push_back( + std::unique_ptr>(new BeamNode(4, 4))); + source1_prefixes.push_back( + std::unique_ptr>(new BeamNode(5, 5))); + + std::vector> beamnode_vector_list; + std::vector> sentence_vector_list( + 2, SentenceVector()); + + beamnode_vector_list.push_back(std::move(source0_prefixes)); + beamnode_vector_list.push_back(std::move(source1_prefixes)); + + // generate data for one step + LoDTensorArray ids; + LoDTensorArray scores; + + paddle::test::GenerateExample(std::vector{0, 3, 5}, + std::vector{0, 1, 1, 3, 4, 5}, + std::vector{0, 1, 2, 3, 4}, &ids, &scores); + + BeamSearchDecoder helper1; + beamnode_vector_list = helper1.PackTwoSteps( + ids[0], scores[0], beamnode_vector_list, &sentence_vector_list); + + ASSERT_EQ(sentence_vector_list[0].size(), 1UL); + ASSERT_EQ(sentence_vector_list[1].size(), 0UL); + ASSERT_EQ(beamnode_vector_list[0].size(), 3UL); + ASSERT_EQ(beamnode_vector_list[1].size(), 2UL); +} + +TEST(BeamSearchDecodeOp, PackAllSteps) { + CPUPlace place; + + // we will constuct a sample data with 3 steps and 2 source sentences + LoDTensorArray ids; + LoDTensorArray scores; + + paddle::test::GenerateExample( + std::vector{0, 3, 6}, std::vector{0, 1, 2, 3, 4, 5, 6}, + std::vector{1, 2, 3, 4, 5, 6}, &ids, &scores); + paddle::test::GenerateExample( + std::vector{0, 3, 6}, std::vector{0, 1, 1, 3, 5, 5, 6}, + std::vector{0, 1, 2, 3, 4, 5}, &ids, &scores); + paddle::test::GenerateExample(std::vector{0, 3, 6}, + std::vector{0, 0, 1, 2, 3, 4, 5}, + std::vector{0, 1, 2, 3, 4}, &ids, &scores); + + ASSERT_EQ(ids.size(), 3UL); + ASSERT_EQ(scores.size(), 3UL); + + BeamSearchDecoder helper; + + LoDTensor id_tensor; + LoDTensor score_tensor; + helper.PackAllSteps(ids, scores, &id_tensor, &score_tensor); + + LoD lod = id_tensor.lod(); + std::vector expect_source_lod = {0, 4, 8}; + EXPECT_EQ(lod[0], expect_source_lod); + std::vector expect_sentence_lod = {0, 1, 3, 6, 9, 10, 13, 16, 19}; + EXPECT_EQ(lod[1], expect_sentence_lod); + // 2| 1, 0| 3, 1, 0| 3, 2, 1| 5| 4, 3, 2| 4, 4, 3| 6, 5, 4 + std::vector expect_data = {2, 1, 0, 3, 1, 0, 3, 2, 1, 5, + 4, 3, 2, 4, 4, 3, 6, 5, 4}; + ASSERT_EQ(id_tensor.dims()[0], static_cast(expect_data.size())); + for (size_t i = 0; i < expect_data.size(); ++i) { + ASSERT_EQ(id_tensor.data()[i], + static_cast(expect_data[i])); + } + for (int64_t i = 0; i < id_tensor.dims()[0]; ++i) { + ASSERT_EQ(score_tensor.data()[i], + static_cast(id_tensor.data()[i])); + } +} diff --git a/paddle/operators/sequence_concat_op.cc b/paddle/operators/sequence_concat_op.cc index db737bed7a..d1de0b4447 100644 --- a/paddle/operators/sequence_concat_op.cc +++ b/paddle/operators/sequence_concat_op.cc @@ -47,7 +47,7 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker { framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", - "(vector) Input is a vector of LoDTensor, " + "(LodTensorArray) Input is a vector of LoDTensor, " "each of which is a variable-length sequence or nested sequence.") .AsDuplicable(); AddOutput("Out", From 0a6262d550c784548ee78719a46b748d89adc0bd Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Mon, 13 Nov 2017 18:45:43 +0800 Subject: [PATCH 31/37] fix warning --- paddle/operators/bilinear_tensor_product_op.h | 52 ++++++++++--------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/paddle/operators/bilinear_tensor_product_op.h b/paddle/operators/bilinear_tensor_product_op.h index 29da5f4d2a..984e7abdfb 100644 --- a/paddle/operators/bilinear_tensor_product_op.h +++ b/paddle/operators/bilinear_tensor_product_op.h @@ -43,24 +43,26 @@ class BilinearTensorProductKernel : public framework::OpKernel { auto batch_size = x->dims()[0]; auto weight_dims = weight->dims(); + int Out_dim = weight_dims[0]; + int X_dim = weight_dims[1]; + int Y_dim = weight_dims[2]; auto place = ctx.GetEigenDevice(); // Create the intermediate variable to caculate the result of // Input(X) multiplied by Input(Weight_i), the formula is: // left_mul = X Weight_i. Tensor left_mul; - left_mul.mutable_data(framework::make_ddim({batch_size, weight_dims[2]}), + left_mul.mutable_data(framework::make_ddim({batch_size, Y_dim}), ctx.GetPlace()); auto left_mul_mat = EigenMatrix::From(left_mul); - for (size_t i = 0; i < weight_dims[0]; ++i) { + for (int i = 0; i < Out_dim; ++i) { auto output_col_vec = output_mat.chip(i, 1); - Tensor weight_mat = weight->Slice(i, i + 1).Resize( - framework::make_ddim({weight_dims[1], weight_dims[2]})); + Tensor weight_mat = + weight->Slice(i, i + 1).Resize(framework::make_ddim({X_dim, Y_dim})); math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, - batch_size, weight_dims[2], weight_dims[1], 1, - x->data(), weight_mat.data(), 0, - left_mul.data()); + batch_size, Y_dim, X_dim, 1, x->data(), + weight_mat.data(), 0, left_mul.data()); output_col_vec.device(place) = (left_mul_mat * y_mat).sum(Eigen::DSizes(1)); } @@ -87,6 +89,9 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { auto batch_size = x->dims()[0]; auto weight_dims = weight->dims(); + int Out_dim = weight_dims[0]; + int X_dim = weight_dims[1]; + int Y_dim = weight_dims[2]; auto x_mat = EigenMatrix::From(*x); auto y_mat = EigenMatrix::From(*y); @@ -95,13 +100,13 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { // Create the intermediate variable to caculate the Output(Y@Grad). Tensor x_scale; - x_scale.mutable_data(framework::make_ddim({batch_size, weight_dims[1]}), + x_scale.mutable_data(framework::make_ddim({batch_size, X_dim}), ctx.GetPlace()); auto x_scale_mat = EigenMatrix::From(x_scale); // Create the intermediate variable to caculate the Output(X@Grad). Tensor y_scale; - y_scale.mutable_data(framework::make_ddim({batch_size, weight_dims[2]}), + y_scale.mutable_data(framework::make_ddim({batch_size, Y_dim}), ctx.GetPlace()); auto y_scale_mat = EigenMatrix::From(y_scale); @@ -121,11 +126,11 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { // Caculate the Output(X@Grad) and Output(Y@Grad). if (d_x || d_y) { - Eigen::DSizes bcast_for_x(1, weight_dims[2]); - Eigen::DSizes bcast_for_y(1, weight_dims[1]); - for (int i = 0; i < weight_dims[0]; ++i) { + Eigen::DSizes bcast_for_x(1, Y_dim); + Eigen::DSizes bcast_for_y(1, X_dim); + for (int i = 0; i < Out_dim; ++i) { Tensor weight_i = weight->Slice(i, i + 1).Resize( - framework::make_ddim({weight_dims[1], weight_dims[2]})); + framework::make_ddim({X_dim, Y_dim})); auto output_vec = d_out_mat.chip(i, 1); if (d_x) { y_scale_mat.device(place) = @@ -133,9 +138,8 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { .broadcast(bcast_for_x) * y_mat; math::gemm(ctx.device_context(), CblasNoTrans, CblasTrans, - batch_size, weight_dims[1], weight_dims[2], 1, - y_scale.data(), weight_i.data(), 1, - d_x->data()); + batch_size, X_dim, Y_dim, 1, y_scale.data(), + weight_i.data(), 1, d_x->data()); } if (d_y) { x_scale_mat.device(place) = @@ -143,9 +147,8 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { .broadcast(bcast_for_y) * x_mat; math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, - batch_size, weight_dims[2], weight_dims[1], 1, - x_scale.data(), weight_i.data(), 1, - d_y->data()); + batch_size, Y_dim, X_dim, 1, x_scale.data(), + weight_i.data(), 1, d_y->data()); } } } @@ -153,19 +156,18 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { // Caculate the gradient of Input(Weight). if (d_weight) { d_weight->mutable_data(ctx.GetPlace()); - Eigen::DSizes bcast_for_weight(1, weight_dims[1]); - for (int i = 0; i < weight_dims[0]; ++i) { + Eigen::DSizes bcast_for_weight(1, X_dim); + for (int i = 0; i < Out_dim; ++i) { Tensor d_weight_i = d_weight->Slice(i, i + 1).Resize( - framework::make_ddim({weight_dims[1], weight_dims[2]})); + framework::make_ddim({X_dim, Y_dim})); auto output_vec = d_out_mat.chip(i, 1); x_scale_mat.device(place) = output_vec.reshape(Eigen::DSizes(batch_size, 1)) .broadcast(bcast_for_weight) * x_mat; math::gemm(ctx.device_context(), CblasTrans, CblasNoTrans, - weight_dims[1], weight_dims[2], batch_size, 1, - x_scale.data(), y->data(), 0, - d_weight_i.data()); + X_dim, Y_dim, batch_size, 1, x_scale.data(), + y->data(), 0, d_weight_i.data()); } } From c5d7107767a1a42f46e7d0bf42ef26279fd562db Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Mon, 13 Nov 2017 20:17:38 +0800 Subject: [PATCH 32/37] refine var name --- paddle/operators/bilinear_tensor_product_op.h | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/paddle/operators/bilinear_tensor_product_op.h b/paddle/operators/bilinear_tensor_product_op.h index 984e7abdfb..ffa4f43a32 100644 --- a/paddle/operators/bilinear_tensor_product_op.h +++ b/paddle/operators/bilinear_tensor_product_op.h @@ -43,25 +43,25 @@ class BilinearTensorProductKernel : public framework::OpKernel { auto batch_size = x->dims()[0]; auto weight_dims = weight->dims(); - int Out_dim = weight_dims[0]; - int X_dim = weight_dims[1]; - int Y_dim = weight_dims[2]; + int out_dim = weight_dims[0]; + auto x_dim = weight_dims[1]; + auto y_dim = weight_dims[2]; auto place = ctx.GetEigenDevice(); // Create the intermediate variable to caculate the result of // Input(X) multiplied by Input(Weight_i), the formula is: // left_mul = X Weight_i. Tensor left_mul; - left_mul.mutable_data(framework::make_ddim({batch_size, Y_dim}), + left_mul.mutable_data(framework::make_ddim({batch_size, y_dim}), ctx.GetPlace()); auto left_mul_mat = EigenMatrix::From(left_mul); - for (int i = 0; i < Out_dim; ++i) { + for (int i = 0; i < out_dim; ++i) { auto output_col_vec = output_mat.chip(i, 1); Tensor weight_mat = - weight->Slice(i, i + 1).Resize(framework::make_ddim({X_dim, Y_dim})); + weight->Slice(i, i + 1).Resize(framework::make_ddim({x_dim, y_dim})); math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, - batch_size, Y_dim, X_dim, 1, x->data(), + batch_size, y_dim, x_dim, 1, x->data(), weight_mat.data(), 0, left_mul.data()); output_col_vec.device(place) = (left_mul_mat * y_mat).sum(Eigen::DSizes(1)); @@ -89,9 +89,9 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { auto batch_size = x->dims()[0]; auto weight_dims = weight->dims(); - int Out_dim = weight_dims[0]; - int X_dim = weight_dims[1]; - int Y_dim = weight_dims[2]; + int out_dim = weight_dims[0]; + auto x_dim = weight_dims[1]; + auto y_dim = weight_dims[2]; auto x_mat = EigenMatrix::From(*x); auto y_mat = EigenMatrix::From(*y); @@ -100,13 +100,13 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { // Create the intermediate variable to caculate the Output(Y@Grad). Tensor x_scale; - x_scale.mutable_data(framework::make_ddim({batch_size, X_dim}), + x_scale.mutable_data(framework::make_ddim({batch_size, x_dim}), ctx.GetPlace()); auto x_scale_mat = EigenMatrix::From(x_scale); // Create the intermediate variable to caculate the Output(X@Grad). Tensor y_scale; - y_scale.mutable_data(framework::make_ddim({batch_size, Y_dim}), + y_scale.mutable_data(framework::make_ddim({batch_size, y_dim}), ctx.GetPlace()); auto y_scale_mat = EigenMatrix::From(y_scale); @@ -126,11 +126,11 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { // Caculate the Output(X@Grad) and Output(Y@Grad). if (d_x || d_y) { - Eigen::DSizes bcast_for_x(1, Y_dim); - Eigen::DSizes bcast_for_y(1, X_dim); - for (int i = 0; i < Out_dim; ++i) { + Eigen::DSizes bcast_for_x(1, y_dim); + Eigen::DSizes bcast_for_y(1, x_dim); + for (int i = 0; i < out_dim; ++i) { Tensor weight_i = weight->Slice(i, i + 1).Resize( - framework::make_ddim({X_dim, Y_dim})); + framework::make_ddim({x_dim, y_dim})); auto output_vec = d_out_mat.chip(i, 1); if (d_x) { y_scale_mat.device(place) = @@ -138,7 +138,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { .broadcast(bcast_for_x) * y_mat; math::gemm(ctx.device_context(), CblasNoTrans, CblasTrans, - batch_size, X_dim, Y_dim, 1, y_scale.data(), + batch_size, x_dim, y_dim, 1, y_scale.data(), weight_i.data(), 1, d_x->data()); } if (d_y) { @@ -147,7 +147,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { .broadcast(bcast_for_y) * x_mat; math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, - batch_size, Y_dim, X_dim, 1, x_scale.data(), + batch_size, y_dim, x_dim, 1, x_scale.data(), weight_i.data(), 1, d_y->data()); } } @@ -156,17 +156,17 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { // Caculate the gradient of Input(Weight). if (d_weight) { d_weight->mutable_data(ctx.GetPlace()); - Eigen::DSizes bcast_for_weight(1, X_dim); - for (int i = 0; i < Out_dim; ++i) { + Eigen::DSizes bcast_for_weight(1, x_dim); + for (int i = 0; i < out_dim; ++i) { Tensor d_weight_i = d_weight->Slice(i, i + 1).Resize( - framework::make_ddim({X_dim, Y_dim})); + framework::make_ddim({x_dim, y_dim})); auto output_vec = d_out_mat.chip(i, 1); x_scale_mat.device(place) = output_vec.reshape(Eigen::DSizes(batch_size, 1)) .broadcast(bcast_for_weight) * x_mat; math::gemm(ctx.device_context(), CblasTrans, CblasNoTrans, - X_dim, Y_dim, batch_size, 1, x_scale.data(), + x_dim, y_dim, batch_size, 1, x_scale.data(), y->data(), 0, d_weight_i.data()); } } From 6a6e4d8d0f3957bca58a0a7f26ffaf3e2c4dee11 Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Fri, 10 Nov 2017 17:59:57 -0800 Subject: [PATCH 33/37] Fix matmal_op for debug mode The dimension is not set correctly and is not being checked in release mode because eigen_assert is not enabled. --- paddle/operators/matmul_op.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/paddle/operators/matmul_op.h b/paddle/operators/matmul_op.h index 5ce30740c9..4f565946d5 100644 --- a/paddle/operators/matmul_op.h +++ b/paddle/operators/matmul_op.h @@ -74,11 +74,10 @@ Tensor CombineBatchAndN(const framework::ExecutionContext& context, Tensor output; auto in_dims = input.dims(); if (in_dims.size() == 3) { - output.Resize(in_dims); + output.Resize({in_dims[1], in_dims[0], in_dims[2]}); output.mutable_data(context.GetPlace()); EigenTranspose(context, input, output, {1, 0, 2}); - std::vector out_dims = {in_dims[1], in_dims[0] * in_dims[2]}; - output.Resize(make_ddim(out_dims)); + output.Resize({in_dims[1], in_dims[0] * in_dims[2]}); } else { output.ShareDataWith(input); } From 983502d2e93e64a3e0b758054db38cbe34d52e8b Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Fri, 10 Nov 2017 13:56:03 -0800 Subject: [PATCH 34/37] Fix sequence_pool_op in debug mode The rank of the tensor from the chip() function is changed. In release mode, eigen_assert is not enabled and the dimenstion mismatch is not detected. --- paddle/operators/sequence_pool_op.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/paddle/operators/sequence_pool_op.h b/paddle/operators/sequence_pool_op.h index 2b8a25c241..7f136d8cf0 100644 --- a/paddle/operators/sequence_pool_op.h +++ b/paddle/operators/sequence_pool_op.h @@ -126,6 +126,7 @@ class SequencePoolGradKernel : public framework::OpKernel { int64_t h = static_cast(lod[i + 1] - lod[i]); auto in_g_e = EigenMatrix::From(in_g_t, {h, w}); auto out_g_e = EigenMatrix::From(out_g_t, {1, w}); + auto out_g_e_v = EigenVector::Flatten(out_g_t); Eigen::DSizes bcast(h, 1); if (pooltype == "AVERAGE") { @@ -136,9 +137,9 @@ class SequencePoolGradKernel : public framework::OpKernel { in_g_e.device(place) = (out_g_e / std::sqrt(static_cast(h))).broadcast(bcast); } else if (pooltype == "LAST") { - in_g_e.chip(h - 1, 0).device(place) = out_g_e; + in_g_e.chip(h - 1, 0).device(place) = out_g_e_v; } else if (pooltype == "FIRST") { - in_g_e.chip(0, 0).device(place) = out_g_e; + in_g_e.chip(0, 0).device(place) = out_g_e_v; } else { PADDLE_THROW("unsupported pooling pooltype"); } From 1dcec992ea036f56dd03a57e1dfc34977e6e8bda Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Mon, 13 Nov 2017 15:35:12 -0800 Subject: [PATCH 35/37] Move the tests of book chapters from test/ into tests/book --- python/paddle/v2/framework/tests/CMakeLists.txt | 2 ++ python/paddle/v2/framework/tests/book/CMakeLists.txt | 5 +++++ .../paddle/v2/framework/tests/{ => book}/test_fit_a_line.py | 0 .../tests/{ => book}/test_image_classification_train.py | 0 .../framework/tests/{ => book}/test_recognize_digits_conv.py | 0 .../framework/tests/{ => book}/test_recognize_digits_mlp.py | 0 .../v2/framework/tests/{ => book}/test_recommender_system.py | 0 .../tests/{ => book}/test_understand_sentiment_conv.py | 0 .../{ => book}/test_understand_sentiment_dynamic_lstm.py | 0 .../tests/{ => book}/test_understand_sentiment_lstm.py | 0 python/paddle/v2/framework/tests/{ => book}/test_word2vec.py | 0 11 files changed, 7 insertions(+) create mode 100644 python/paddle/v2/framework/tests/book/CMakeLists.txt rename python/paddle/v2/framework/tests/{ => book}/test_fit_a_line.py (100%) rename python/paddle/v2/framework/tests/{ => book}/test_image_classification_train.py (100%) rename python/paddle/v2/framework/tests/{ => book}/test_recognize_digits_conv.py (100%) rename python/paddle/v2/framework/tests/{ => book}/test_recognize_digits_mlp.py (100%) rename python/paddle/v2/framework/tests/{ => book}/test_recommender_system.py (100%) rename python/paddle/v2/framework/tests/{ => book}/test_understand_sentiment_conv.py (100%) rename python/paddle/v2/framework/tests/{ => book}/test_understand_sentiment_dynamic_lstm.py (100%) rename python/paddle/v2/framework/tests/{ => book}/test_understand_sentiment_lstm.py (100%) rename python/paddle/v2/framework/tests/{ => book}/test_word2vec.py (100%) diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 4d7664469e..e795627bfe 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -3,3 +3,5 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() + +add_subdirectory(book) diff --git a/python/paddle/v2/framework/tests/book/CMakeLists.txt b/python/paddle/v2/framework/tests/book/CMakeLists.txt new file mode 100644 index 0000000000..4d7664469e --- /dev/null +++ b/python/paddle/v2/framework/tests/book/CMakeLists.txt @@ -0,0 +1,5 @@ +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() diff --git a/python/paddle/v2/framework/tests/test_fit_a_line.py b/python/paddle/v2/framework/tests/book/test_fit_a_line.py similarity index 100% rename from python/paddle/v2/framework/tests/test_fit_a_line.py rename to python/paddle/v2/framework/tests/book/test_fit_a_line.py diff --git a/python/paddle/v2/framework/tests/test_image_classification_train.py b/python/paddle/v2/framework/tests/book/test_image_classification_train.py similarity index 100% rename from python/paddle/v2/framework/tests/test_image_classification_train.py rename to python/paddle/v2/framework/tests/book/test_image_classification_train.py diff --git a/python/paddle/v2/framework/tests/test_recognize_digits_conv.py b/python/paddle/v2/framework/tests/book/test_recognize_digits_conv.py similarity index 100% rename from python/paddle/v2/framework/tests/test_recognize_digits_conv.py rename to python/paddle/v2/framework/tests/book/test_recognize_digits_conv.py diff --git a/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py b/python/paddle/v2/framework/tests/book/test_recognize_digits_mlp.py similarity index 100% rename from python/paddle/v2/framework/tests/test_recognize_digits_mlp.py rename to python/paddle/v2/framework/tests/book/test_recognize_digits_mlp.py diff --git a/python/paddle/v2/framework/tests/test_recommender_system.py b/python/paddle/v2/framework/tests/book/test_recommender_system.py similarity index 100% rename from python/paddle/v2/framework/tests/test_recommender_system.py rename to python/paddle/v2/framework/tests/book/test_recommender_system.py diff --git a/python/paddle/v2/framework/tests/test_understand_sentiment_conv.py b/python/paddle/v2/framework/tests/book/test_understand_sentiment_conv.py similarity index 100% rename from python/paddle/v2/framework/tests/test_understand_sentiment_conv.py rename to python/paddle/v2/framework/tests/book/test_understand_sentiment_conv.py diff --git a/python/paddle/v2/framework/tests/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/framework/tests/book/test_understand_sentiment_dynamic_lstm.py similarity index 100% rename from python/paddle/v2/framework/tests/test_understand_sentiment_dynamic_lstm.py rename to python/paddle/v2/framework/tests/book/test_understand_sentiment_dynamic_lstm.py diff --git a/python/paddle/v2/framework/tests/test_understand_sentiment_lstm.py b/python/paddle/v2/framework/tests/book/test_understand_sentiment_lstm.py similarity index 100% rename from python/paddle/v2/framework/tests/test_understand_sentiment_lstm.py rename to python/paddle/v2/framework/tests/book/test_understand_sentiment_lstm.py diff --git a/python/paddle/v2/framework/tests/test_word2vec.py b/python/paddle/v2/framework/tests/book/test_word2vec.py similarity index 100% rename from python/paddle/v2/framework/tests/test_word2vec.py rename to python/paddle/v2/framework/tests/book/test_word2vec.py From 7c1755d93f7f046432b596aac6c271edc676b8ae Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 13 Nov 2017 18:31:22 -0800 Subject: [PATCH 36/37] Assign Operator. (#5531) * Assign Operator. Out=X, when type in [LoDTensor/SelectedRows/LoDTensorArray] * Follow comments --- paddle/framework/var_type.h | 22 +++ paddle/operators/assign_op.cc | 138 ++++++++++++++++++ .../v2/framework/tests/test_assign_op.py | 21 +++ 3 files changed, 181 insertions(+) create mode 100644 paddle/operators/assign_op.cc create mode 100644 python/paddle/v2/framework/tests/test_assign_op.py diff --git a/paddle/framework/var_type.h b/paddle/framework/var_type.h index d060196bb2..0f19870bec 100644 --- a/paddle/framework/var_type.h +++ b/paddle/framework/var_type.h @@ -27,10 +27,32 @@ inline VarDesc::VarType ToVarType(std::type_index type) { return VarDesc_VarType_LOD_RANK_TABLE; } else if (type.hash_code() == typeid(LoDTensorArray).hash_code()) { return VarDesc_VarType_LOD_TENSOR_ARRAY; + } else if (type.hash_code() == typeid(SelectedRows).hash_code()) { + return VarDesc_VarType_SELECTED_ROWS; } else { PADDLE_THROW("ToVarType:Unsupported type %s", type.name()); } } +template +inline void VisitVarType(const Variable& var, Visitor visitor) { + switch (ToVarType(var.Type())) { + case VarDesc_VarType_LOD_TENSOR: + visitor(var.Get()); + return; + case VarDesc_VarType_LOD_RANK_TABLE: + visitor(var.Get()); + return; + case VarDesc_VarType_LOD_TENSOR_ARRAY: + visitor(var.Get()); + return; + case VarDesc_VarType_SELECTED_ROWS: + visitor(var.Get()); + return; + default: + PADDLE_THROW("Not supported visit type, %d", ToVarType(var.Type())); + } +} + } // namespace framework } // namespace paddle diff --git a/paddle/operators/assign_op.cc b/paddle/operators/assign_op.cc new file mode 100644 index 0000000000..609e915b93 --- /dev/null +++ b/paddle/operators/assign_op.cc @@ -0,0 +1,138 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/data_type.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/var_type.h" + +namespace paddle { +namespace operators { +class AssignFunctor { + public: + AssignFunctor(framework::Variable *out, + const platform::DeviceContext &dev_ctx) + : out_(out), dev_ctx_(dev_ctx) {} + + void operator()(const framework::LoDTensor &lod_tensor) const { + auto &out_tensor = *out_->GetMutable(); + copy_tensor(lod_tensor, &out_tensor); + } + + void operator()(const framework::LoDTensorArray &array) const { + auto &out_array = *out_->GetMutable(); + out_array.resize(array.size()); + for (size_t i = 0; i < array.size(); ++i) { + copy_tensor(array[i], &out_array[i]); + } + } + + void operator()(const framework::SelectedRows &rows) const { + framework::SelectedRows &out_rows = + *out_->GetMutable(); + out_rows.set_rows(rows.rows()); + out_rows.set_height(rows.height()); + auto &t = rows.value(); + out_rows.mutable_value()->CopyFrom(t, t.place(), dev_ctx_); + } + + template + void operator()(const T &v) const { + PADDLE_THROW("Not support type for assign op %s", typeid(T).name()); + } + + private: + void copy_tensor(const framework::LoDTensor &lod_tensor, + framework::LoDTensor *out) const { + auto &out_tensor = *out; + out_tensor.CopyFrom(lod_tensor, lod_tensor.place(), dev_ctx_); + out_tensor.set_lod(lod_tensor.lod()); + } + + framework::Variable *out_; + const platform::DeviceContext &dev_ctx_; +}; + +class AssignOp : public framework::OperatorBase { + public: + AssignOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto *x = scope.FindVar(Input("X")); + if (x == nullptr) { + return; + } + auto *out = scope.FindVar(Output("Out")); + PADDLE_ENFORCE( + out != nullptr, + "The Output(Out) should not be null if the Input(X) is set."); + framework::VisitVarType(*x, AssignFunctor(out, dev_ctx)); + } +}; + +class AssignOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + AssignOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(LoDTensor, SelectedRows or LoDTensorArray) The input variable " + "could be LoDTensor, SelectedRows or LoDTensorArray.") + .AsDispensable(); + AddOutput("Out", + "(LoDTensor, SelectedRows or LoDTensorArray) The type of output " + "is the same as input X."); + AddComment(R"DOC(Assign Operator + +Out = X, when type in [LoDTensor/SelectedRows/LoDTensorArray] +raise error if the type is not listed above. +)DOC"); + } +}; + +class AssignInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + if (context->HasInput("X")) { + auto type = context->GetInputsVarType("X")[0]; + if (type == framework::VarDesc_VarType_SELECTED_ROWS || + type == framework::VarDesc_VarType_LOD_TENSOR) { + context->SetOutputDim("Out", context->GetInputDim("X")); + } + } + } +}; + +class AssignGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto *op = new framework::OpDescBind(); + op->SetType("assign"); + op->SetInput("X", OutputGrad("Out")); + op->SetOutput("Out", InputGrad("X")); + return std::unique_ptr(op); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(assign, ops::AssignOp, ops::AssignGradMaker, + ops::AssignInferShape, ops::AssignOpProtoMaker); diff --git a/python/paddle/v2/framework/tests/test_assign_op.py b/python/paddle/v2/framework/tests/test_assign_op.py new file mode 100644 index 0000000000..1b0c145f1a --- /dev/null +++ b/python/paddle/v2/framework/tests/test_assign_op.py @@ -0,0 +1,21 @@ +import op_test +import numpy +import unittest + + +class TestAssignOp(op_test.OpTest): + def setUp(self): + self.op_type = "assign" + x = numpy.random.random(size=(100, 10)) + self.inputs = {'X': x} + self.outputs = {'Out': x} + + def test_forward(self): + self.check_output() + + def test_backward(self): + self.check_grad(['X'], 'Out') + + +if __name__ == '__main__': + unittest.main() From f07a226a4f205b72418795ac83b467e29ccbc91a Mon Sep 17 00:00:00 2001 From: QI JUN Date: Tue, 14 Nov 2017 10:57:14 +0800 Subject: [PATCH 37/37] add split and merge lod tensor operator (#5537) * add split lod tensor operator * add more test cast * clean code * add merge lod tensor operator * fix bug * clean code * add grad operator * make mask support GPU * add comments --- paddle/operators/merge_lod_tensor_op.cc | 182 +++++++++++++++++ paddle/operators/split_lod_tensor_op.cc | 186 ++++++++++++++++++ python/paddle/v2/framework/layers.py | 42 +++- .../test_split_and_merge_lod_tensor_op.py | 181 +++++++++++++++++ 4 files changed, 590 insertions(+), 1 deletion(-) create mode 100644 paddle/operators/merge_lod_tensor_op.cc create mode 100644 paddle/operators/split_lod_tensor_op.cc create mode 100644 python/paddle/v2/framework/tests/test_split_and_merge_lod_tensor_op.py diff --git a/paddle/operators/merge_lod_tensor_op.cc b/paddle/operators/merge_lod_tensor_op.cc new file mode 100644 index 0000000000..80460c4769 --- /dev/null +++ b/paddle/operators/merge_lod_tensor_op.cc @@ -0,0 +1,182 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/op_registry.h" +#include "paddle/memory/memcpy.h" + +namespace paddle { +namespace operators { + +using LoD = framework::LoD; + +class MergeLoDTensorOp : public framework::OperatorBase { + public: + MergeLoDTensorOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto &x = scope.FindVar(Input("X"))->Get(); + auto &mask = scope.FindVar(Input("Mask"))->Get(); + auto &in_true = scope.FindVar(Input("InTrue"))->Get(); + auto &in_false = + scope.FindVar(Input("InFalse"))->Get(); + auto *out = + scope.FindVar(Output("Out"))->GetMutable(); + auto level = static_cast(Attr("level")); + + auto &mask_dim = mask.dims(); + + std::unique_ptr cpu_mask{new framework::LoDTensor()}; + if (platform::is_cpu_place(mask.place())) { + cpu_mask->ShareDataWith(mask); + } else if (platform::is_gpu_place(mask.place())) { +#ifdef PADDLE_WITH_CUDA + cpu_mask->CopyFrom(mask, platform::CPUPlace(), dev_ctx); +#else + PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option"); +#endif + } + auto *mask_data = cpu_mask->data(); + + int rank = in_true.dims().size(); + platform::Place place = in_true.place(); + std::type_index data_type = in_true.type(); + framework::DDim in_true_dims = + framework::slice_ddim(in_true.dims(), 1, rank); + + int64_t batch_size = in_true.dims()[0] + in_false.dims()[0]; + + auto in_true_dim_vec = framework::vectorize(in_true_dims); + in_true_dim_vec.insert(in_true_dim_vec.begin(), batch_size); + + framework::DDim out_dims = framework::make_ddim(in_true_dim_vec); + out->Resize(out_dims); + out->mutable_data(place, data_type); + + auto *out_lod = out->mutable_lod(); + out_lod->clear(); + size_t out_offset = 0; + + // Build LoDTensor `out` + + size_t in_true_idx = 0; + size_t in_false_idx = 0; + for (size_t i = 0; i < static_cast(mask_dim[0]); i++) { + const framework::LoDTensor *input = nullptr; + size_t *in_idx = nullptr; + if (static_cast(mask_data[i]) == 0) { + input = &in_false; + in_idx = &in_false_idx; + } else { + input = &in_true; + in_idx = &in_true_idx; + } + auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset( + input->lod(), *in_idx, (*in_idx) + 1, 0); + auto &lod_length = lod_and_offset.first; + + framework::AppendLoD(out_lod, lod_length); + + size_t start_offset = lod_and_offset.second.first; + size_t end_offset = lod_and_offset.second.second; + + PADDLE_ENFORCE_GE(end_offset, start_offset); + size_t len = end_offset - start_offset; + if (len == 0) { + continue; + } + out->Slice(out_offset, out_offset + len) + .CopyFrom(input->Slice(start_offset, end_offset), place, dev_ctx); + out_offset += len; + (*in_idx) += 1; + } + + for (size_t i = 0; i < level; i++) { + out_lod->insert(out_lod->begin(), x.lod()[i]); + } + } +}; + +class MergeLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + MergeLoDTensorOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "The input LoDTensor, contains complete lod information to " + "construct the output"); + AddInput("Mask", "A bool column vector which mask the input"); + AddInput("InTrue", "The True branch to be merged"); + AddInput("InFalse", "The False branch to be merged"); + AddOutput("Out", "The merged output LoDTensor"); + AddAttr("level", "(int) the specific lod level to rank.") + .SetDefault(0) + .EqualGreaterThan(0); + AddComment( + R"DOC( + Merge True and False branches of LoDTensor into a single Output, + with a mask at certain lod level. X is used to obtain complete + lod information. Please refer to SplitLoDTensorOp.)DOC"); + } +}; + +class MergeLoDTensorInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + PADDLE_ENFORCE(context->HasInput("X"), + "MergeLoDTensorOp must has input X."); + PADDLE_ENFORCE(context->HasInput("Mask"), + "MergeLoDTensorOp must has input Mask."); + PADDLE_ENFORCE(context->HasInput("InTrue"), + "MergeLoDTensorOp must has input InTrue."); + PADDLE_ENFORCE(context->HasInput("InFalse"), + "MergeLoDTensorOp must has input InFalse."); + PADDLE_ENFORCE(context->HasOutput("Out"), + "MergeLoDTensorOp must has output Out"); + + auto mask_dim = context->GetInputDim("Mask"); + PADDLE_ENFORCE_EQ(mask_dim.size(), 2); + PADDLE_ENFORCE_EQ(mask_dim[1], 1); + + context->SetOutputDim("Out", context->GetInputDim("InTrue")); + } +}; + +class MergeLoDTensorGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDescBind(); + grad_op->SetType("split_lod_tensor"); + grad_op->SetInput("X", OutputGrad("Out")); + grad_op->SetInput("Mask", Input("Mask")); + grad_op->SetOutput("OutTrue", InputGrad("InTrue")); + grad_op->SetOutput("OutFalse", InputGrad("InFalse")); + grad_op->SetAttrMap(Attrs()); + return std::unique_ptr(grad_op); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(merge_lod_tensor, ops::MergeLoDTensorOp, + ops::MergeLoDTensorOpProtoMaker, + ops::MergeLoDTensorInferShape, ops::MergeLoDTensorGradMaker); diff --git a/paddle/operators/split_lod_tensor_op.cc b/paddle/operators/split_lod_tensor_op.cc new file mode 100644 index 0000000000..db635f2ba0 --- /dev/null +++ b/paddle/operators/split_lod_tensor_op.cc @@ -0,0 +1,186 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/op_registry.h" +#include "paddle/memory/memcpy.h" + +namespace paddle { +namespace operators { + +struct CopyRange { + size_t begin; + size_t end; +}; + +using LoD = framework::LoD; + +class SplitLoDTensorOp : public framework::OperatorBase { + public: + SplitLoDTensorOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto &x = scope.FindVar(Input("X"))->Get(); + auto &mask = scope.FindVar(Input("Mask"))->Get(); + auto *out_true = + scope.FindVar(Output("OutTrue"))->GetMutable(); + auto *out_false = + scope.FindVar(Output("OutFalse"))->GetMutable(); + auto level = static_cast(Attr("level")); + auto &x_lod = x.lod(); + auto &mask_dim = mask.dims(); + + std::unique_ptr cpu_mask{new framework::LoDTensor()}; + if (platform::is_cpu_place(mask.place())) { + cpu_mask->ShareDataWith(mask); + } else if (platform::is_gpu_place(mask.place())) { +#ifdef PADDLE_WITH_CUDA + cpu_mask->CopyFrom(mask, platform::CPUPlace(), dev_ctx); +#else + PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option"); +#endif + } + auto *mask_data = cpu_mask->data(); + + std::vector> copy_ranges(mask_dim[0]); + + // set out_true/out_false lod + for (size_t t = 0; t < 2; t++) { + LoD *lod = nullptr; + if (t == 0) { + lod = out_false->mutable_lod(); + } else { + lod = out_true->mutable_lod(); + } + lod->clear(); + for (size_t i = 0; i < static_cast(mask_dim[0]); i++) { + if (static_cast(mask_data[i]) == t) { + size_t start_idx = i; + auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset( + x_lod, start_idx, start_idx + 1, level); + + auto &lod_length = lod_and_offset.first; + framework::AppendLoD(lod, lod_length); + + size_t start_offset = lod_and_offset.second.first; + size_t end_offset = lod_and_offset.second.second; + copy_ranges[t].emplace_back(CopyRange{start_offset, end_offset}); + } + } + } + + for (size_t t = 0; t < 2; ++t) { + framework::LoDTensor *out; + if (t == 0) { + out = out_false; + } else { + out = out_true; + } + auto &ranges = copy_ranges[t]; + size_t height = std::accumulate( + ranges.begin(), ranges.end(), 0UL, + [](size_t a, const CopyRange &b) { return a + b.end - b.begin; }); + auto x_dim = x.dims(); + x_dim[0] = static_cast(height); + out->Resize(x_dim); + out->mutable_data(x.place(), x.type()); + size_t offset = 0; + for (auto &each_range : ranges) { + size_t len = each_range.end - each_range.begin; + if (len == 0) { + continue; + } + // out[offset: offset+len] = x[each_range.begin: each_range.end] + out->Slice(static_cast(offset), static_cast(offset + len)) + .CopyFrom(x.Slice(static_cast(each_range.begin), + static_cast(each_range.end)), + x.place(), dev_ctx); + offset += len; + } + } + } +}; + +class SplitLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + SplitLoDTensorOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The input LoDTensor"); + AddInput("Mask", "A bool column vector which mask the input"); + AddOutput("OutTrue", "True branch of input LoDTensor"); + AddOutput("OutFalse", "False branch of input LoDTensor"); + AddAttr("level", "(int) the specific lod level to split.") + .SetDefault(0) + .EqualGreaterThan(0); + AddComment( + R"DOC( + Split a LoDTensor with a Mask at certain level. The input LoDTensor + has 3 sequence at certain lod level. The Mask is a bool column vector, + such as [0, 1, 0] at the same level. The first and third sequence will + be send to False Output LoDTensor; whereas the second sequence will + be send to True Output LoDTensor. Please refer to MergeLoDTensorOp.)DOC"); + } +}; + +class SplitLoDTensorInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + PADDLE_ENFORCE(context->HasInput("X"), + "SplitLoDTensorOp must has input X."); + PADDLE_ENFORCE(context->HasInput("Mask"), + "SplitLoDTensorOp must has input Mask."); + PADDLE_ENFORCE(context->HasOutput("OutTrue"), + "SplitLoDTensorOp must has output OutTrue."); + PADDLE_ENFORCE(context->HasOutput("OutFalse"), + "SplitLoDTensorOp must has output OutFalse."); + + auto mask_dim = context->GetInputDim("Mask"); + PADDLE_ENFORCE_EQ(mask_dim.size(), 2); + PADDLE_ENFORCE_EQ(mask_dim[1], 1); + + context->SetOutputDim("OutTrue", context->GetInputDim("X")); + context->SetOutputDim("OutFalse", context->GetInputDim("X")); + } +}; + +class SplitLoDTensorArrayGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDescBind(); + grad_op->SetType("merge_lod_tensor"); + grad_op->SetInput("InTrue", OutputGrad("OutTrue")); + grad_op->SetInput("InFalse", OutputGrad("OutFalse")); + grad_op->SetInput("Mask", Input("Mask")); + grad_op->SetInput("X", Input("X")); + grad_op->SetOutput("Out", InputGrad("X")); + grad_op->SetAttrMap(Attrs()); + return std::unique_ptr(grad_op); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(split_lod_tensor, ops::SplitLoDTensorOp, + ops::SplitLoDTensorOpProtoMaker, + ops::SplitLoDTensorInferShape, + ops::SplitLoDTensorArrayGradMaker); diff --git a/python/paddle/v2/framework/layers.py b/python/paddle/v2/framework/layers.py index fe3c86febb..a2219465b7 100644 --- a/python/paddle/v2/framework/layers.py +++ b/python/paddle/v2/framework/layers.py @@ -11,7 +11,7 @@ import cStringIO __all__ = [ 'fc', 'data', 'cross_entropy', 'conv2d', 'pool2d', 'embedding', 'concat', 'StaticRNN', 'cast', 'sequence_conv', 'sequence_pool', 'sums', 'cos_sim', - 'batch_norm', 'accuracy' + 'batch_norm', 'accuracy', 'split_lod_tensor' ] @@ -451,6 +451,46 @@ def sums(input, main_program=None, startup_program=None): return out +def split_lod_tensor(input, + mask, + level, + main_program=None, + startup_program=None): + helper = LayerHelper('split_lod_tensor', **locals()) + out_true = helper.create_tmp_variable(dtype=input.data_type) + out_false = helper.create_tmp_variable(dtype=input.data_type) + helper.append_op( + type='split_lod_tensor', + inputs={ + 'X': input, + 'Mask': mask, + }, + outputs={'OutTrue': out_true, + 'OutFalse': out_false}, + attrs={'level': level}) + return out_true, out_false + + +def merge_lod_tensor(in_true, + in_false, + x, + mask, + level, + main_program=None, + startup_program=None): + helper = LayerHelper('merge_lod_tensor', **locals()) + out = helper.create_tmp_variable(dtype=x.data_type) + helper.append_op( + type='merge_lod_tensor', + inputs={'X': x, + 'Mask': mask, + 'InTrue': in_true, + 'InFalse': in_false}, + outputs={'Out': out}, + attrs={'level': level}) + return out + + def cos_sim(X, Y, **kwargs): """ This function performs the cosine similarity between two tensors diff --git a/python/paddle/v2/framework/tests/test_split_and_merge_lod_tensor_op.py b/python/paddle/v2/framework/tests/test_split_and_merge_lod_tensor_op.py new file mode 100644 index 0000000000..6ba1e56824 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_split_and_merge_lod_tensor_op.py @@ -0,0 +1,181 @@ +import unittest +import paddle.v2.framework.core as core +import numpy as np +import paddle.v2.framework.layers as layers +from paddle.v2.framework.framework import Program +from paddle.v2.framework.executor import Executor +from paddle.v2.framework.backward import append_backward_ops + + +class TestCPULoDTensorArrayOps(unittest.TestCase): + def place(self): + return core.CPUPlace() + + def test_split_and_merge_lod_tensor_no_lod(self): + tensor = core.LoDTensor() + tensor.set(np.arange(10).reshape(10, 1).astype('int32'), self.place()) + + mask_np = np.array([0, 0, 1, 1, 1, 1, 0, 0, 0, 0]).astype('bool') + mask_np = np.expand_dims(mask_np, axis=1) + + mask = core.LoDTensor() + mask.set(mask_np, self.place()) + + expect_true_tensor = np.array([2, 3, 4, 5]).astype('int32') + expect_true_tensor = np.expand_dims(expect_true_tensor, axis=1) + expect_true = core.LoDTensor() + expect_true.set(expect_true_tensor, self.place()) + + expect_false_tensor = np.array([0, 1, 6, 7, 8, 9]).astype('int32') + expect_false_tensor = np.expand_dims(expect_false_tensor, axis=1) + + expect_false = core.LoDTensor() + expect_false.set(expect_false_tensor, self.place()) + + self.main( + tensor=tensor, + mask=mask, + expect_true=expect_true, + expect_false=expect_false, + expect_out=tensor) + + def test_split_and_merge_lod_tensor_level_0(self): + tensor = core.LoDTensor() + tensor.set(np.arange(10).reshape(10, 1).astype('int32'), self.place()) + tensor.set_lod([[0, 3, 9, 10]]) + + mask_np = np.array([0, 1, 0]).astype('bool') + mask_np = np.expand_dims(mask_np, axis=1) + + mask = core.LoDTensor() + mask.set(mask_np, self.place()) + + expect_true_tensor = np.array([3, 4, 5, 6, 7, 8]).astype('int32') + expect_true_tensor = np.expand_dims(expect_true_tensor, axis=1) + expect_true = core.LoDTensor() + expect_true.set(expect_true_tensor, self.place()) + expect_true.set_lod([[0, 6]]) + + expect_false_tensor = np.array([0, 1, 2, 9]).astype('int32') + expect_false_tensor = np.expand_dims(expect_false_tensor, axis=1) + expect_false_lod = [[0, 3, 4]] + + expect_false = core.LoDTensor() + expect_false.set(expect_false_tensor, self.place()) + expect_false.set_lod(expect_false_lod) + + self.main( + tensor=tensor, + mask=mask, + expect_true=expect_true, + expect_false=expect_false, + expect_out=tensor) + + def main(self, tensor, mask, expect_true, expect_false, expect_out, + level=0): + place = self.place() + program = Program() + x = layers.data(name='x', shape=[1], main_program=program) + x.persistable = True + + y = layers.data(name='y', shape=[1], main_program=program) + y.persistable = True + + out_true, out_false = layers.split_lod_tensor( + input=x, mask=y, level=level, main_program=program) + out_true.persistable = True + out_false.persistable = True + + out = layers.merge_lod_tensor( + in_true=out_true, + in_false=out_false, + mask=y, + x=x, + level=level, + main_program=program) + + out.persistable = True + + exe = Executor(place) + scope = core.Scope() + exe.run(program, feed={'x': tensor, 'y': mask}, scope=scope) + + var_true = scope.find_var(out_true.name).get_tensor() + + var_false = scope.find_var(out_false.name).get_tensor() + + var_out = scope.find_var(out.name).get_tensor() + + self.check_tensor_same(var_true, expect_true) + self.check_tensor_same(var_false, expect_false) + self.check_tensor_same(var_out, expect_out) + + def check_tensor_same(self, actual, expect): + self.assertTrue(np.allclose(np.array(actual), np.array(expect))) + self.assertEqual(actual.lod(), expect.lod()) + + +class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): + def test_grad(self): + place = core.CPUPlace() + program = Program() + + x = layers.data( + name='x', + shape=[1], + data_type='float32', + main_program=program, + stop_gradient=False) + y = layers.data( + name='y', + shape=[1], + data_type='bool', + main_program=program, + stop_gradient=False) + + level = 0 + + out_true, out_false = layers.split_lod_tensor( + input=x, mask=y, level=level, main_program=program) + out = layers.merge_lod_tensor( + in_true=out_true, + in_false=out_false, + mask=y, + x=x, + level=level, + main_program=program) + mean = layers.mean(x=out, main_program=program) + + append_backward_ops(mean) + + tensor = core.LoDTensor() + tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place) + tensor.set_lod([[0, 3, 9, 10]]) + + mask_np = np.array([0, 1, 0]).astype('bool') + mask_np = np.expand_dims(mask_np, axis=1) + + mask = core.LoDTensor() + mask.set(mask_np, place) + + exe = Executor(place) + scope = core.Scope() + + g_vars = program.global_block().var(x.name + "@GRAD") + g_out = [ + item.sum() + for item in map(np.array, + exe.run(program, + feed={'x': tensor, + 'y': mask}, + fetch_list=[g_vars], + scope=scope)) + ] + + g_out_sum = np.array(g_out).sum() + + self.assertAlmostEqual(1.0, g_out_sum, delta=0.1) + + +if __name__ == '__main__': + unittest.main()