Merge branch 'develop' of https://github.com/PaddlePaddle/paddle into fix-H0-GRUOp

release/0.11.0
guosheng 7 years ago
commit 2bed961211

@ -82,6 +82,11 @@ maxout
.. autoclass:: paddle.v2.layer.maxout
:noindex:
roi_pool
--------
.. autoclass:: paddle.v2.layer.roi_pool
:noindex:
Norm Layer
==========

@ -98,5 +98,23 @@ void Scope::DeleteScope(Scope* scope) {
delete scope;
}
void Scope::Rename(const std::string& origin_name,
const std::string& new_name) const {
auto origin_it = vars_.find(origin_name);
PADDLE_ENFORCE(origin_it != vars_.end(),
"Cannot find original variable with name %s", origin_name);
auto new_it = vars_.find(new_name);
PADDLE_ENFORCE(new_it == vars_.end(),
"The variable with name %s is already in the scope", new_name);
vars_[new_name] = origin_it->second;
vars_.erase(origin_it);
}
std::string Scope::Rename(const std::string& origin_name) const {
auto var_name = string::Sprintf("%p.%d", this, vars_.size());
Rename(origin_name, var_name);
return var_name;
}
} // namespace framework
} // namespace paddle

@ -68,11 +68,18 @@ class Scope {
// enumerate all the variables current contains.
std::vector<std::string> GetAllNames(bool recursive = false) const;
// Rename variable to a new name
void Rename(const std::string& origin_name,
const std::string& new_name) const;
// Rename variable to a new name and return the new name
std::string Rename(const std::string& origin_name) const;
private:
// Call Scope::NewScope for a sub-scope.
explicit Scope(Scope const* parent) : parent_(parent) {}
std::unordered_map<std::string, Variable*> vars_;
mutable std::unordered_map<std::string, Variable*> vars_;
mutable std::list<Scope*> kids_;
Scope const* parent_{nullptr};

@ -27,10 +27,32 @@ inline VarDesc::VarType ToVarType(std::type_index type) {
return VarDesc_VarType_LOD_RANK_TABLE;
} else if (type.hash_code() == typeid(LoDTensorArray).hash_code()) {
return VarDesc_VarType_LOD_TENSOR_ARRAY;
} else if (type.hash_code() == typeid(SelectedRows).hash_code()) {
return VarDesc_VarType_SELECTED_ROWS;
} else {
PADDLE_THROW("ToVarType:Unsupported type %s", type.name());
}
}
template <typename Visitor>
inline void VisitVarType(const Variable& var, Visitor visitor) {
switch (ToVarType(var.Type())) {
case VarDesc_VarType_LOD_TENSOR:
visitor(var.Get<framework::LoDTensor>());
return;
case VarDesc_VarType_LOD_RANK_TABLE:
visitor(var.Get<LoDRankTable>());
return;
case VarDesc_VarType_LOD_TENSOR_ARRAY:
visitor(var.Get<LoDTensorArray>());
return;
case VarDesc_VarType_SELECTED_ROWS:
visitor(var.Get<SelectedRows>());
return;
default:
PADDLE_THROW("Not supported visit type, %d", ToVarType(var.Type()));
}
}
} // namespace framework
} // namespace paddle

@ -54,7 +54,6 @@ void MKLDNNAddtoLayer::reshape(
ow = iw;
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNAddtoLayer::resetFwd(std::vector<primitive>& pipeline,

@ -125,7 +125,6 @@ void MKLDNNBatchNormLayer::reshape(
<< "Input channel can not be changed";
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNBatchNormLayer::resetFwd(std::vector<primitive>& pipeline,

@ -102,8 +102,6 @@ void MKLDNNConvLayer::reshape(
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,

@ -92,7 +92,7 @@ public:
void printSizeInfo() override {
MKLDNNLayer::printSizeInfo();
VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_
<< ": ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_
<< ", ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_
<< ", sw: " << sw_ << ", dh: " << dh_ << ", dw: " << dw_;
}

@ -84,8 +84,6 @@ void MKLDNNFcLayer::reshape(
reshapeOutput(oh, ow);
resizeOutput(bs, oc);
printSizeInfo();
}
void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline,

@ -71,8 +71,6 @@ void MKLDNNPoolLayer::reshape(
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,

@ -0,0 +1,220 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ROIPoolLayer.h"
namespace paddle {
REGISTER_LAYER(roi_pool, ROIPoolLayer);
bool ROIPoolLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
const ROIPoolConfig& layerConf = config_.inputs(0).roi_pool_conf();
pooledWidth_ = layerConf.pooled_width();
pooledHeight_ = layerConf.pooled_height();
spatialScale_ = layerConf.spatial_scale();
return true;
}
void ROIPoolLayer::forward(PassType passType) {
Layer::forward(passType);
const ROIPoolConfig& layerConf = config_.inputs(0).roi_pool_conf();
height_ = getInput(0).getFrameHeight();
if (!height_) height_ = layerConf.height();
width_ = getInput(0).getFrameWidth();
if (!width_) width_ = layerConf.width();
channels_ = getInputValue(0)->getWidth() / width_ / height_;
size_t batchSize = getInput(0).getBatchSize();
size_t numROIs = getInput(1).getBatchSize();
MatrixPtr dataValue = getInputValue(0);
MatrixPtr roiValue = getInputValue(1);
resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_);
MatrixPtr outputValue = getOutputValue();
if (useGpu_) { // TODO(guosheng): implement on GPU later
MatrixPtr dataCpuBuffer;
Matrix::resizeOrCreate(dataCpuBuffer,
dataValue->getHeight(),
dataValue->getWidth(),
false,
false);
MatrixPtr roiCpuBuffer;
Matrix::resizeOrCreate(roiCpuBuffer,
roiValue->getHeight(),
roiValue->getWidth(),
false,
false);
dataCpuBuffer->copyFrom(*dataValue);
roiCpuBuffer->copyFrom(*roiValue);
dataValue = dataCpuBuffer;
roiValue = roiCpuBuffer;
MatrixPtr outputCpuBuffer;
Matrix::resizeOrCreate(outputCpuBuffer,
outputValue->getHeight(),
outputValue->getWidth(),
false,
false);
outputCpuBuffer->copyFrom(*outputValue);
outputValue = outputCpuBuffer;
}
real* bottomData = dataValue->getData();
size_t batchOffset = dataValue->getWidth();
size_t channelOffset = height_ * width_;
real* bottomROIs = roiValue->getData();
size_t roiOffset = roiValue->getWidth();
size_t poolChannelOffset = pooledHeight_ * pooledWidth_;
real* outputData = outputValue->getData();
Matrix::resizeOrCreate(maxIdxs_,
numROIs,
channels_ * pooledHeight_ * pooledWidth_,
false,
false);
real* argmaxData = maxIdxs_->getData();
for (size_t n = 0; n < numROIs; ++n) {
// the first five elememts of each RoI should be:
// batch_idx, roi_x_start, roi_y_start, roi_x_end, roi_y_end
size_t roiBatchIdx = bottomROIs[0];
size_t roiStartW = round(bottomROIs[1] * spatialScale_);
size_t roiStartH = round(bottomROIs[2] * spatialScale_);
size_t roiEndW = round(bottomROIs[3] * spatialScale_);
size_t roiEndH = round(bottomROIs[4] * spatialScale_);
CHECK_GE(roiBatchIdx, 0UL);
CHECK_LT(roiBatchIdx, batchSize);
size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL);
size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL);
real binSizeH =
static_cast<real>(roiHeight) / static_cast<real>(pooledHeight_);
real binSizeW =
static_cast<real>(roiWidth) / static_cast<real>(pooledWidth_);
real* batchData = bottomData + batchOffset * roiBatchIdx;
for (size_t c = 0; c < channels_; ++c) {
for (size_t ph = 0; ph < pooledHeight_; ++ph) {
for (size_t pw = 0; pw < pooledWidth_; ++pw) {
size_t hstart = static_cast<size_t>(std::floor(ph * binSizeH));
size_t wstart = static_cast<size_t>(std::floor(pw * binSizeW));
size_t hend = static_cast<size_t>(std::ceil((ph + 1) * binSizeH));
size_t wend = static_cast<size_t>(std::ceil((pw + 1) * binSizeW));
hstart = std::min(std::max(hstart + roiStartH, 0UL), height_);
wstart = std::min(std::max(wstart + roiStartW, 0UL), width_);
hend = std::min(std::max(hend + roiStartH, 0UL), height_);
wend = std::min(std::max(wend + roiStartW, 0UL), width_);
bool isEmpty = (hend <= hstart) || (wend <= wstart);
size_t poolIndex = ph * pooledWidth_ + pw;
if (isEmpty) {
outputData[poolIndex] = 0;
argmaxData[poolIndex] = -1;
}
for (size_t h = hstart; h < hend; ++h) {
for (size_t w = wstart; w < wend; ++w) {
size_t index = h * width_ + w;
if (batchData[index] > outputData[poolIndex]) {
outputData[poolIndex] = batchData[index];
argmaxData[poolIndex] = index;
}
}
}
}
}
batchData += channelOffset;
outputData += poolChannelOffset;
argmaxData += poolChannelOffset;
}
bottomROIs += roiOffset;
}
if (useGpu_) {
getOutputValue()->copyFrom(*outputValue);
}
}
void ROIPoolLayer::backward(const UpdateCallback& callback) {
MatrixPtr inGradValue = getInputGrad(0);
MatrixPtr outGradValue = getOutputGrad();
MatrixPtr roiValue = getInputValue(1);
if (useGpu_) {
MatrixPtr inGradCpuBuffer;
Matrix::resizeOrCreate(inGradCpuBuffer,
inGradValue->getHeight(),
inGradValue->getWidth(),
false,
false);
MatrixPtr outGradCpuBuffer;
Matrix::resizeOrCreate(outGradCpuBuffer,
outGradValue->getHeight(),
outGradValue->getWidth(),
false,
false);
MatrixPtr roiCpuBuffer;
Matrix::resizeOrCreate(roiCpuBuffer,
roiValue->getHeight(),
roiValue->getWidth(),
false,
false);
inGradCpuBuffer->copyFrom(*inGradValue);
outGradCpuBuffer->copyFrom(*outGradValue);
roiCpuBuffer->copyFrom(*roiValue);
inGradValue = inGradCpuBuffer;
outGradValue = outGradCpuBuffer;
roiValue = roiCpuBuffer;
}
real* bottomROIs = roiValue->getData();
size_t numROIs = getInput(1).getBatchSize();
size_t roiOffset = getInputValue(1)->getWidth();
real* inDiffData = inGradValue->getData();
size_t batchOffset = getInputValue(0)->getWidth();
size_t channelOffset = height_ * width_;
real* outDiffData = outGradValue->getData();
size_t poolChannelOffset = pooledHeight_ * pooledWidth_;
real* argmaxData = maxIdxs_->getData();
for (size_t n = 0; n < numROIs; ++n) {
size_t roiBatchIdx = bottomROIs[0];
real* batchDiffData = inDiffData + batchOffset * roiBatchIdx;
for (size_t c = 0; c < channels_; ++c) {
for (size_t ph = 0; ph < pooledHeight_; ++ph) {
for (size_t pw = 0; pw < pooledWidth_; ++pw) {
size_t poolIndex = ph * pooledWidth_ + pw;
if (argmaxData[poolIndex] > 0) {
size_t index = static_cast<size_t>(argmaxData[poolIndex]);
batchDiffData[index] += outDiffData[poolIndex];
}
}
}
batchDiffData += channelOffset;
outDiffData += poolChannelOffset;
argmaxData += poolChannelOffset;
}
bottomROIs += roiOffset;
}
if (useGpu_) {
getInputGrad(0)->copyFrom(*inGradValue);
}
}
} // namespace paddle

@ -0,0 +1,56 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
namespace paddle {
/**
* A layer used by Fast R-CNN to extract feature maps of ROIs from the last
* feature map.
* - Input: This layer needs two input layers: The first input layer is a
* convolution layer; The second input layer contains the ROI data
* which is the output of ProposalLayer in Faster R-CNN. layers for
* generating bbox location offset and the classification confidence.
* - Output: The ROIs' feature map.
* Reference:
* Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
* Faster R-CNN: Towards Real-Time Object Detection with Region Proposal
* Networks
*/
class ROIPoolLayer : public Layer {
protected:
size_t channels_;
size_t width_;
size_t height_;
size_t pooledWidth_;
size_t pooledHeight_;
real spatialScale_;
// Since there is no int matrix, use real maxtrix instead.
MatrixPtr maxIdxs_;
public:
explicit ROIPoolLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
};
} // namespace paddle

@ -2056,6 +2056,43 @@ TEST(Layer, CropLayer) {
}
}
TEST(Layer, roi_pool) {
TestConfig config;
config.layerConfig.set_type("roi_pool");
config.biasSize = 0;
LayerInputConfig* input = config.layerConfig.add_inputs();
ROIPoolConfig* roiPoolConf = input->mutable_roi_pool_conf();
roiPoolConf->set_pooled_width(7);
roiPoolConf->set_pooled_height(7);
roiPoolConf->set_spatial_scale(1. / 16);
roiPoolConf->set_width(14);
roiPoolConf->set_height(14);
const size_t roiNum = 10;
const size_t roiDim = 10;
const size_t batchSize = 5;
MatrixPtr roiValue = Matrix::create(roiNum, roiDim, false, false);
roiValue->zeroMem();
real* roiData = roiValue->getData();
for (size_t i = 0; i < roiNum; ++i) {
roiData[i * roiDim + 0] = std::rand() % batchSize;
roiData[i * roiDim + 1] = std::rand() % 224; // xMin
roiData[i * roiDim + 2] = std::rand() % 224; // yMin
size_t xMin = static_cast<size_t>(roiData[i * roiDim + 1]);
size_t yMin = static_cast<size_t>(roiData[i * roiDim + 2]);
roiData[i * roiDim + 3] = xMin + std::rand() % (224 - xMin); // xMax
roiData[i * roiDim + 4] = yMin + std::rand() % (224 - yMin); // yMax
}
config.inputDefs.push_back({INPUT_DATA, "input", 3 * 14 * 14, {}});
config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "rois", roiValue, {}});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config, "roi_pool", batchSize, false, useGpu, false);
}
}
TEST(Layer, SwitchOrderLayer) {
TestConfig config;
// config input_0

@ -297,7 +297,7 @@ static void getAddtoConfig(TestConfig& cfg,
}
void testAddtoLayer(const testImageDesc& pm, const size_t nInputs) {
CHECK_GE(nInputs, 1);
CHECK_GE(nInputs, 1UL);
TestConfig dnnConfig;
getAddtoConfig(dnnConfig, pm, nInputs);
dnnConfig.layerConfig.set_type("mkldnn_addto");

@ -152,12 +152,7 @@ void MKLDNNMatrix::downSpatial() {
}
memory::desc md = memory::desc(dstDims, getDtype(), dstFmt);
memory::primitive_desc pd = memory::primitive_desc(md, getEngine());
mkldnn_primitive_t result;
mkldnn::error::wrap_c_api(
mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr),
"could not create a memory primitive");
reset(result);
set_data_handle(data_);
resetMKLDNNMemory(pd, data_);
}
} // namespace paddle

@ -145,6 +145,27 @@ public:
m_.reset();
}
/**
* override the CpuMatrix::resize
*/
void resize(size_t newHeight, size_t newWidth) override {
m_->resize(newHeight, newWidth);
if (data_ == m_->getData() && elementCnt_ == newHeight * newWidth) {
return;
}
CpuMatrix::setData(data_);
height_ = newHeight;
width_ = newWidth;
elementCnt_ = newHeight * newWidth;
stride_ = width_;
auto pd = mkldnn::memory::primitive_desc(
mkldnn::memory::desc({(int)newHeight, (int)newWidth},
getDtype(),
mkldnn::memory::format::nc),
getEngine());
resetMKLDNNMemory(pd, data_);
}
/**
* override Matrix::getData
* check data before return
@ -215,6 +236,17 @@ protected:
memory::format srcFmt,
memory::format dstFmt,
memory::dims dm);
/**
* reset this MKLDNN Memory from primitve desc
*/
void resetMKLDNNMemory(memory::primitive_desc pd, real* data) {
mkldnn_primitive_t result;
mkldnn::error::wrap_c_api(
mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr),
"could not create a memory primitive");
reset(result);
set_data_handle(data);
}
private:
// save the CpuMatrixPtr in case the buffer released outside

@ -214,6 +214,7 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor)
cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory)
cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc
rnn/recurrent_op_utils.cc

@ -0,0 +1,138 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/data_type.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/var_type.h"
namespace paddle {
namespace operators {
class AssignFunctor {
public:
AssignFunctor(framework::Variable *out,
const platform::DeviceContext &dev_ctx)
: out_(out), dev_ctx_(dev_ctx) {}
void operator()(const framework::LoDTensor &lod_tensor) const {
auto &out_tensor = *out_->GetMutable<framework::LoDTensor>();
copy_tensor(lod_tensor, &out_tensor);
}
void operator()(const framework::LoDTensorArray &array) const {
auto &out_array = *out_->GetMutable<framework::LoDTensorArray>();
out_array.resize(array.size());
for (size_t i = 0; i < array.size(); ++i) {
copy_tensor(array[i], &out_array[i]);
}
}
void operator()(const framework::SelectedRows &rows) const {
framework::SelectedRows &out_rows =
*out_->GetMutable<framework::SelectedRows>();
out_rows.set_rows(rows.rows());
out_rows.set_height(rows.height());
auto &t = rows.value();
out_rows.mutable_value()->CopyFrom(t, t.place(), dev_ctx_);
}
template <typename T>
void operator()(const T &v) const {
PADDLE_THROW("Not support type for assign op %s", typeid(T).name());
}
private:
void copy_tensor(const framework::LoDTensor &lod_tensor,
framework::LoDTensor *out) const {
auto &out_tensor = *out;
out_tensor.CopyFrom(lod_tensor, lod_tensor.place(), dev_ctx_);
out_tensor.set_lod(lod_tensor.lod());
}
framework::Variable *out_;
const platform::DeviceContext &dev_ctx_;
};
class AssignOp : public framework::OperatorBase {
public:
AssignOp(const std::string &type, const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override {
auto *x = scope.FindVar(Input("X"));
if (x == nullptr) {
return;
}
auto *out = scope.FindVar(Output("Out"));
PADDLE_ENFORCE(
out != nullptr,
"The Output(Out) should not be null if the Input(X) is set.");
framework::VisitVarType(*x, AssignFunctor(out, dev_ctx));
}
};
class AssignOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
AssignOpProtoMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(LoDTensor, SelectedRows or LoDTensorArray) The input variable "
"could be LoDTensor, SelectedRows or LoDTensorArray.")
.AsDispensable();
AddOutput("Out",
"(LoDTensor, SelectedRows or LoDTensorArray) The type of output "
"is the same as input X.");
AddComment(R"DOC(Assign Operator
Out = X, when type in [LoDTensor/SelectedRows/LoDTensorArray]
raise error if the type is not listed above.
)DOC");
}
};
class AssignInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *context) const override {
if (context->HasInput("X")) {
auto type = context->GetInputsVarType("X")[0];
if (type == framework::VarDesc_VarType_SELECTED_ROWS ||
type == framework::VarDesc_VarType_LOD_TENSOR) {
context->SetOutputDim("Out", context->GetInputDim("X"));
}
}
}
};
class AssignGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *op = new framework::OpDescBind();
op->SetType("assign");
op->SetInput("X", OutputGrad("Out"));
op->SetOutput("Out", InputGrad("X"));
return std::unique_ptr<framework::OpDescBind>(op);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(assign, ops::AssignOp, ops::AssignGradMaker,
ops::AssignInferShape, ops::AssignOpProtoMaker);

@ -0,0 +1,110 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/beam_search_decode_op.h"
namespace paddle {
namespace operators {
class BeamSearchDecodeOp : public framework::OperatorBase {
public:
BeamSearchDecodeOp(const std::string& type,
const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void Run(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const override {
framework::ExecutionContext ctx(*this, scope, dev_ctx);
const LoDTensorArray* ids = ctx.Input<LoDTensorArray>("Ids");
const LoDTensorArray* scores = ctx.Input<LoDTensorArray>("Scores");
const size_t step_num = ids->size();
PADDLE_ENFORCE_GT(step_num, 0UL,
"beam search steps should be larger than 0");
const size_t source_num = ids->at(0).lod().at(0).size() - 1;
PADDLE_ENFORCE_GT(source_num, 0UL, "source num should be larger than 0");
for (size_t i = 0; i < step_num; ++i) {
PADDLE_ENFORCE_EQ(ids->at(i).lod().size(), 2UL,
"Level of LodTensor should be 2");
}
// prepare output
LoDTensor* sentenceIds = ctx.Output<LoDTensor>("SentenceIds");
LoDTensor* sentenceScores = ctx.Output<LoDTensor>("SentenceScores");
BeamSearchDecoder<float> beam_search_decoder;
beam_search_decoder.PackAllSteps(*ids, *scores, sentenceIds,
sentenceScores);
}
};
class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
BeamSearchDecodeOpProtoMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Ids",
"(LodTensorArray)"
"score of the candidate words in each step");
AddInput("Scores",
"(LodTensorArray)"
"score of the candidate words in each step");
AddOutput("SentenceIds",
"(LodTensor)"
"All possible result sentences of word ids");
AddOutput("SentenceScores",
"(LodTensor)"
"All possible result sentences of word scores");
AddComment(R"DOC(
Pack the result of Beam search op into SentenceIds and SentenceScores.
)DOC");
}
};
class BeamSearchDecodeInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext* context) const override {
PADDLE_ENFORCE(context->HasInput("Ids"),
"BeamSearchDecodeOp must has input Ids");
PADDLE_ENFORCE(context->HasInput("Scores"),
"BeamSearchDecodeOp must has input Scores");
PADDLE_ENFORCE(context->HasOutput("SentenceIds"),
"BeamSearchDecodeOp must has output SentenceIds");
PADDLE_ENFORCE(context->HasOutput("SentenceScores"),
"BeamSearchDecodeOp must has output SentenceScores");
}
};
class BeamSearchDecodeInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDescBind& op_desc,
framework::BlockDescBind* block) const override {
for (auto& o : op_desc.Output("SentenceIds")) {
block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR);
}
for (auto& o : op_desc.Output("SentenceScores")) {
block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR);
}
}
};
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(beam_search_decode, paddle::operators::BeamSearchDecodeOp,
paddle::operators::BeamSearchDecodeOpProtoMaker,
paddle::operators::BeamSearchDecodeInferShape,
paddle::operators::BeamSearchDecodeInferVarType,
paddle::framework::EmptyGradOpMaker);

File diff suppressed because it is too large Load Diff

@ -0,0 +1,221 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/beam_search_decode_op.h"
#include "gtest/gtest.h"
using CPUPlace = paddle::platform::CPUPlace;
using LoD = paddle::framework::LoD;
using LoDTensor = paddle::framework::LoDTensor;
using LoDTensorArray = paddle::framework::LoDTensorArray;
template <typename T>
using BeamNode = paddle::operators::BeamNode<T>;
template <typename T>
using BeamSearchDecoder = paddle::operators::BeamSearchDecoder<T>;
template <typename T>
using Sentence = paddle::operators::Sentence<T>;
template <typename T>
using BeamNodeVector = paddle::operators::BeamNodeVector<T>;
template <typename T>
using SentenceVector = paddle::operators::SentenceVector<T>;
namespace paddle {
namespace test {
void GenerateExample(const std::vector<size_t>& level_0,
const std::vector<size_t>& level_1,
const std::vector<int>& data, LoDTensorArray* ids,
LoDTensorArray* scores) {
PADDLE_ENFORCE_EQ(level_0.back(), level_1.size() - 1,
"source level is used to describe candidate set");
PADDLE_ENFORCE_EQ(level_1.back(), data.size(),
"the lowest level is used to describe data"
", so it's last element should be data length");
CPUPlace place;
LoD lod;
lod.push_back(level_0);
lod.push_back(level_1);
// Ids
LoDTensor tensor_id;
tensor_id.set_lod(lod);
tensor_id.Resize({static_cast<int64_t>(data.size())});
// malloc memory
int64_t* id_ptr = tensor_id.mutable_data<int64_t>(place);
for (size_t i = 0; i < data.size(); ++i) {
id_ptr[i] = static_cast<int64_t>(data.at(i));
}
// Scores
LoDTensor tensor_score;
tensor_score.set_lod(lod);
tensor_score.Resize({static_cast<int64_t>(data.size())});
// malloc memory
float* score_ptr = tensor_score.mutable_data<float>(place);
for (size_t i = 0; i < data.size(); ++i) {
score_ptr[i] = static_cast<float>(data.at(i));
}
ids->push_back(tensor_id);
scores->push_back(tensor_score);
}
} // namespace test
} // namespace paddle
TEST(BeamSearchDecodeOp, DeleteBeamNode) {
auto* root = new BeamNode<float>(0, 0);
auto* b1 = new BeamNode<float>(1, 1);
auto* b2 = new BeamNode<float>(2, 2);
auto* b3 = new BeamNode<float>(3, 3);
b1->AppendTo(root);
b2->AppendTo(root);
b3->AppendTo(b1);
delete b3;
delete b2;
}
TEST(BeamSearchDecodeOp, MakeSentence) {
auto* root = new BeamNode<float>(0, 0);
auto* b1 = new BeamNode<float>(1, 1);
auto* end = new BeamNode<float>(2, 2);
b1->AppendTo(root);
end->AppendTo(b1);
BeamSearchDecoder<float> helper;
Sentence<float> sentence = helper.MakeSentence(end);
delete end;
std::vector<int64_t> expect_ids = {0, 1, 2};
ASSERT_EQ(sentence.word_ids, expect_ids);
std::vector<float> expect_scores = {0, 1, 2};
ASSERT_EQ(sentence.scores, expect_scores);
}
TEST(BeamSearchDecodeOp, PackTwoStepsFistStep) {
CPUPlace place;
LoDTensorArray ids;
LoDTensorArray scores;
paddle::test::GenerateExample(
std::vector<size_t>{0, 2, 6}, std::vector<size_t>{0, 1, 2, 3, 4, 5, 6},
std::vector<int>{1, 2, 3, 4, 5, 6}, &ids, &scores);
std::vector<BeamNodeVector<float>> beamnode_vector_list;
std::vector<SentenceVector<float>> sentence_vector_list(
2, SentenceVector<float>());
BeamSearchDecoder<float> helper;
beamnode_vector_list = helper.PackTwoSteps(
ids[0], scores[0], beamnode_vector_list, &sentence_vector_list);
ASSERT_EQ(beamnode_vector_list.size(), 2UL);
ASSERT_EQ(beamnode_vector_list[0].size(), 2UL);
ASSERT_EQ(beamnode_vector_list[1].size(), 4UL);
}
TEST(BeamSearchDecodeOp, PackTwoSteps) {
CPUPlace place;
// first source has three prefix
BeamNodeVector<float> source0_prefixes;
source0_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(1, 1)));
source0_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(0, 0)));
source0_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(3, 3)));
// second source has two prefix
BeamNodeVector<float> source1_prefixes;
source1_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(4, 4)));
source1_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(5, 5)));
std::vector<BeamNodeVector<float>> beamnode_vector_list;
std::vector<SentenceVector<float>> sentence_vector_list(
2, SentenceVector<float>());
beamnode_vector_list.push_back(std::move(source0_prefixes));
beamnode_vector_list.push_back(std::move(source1_prefixes));
// generate data for one step
LoDTensorArray ids;
LoDTensorArray scores;
paddle::test::GenerateExample(std::vector<size_t>{0, 3, 5},
std::vector<size_t>{0, 1, 1, 3, 4, 5},
std::vector<int>{0, 1, 2, 3, 4}, &ids, &scores);
BeamSearchDecoder<float> helper1;
beamnode_vector_list = helper1.PackTwoSteps(
ids[0], scores[0], beamnode_vector_list, &sentence_vector_list);
ASSERT_EQ(sentence_vector_list[0].size(), 1UL);
ASSERT_EQ(sentence_vector_list[1].size(), 0UL);
ASSERT_EQ(beamnode_vector_list[0].size(), 3UL);
ASSERT_EQ(beamnode_vector_list[1].size(), 2UL);
}
TEST(BeamSearchDecodeOp, PackAllSteps) {
CPUPlace place;
// we will constuct a sample data with 3 steps and 2 source sentences
LoDTensorArray ids;
LoDTensorArray scores;
paddle::test::GenerateExample(
std::vector<size_t>{0, 3, 6}, std::vector<size_t>{0, 1, 2, 3, 4, 5, 6},
std::vector<int>{1, 2, 3, 4, 5, 6}, &ids, &scores);
paddle::test::GenerateExample(
std::vector<size_t>{0, 3, 6}, std::vector<size_t>{0, 1, 1, 3, 5, 5, 6},
std::vector<int>{0, 1, 2, 3, 4, 5}, &ids, &scores);
paddle::test::GenerateExample(std::vector<size_t>{0, 3, 6},
std::vector<size_t>{0, 0, 1, 2, 3, 4, 5},
std::vector<int>{0, 1, 2, 3, 4}, &ids, &scores);
ASSERT_EQ(ids.size(), 3UL);
ASSERT_EQ(scores.size(), 3UL);
BeamSearchDecoder<float> helper;
LoDTensor id_tensor;
LoDTensor score_tensor;
helper.PackAllSteps(ids, scores, &id_tensor, &score_tensor);
LoD lod = id_tensor.lod();
std::vector<size_t> expect_source_lod = {0, 4, 8};
EXPECT_EQ(lod[0], expect_source_lod);
std::vector<size_t> expect_sentence_lod = {0, 1, 3, 6, 9, 10, 13, 16, 19};
EXPECT_EQ(lod[1], expect_sentence_lod);
// 2| 1, 0| 3, 1, 0| 3, 2, 1| 5| 4, 3, 2| 4, 4, 3| 6, 5, 4
std::vector<int> expect_data = {2, 1, 0, 3, 1, 0, 3, 2, 1, 5,
4, 3, 2, 4, 4, 3, 6, 5, 4};
ASSERT_EQ(id_tensor.dims()[0], static_cast<int64_t>(expect_data.size()));
for (size_t i = 0; i < expect_data.size(); ++i) {
ASSERT_EQ(id_tensor.data<int64_t>()[i],
static_cast<int64_t>(expect_data[i]));
}
for (int64_t i = 0; i < id_tensor.dims()[0]; ++i) {
ASSERT_EQ(score_tensor.data<float>()[i],
static_cast<float>(id_tensor.data<int64_t>()[i]));
}
}

@ -0,0 +1,159 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/bilinear_tensor_product_op.h"
namespace paddle {
namespace operators {
using framework::Tensor;
class BilinearTensorProductOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(Weight) should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null.");
auto x_dims = ctx->GetInputDim("X");
auto y_dims = ctx->GetInputDim("Y");
auto weight_dims = ctx->GetInputDim("Weight");
PADDLE_ENFORCE_EQ(x_dims.size(), 2UL, "The input(X) must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(y_dims.size(), 2UL, "The input(Y) must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(weight_dims.size(), 3UL,
"The input(Weight) must be a 3D tensor.");
PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0],
"The first dimension(batch_size) of input(X) must be "
"equal to the first dimension of the input(Y).");
PADDLE_ENFORCE_EQ(x_dims[1], weight_dims[1],
"The second dimension of input(X) must be equal to "
"the second dimension of the input(Weight).");
PADDLE_ENFORCE_EQ(y_dims[1], weight_dims[2],
"The second dimension of input(Y) must be equal to "
"the third dimension of the input(Weight).");
if (ctx->HasInput("Bias")) {
auto bias_dims = ctx->GetInputDim("Bias");
PADDLE_ENFORCE(bias_dims.size() == 2UL && bias_dims[0] == 1UL,
"The Input(Bias) must be a 2-D tensor with "
"the 2nd dimension fixed to 1 (a row vector).");
PADDLE_ENFORCE_EQ(bias_dims[1], weight_dims[0],
"The second dimension of input(Bias) must be equal "
"to the first dimension of the input(Weight).");
}
ctx->SetOutputDim("Out", {x_dims[0], weight_dims[0]});
ctx->ShareLoD("X", /*->*/ "Out");
}
};
class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker {
public:
BilinearTensorProductOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of bilinear_tensor_product operator.");
AddInput("Y", "The second input of bilinear_tensor_product operator.");
AddInput("Weight",
"The learnable parameters of bilinear_tensor_product operator.");
AddInput("Bias", "The learnable bias of bilinear_tensor_product operator.")
.AsDispensable();
AddOutput("Out", "The output of bilinear_tensor_product operator.");
AddComment(R"DOC(
Bilinear Tensor Product operator.
Given input X and Y, a 3D tensor weight, and bias. Each column of the
output is computed by one slice i = 1, . . . , k of the tensor:
M = (X W_i) \cdot Y
Out_i = \sum_i {M_i} + Bias_i
)DOC");
}
};
class BilinearTensorProductOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(Weight) should not be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null.");
auto x_dims = ctx->GetInputDim("X");
auto y_dims = ctx->GetInputDim("Y");
auto weight_dims = ctx->GetInputDim("Weight");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
PADDLE_ENFORCE_EQ(out_dims.size(), 2UL,
"The input(Out@GRAD) must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(
x_dims[0], out_dims[0],
"The first dimension(batch_size) of input(Out@GRAD) must be "
"equal to the first dimension of the Input(X).");
PADDLE_ENFORCE_EQ(
weight_dims[0], out_dims[1],
"The second dimension of input(Out@GRAD) must be equal to "
"the third dimension of the Input(Weight).");
if (ctx->HasInput("Bias")) {
auto bias_dims = ctx->GetInputDim("Bias");
PADDLE_ENFORCE_EQ(
bias_dims[1], out_dims[1],
"The second dimension of input(Out@GRAD) must be equal to "
"the second dimension of the Input(Bias).");
auto bias_grad_name = framework::GradVarName("Bias");
if (ctx->HasOutput(bias_grad_name))
ctx->SetOutputDim(bias_grad_name, bias_dims);
}
auto x_grad_name = framework::GradVarName("X");
auto y_grad_name = framework::GradVarName("Y");
auto weight_grad_name = framework::GradVarName("Weight");
if (ctx->HasOutput(x_grad_name)) {
ctx->SetOutputDim(x_grad_name, x_dims);
}
if (ctx->HasOutput(y_grad_name)) {
ctx->SetOutputDim(y_grad_name, y_dims);
}
if (ctx->HasOutput(weight_grad_name)) {
ctx->SetOutputDim(weight_grad_name, weight_dims);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(bilinear_tensor_product, ops::BilinearTensorProductOp,
ops::BilinearTensorProductOpMaker, bilinear_tensor_product_grad,
ops::BilinearTensorProductOpGrad);
REGISTER_OP_CPU_KERNEL(
bilinear_tensor_product,
ops::BilinearTensorProductKernel<paddle::platform::CPUPlace, float>,
ops::BilinearTensorProductKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(
bilinear_tensor_product_grad,
ops::BilinearTensorProductGradKernel<paddle::platform::CPUPlace, float>,
ops::BilinearTensorProductGradKernel<paddle::platform::CPUPlace, double>);

@ -0,0 +1,26 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/bilinear_tensor_product_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
bilinear_tensor_product,
ops::BilinearTensorProductKernel<paddle::platform::GPUPlace, float>,
ops::BilinearTensorProductKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(
bilinear_tensor_product_grad,
ops::BilinearTensorProductGradKernel<paddle::platform::GPUPlace, float>,
ops::BilinearTensorProductGradKernel<paddle::platform::GPUPlace, double>);

@ -0,0 +1,184 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
using framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename Place, typename T>
class BilinearTensorProductKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* weight = ctx.Input<Tensor>("Weight");
auto* bias = ctx.Input<Tensor>("Bias");
auto* out = ctx.Output<Tensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
auto y_mat = EigenMatrix<T>::From(*y);
auto output_mat = EigenMatrix<T>::From(*out);
auto batch_size = x->dims()[0];
auto weight_dims = weight->dims();
int out_dim = weight_dims[0];
auto x_dim = weight_dims[1];
auto y_dim = weight_dims[2];
auto place = ctx.GetEigenDevice<Place>();
// Create the intermediate variable to caculate the result of
// Input(X) multiplied by Input(Weight_i), the formula is:
// left_mul = X Weight_i.
Tensor left_mul;
left_mul.mutable_data<T>(framework::make_ddim({batch_size, y_dim}),
ctx.GetPlace());
auto left_mul_mat = EigenMatrix<T>::From(left_mul);
for (int i = 0; i < out_dim; ++i) {
auto output_col_vec = output_mat.chip(i, 1);
Tensor weight_mat =
weight->Slice(i, i + 1).Resize(framework::make_ddim({x_dim, y_dim}));
math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans,
batch_size, y_dim, x_dim, 1, x->data<T>(),
weight_mat.data<T>(), 0, left_mul.data<T>());
output_col_vec.device(place) =
(left_mul_mat * y_mat).sum(Eigen::DSizes<int, 1>(1));
}
if (bias) {
auto bias_vec = EigenMatrix<T>::From(*bias);
Eigen::DSizes<int, 2> bcast(batch_size, 1);
output_mat.device(place) = bias_vec.broadcast(bcast) + output_mat;
}
}
};
template <typename Place, typename T>
class BilinearTensorProductGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const Tensor* x = ctx.Input<Tensor>("X");
const Tensor* y = ctx.Input<Tensor>("Y");
const Tensor* weight = ctx.Input<Tensor>("Weight");
Tensor* d_x = ctx.Output<Tensor>(framework::GradVarName("X"));
Tensor* d_y = ctx.Output<Tensor>(framework::GradVarName("Y"));
Tensor* d_weight = ctx.Output<Tensor>(framework::GradVarName("Weight"));
Tensor* d_bias = ctx.Output<Tensor>(framework::GradVarName("Bias"));
const Tensor* d_out = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto batch_size = x->dims()[0];
auto weight_dims = weight->dims();
int out_dim = weight_dims[0];
auto x_dim = weight_dims[1];
auto y_dim = weight_dims[2];
auto x_mat = EigenMatrix<T>::From(*x);
auto y_mat = EigenMatrix<T>::From(*y);
auto d_out_mat = EigenMatrix<T>::From(*d_out);
auto place = ctx.GetEigenDevice<Place>();
// Create the intermediate variable to caculate the Output(Y@Grad).
Tensor x_scale;
x_scale.mutable_data<T>(framework::make_ddim({batch_size, x_dim}),
ctx.GetPlace());
auto x_scale_mat = EigenMatrix<T>::From(x_scale);
// Create the intermediate variable to caculate the Output(X@Grad).
Tensor y_scale;
y_scale.mutable_data<T>(framework::make_ddim({batch_size, y_dim}),
ctx.GetPlace());
auto y_scale_mat = EigenMatrix<T>::From(y_scale);
math::SetConstant<Place, T> set_zero;
// Set Output(X@Grad) be zero.
if (d_x) {
d_x->mutable_data<T>(ctx.GetPlace());
set_zero(ctx.device_context(), d_x, static_cast<T>(0));
}
// Set Output(Y@Grad) be zero.
if (d_y) {
d_y->mutable_data<T>(ctx.GetPlace());
set_zero(ctx.device_context(), d_y, static_cast<T>(0));
}
// Caculate the Output(X@Grad) and Output(Y@Grad).
if (d_x || d_y) {
Eigen::DSizes<int, 2> bcast_for_x(1, y_dim);
Eigen::DSizes<int, 2> bcast_for_y(1, x_dim);
for (int i = 0; i < out_dim; ++i) {
Tensor weight_i = weight->Slice(i, i + 1).Resize(
framework::make_ddim({x_dim, y_dim}));
auto output_vec = d_out_mat.chip(i, 1);
if (d_x) {
y_scale_mat.device(place) =
output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1))
.broadcast(bcast_for_x) *
y_mat;
math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasTrans,
batch_size, x_dim, y_dim, 1, y_scale.data<T>(),
weight_i.data<T>(), 1, d_x->data<T>());
}
if (d_y) {
x_scale_mat.device(place) =
output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1))
.broadcast(bcast_for_y) *
x_mat;
math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans,
batch_size, y_dim, x_dim, 1, x_scale.data<T>(),
weight_i.data<T>(), 1, d_y->data<T>());
}
}
}
// Caculate the gradient of Input(Weight).
if (d_weight) {
d_weight->mutable_data<T>(ctx.GetPlace());
Eigen::DSizes<int, 2> bcast_for_weight(1, x_dim);
for (int i = 0; i < out_dim; ++i) {
Tensor d_weight_i = d_weight->Slice(i, i + 1).Resize(
framework::make_ddim({x_dim, y_dim}));
auto output_vec = d_out_mat.chip(i, 1);
x_scale_mat.device(place) =
output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1))
.broadcast(bcast_for_weight) *
x_mat;
math::gemm<Place, T>(ctx.device_context(), CblasTrans, CblasNoTrans,
x_dim, y_dim, batch_size, 1, x_scale.data<T>(),
y->data<T>(), 0, d_weight_i.data<T>());
}
}
// Caculate the gradient of Input(Bias).
if (d_bias) {
d_bias->mutable_data<T>(ctx.GetPlace());
auto d_bias_mat = EigenMatrix<T>::From(*d_bias);
d_bias_mat.device(place) = d_out_mat.sum(Eigen::DSizes<int, 1>(0));
}
}
};
} // namespace operators
} // namespace paddle

@ -0,0 +1,136 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/expand_op.h"
namespace paddle {
namespace operators {
using framework::Tensor;
class ExpandOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null.");
std::vector<int> expand_times =
ctx->Attrs().Get<std::vector<int>>("expand_times");
auto x_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(static_cast<size_t>(x_dims.size()), expand_times.size(),
"The number of Attr(expand_times)'s value must be equal "
"to the rank of Input(X).");
PADDLE_ENFORCE_LE(x_dims.size(), 6,
"The rank of Input(X) must not be greater than 6.");
std::vector<int64_t> out_shape(x_dims.size());
for (size_t i = 0; i < expand_times.size(); ++i) {
PADDLE_ENFORCE_GE(expand_times[i], 1,
"Each value of Attr(expand_times) should not be "
"less than 1.");
out_shape[i] = x_dims[i] * expand_times[i];
}
ctx->SetOutputDim("Out", framework::make_ddim(out_shape));
if (out_shape[0] == x_dims[0]) {
ctx->ShareLoD("X", "Out");
}
}
};
class ExpandOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ExpandOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(Tensor, default Tensor<float>) A tensor with rank in [1, 6]."
"X is the input tensor to be expanded.");
AddOutput("Out",
"(Tensor, default Tensor<float>) A tensor with rank in [1, 6]."
"The rank of Output(Out) is same as Input(X) except that each "
"dimension size of Output(Out) is equal to corresponding "
"dimension size of Input(X) multiplying corresponding value of "
"Attr(expand_times).");
AddAttr<std::vector<int>>("expand_times",
"Expand times number for each dimension.");
AddComment(R"DOC(
Expand operator tiles the input by given times number. You should set times
number for each dimension by providing attribute 'expand_times'. The rank of X
should be in [1, 6]. Please notice that size of 'expand_times' must be same with
X's rank. Following is a using case:
Input(X) is a 3-D tensor with shape [2, 3, 1]:
[
[[1], [2], [3]],
[[4], [5], [6]]
]
Attr(expand_times): [1, 2, 2]
Output(Out) is a 3-D tensor with shape [2, 6, 2]:
[
[[1, 1], [2, 2], [3, 3], [1, 1], [2, 2], [3, 3]],
[[4, 4], [5, 5], [6, 6], [4, 4], [5, 5], [6, 6]]
]
)DOC");
}
};
class ExpandGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null.");
auto x_dims = ctx->GetInputDim("X");
std::vector<int> expand_times =
ctx->Attrs().Get<std::vector<int>>("expand_times");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
for (size_t i = 0; i < expand_times.size(); ++i) {
PADDLE_ENFORCE_EQ(x_dims[i] * expand_times[i], out_dims[i],
"Each dimension size of Input(Out@GRAD) should be "
"equal to multiplication of crroresponding dimension "
"size of Input(X) and Attr(expand_times) value.");
}
auto x_grad_name = framework::GradVarName("X");
if (ctx->HasOutput(x_grad_name)) {
ctx->SetOutputDim(x_grad_name, x_dims);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(expand, ops::ExpandOp, ops::ExpandOpMaker, expand_grad,
ops::ExpandGradOp);
REGISTER_OP_CPU_KERNEL(expand,
ops::ExpandKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
expand_grad, ops::ExpandGradKernel<paddle::platform::CPUPlace, float>);

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save