Merge branch 'develop' into cross_entropy_over_beam

revert-3824-remove_grad_op_type
caoying03 8 years ago
commit 74d3ca8bab

@ -257,6 +257,11 @@ seq_concat
.. autoclass:: paddle.v2.layer.seq_concat
:noindex:
seq_slice
---------
.. autoclass:: paddle.v2.layer.seq_slice
:noindex:
kmax_sequence_score
-------------------
.. autoclass:: paddle.v2.layer.kmax_sequence_score

@ -15,6 +15,7 @@ if(Boost_FOUND)
add_subdirectory(platform)
add_subdirectory(framework)
add_subdirectory(operators)
add_subdirectory(pybind)
endif()
if(WITH_C_API)

@ -18,8 +18,8 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope)
proto_library(framework_proto SRCS framework.proto)
cc_library(attribute SRCS attribute.cc DEPS framework_proto)
cc_library(operator SRCS operator.cc DEPS framework_proto device_context tensor scope attribute)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator)
@ -39,21 +39,3 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
if(WITH_PYTHON)
cc_library(paddle_pybind SHARED
SRCS pybind.cc
DEPS pybind python backward
sgd_op
add_op
mul_op
rowwise_add_op
sigmoid_op
softmax_op
mean_op
cross_entropy_op
recurrent_op
uniform_random_op
gaussian_random_op
fill_zeros_like_op)
endif(WITH_PYTHON)

@ -72,8 +72,8 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker {
class FcOp : public operators::NetOp {
public:
FcOp(const std::string &type, const VarNameMap &inputs,
const VarNameMap &outputs, const AttributeMap &attrs)
FcOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs)
: NetOp(type, inputs, outputs, attrs) {
AppendOp(OpRegistry::CreateOp("mul",
{{"X", {Input("X")}}, {"Y", {Input("W")}}},

@ -20,13 +20,13 @@ namespace framework {
enum class OpArgType { IN, OUT };
static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type,
bool is_grad, OperatorBase::VarNameMap* vars) {
bool is_grad, VariableNameMap* vars) {
const auto& src_inout =
src_type == OpArgType::IN ? src_op->Inputs() : src_op->Outputs();
auto& dst_inout = *vars;
const OpProto* proto = OpRegistry::op_info_map().at(src_op->Type()).proto_;
auto& proto = OpInfoMap::Instance().Get(src_op->Type()).Proto();
const auto& src_arg_list =
src_type == OpArgType::IN ? proto->inputs() : proto->outputs();
src_type == OpArgType::IN ? proto.inputs() : proto.outputs();
for (const auto& arg : src_arg_list) {
if (arg.not_in_gradient() && !is_grad) continue;
const std::string src_name = arg.name();
@ -40,26 +40,18 @@ static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type,
}
OperatorBase* BuildGradOp(const OperatorBase* op) {
auto it = OpRegistry::op_info_map().find(op->Type());
PADDLE_ENFORCE(it != OpRegistry::op_info_map().end(),
"'%s' has not been registered.", op->Type());
PADDLE_ENFORCE(it->second.proto_ != nullptr, "'%s' has no OpProto.",
op->Type());
std::string grad_op_type = it->second.grad_op_type_;
PADDLE_ENFORCE(!grad_op_type.empty(), "'%s' has no gradient operator.",
op->Type());
auto& info = OpInfoMap::Instance().Get(op->Type());
PADDLE_ENFORCE(info.HasGradientOp());
OperatorBase::VarNameMap inputs;
OperatorBase::VarNameMap outputs;
VariableNameMap inputs;
VariableNameMap outputs;
TransOpArg(op, OpArgType::IN, false, &inputs); // I
TransOpArg(op, OpArgType::OUT, false, &inputs); // O
TransOpArg(op, OpArgType::OUT, true, &inputs); // OG
TransOpArg(op, OpArgType::IN, true, &outputs); // IG
it = OpRegistry::op_info_map().find(grad_op_type);
PADDLE_ENFORCE(it != OpRegistry::op_info_map().end(),
"'%s' has not been registered.", grad_op_type);
return it->second.creator_(grad_op_type, inputs, outputs, op->Attrs());
auto& grad_info = OpInfoMap::Instance().Get(info.grad_op_type_);
return grad_info.Creator()(info.grad_op_type_, inputs, outputs, op->Attrs());
}
} // namespace framework

@ -0,0 +1,29 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/op_info.h"
namespace paddle {
namespace framework {
static OpInfoMap* g_op_info_map = nullptr;
OpInfoMap& OpInfoMap::Instance() {
if (g_op_info_map == nullptr) {
g_op_info_map = new OpInfoMap();
}
return *g_op_info_map;
}
} // namespace framework
} // namespace paddle

@ -0,0 +1,101 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <functional>
#include <map>
#include <string>
#include <unordered_map>
#include "paddle/framework/attribute.h"
namespace paddle {
namespace framework {
class OperatorBase;
using VariableNameMap = std::map<std::string, std::vector<std::string>>;
using OpCreator = std::function<OperatorBase*(
const std::string& /*type*/, const VariableNameMap& /*inputs*/,
const VariableNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>;
struct OpInfo {
OpCreator creator_;
std::string grad_op_type_;
OpProto* proto_;
OpAttrChecker* checker_;
bool HasOpProtoAndChecker() const {
return proto_ != nullptr && checker_ != nullptr;
}
const OpProto& Proto() const {
PADDLE_ENFORCE_NOT_NULL(proto_, "Operator Proto has not been registered");
PADDLE_ENFORCE(proto_->IsInitialized(),
"Operator Proto must be initialized in op info");
return *proto_;
}
const OpAttrChecker& Checker() const {
PADDLE_ENFORCE_NOT_NULL(checker_,
"Operator Checker has not been registered");
return *checker_;
}
const OpCreator& Creator() const {
PADDLE_ENFORCE_NOT_NULL(creator_,
"Operator Creator has not been registered");
return creator_;
}
bool HasGradientOp() const { return !grad_op_type_.empty(); }
};
class OpInfoMap {
public:
static OpInfoMap& Instance();
OpInfoMap(const OpInfoMap& o) = delete;
OpInfoMap(OpInfoMap&& o) = delete;
OpInfoMap& operator=(const OpInfoMap& o) = delete;
OpInfoMap& operator=(OpInfoMap&& o) = delete;
bool Has(const std::string& op_type) const {
return map_.find(op_type) != map_.end();
}
void Insert(const std::string& type, const OpInfo& info) {
PADDLE_ENFORCE(!Has(type), "Operator %s has been registered", type);
map_.insert({type, info});
}
const OpInfo& Get(const std::string& type) const {
auto it = map_.find(type);
PADDLE_ENFORCE(it != map_.end(), "Operator %s are not found", type);
return it->second;
}
template <typename Callback>
void IterAllInfo(Callback callback) {
for (auto& it : map_) {
callback(it.first, it.second);
}
}
private:
OpInfoMap() = default;
std::unordered_map<std::string, const OpInfo> map_;
};
} // namespace framework
} // namespace paddle

@ -19,32 +19,18 @@ limitations under the License. */
namespace paddle {
namespace framework {
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const std::string& type,
const VarNameMap& inputs,
const VarNameMap& outputs,
AttributeMap attrs) {
auto it = op_info_map().find(type);
PADDLE_ENFORCE(it != op_info_map().end(),
"Operator '%s' has not been registered.", type);
it->second.checker_->Check(attrs);
auto op = it->second.creator_(type, inputs, outputs, attrs);
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, AttributeMap attrs) {
auto& info = OpInfoMap::Instance().Get(type);
info.Checker().Check(attrs);
auto op = info.Creator()(type, inputs, outputs, attrs);
return std::unique_ptr<OperatorBase>(op);
}
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) {
VarNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VarNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
AttributeMap attrs;
for (auto& attr : op_desc.attrs()) {
attrs[attr.name()] = GetAttrValue(attr);
}
return CreateOp(op_desc.type(), inputs, outputs, attrs);
}
OperatorBase::VarNameMap OpRegistry::ConvertOpDescVarsToVarNameMap(
static VariableNameMap ConvertOpDescVarsToVarNameMap(
const google::protobuf::RepeatedPtrField<OpDesc::Var>& op_desc_vars) {
VarNameMap ret_val;
VariableNameMap ret_val;
for (auto& var : op_desc_vars) {
auto& var_names = ret_val[var.parameter()];
auto& var_names_in_proto = var.arguments();
@ -55,6 +41,17 @@ OperatorBase::VarNameMap OpRegistry::ConvertOpDescVarsToVarNameMap(
return ret_val;
}
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) {
VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
AttributeMap attrs;
for (auto& attr : op_desc.attrs()) {
attrs[attr.name()] = GetAttrValue(attr);
}
return CreateOp(op_desc.type(), inputs, outputs, attrs);
}
std::unique_ptr<OperatorBase> OpRegistry::CreateGradOp(const OperatorBase& op) {
PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops");
return std::unique_ptr<OperatorBase>(BuildGradOp(&op));

@ -23,6 +23,7 @@ limitations under the License. */
#include "paddle/framework/attribute.h"
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/grad_op_builder.h"
#include "paddle/framework/op_info.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/scope.h"
@ -30,28 +31,16 @@ namespace paddle {
namespace framework {
class OpRegistry {
using VarNameMap = OperatorBase::VarNameMap;
using OpCreator = std::function<OperatorBase*(
const std::string& /*type*/, const VarNameMap& /*inputs*/,
const VarNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>;
public:
struct OpInfo {
OpCreator creator_;
std::string grad_op_type_;
OpProto* proto_;
OpAttrChecker* checker_;
};
template <typename OpType, typename ProtoMakerType, typename GradOpType>
static void RegisterOp(const std::string& op_type,
const std::string& grad_op_type) {
PADDLE_ENFORCE(op_info_map().count(op_type) == 0,
PADDLE_ENFORCE(!OpInfoMap::Instance().Has(op_type),
"'%s' is registered more than once.", op_type);
OpInfo op_info;
op_info.creator_ = [](const std::string& type, const VarNameMap& inputs,
const VarNameMap& outputs,
const AttributeMap& attrs) {
op_info.creator_ = [](
const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, const AttributeMap& attrs) {
return new OpType(type, inputs, outputs, attrs);
};
op_info.grad_op_type_ = grad_op_type;
@ -70,7 +59,7 @@ class OpRegistry {
op_info.proto_ = nullptr;
op_info.checker_ = nullptr;
}
op_info_map().insert(std::make_pair(op_type, op_info));
OpInfoMap::Instance().Insert(op_type, op_info);
// register gradient op
if (!grad_op_type.empty()) {
RegisterOp<GradOpType, NOPMaker, NOP>(grad_op_type, "");
@ -78,21 +67,13 @@ class OpRegistry {
}
static std::unique_ptr<OperatorBase> CreateOp(const std::string& type,
const VarNameMap& inputs,
const VarNameMap& outputs,
const VariableNameMap& inputs,
const VariableNameMap& outputs,
AttributeMap attrs);
static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);
static VarNameMap ConvertOpDescVarsToVarNameMap(
const google::protobuf::RepeatedPtrField<OpDesc::Var>& op_desc_vars);
static std::unique_ptr<OperatorBase> CreateGradOp(const OperatorBase& op);
static std::unordered_map<std::string, const OpInfo>& op_info_map() {
static std::unordered_map<std::string, const OpInfo> op_info_map_;
return op_info_map_;
}
};
class Registrar {

@ -115,8 +115,8 @@ void OperatorBase::Rename(const std::string& old_name,
}
OperatorBase::OperatorBase(const std::string& type,
const OperatorBase::VarNameMap& inputs,
const OperatorBase::VarNameMap& outputs,
const VariableNameMap& inputs,
const VariableNameMap& outputs,
const AttributeMap& attrs)
: type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs) {
static std::atomic<size_t> gUniqId(0UL);
@ -141,18 +141,10 @@ std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
}
return ret_val;
}
auto it = OpRegistry::op_info_map().find(type_);
PADDLE_ENFORCE(
it != OpRegistry::op_info_map().end(),
"Operator %s not registered, cannot figure out intermediate outputs",
type_);
PADDLE_ENFORCE(
it->second.proto_ != nullptr,
"Operator %s has no OpProto, cannot figure out intermediate outputs",
type_);
auto& info = OpInfoMap::Instance().Get(Type());
// get all OpProto::Var for outputs
for (auto& o : it->second.proto_->outputs()) {
for (auto& o : info.Proto().outputs()) {
// ignore all intermediate output
if (o.intermediate()) continue;
auto out = outputs_.find(o.name());

@ -19,6 +19,7 @@ limitations under the License. */
#include <unordered_map>
#include <vector>
#include "op_info.h"
#include "paddle/framework/attribute.h"
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/scope.h"
@ -62,10 +63,8 @@ class ExecutionContext;
*/
class OperatorBase {
public:
using VarNameMap = std::map<std::string, std::vector<std::string>>;
OperatorBase(const std::string& type, const VarNameMap& inputs,
const VarNameMap& outputs, const AttributeMap& attrs);
OperatorBase(const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, const AttributeMap& attrs);
virtual ~OperatorBase() {}
@ -93,8 +92,8 @@ class OperatorBase {
/// rename inputs outputs name
void Rename(const std::string& old_name, const std::string& new_name);
const VarNameMap& Inputs() const { return inputs_; }
const VarNameMap& Outputs() const { return outputs_; }
const VariableNameMap& Inputs() const { return inputs_; }
const VariableNameMap& Outputs() const { return outputs_; }
//! Get a input with argument's name described in `op_proto`
const std::string& Input(const std::string& name) const;
//! Get a input which has multiple variables.
@ -122,30 +121,32 @@ class OperatorBase {
// I (Inputs)opear
// O (Outputs)
// OG (Output Gradients)
VarNameMap inputs_;
VariableNameMap inputs_;
// NOTE: in case of OpGrad, outputs_ contains
// IG (Inputs Gradients)
VarNameMap outputs_;
VariableNameMap outputs_;
AttributeMap attrs_;
};
// Macro for define a clone method.
// If you are writing an kernel operator, `Clone` will be defined when you
// register it. i.e. `Clone` method is not needed to define by yourself.
#define DEFINE_OP_CLONE_METHOD(CLS) \
#define DEFINE_OP_CLONE_METHOD(cls) \
std::unique_ptr<OperatorBase> Clone() const final { \
return std::unique_ptr<OperatorBase>(new CLS(*this)); \
return std::unique_ptr<OperatorBase>(new cls(*this)); \
}
// Macro for define a default constructor for Operator.
// You can also use
// using PARENT_CLASS::PARENT_CLASS;
// to use parent's constructor.
#define DEFINE_OP_CONSTRUCTOR(CLS, PARENT_CLS) \
CLS(const std::string& type, const VarNameMap& inputs, \
const VarNameMap& outputs, const paddle::framework::AttributeMap& attrs) \
: PARENT_CLS(type, inputs, outputs, attrs) {}
#define DEFINE_OP_CONSTRUCTOR(cls, parent_cls) \
cls(const std::string& type, \
const ::paddle::framework::VariableNameMap& inputs, \
const ::paddle::framework::VariableNameMap& outputs, \
const paddle::framework::AttributeMap& attrs) \
: parent_cls(type, inputs, outputs, attrs) {}
class NOP : public OperatorBase {
public:
@ -389,8 +390,8 @@ class OperatorWithKernel : public OperatorBase {
using OpKernelMap =
std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>;
OperatorWithKernel(const std::string& type, const VarNameMap& inputs,
const VarNameMap& outputs, const AttributeMap& attrs)
OperatorWithKernel(const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, const AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void InferShape(const Scope& scope) const override {

@ -23,8 +23,8 @@ static int op_run_num = 0;
class OpWithoutKernelTest : public OperatorBase {
public:
OpWithoutKernelTest(const std::string& type, const VarNameMap& inputs,
const VarNameMap& outputs, const AttributeMap& attrs)
OpWithoutKernelTest(const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, const AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs), x(1) {}
void InferShape(const Scope& scope) const override {}
void Run(const Scope& scope,
@ -249,8 +249,9 @@ TEST(OpKernel, multi_inputs) {
class OperatorClone : public paddle::framework::OperatorBase {
public:
DEFINE_OP_CLONE_METHOD(OperatorClone);
OperatorClone(const std::string& type, const VarNameMap& inputs,
const VarNameMap& outputs,
OperatorClone(const std::string& type,
const paddle::framework::VariableNameMap& inputs,
const paddle::framework::VariableNameMap& outputs,
const paddle::framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void InferShape(const paddle::framework::Scope& scope) const override {}

@ -105,7 +105,10 @@ class Tensor {
template <typename T>
inline Tensor Slice(const int& begin_idx, const int& end_idx) const;
platform::Place place() const { return holder_->place(); }
platform::Place place() const {
PADDLE_ENFORCE_NOT_NULL(holder_, "Tensor get place() must contains holder");
return holder_->place();
}
private:
template <typename T>

@ -1012,11 +1012,6 @@ void RecurrentGradientMachine::generateSequence() {
/* width */ resultNum,
false,
/* useGpu */ false);
Matrix::resizeOrCreate(generator_.outArg.value,
/* height */ maxGenWordCount,
/* width */ 1,
false,
/* useGpu */ false);
}
ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions,
numSequences + 1,
@ -1026,7 +1021,7 @@ void RecurrentGradientMachine::generateSequence() {
} else {
oneWaySearch(numSequences);
}
if (dataArgsSize_) createDataOutlink(batchMachineIdVec_);
if (dataArgsSize_) createDataOutlink();
size_t size = generator_.ids.size();
generator_.outArg.ids->resize(size);
@ -1106,6 +1101,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
}
batchMachineIdVec_.clear();
batchMachineStartPos_.clear();
int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
starts[0] = 0;
generator_.ids.clear();
@ -1312,13 +1308,20 @@ void RecurrentGradientMachine::fillGenOutputs() {
finalPaths_[i].resize(minFinalPathsSize);
}
batchMachineIdVec_.clear();
generator_.ids.clear();
int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
starts[0] = 0;
if (numResults > 1) {
real* probs = generator_.outArg.in->getData();
int idsProbSaveSize = 0;
for (auto inSeq : finalPaths_) {
for (auto path : inSeq) idsProbSaveSize += path.ids.size();
idsProbSaveSize += inSeq.size();
}
Matrix::resizeOrCreate(
generator_.outArg.value, idsProbSaveSize, 1, false, false);
real* idsProb = generator_.outArg.value->getData();
real* probs = generator_.outArg.in->getData();
size_t curPos = 0;
for (size_t i = 0; i < finalPaths_.size(); ++i) {
for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
@ -1333,24 +1336,16 @@ void RecurrentGradientMachine::fillGenOutputs() {
curPos += genLen;
idsProb[curPos++] = -1.0;
probs[i * numResults + j] = path.logProb;
if (!j && dataArgsSize_) {
// in beam search, here only reserved the top 1 generated result
// for out_links that are not the generated word indices.
batchMachineIdVec_.insert(batchMachineIdVec_.end(),
path.machineIdVec.begin(),
path.machineIdVec.end());
}
}
starts[i + 1] = generator_.ids.size();
}
} else {
for (size_t i = 0; i < finalPaths_.size(); ++i) {
CHECK(!finalPaths_[i].empty());
generator_.ids.insert(generator_.ids.begin(),
finalPaths_[i][0].ids.begin(),
finalPaths_[i][0].ids.end());
starts[i + 1] = starts[i] + finalPaths_[i][0].ids.size();
Path& path = finalPaths_[i][0];
generator_.ids.insert(
generator_.ids.begin(), path.ids.begin(), path.ids.end());
starts[i + 1] = starts[i] + path.ids.size();
}
}
}
@ -1364,25 +1359,76 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) {
}
}
void RecurrentGradientMachine::createDataOutlink(
std::vector<int>& machineIdVec) {
size_t seqNum =
getBeamSize() > 1UL ? finalPaths_.size() : finalPaths_[0].size();
std::vector<int> starts(seqNum + 1, 0);
for (size_t i = 0; i < seqNum; ++i) {
size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size()
: finalPaths_[0][i].ids.size();
starts[i + 1] = starts[i] + seqLen;
void RecurrentGradientMachine::createDataOutlinkSelRowsInfo(
bool isSeq, std::vector<Argument>& outArgs) {
batchMachineIdVec_.clear();
size_t seqIdx = 0;
for (size_t i = 0; i < finalPaths_.size(); ++i) {
for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
std::vector<int>& machineIdVec = finalPaths_[i][j].machineIdVec;
if (isSeq) {
for (size_t i = 0; i < machineIdVec.size(); ++i) {
size_t rowId = machineIdVec[i];
int* seqPos =
outArgs[i].sequenceStartPositions->getMutableData(false);
batchMachineIdVec_.push_back(seqPos[rowId]);
}
} else {
batchMachineIdVec_.insert(
batchMachineIdVec_.end(), machineIdVec.begin(), machineIdVec.end());
}
seqIdx++;
}
}
}
void RecurrentGradientMachine::createDataOutlinkCopySizeInfo(
bool isSeq, std::vector<Argument>& outArgs, std::vector<int>& copySize) {
size_t totalSeqNum = std::accumulate(
finalPaths_.begin(),
finalPaths_.end(),
0UL,
[](size_t a, const std::vector<Path>& b) { return a + b.size(); });
copySize.resize(totalSeqNum, 1);
batchMachineStartPos_.resize(totalSeqNum + 1, 0);
if (isSeq) {
ICpuGpuVectorPtr inputSeqStartPos = outArgs[0].sequenceStartPositions;
CHECK_EQ(static_cast<size_t>(inputSeqStartPos->getSize() - 1),
getBeamSize() > 1 ? finalPaths_.size() : finalPaths_[0].size());
int* starts = inputSeqStartPos->getMutableData(false);
int seqId = 0;
for (int i = 0; i < finalPaths_.size(); ++i) {
for (int j = 0; j < finalPaths_[i].size(); ++j) {
copySize[seqId] = getBeamSize() > 1 ? starts[i + 1] - starts[i]
: starts[j + 1] - starts[j];
batchMachineStartPos_[seqId + 1] =
batchMachineStartPos_[seqId] + finalPaths_[i][j].ids.size();
seqId++;
}
}
} else {
for (size_t i = 0; i < finalPaths_[0].size(); ++i)
batchMachineStartPos_[i + 1] =
batchMachineStartPos_[i] + finalPaths_[0][i].ids.size();
}
}
void RecurrentGradientMachine::createDataOutlink() {
for (size_t i = 0; i < dataArgsSize_; i++) {
bool isSeq = dataArgsFrame_[i][0].hasSeq();
std::vector<int> copySize;
createDataOutlinkCopySizeInfo(isSeq, dataArgsFrame_[i], copySize);
createDataOutlinkSelRowsInfo(isSeq, dataArgsFrame_[i]);
dataArgs_[i].concat(dataArgsFrame_[i],
machineIdVec,
starts,
batchMachineIdVec_,
batchMachineStartPos_,
copySize,
useGpu_,
HPPL_STREAM_1,
PASS_TEST);
auto dataAgent =
dynamic_cast<DataLayer*>(outFrameLines_[i + 1].agentLayer.get());
CHECK_NOTNULL(dataAgent);

@ -190,7 +190,7 @@ public:
std::vector<int> ids;
/**
* @brief idsProb, log probability of each generated words.
* @brief idsProb, log probability of each generated word.
*/
std::vector<real> idsProb;
@ -472,15 +472,43 @@ private:
void copyDataOutlinkFrame(size_t machineCur);
/*
* @brief In generation, if the layer group has more than 1 outlink, outlinks
* except the first one are data outlinks. This function creates the data
* outlinks.
* @note In beam search, only one generated sequence with the hightest log
* probabilites are retained.
* @param machineIdVec : select a row of output matrix in each frame
* that the generation process expanded.
* @brief In generation, if the layer group has more than 1 outlink, outlink
* except the first one is a data outlink. In RecurrentLayerGroup, each time
* step is a separate Network, outputs of a layer inside the
* RecurrentLayerGroup are stored in separate Arguments. If one layer is
* specified as an outlink of RecurrentLayerGroup. This function will
* collect outputs in each time step of each generated sequence which are
* dispersed in separate Arguments to form a new single Argument as output of
* RecurrentLayerGroup.
*/
void createDataOutlink(std::vector<int>& machineIdVec);
void createDataOutlink();
/*
* @brief decide to select how many rows from the Matrix stored the forward
* pass results from a start position.
*
* @param isSeq: a flag indicating whetehr the layer to be output of the
* RecurrentGradientMachine is a sequence or not
* @param outArgs: all of the the returned Arguments of the forward pass
* during the generation process.
* @param copySize: the returned result, number of rows to select from the
* Matrix stored the forward pass results from a start position.
*/
void createDataOutlinkCopySizeInfo(bool isSeq,
std::vector<Argument>& outArgs,
std::vector<int>& copySize);
/*
* @brief decide index of the start row for each time step of a generated
* sequence in Matrix stored the entire beam search batch's forward pass
* results.
*
* @param isSeq: a flag indicating whether the layer to be output of the
* RecurrentGradientMachine is a sequence or not
* @param outArgs: all of the returned Arguments of the forward pass
* during the generation process.
*/
void createDataOutlinkSelRowsInfo(bool isSeq, std::vector<Argument>& outArgs);
/*
* @brief used in beam search, connect previous frame to form recurrent link
@ -543,6 +571,7 @@ private:
std::vector<int> topIds_;
std::vector<int> seqIds_;
std::vector<int> batchMachineIdVec_;
std::vector<int> batchMachineStartPos_;
std::vector<std::vector<Path>> finalPaths_;
std::vector<real> minFinalPathLogProb_;
BeamSearchControlCallbacks* beamSearchCtrlCallbacks_;

@ -80,13 +80,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
<< "input of " << getName()
<< " must be a sequence or a nested sequence.";
CHECK_EQ(input.value->getWidth(), 1UL)
<< "input of " << getName()
<< " is score over a sequence or a nested sequence, so its width "
<< " must be 1.";
<< "input of " << getName() << " are scores over a sequence or "
<< "a nested sequence, so its width must be 1.";
if (useGpu_) {
// this Layer runs only in CPU, if the model is runing on GPU,
// then copy the input to this layer from GPU to CPU.
/*
* currently, this Layer only runs in CPU, if the other part of the model is
* runing on GPU, then copy the input to this layer from GPU to CPU.
*/
Matrix::resizeOrCreate(scores_,
inputScore->getHeight(),
1,
@ -97,6 +98,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
scores_ = inputScore;
}
/*
* TODO(caoying)
* In PaddePaddle, currently all matrices are real number types,
* but output of this layer which is some selected indices of the give
* sequence are actually filled with int types so that storing int types
* information in a real number matrix is dangerous, since real numbers will
* be convered to int types.
*/
Matrix::resizeOrCreate(
output_.value,
input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),

@ -0,0 +1,220 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Vector.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
class SequenceSliceLayer : public Layer {
public:
explicit SequenceSliceLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
private:
/*
* TODO(caoying)
* In PaddePaddle, currently all matrices are real number types,
* but the second and the (optional) third input which are some
* selected indices of the give sequence to trim the sequence, are actually
* filled with int types so that storing int types information in real number
* matrices is very dangerous, since real numbers will be convered to int
* types. If a user fills this matrix himself, invalid data may occor.
*/
MatrixPtr startIdsOnCpu_;
MatrixPtr endIdsOnCpu_;
std::vector<int> selectedRows_;
IVectorPtr rowIndice_;
std::vector<std::vector<int>> inputSeqInfoVec_;
std::vector<int> outSubSeqStartPos_;
std::vector<int> outSeqStartPos_;
void checkInputs();
void copySliceIdsToCpu();
void calSelectedRows(const MatrixPtr starts, const MatrixPtr ends);
};
REGISTER_LAYER(seq_slice, SequenceSliceLayer);
bool SequenceSliceLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
CHECK_GE(inputLayers_.size(), 2U);
CHECK_LE(inputLayers_.size(), 3U);
setNeedSequenceInfo(false);
return true;
}
void SequenceSliceLayer::checkInputs() {
const Argument& inputSeq = getInput(0);
CHECK(inputSeq.hasSeq()) << "The first input of sequence slice layer "
<< "must be a sequence.";
const MatrixPtr indices1 = getInputValue(1);
CHECK_EQ(static_cast<size_t>(indices1->getHeight()),
inputSeq.hasSubseq() ? inputSeq.getNumSubSequences()
: inputSeq.getNumSequences())
<< "Height of the second input should be equal to number of sequence "
<< "in the first input.";
if (inputLayers_.size() == 3) {
const MatrixPtr indices2 = getInputValue(2);
CHECK_EQ(indices2->getHeight(), indices1->getHeight())
<< "start indices and end indices should have the same height.";
CHECK_EQ(indices2->getWidth(), indices1->getWidth())
<< "start indices and end indices should have the same Width.";
}
}
void SequenceSliceLayer::copySliceIdsToCpu() {
const MatrixPtr indices1 = getInputValue(1);
if (inputLayers_.size() == 2U) {
if (config_.select_first()) {
Matrix::resizeOrCreate(startIdsOnCpu_,
indices1->getHeight(),
indices1->getWidth(),
false /* trans */,
false /* useGpu */);
startIdsOnCpu_->copyFrom(*indices1);
endIdsOnCpu_ = nullptr;
} else {
Matrix::resizeOrCreate(endIdsOnCpu_,
indices1->getHeight(),
indices1->getWidth(),
false /* trans */,
false /* useGpu */);
endIdsOnCpu_->copyFrom(*indices1);
startIdsOnCpu_ = nullptr;
}
} else if (inputLayers_.size() == 3U) {
Matrix::resizeOrCreate(startIdsOnCpu_,
indices1->getHeight(),
indices1->getWidth(),
false /* trans */,
false /* useGpu */);
startIdsOnCpu_->copyFrom(*indices1);
const MatrixPtr indices2 = getInputValue(2);
Matrix::resizeOrCreate(endIdsOnCpu_,
indices2->getHeight(),
indices2->getWidth(),
false /* trans */,
false /* useGpu */);
endIdsOnCpu_->copyFrom(*indices2);
}
}
void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
const MatrixPtr ends) {
CHECK(starts || ends) << "At least one of the start or end indices "
<< "should be given.";
outSeqStartPos_.resize(1, 0);
outSubSeqStartPos_.resize(1, 0);
selectedRows_.clear();
size_t beamSize = starts ? starts->getWidth() : ends->getWidth();
size_t rowIdx = 0;
for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) {
for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) {
for (size_t k = 0; k < beamSize; ++k) {
if (starts && starts->getElement(rowIdx, k) == -1.) break;
if (ends && ends->getElement(rowIdx, k) == -1.) break;
int begPos = inputSeqInfoVec_[i][j];
if (starts) begPos += starts->getElement(rowIdx, k);
int endPos = inputSeqInfoVec_[i][j + 1] - 1;
if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k);
int seqLen = endPos - begPos + 1;
CHECK_GT(seqLen, 0U);
for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m);
inputSeqInfoVec_.size() > 1
? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)
: outSeqStartPos_.push_back(outSeqStartPos_.back() + seqLen);
}
rowIdx++;
}
if (inputSeqInfoVec_.size() > 1)
outSeqStartPos_.push_back(outSubSeqStartPos_.back());
}
if (useGpu_) {
rowIndice_ = IVector::create(selectedRows_.size(), useGpu_);
rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size());
} else {
rowIndice_ =
IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_);
}
// create the sequence information for the output.
ICpuGpuVector::resizeOrCreate(
output_.sequenceStartPositions, outSeqStartPos_.size(), false);
output_.sequenceStartPositions->copyFrom(
outSeqStartPos_.data(), outSeqStartPos_.size(), false);
if (inputSeqInfoVec_.size() > 1) {
ICpuGpuVector::resizeOrCreate(
output_.subSequenceStartPositions, outSubSeqStartPos_.size(), false);
output_.subSequenceStartPositions->copyFrom(
outSubSeqStartPos_.data(), outSubSeqStartPos_.size(), false);
}
}
void SequenceSliceLayer::forward(PassType passType) {
Layer::forward(passType);
checkInputs();
const Argument& inputSeq = getInput(0);
inputSeqInfoVec_.clear();
Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
inputSeq.subSequenceStartPositions,
inputSeqInfoVec_);
if (!useGpu_) {
if (inputLayers_.size() == 2U) {
startIdsOnCpu_ = config_.select_first() ? getInputValue(1) : nullptr;
endIdsOnCpu_ = config_.select_first() ? nullptr : getInputValue(1);
} else if (inputLayers_.size() == 3U) {
startIdsOnCpu_ = getInputValue(1);
endIdsOnCpu_ = getInputValue(2);
}
} else
copySliceIdsToCpu();
// calculate the selected row indices in a batch,
// and build the output sequence information.
calSelectedRows(startIdsOnCpu_ ? startIdsOnCpu_ : nullptr,
endIdsOnCpu_ ? endIdsOnCpu_ : nullptr);
resetOutput(selectedRows_.size(), getSize());
getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
}
void SequenceSliceLayer::backward(const UpdateCallback& callback) {
getOutputGrad()->addToRows(*getInputGrad(0), *rowIndice_);
}
} // namespace paddle

@ -52,23 +52,34 @@ private:
* ]
*
* ths output is saved to private member rowIndice_;
* [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
* 16,17,18,19,20,21,22,23,24,25,26,27]
* [0,1,2,3,4,5,6,7,8,9,15,16,17,18,19,20,21,23,24,25,26,27]
*/
void calSelectedCols(const MatrixPtr selectedIndices,
void calSelectedRows(const MatrixPtr selectedIndices,
const std::vector<std::vector<int>>& inputSeqInfo);
// if the second input of this layer is on GPU memory, copy it to CPU memory.
/*
* TODO(caoying)
* In PaddePaddle, currently all matrices are real number types,
* but the second is some selected indices of the give sequence to trim
* the nested sequence, are actually filled with int types so that storing
* int types information in real number matrices is very dangerous, since
* real numbers will be convered to int types. If a user fills this matrix
* himself, invalid data may occor.
*
* if the second input of this layer is on GPU memory, copy it to CPU memory.
*/
MatrixPtr selIdsCpu_;
// reorganized sequenceStartPositions and subSequenceStartPositions
// into a 2d vector to facilitate the sequence selection process.
/*
* reorganize sequenceStartPositions and subSequenceStartPositions
* into a 2d vector to facilitate the sequence selection process.
*/
std::vector<std::vector<int>> inputSeqInfoVec_;
// the final selected row indices in a batch,
// rowIdx_ and selectedRows_ actually share a same memory.
/* store the final selected row indices in a batch */
IVectorPtr rowIndice_;
/* rowIndice_ and selectedRows_ actually share a same memory. */
std::vector<int> selectedRows_;
};
@ -83,7 +94,7 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap,
return true;
}
void SubNestedSequenceLayer::calSelectedCols(
void SubNestedSequenceLayer::calSelectedRows(
const MatrixPtr selectedIndices,
const std::vector<std::vector<int>>& inputSeqInfo) {
selectedRows_.clear();
@ -160,7 +171,7 @@ void SubNestedSequenceLayer::forward(PassType passType) {
Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
inputSeq.subSequenceStartPositions,
inputSeqInfoVec_);
calSelectedCols(selIdsCpu_, inputSeqInfoVec_);
calSelectedRows(selIdsCpu_, inputSeqInfoVec_);
resetOutput(selectedRows_.size(), getSize());
getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);

@ -41,6 +41,13 @@ add_unittest_without_exec(test_CrossEntropyOverBeam
add_test(NAME test_CrossEntropyOverBeam
COMMAND test_CrossEntropyOverBeam)
################ test_SeqSliceLayerGrad ####################
add_unittest_without_exec(test_SeqSliceLayerGrad
test_SeqSliceLayerGrad.cpp
LayerGradUtil.cpp)
add_test(NAME test_SeqSliceLayerGrad
COMMAND test_SeqSliceLayerGrad)
add_unittest_without_exec(test_ActivationGrad
test_ActivationGrad.cpp
LayerGradUtil.cpp)

@ -0,0 +1,223 @@
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT
using namespace std; // NOLINT
DECLARE_int32(gpu_id);
DECLARE_bool(thread_local_rand_use_global_seed);
const int MAX_SEQ_NUM = 17;
const int MAX_SEQ_LEN = 23;
const int MAX_BEAM_SIZE = 13;
vector<real> randSampling(real range, int n) {
CHECK_GE(range, n);
vector<real> num(range);
iota(begin(num), end(num), 0.);
if (range == n) return num;
random_shuffle(begin(num), end(num));
num.resize(n);
sort(begin(num), end(num));
return num;
}
void genSeqInfo(vector<int>& seqStartPos, vector<int>& subSeqStartPos) {
seqStartPos.resize(1, 0);
subSeqStartPos.resize(1, 0);
srand((size_t)(time(NULL)));
int seqNum = 1 + (rand() % MAX_SEQ_NUM);
for (int i = 0; i < seqNum; ++i) {
int subSeqNum = 1 + (rand() % MAX_SEQ_NUM);
for (int j = 0; j < subSeqNum; ++j)
subSeqStartPos.push_back(subSeqStartPos.back() +
(1 + (rand() % MAX_SEQ_LEN)));
seqStartPos.push_back(subSeqStartPos.back());
}
}
/*
generate start indices according to sequence start positions.
*/
void genStarts(vector<int>& seqStartPos,
vector<vector<real>>& starts,
size_t beamSize) {
starts.clear();
starts.resize(seqStartPos.size() - 1, vector<real>(beamSize, -1.));
for (size_t i = 0; i < seqStartPos.size() - 1; ++i) {
int seqLen = seqStartPos[i + 1] - seqStartPos[i];
vector<real> randStarts =
randSampling(seqLen, min(seqLen, static_cast<int>(beamSize)));
copy(begin(randStarts), end(randStarts), begin(starts[i]));
}
}
/*
generate end indices according to sequence start positions and start indices.
*/
void genEnds(vector<int>& seqStartPos,
vector<vector<real>>& starts,
vector<vector<real>>& ends,
size_t beamSize) {
CHECK_EQ(seqStartPos.size() - 1, starts.size());
ends.clear();
ends.resize(seqStartPos.size() - 1, vector<real>(beamSize, -1.));
for (size_t i = 0; i < starts.size(); ++i) {
for (size_t j = 0; j < starts[i].size(); ++j) {
int seqLen = seqStartPos[i + 1] - seqStartPos[i];
CHECK_GE(seqLen - 1, starts[i][j]);
if (starts[i][j] == -1.) break;
if (starts[i][j] == (seqLen - 1)) {
ends[i][j] = starts[i][j];
} else {
ends[i][j] = starts[i][j] + randSampling(seqLen - starts[i][j], 1)[0];
}
}
}
}
void genTestData(vector<int>& seqStartPos,
vector<int>& subSeqStartPos,
vector<vector<real>>& starts,
vector<vector<real>>& ends,
bool hasSubseq) {
size_t beamSize = 1 + (rand() % MAX_BEAM_SIZE);
genSeqInfo(seqStartPos, subSeqStartPos);
genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize);
genEnds(hasSubseq ? subSeqStartPos : seqStartPos, starts, ends, beamSize);
}
template <typename T>
void flatten2dVector(vector<vector<T>>& inVec, vector<T>& outVec) {
size_t totalSize{0};
for (auto const& items : inVec) totalSize += items.size();
outVec.reserve(totalSize);
for (auto& items : inVec)
move(items.begin(), items.end(), back_inserter(outVec));
}
void testSeqSliceLayer(bool hasSubseq,
bool useGpu,
vector<int>& seqStartPos,
vector<int>& subSeqStartPos,
vector<vector<real>>& starts,
vector<vector<real>>& ends) {
// layer size is not crutial for this layer,
// so here use a small layer size in the unittest.
const size_t layerSize{4};
TestConfig config;
config.layerConfig.set_type("seq_slice");
config.layerConfig.set_size(layerSize);
// add the first input
MatrixPtr seqInputPtr =
Matrix::create(hasSubseq ? subSeqStartPos.back() : seqStartPos.back(),
layerSize,
false,
false);
seqInputPtr->randomizeUniform();
if (hasSubseq) {
config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
"seq_input",
seqInputPtr,
seqStartPos,
subSeqStartPos});
} else {
config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, "seq_input", seqInputPtr, seqStartPos});
}
config.layerConfig.add_inputs();
// add start indices
if (starts.size()) {
vector<real> startsToVec;
flatten2dVector(starts, startsToVec);
MatrixPtr startMatrixPtr =
Matrix::create(starts.size(), starts[0].size(), false, false);
startMatrixPtr->copyFrom(startsToVec.data(), startsToVec.size());
config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr});
config.layerConfig.add_inputs();
config.layerConfig.set_select_first(true);
}
// add end indices
if (ends.size()) {
vector<real> endsToVec;
flatten2dVector(ends, endsToVec);
MatrixPtr endMatrixPtr =
Matrix::create(ends.size(), ends[0].size(), false, false);
endMatrixPtr->copyFrom(endsToVec.data(), endsToVec.size());
config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr});
config.layerConfig.add_inputs();
config.layerConfig.set_select_first(false);
}
testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false);
}
TEST(Layer, SeqSliceLayer) {
vector<int> seqStartPos;
vector<int> subSeqStartPos;
vector<vector<real>> starts;
vector<vector<real>> ends;
std::vector<bool> mode = {false};
#ifndef PADDLE_ONLY_CPU
mode.push_back(true);
#endif
genSeqInfo(seqStartPos, subSeqStartPos);
for (bool hasSubseq : {true, false}) {
LOG(INFO) << "hasSubSeq : " << hasSubseq;
genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq);
for (bool useGpu : mode) {
vector<vector<real>> tmp;
testSeqSliceLayer(
hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends);
testSeqSliceLayer(
hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, tmp);
testSeqSliceLayer(
hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, ends);
}
}
}
int main(int argc, char** argv) {
initMain(argc, argv);
hl_start();
hl_init(FLAGS_gpu_id);
FLAGS_thread_local_rand_use_global_seed = true;
srand(1);
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -43,6 +43,7 @@ endfunction()
add_subdirectory(math)
cc_test(gather_test SRCS gather_test.cc DEPS tensor)
op_library(gather_op SRCS gather_op.cc gather_op.cu)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
@ -68,3 +69,4 @@ op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
DEPS framework_proto tensor op_registry operator net_op)
op_library(uniform_random_op
SRCS uniform_random_op.cc uniform_random_op.cu)
op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op)

@ -17,6 +17,7 @@ limitations under the License. */
#include <cstring>
#include "paddle/framework/ddim.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/place.h"
@ -25,13 +26,13 @@ namespace operators {
// Implementation of CPU copy
template <typename T>
void CPUGather(const T* params, const int* indices, const int slice_size,
void CPUGather(const T* src, const int* indices, const int slice_size,
const int index_size, T* output) {
const size_t slice_bytes = slice_size * sizeof(T);
for (int i = 0; i < index_size; ++i) {
int index_ = indices[i];
memcpy(output + i * slice_size, params + index_ * slice_size, slice_bytes);
memcpy(output + i * slice_size, src + index_ * slice_size, slice_bytes);
}
}
@ -55,7 +56,7 @@ void Gather(const platform::Place& place, const paddle::framework::Tensor* src,
int index_size = index->dims()[0];
auto src_dims = src->dims();
paddle::framework::DDim output_dims(src_dims);
framework::DDim output_dims(src_dims);
output_dims[0] = index_size;
// slice size

@ -0,0 +1,72 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/gather_op.h"
#include "paddle/framework/ddim.h"
namespace paddle {
namespace operators {
class GatherOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
int batch_size = ctx.Input<Tensor>("Index")->dims()[0];
PADDLE_ENFORCE_GE(batch_size, 0, "Batch size must be >0");
framework::DDim output_dims(ctx.Input<Tensor>("X")->dims());
output_dims[0] = batch_size;
ctx.Output<Tensor>("Out")->Resize(output_dims);
}
};
class GatherGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
auto X_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto X = ctx.Input<Tensor>("X");
X_grad->Resize(X->dims());
}
};
class GatherOpMaker : public framework::OpProtoAndCheckerMaker {
public:
GatherOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The source input of gather op");
AddInput("Index", "The index input of gather op");
AddOutput("Out", "The output of add op");
AddComment(R"DOC(
Gather Operator by selecting from the first axis,
Out = X[Index]
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(gather, ops::GatherOp, ops::GatherOpMaker, gather_grad,
ops::GatherGradOp);
REGISTER_OP_CPU_KERNEL(gather,
ops::GatherOpKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
gather_grad,
ops::GatherGradientOpKernel<paddle::platform::CPUPlace, float>);

@ -0,0 +1,20 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/gather_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(gather,
ops::GatherOpKernel<paddle::platform::GPUPlace, float>);

@ -0,0 +1,53 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "gather.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "scatter.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename Place, typename T>
class GatherOpKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *X = ctx.Input<Tensor>("X");
auto *Index = ctx.Input<Tensor>("Index");
auto *Y = ctx.Output<Tensor>("Out");
Y->mutable_data<T>(ctx.GetPlace());
Gather<T>(ctx.GetPlace(), X, Index, Y);
}
};
template <typename Place, typename T>
class GatherGradientOpKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *Index = ctx.Input<Tensor>("Index");
auto *dX = ctx.Output<Tensor>(framework::GradVarName("X"));
auto *dO = ctx.Input<Tensor>(framework::GradVarName("Out"));
dX->mutable_data<T>(ctx.GetPlace());
ScatterUpdate<T>(ctx.GetPlace(), dO, Index, dX);
}
};
} // namespace operators
} // namespace paddle

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save