Merge branch 'develop' of github.com:baidu/Paddle into feature/identity_op

revert-3824-remove_grad_op_type
Yu Yang 8 years ago
commit b53f4e0be7

@ -55,6 +55,7 @@ option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF)
option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF)
option(GLIDE_INSTALL "Download and install go dependencies " ON) option(GLIDE_INSTALL "Download and install go dependencies " ON)
option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
# CMAKE_BUILD_TYPE # CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)

@ -28,6 +28,10 @@ if(NOT WITH_TIMER)
add_definitions(-DPADDLE_DISABLE_TIMER) add_definitions(-DPADDLE_DISABLE_TIMER)
endif(NOT WITH_TIMER) endif(NOT WITH_TIMER)
if(USE_EIGEN_FOR_BLAS)
add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS)
endif(USE_EIGEN_FOR_BLAS)
if(NOT WITH_PROFILER) if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER) add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER) endif(NOT WITH_PROFILER)

@ -362,6 +362,11 @@ trans
.. autoclass:: paddle.v2.layer.trans .. autoclass:: paddle.v2.layer.trans
:noindex: :noindex:
scale_shift
-----------
.. autoclass:: paddle.v2.layer.scale_shift
:noindex:
Sampling Layers Sampling Layers
=============== ===============

@ -18,8 +18,8 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope)
proto_library(framework_proto SRCS framework.proto) proto_library(framework_proto SRCS framework.proto)
cc_library(attribute SRCS attribute.cc DEPS framework_proto) cc_library(attribute SRCS attribute.cc DEPS framework_proto)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(operator SRCS operator.cc DEPS framework_proto device_context tensor scope attribute) cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator) cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator)
@ -45,6 +45,7 @@ cc_library(paddle_pybind SHARED
SRCS pybind.cc SRCS pybind.cc
DEPS pybind python backward DEPS pybind python backward
sgd_op sgd_op
gather_op
add_op add_op
mul_op mul_op
rowwise_add_op rowwise_add_op

@ -110,7 +110,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
dup_output_ops[out].emplace_back(local_op_id); dup_output_ops[out].emplace_back(local_op_id);
return false; return false;
}); });
net->AddOp(std::move(bwd)); net->AppendOp(std::move(bwd));
} }
// Get unique ID for this method. // Get unique ID for this method.
auto uid = uniq_id++; auto uid = uniq_id++;
@ -163,8 +163,9 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
// If part of input gradient of that operator is not calculated, fill // If part of input gradient of that operator is not calculated, fill
// zero variables to that input gradient. // zero variables to that input gradient.
net->AddOp(OpRegistry::CreateOp("fill_zeros_like", {{"Src", {prefix}}}, net->AppendOp(OpRegistry::CreateOp("fill_zeros_like",
{{"Dst", {grad_input}}}, {})); {{"Src", {prefix}}},
{{"Dst", {grad_input}}}, {}));
} }
return false; return false;
}); });
@ -195,7 +196,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
if (net->ops_.empty()) { // Current no aux op is added to network if (net->ops_.empty()) { // Current no aux op is added to network
return grad_op; return grad_op;
} }
net->AddOp(std::move(grad_op)); net->AppendOp(std::move(grad_op));
} }
net->SetType("@GENERATED_BACKWARD@"); net->SetType("@GENERATED_BACKWARD@");
net->CompleteAddOp(); net->CompleteAddOp();

@ -72,16 +72,16 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker {
class FcOp : public operators::NetOp { class FcOp : public operators::NetOp {
public: public:
FcOp(const std::string &type, const VarNameMap &inputs, FcOp(const std::string &type, const VariableNameMap &inputs,
const VarNameMap &outputs, const AttributeMap &attrs) const VariableNameMap &outputs, const AttributeMap &attrs)
: NetOp(type, inputs, outputs, attrs) { : NetOp(type, inputs, outputs, attrs) {
AddOp(OpRegistry::CreateOp("mul", AppendOp(OpRegistry::CreateOp("mul",
{{"X", {Input("X")}}, {"Y", {Input("W")}}}, {{"X", {Input("X")}}, {"Y", {Input("W")}}},
{{"Out", {Output("mul_result")}}}, {})); {{"Out", {Output("mul_result")}}}, {}));
auto input_b = Inputs("b"); auto input_b = Inputs("b");
std::string before_act = "mul_result"; std::string before_act = "mul_result";
if (input_b.size() != 0) { if (input_b.size() != 0) {
AddOp(OpRegistry::CreateOp( AppendOp(OpRegistry::CreateOp(
"rowwise_add", {{"X", {Output("mul_result")}}, {"b", {input_b[0]}}}, "rowwise_add", {{"X", {Output("mul_result")}}, {"b", {input_b[0]}}},
{{"Out", {Output("add_result")}}}, {})); {{"Out", {Output("add_result")}}}, {}));
before_act = "add_result"; before_act = "add_result";
@ -92,8 +92,8 @@ class FcOp : public operators::NetOp {
} }
} }
AddOp(OpRegistry::CreateOp("sigmoid", {{"X", {Output(before_act)}}}, AppendOp(OpRegistry::CreateOp("sigmoid", {{"X", {Output(before_act)}}},
{{"Out", {Output("Out")}}}, {})); {{"Out", {Output("Out")}}}, {}));
CompleteAddOp(false); CompleteAddOp(false);
} }
}; };
@ -234,13 +234,13 @@ TEST(Backward, net_fc_backward_not_have_b) {
TEST(Backward, net_input_of_network_not_need_grad) { TEST(Backward, net_input_of_network_not_need_grad) {
ops::NetOp net; ops::NetOp net;
net.AddOp(f::OpRegistry::CreateOp( net.AppendOp(f::OpRegistry::CreateOp(
"fc", {{"X", {"x"}}, {"W", {"W1"}}, {"b", {"b1"}}}, "fc", {{"X", {"x"}}, {"W", {"W1"}}, {"b", {"b1"}}},
{{"mul_result", {"mul_tmp_0"}}, {{"mul_result", {"mul_tmp_0"}},
{"add_result", {"add_tmp_0"}}, {"add_result", {"add_tmp_0"}},
{"Out", {"hidden0"}}}, {"Out", {"hidden0"}}},
{})); {}));
net.AddOp(f::OpRegistry::CreateOp( net.AppendOp(f::OpRegistry::CreateOp(
"fc", {{"X", {"hidden0"}}, {"W", {"W2"}}, {"b", {"b2"}}}, "fc", {{"X", {"hidden0"}}, {"W", {"W2"}}, {"b", {"b2"}}},
{{"mul_result", {"mul_tmp_1"}}, {{"mul_result", {"mul_tmp_1"}},
{"add_result", {"add_tmp_1"}}, {"add_result", {"add_tmp_1"}},
@ -273,10 +273,10 @@ TEST(Backward, net_input_of_network_not_need_grad) {
TEST(Backward, net_shared_weight) { TEST(Backward, net_shared_weight) {
ops::NetOp net; ops::NetOp net;
net.AddOp(f::OpRegistry::CreateOp("mul", {{"X", {"x"}}, {"Y", {"w"}}}, net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"x"}}, {"Y", {"w"}}},
{{"Out", {"out"}}}, {})); {{"Out", {"out"}}}, {}));
net.AddOp(f::OpRegistry::CreateOp("mul", {{"X", {"out"}}, {"Y", {"w"}}}, net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"out"}}, {"Y", {"w"}}},
{{"Out", {"FinalOut"}}}, {})); {{"Out", {"FinalOut"}}}, {}));
net.CompleteAddOp(); net.CompleteAddOp();
auto bwd = f::Backward(net, {}); auto bwd = f::Backward(net, {});
@ -357,19 +357,19 @@ TEST(Backward, op_part_of_input_are_not_need) {
TEST(Backward, linear_net_intermediate_variable_has_no_grad) { TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
ops::NetOp net; ops::NetOp net;
net.AddOp(f::OpRegistry::CreateOp( net.AppendOp(f::OpRegistry::CreateOp(
"fc", {{"X", {"x1"}}, {"W", {"w1"}}, {"b", {"b1"}}}, "fc", {{"X", {"x1"}}, {"W", {"w1"}}, {"b", {"b1"}}},
{{"mul_result", {"mul_out1"}}, {{"mul_result", {"mul_out1"}},
{"add_result", {"add_out1"}}, {"add_result", {"add_out1"}},
{"Out", {"out1"}}}, {"Out", {"out1"}}},
{})); {}));
net.AddOp(f::OpRegistry::CreateOp( net.AppendOp(f::OpRegistry::CreateOp(
"fc", {{"X", {"out1"}}, {"W", {"w2"}}, {"b", {"b2"}}}, "fc", {{"X", {"out1"}}, {"W", {"w2"}}, {"b", {"b2"}}},
{{"mul_result", {"mul_out2"}}, {{"mul_result", {"mul_out2"}},
{"add_result", {"tmp_out2"}}, {"add_result", {"tmp_out2"}},
{"Out", {"out2"}}}, {"Out", {"out2"}}},
{})); {}));
net.AddOp(f::OpRegistry::CreateOp( net.AppendOp(f::OpRegistry::CreateOp(
"fc", {{"X", {"out2"}}, {"W", {"w3"}}, {"b", {"b3"}}}, "fc", {{"X", {"out2"}}, {"W", {"w3"}}, {"b", {"b3"}}},
{{"mul_result", {"mul_out3"}}, {{"mul_result", {"mul_out3"}},
{"add_result", {"tmp_out3"}}, {"add_result", {"tmp_out3"}},

@ -20,13 +20,13 @@ namespace framework {
enum class OpArgType { IN, OUT }; enum class OpArgType { IN, OUT };
static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type, static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type,
bool is_grad, OperatorBase::VarNameMap* vars) { bool is_grad, VariableNameMap* vars) {
const auto& src_inout = const auto& src_inout =
src_type == OpArgType::IN ? src_op->Inputs() : src_op->Outputs(); src_type == OpArgType::IN ? src_op->Inputs() : src_op->Outputs();
auto& dst_inout = *vars; auto& dst_inout = *vars;
const OpProto* proto = OpRegistry::op_info_map().at(src_op->Type()).proto_; auto& proto = OpInfoMap::Instance().Get(src_op->Type()).Proto();
const auto& src_arg_list = const auto& src_arg_list =
src_type == OpArgType::IN ? proto->inputs() : proto->outputs(); src_type == OpArgType::IN ? proto.inputs() : proto.outputs();
for (const auto& arg : src_arg_list) { for (const auto& arg : src_arg_list) {
if (arg.not_in_gradient() && !is_grad) continue; if (arg.not_in_gradient() && !is_grad) continue;
const std::string src_name = arg.name(); const std::string src_name = arg.name();
@ -40,26 +40,18 @@ static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type,
} }
OperatorBase* BuildGradOp(const OperatorBase* op) { OperatorBase* BuildGradOp(const OperatorBase* op) {
auto it = OpRegistry::op_info_map().find(op->Type()); auto& info = OpInfoMap::Instance().Get(op->Type());
PADDLE_ENFORCE(it != OpRegistry::op_info_map().end(), PADDLE_ENFORCE(info.HasGradientOp());
"'%s' has not been registered.", op->Type());
PADDLE_ENFORCE(it->second.proto_ != nullptr, "'%s' has no OpProto.",
op->Type());
std::string grad_op_type = it->second.grad_op_type_;
PADDLE_ENFORCE(!grad_op_type.empty(), "'%s' has no gradient operator.",
op->Type());
OperatorBase::VarNameMap inputs; VariableNameMap inputs;
OperatorBase::VarNameMap outputs; VariableNameMap outputs;
TransOpArg(op, OpArgType::IN, false, &inputs); // I TransOpArg(op, OpArgType::IN, false, &inputs); // I
TransOpArg(op, OpArgType::OUT, false, &inputs); // O TransOpArg(op, OpArgType::OUT, false, &inputs); // O
TransOpArg(op, OpArgType::OUT, true, &inputs); // OG TransOpArg(op, OpArgType::OUT, true, &inputs); // OG
TransOpArg(op, OpArgType::IN, true, &outputs); // IG TransOpArg(op, OpArgType::IN, true, &outputs); // IG
it = OpRegistry::op_info_map().find(grad_op_type); auto& grad_info = OpInfoMap::Instance().Get(info.grad_op_type_);
PADDLE_ENFORCE(it != OpRegistry::op_info_map().end(), return grad_info.Creator()(info.grad_op_type_, inputs, outputs, op->Attrs());
"'%s' has not been registered.", grad_op_type);
return it->second.creator_(grad_op_type, inputs, outputs, op->Attrs());
} }
} // namespace framework } // namespace framework

@ -0,0 +1,29 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/op_info.h"
namespace paddle {
namespace framework {
static OpInfoMap* g_op_info_map = nullptr;
OpInfoMap& OpInfoMap::Instance() {
if (g_op_info_map == nullptr) {
g_op_info_map = new OpInfoMap();
}
return *g_op_info_map;
}
} // namespace framework
} // namespace paddle

@ -0,0 +1,101 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <functional>
#include <map>
#include <string>
#include <unordered_map>
#include "paddle/framework/attribute.h"
namespace paddle {
namespace framework {
class OperatorBase;
using VariableNameMap = std::map<std::string, std::vector<std::string>>;
using OpCreator = std::function<OperatorBase*(
const std::string& /*type*/, const VariableNameMap& /*inputs*/,
const VariableNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>;
struct OpInfo {
OpCreator creator_;
std::string grad_op_type_;
OpProto* proto_;
OpAttrChecker* checker_;
bool HasOpProtoAndChecker() const {
return proto_ != nullptr && checker_ != nullptr;
}
const OpProto& Proto() const {
PADDLE_ENFORCE_NOT_NULL(proto_, "Operator Proto has not been registered");
PADDLE_ENFORCE(proto_->IsInitialized(),
"Operator Proto must be initialized in op info");
return *proto_;
}
const OpAttrChecker& Checker() const {
PADDLE_ENFORCE_NOT_NULL(checker_,
"Operator Checker has not been registered");
return *checker_;
}
const OpCreator& Creator() const {
PADDLE_ENFORCE_NOT_NULL(creator_,
"Operator Creator has not been registered");
return creator_;
}
bool HasGradientOp() const { return !grad_op_type_.empty(); }
};
class OpInfoMap {
public:
static OpInfoMap& Instance();
OpInfoMap(const OpInfoMap& o) = delete;
OpInfoMap(OpInfoMap&& o) = delete;
OpInfoMap& operator=(const OpInfoMap& o) = delete;
OpInfoMap& operator=(OpInfoMap&& o) = delete;
bool Has(const std::string& op_type) const {
return map_.find(op_type) != map_.end();
}
void Insert(const std::string& type, const OpInfo& info) {
PADDLE_ENFORCE(!Has(type), "Operator %s has been registered", type);
map_.insert({type, info});
}
const OpInfo& Get(const std::string& type) const {
auto it = map_.find(type);
PADDLE_ENFORCE(it != map_.end(), "Operator %s are not found", type);
return it->second;
}
template <typename Callback>
void IterAllInfo(Callback callback) {
for (auto& it : map_) {
callback(it.first, it.second);
}
}
private:
OpInfoMap() = default;
std::unordered_map<std::string, const OpInfo> map_;
};
} // namespace framework
} // namespace paddle

@ -19,32 +19,18 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const std::string& type, std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
const VarNameMap& inputs, const std::string& type, const VariableNameMap& inputs,
const VarNameMap& outputs, const VariableNameMap& outputs, AttributeMap attrs) {
AttributeMap attrs) { auto& info = OpInfoMap::Instance().Get(type);
auto it = op_info_map().find(type); info.Checker().Check(attrs);
PADDLE_ENFORCE(it != op_info_map().end(), auto op = info.Creator()(type, inputs, outputs, attrs);
"Operator '%s' has not been registered.", type);
it->second.checker_->Check(attrs);
auto op = it->second.creator_(type, inputs, outputs, attrs);
return std::unique_ptr<OperatorBase>(op); return std::unique_ptr<OperatorBase>(op);
} }
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) { static VariableNameMap ConvertOpDescVarsToVarNameMap(
VarNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VarNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
AttributeMap attrs;
for (auto& attr : op_desc.attrs()) {
attrs[attr.name()] = GetAttrValue(attr);
}
return CreateOp(op_desc.type(), inputs, outputs, attrs);
}
OperatorBase::VarNameMap OpRegistry::ConvertOpDescVarsToVarNameMap(
const google::protobuf::RepeatedPtrField<OpDesc::Var>& op_desc_vars) { const google::protobuf::RepeatedPtrField<OpDesc::Var>& op_desc_vars) {
VarNameMap ret_val; VariableNameMap ret_val;
for (auto& var : op_desc_vars) { for (auto& var : op_desc_vars) {
auto& var_names = ret_val[var.parameter()]; auto& var_names = ret_val[var.parameter()];
auto& var_names_in_proto = var.arguments(); auto& var_names_in_proto = var.arguments();
@ -55,6 +41,17 @@ OperatorBase::VarNameMap OpRegistry::ConvertOpDescVarsToVarNameMap(
return ret_val; return ret_val;
} }
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) {
VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
AttributeMap attrs;
for (auto& attr : op_desc.attrs()) {
attrs[attr.name()] = GetAttrValue(attr);
}
return CreateOp(op_desc.type(), inputs, outputs, attrs);
}
std::unique_ptr<OperatorBase> OpRegistry::CreateGradOp(const OperatorBase& op) { std::unique_ptr<OperatorBase> OpRegistry::CreateGradOp(const OperatorBase& op) {
PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops");
return std::unique_ptr<OperatorBase>(BuildGradOp(&op)); return std::unique_ptr<OperatorBase>(BuildGradOp(&op));

@ -23,6 +23,7 @@ limitations under the License. */
#include "paddle/framework/attribute.h" #include "paddle/framework/attribute.h"
#include "paddle/framework/framework.pb.h" #include "paddle/framework/framework.pb.h"
#include "paddle/framework/grad_op_builder.h" #include "paddle/framework/grad_op_builder.h"
#include "paddle/framework/op_info.h"
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
#include "paddle/framework/scope.h" #include "paddle/framework/scope.h"
@ -30,28 +31,16 @@ namespace paddle {
namespace framework { namespace framework {
class OpRegistry { class OpRegistry {
using VarNameMap = OperatorBase::VarNameMap;
using OpCreator = std::function<OperatorBase*(
const std::string& /*type*/, const VarNameMap& /*inputs*/,
const VarNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>;
public: public:
struct OpInfo {
OpCreator creator_;
std::string grad_op_type_;
OpProto* proto_;
OpAttrChecker* checker_;
};
template <typename OpType, typename ProtoMakerType, typename GradOpType> template <typename OpType, typename ProtoMakerType, typename GradOpType>
static void RegisterOp(const std::string& op_type, static void RegisterOp(const std::string& op_type,
const std::string& grad_op_type) { const std::string& grad_op_type) {
PADDLE_ENFORCE(op_info_map().count(op_type) == 0, PADDLE_ENFORCE(!OpInfoMap::Instance().Has(op_type),
"'%s' is registered more than once.", op_type); "'%s' is registered more than once.", op_type);
OpInfo op_info; OpInfo op_info;
op_info.creator_ = [](const std::string& type, const VarNameMap& inputs, op_info.creator_ = [](
const VarNameMap& outputs, const std::string& type, const VariableNameMap& inputs,
const AttributeMap& attrs) { const VariableNameMap& outputs, const AttributeMap& attrs) {
return new OpType(type, inputs, outputs, attrs); return new OpType(type, inputs, outputs, attrs);
}; };
op_info.grad_op_type_ = grad_op_type; op_info.grad_op_type_ = grad_op_type;
@ -70,7 +59,7 @@ class OpRegistry {
op_info.proto_ = nullptr; op_info.proto_ = nullptr;
op_info.checker_ = nullptr; op_info.checker_ = nullptr;
} }
op_info_map().insert(std::make_pair(op_type, op_info)); OpInfoMap::Instance().Insert(op_type, op_info);
// register gradient op // register gradient op
if (!grad_op_type.empty()) { if (!grad_op_type.empty()) {
RegisterOp<GradOpType, NOPMaker, NOP>(grad_op_type, ""); RegisterOp<GradOpType, NOPMaker, NOP>(grad_op_type, "");
@ -78,21 +67,13 @@ class OpRegistry {
} }
static std::unique_ptr<OperatorBase> CreateOp(const std::string& type, static std::unique_ptr<OperatorBase> CreateOp(const std::string& type,
const VarNameMap& inputs, const VariableNameMap& inputs,
const VarNameMap& outputs, const VariableNameMap& outputs,
AttributeMap attrs); AttributeMap attrs);
static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc); static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);
static VarNameMap ConvertOpDescVarsToVarNameMap(
const google::protobuf::RepeatedPtrField<OpDesc::Var>& op_desc_vars);
static std::unique_ptr<OperatorBase> CreateGradOp(const OperatorBase& op); static std::unique_ptr<OperatorBase> CreateGradOp(const OperatorBase& op);
static std::unordered_map<std::string, const OpInfo>& op_info_map() {
static std::unordered_map<std::string, const OpInfo> op_info_map_;
return op_info_map_;
}
}; };
class Registrar { class Registrar {

@ -115,8 +115,8 @@ void OperatorBase::Rename(const std::string& old_name,
} }
OperatorBase::OperatorBase(const std::string& type, OperatorBase::OperatorBase(const std::string& type,
const OperatorBase::VarNameMap& inputs, const VariableNameMap& inputs,
const OperatorBase::VarNameMap& outputs, const VariableNameMap& outputs,
const AttributeMap& attrs) const AttributeMap& attrs)
: type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs) { : type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs) {
static std::atomic<size_t> gUniqId(0UL); static std::atomic<size_t> gUniqId(0UL);
@ -141,18 +141,10 @@ std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
} }
return ret_val; return ret_val;
} }
auto it = OpRegistry::op_info_map().find(type_); auto& info = OpInfoMap::Instance().Get(Type());
PADDLE_ENFORCE(
it != OpRegistry::op_info_map().end(),
"Operator %s not registered, cannot figure out intermediate outputs",
type_);
PADDLE_ENFORCE(
it->second.proto_ != nullptr,
"Operator %s has no OpProto, cannot figure out intermediate outputs",
type_);
// get all OpProto::Var for outputs // get all OpProto::Var for outputs
for (auto& o : it->second.proto_->outputs()) { for (auto& o : info.Proto().outputs()) {
// ignore all intermediate output // ignore all intermediate output
if (o.intermediate()) continue; if (o.intermediate()) continue;
auto out = outputs_.find(o.name()); auto out = outputs_.find(o.name());

@ -19,6 +19,7 @@ limitations under the License. */
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "op_info.h"
#include "paddle/framework/attribute.h" #include "paddle/framework/attribute.h"
#include "paddle/framework/framework.pb.h" #include "paddle/framework/framework.pb.h"
#include "paddle/framework/scope.h" #include "paddle/framework/scope.h"
@ -62,10 +63,8 @@ class ExecutionContext;
*/ */
class OperatorBase { class OperatorBase {
public: public:
using VarNameMap = std::map<std::string, std::vector<std::string>>; OperatorBase(const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, const AttributeMap& attrs);
OperatorBase(const std::string& type, const VarNameMap& inputs,
const VarNameMap& outputs, const AttributeMap& attrs);
virtual ~OperatorBase() {} virtual ~OperatorBase() {}
@ -93,8 +92,8 @@ class OperatorBase {
/// rename inputs outputs name /// rename inputs outputs name
void Rename(const std::string& old_name, const std::string& new_name); void Rename(const std::string& old_name, const std::string& new_name);
const VarNameMap& Inputs() const { return inputs_; } const VariableNameMap& Inputs() const { return inputs_; }
const VarNameMap& Outputs() const { return outputs_; } const VariableNameMap& Outputs() const { return outputs_; }
//! Get a input with argument's name described in `op_proto` //! Get a input with argument's name described in `op_proto`
const std::string& Input(const std::string& name) const; const std::string& Input(const std::string& name) const;
//! Get a input which has multiple variables. //! Get a input which has multiple variables.
@ -122,30 +121,32 @@ class OperatorBase {
// I (Inputs)opear // I (Inputs)opear
// O (Outputs) // O (Outputs)
// OG (Output Gradients) // OG (Output Gradients)
VarNameMap inputs_; VariableNameMap inputs_;
// NOTE: in case of OpGrad, outputs_ contains // NOTE: in case of OpGrad, outputs_ contains
// IG (Inputs Gradients) // IG (Inputs Gradients)
VarNameMap outputs_; VariableNameMap outputs_;
AttributeMap attrs_; AttributeMap attrs_;
}; };
// Macro for define a clone method. // Macro for define a clone method.
// If you are writing an kernel operator, `Clone` will be defined when you // If you are writing an kernel operator, `Clone` will be defined when you
// register it. i.e. `Clone` method is not needed to define by yourself. // register it. i.e. `Clone` method is not needed to define by yourself.
#define DEFINE_OP_CLONE_METHOD(CLS) \ #define DEFINE_OP_CLONE_METHOD(cls) \
std::unique_ptr<OperatorBase> Clone() const final { \ std::unique_ptr<OperatorBase> Clone() const final { \
return std::unique_ptr<OperatorBase>(new CLS(*this)); \ return std::unique_ptr<OperatorBase>(new cls(*this)); \
} }
// Macro for define a default constructor for Operator. // Macro for define a default constructor for Operator.
// You can also use // You can also use
// using PARENT_CLASS::PARENT_CLASS; // using PARENT_CLASS::PARENT_CLASS;
// to use parent's constructor. // to use parent's constructor.
#define DEFINE_OP_CONSTRUCTOR(CLS, PARENT_CLS) \ #define DEFINE_OP_CONSTRUCTOR(cls, parent_cls) \
CLS(const std::string& type, const VarNameMap& inputs, \ cls(const std::string& type, \
const VarNameMap& outputs, const paddle::framework::AttributeMap& attrs) \ const ::paddle::framework::VariableNameMap& inputs, \
: PARENT_CLS(type, inputs, outputs, attrs) {} const ::paddle::framework::VariableNameMap& outputs, \
const paddle::framework::AttributeMap& attrs) \
: parent_cls(type, inputs, outputs, attrs) {}
class NOP : public OperatorBase { class NOP : public OperatorBase {
public: public:
@ -389,8 +390,8 @@ class OperatorWithKernel : public OperatorBase {
using OpKernelMap = using OpKernelMap =
std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>; std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>;
OperatorWithKernel(const std::string& type, const VarNameMap& inputs, OperatorWithKernel(const std::string& type, const VariableNameMap& inputs,
const VarNameMap& outputs, const AttributeMap& attrs) const VariableNameMap& outputs, const AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {} : OperatorBase(type, inputs, outputs, attrs) {}
void InferShape(const Scope& scope) const override { void InferShape(const Scope& scope) const override {

@ -23,8 +23,8 @@ static int op_run_num = 0;
class OpWithoutKernelTest : public OperatorBase { class OpWithoutKernelTest : public OperatorBase {
public: public:
OpWithoutKernelTest(const std::string& type, const VarNameMap& inputs, OpWithoutKernelTest(const std::string& type, const VariableNameMap& inputs,
const VarNameMap& outputs, const AttributeMap& attrs) const VariableNameMap& outputs, const AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs), x(1) {} : OperatorBase(type, inputs, outputs, attrs), x(1) {}
void InferShape(const Scope& scope) const override {} void InferShape(const Scope& scope) const override {}
void Run(const Scope& scope, void Run(const Scope& scope,
@ -249,8 +249,9 @@ TEST(OpKernel, multi_inputs) {
class OperatorClone : public paddle::framework::OperatorBase { class OperatorClone : public paddle::framework::OperatorBase {
public: public:
DEFINE_OP_CLONE_METHOD(OperatorClone); DEFINE_OP_CLONE_METHOD(OperatorClone);
OperatorClone(const std::string& type, const VarNameMap& inputs, OperatorClone(const std::string& type,
const VarNameMap& outputs, const paddle::framework::VariableNameMap& inputs,
const paddle::framework::VariableNameMap& outputs,
const paddle::framework::AttributeMap& attrs) const paddle::framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {} : OperatorBase(type, inputs, outputs, attrs) {}
void InferShape(const paddle::framework::Scope& scope) const override {} void InferShape(const paddle::framework::Scope& scope) const override {}

@ -31,7 +31,7 @@ limitations under the License. */
namespace py = pybind11; namespace py = pybind11;
USE_OP(add_two); USE_OP(add_two);
USE_CPU_ONLY_OP(onehot_cross_entropy); USE_OP(onehot_cross_entropy);
USE_OP(sgd); USE_OP(sgd);
USE_OP(mul); USE_OP(mul);
USE_OP(mean); USE_OP(mean);
@ -44,6 +44,7 @@ USE_OP(gaussian_random);
USE_OP(uniform_random); USE_OP(uniform_random);
USE_OP(scale); USE_OP(scale);
USE_OP_ITSELF(identity); USE_OP_ITSELF(identity);
USE_CPU_ONLY_OP(gather);
namespace paddle { namespace paddle {
namespace framework { namespace framework {
@ -140,19 +141,16 @@ All parameter, weight, gradient are variables in Paddle.
//! @note: Be careful! PyBind will return std::string as an unicode, not //! @note: Be careful! PyBind will return std::string as an unicode, not
//! Python str. If you want a str object, you should cast them in Python. //! Python str. If you want a str object, you should cast them in Python.
m.def("get_all_op_protos", []() -> std::vector<py::bytes> { m.def("get_all_op_protos", []() -> std::vector<py::bytes> {
auto &op_info_map = OpRegistry::op_info_map();
std::vector<py::bytes> ret_values; std::vector<py::bytes> ret_values;
for (auto it = op_info_map.begin(); it != op_info_map.end(); ++it) {
const OpProto *proto = it->second.proto_; OpInfoMap::Instance().IterAllInfo([&ret_values](const std::string &type,
if (proto == nullptr) { const OpInfo &info) {
continue; if (!info.HasOpProtoAndChecker()) return;
}
PADDLE_ENFORCE(proto->IsInitialized(), "OpProto must all be initialized");
std::string str; std::string str;
PADDLE_ENFORCE(proto->SerializeToString(&str), PADDLE_ENFORCE(info.Proto().SerializeToString(&str),
"Serialize OpProto Error. This could be a bug of Paddle."); "Serialize OpProto Error. This could be a bug of Paddle.");
ret_values.push_back(py::bytes(str)); ret_values.emplace_back(str);
} });
return ret_values; return ret_values;
}); });
m.def_submodule( m.def_submodule(
@ -224,8 +222,8 @@ All parameter, weight, gradient are variables in Paddle.
retv->SetType("plain_net"); retv->SetType("plain_net");
return retv; return retv;
}) })
.def("add_op", [](operators::NetOp &self, .def("append_op", [](operators::NetOp &self,
const OperatorBase &op) { self.AddOp(op); }) const OperatorBase &op) { self.AppendOp(op); })
.def("complete_add_op", &operators::NetOp::CompleteAddOp) .def("complete_add_op", &operators::NetOp::CompleteAddOp)
.def("complete_add_op", [](std::shared_ptr<operators::NetOp> &self) { .def("complete_add_op", [](std::shared_ptr<operators::NetOp> &self) {
self->CompleteAddOp(); self->CompleteAddOp();

@ -4,6 +4,10 @@ file(GLOB cpp_files . *Op.cpp)
list(APPEND h_files Function.h) list(APPEND h_files Function.h)
list(APPEND cpp_files Function.cpp) list(APPEND cpp_files Function.cpp)
list(APPEND cpp_files BufferArg.cpp) list(APPEND cpp_files BufferArg.cpp)
list(APPEND cpp_files GemmFunctor.cpp)
if(USE_EIGEN_FOR_BLAS)
list(APPEND cpp_files EigenGemm.cpp)
endif(USE_EIGEN_FOR_BLAS)
if(WITH_GPU) if(WITH_GPU)
file(GLOB cu_files . *OpGpu.cu) file(GLOB cu_files . *OpGpu.cu)

@ -14,7 +14,6 @@ limitations under the License. */
#include "DepthwiseConvOp.h" #include "DepthwiseConvOp.h"
#include "ConvOp.h" #include "ConvOp.h"
#include "GemmFunctor.h"
namespace paddle { namespace paddle {

@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "DepthwiseConvOp.h" #include "DepthwiseConvOp.h"
#include "GemmFunctor.h"
#include "paddle/math/BaseMatrix.h" #include "paddle/math/BaseMatrix.h"
namespace paddle { namespace paddle {

@ -0,0 +1,91 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include "unsupported/Eigen/CXX11/Tensor"
namespace paddle {
template <class T>
struct EigenBlasGemm {
typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor, int>,
Eigen::Aligned>
Matrix;
static void compute(const bool transA,
const bool transB,
const int M,
const int N,
const int K,
const T alpha,
const T* A,
const int lda,
const T* B,
const int ldb,
const T beta,
T* C,
const int ldc) {
Eigen::array<int, 2> sizeA;
if (transA) {
sizeA[0] = K;
sizeA[1] = M;
CHECK_EQ(M, lda);
} else {
sizeA[0] = M;
sizeA[1] = K;
CHECK_EQ(K, lda);
}
Eigen::array<int, 2> sizeB;
if (transB) {
sizeB[0] = N;
sizeB[1] = K;
CHECK_EQ(K, ldb);
} else {
sizeB[0] = K;
sizeB[1] = N;
CHECK_EQ(N, ldb);
}
Eigen::array<int, 2> sizeC;
sizeC[0] = M;
sizeC[1] = N;
CHECK_EQ(N, ldc);
const Matrix a(const_cast<T*>(A), sizeA);
const Matrix b(const_cast<T*>(B), sizeB);
Matrix c(C, sizeC);
typedef typename Eigen::Tensor<T, 2>::DimensionPair DimPair;
Eigen::array<DimPair, 1> dims;
dims[0] = DimPair(1, 0);
dims[0].first = transA ? 0 : 1;
dims[0].second = transB ? 1 : 0;
Eigen::DefaultDevice device;
if (alpha == T(1) && beta == T(0)) {
c.device(device) = a.contract(b, dims);
} else if (alpha == T(1) && beta == T(1)) {
c.device(device) += a.contract(b, dims);
} else {
c.device(device) = alpha * a.contract(b, dims) + beta * c;
}
}
};
#ifdef PADDLE_TYPE_DOUBLE
template class EigenBlasGemm<double>;
#else
template class EigenBlasGemm<float>;
#endif
} // namespace paddle

@ -85,7 +85,6 @@ public:
} }
Im2ColFunctor<kCFO, Device, real> im2col; Im2ColFunctor<kCFO, Device, real> im2col;
GemmFunctor<Device, real> gemm;
size_t inputOffset = imShape.getElements(); size_t inputOffset = imShape.getElements();
size_t outputOffset = size_t outputOffset =
(outputChannels / groups_) * outputHeight * outputWidth; (outputChannels / groups_) * outputHeight * outputWidth;
@ -108,19 +107,19 @@ public:
int M = outputChannels / groups_; int M = outputChannels / groups_;
int N = outputHeight * outputWidth; int N = outputHeight * outputWidth;
int K = inputChannels / groups_ * filterHeight * filterWidth; int K = inputChannels / groups_ * filterHeight * filterWidth;
gemm(CblasNoTrans, BlasGemm<Device, real>::compute(false,
CblasNoTrans, false,
M, M,
N, N,
K, K,
1.0f, 1.0f,
filterData + g * filterOffset, filterData + g * filterOffset,
K, K,
colData, colData,
N, N,
beta, beta,
outputData + g * outputOffset, outputData + g * outputOffset,
N); N);
} }
inputData += inputChannels * inputHeight * inputWidth; inputData += inputChannels * inputHeight * inputWidth;
outputData += outputChannels * outputHeight * outputWidth; outputData += outputChannels * outputHeight * outputWidth;
@ -188,8 +187,6 @@ public:
} }
Col2ImFunctor<kCFO, Device, real> col2im; Col2ImFunctor<kCFO, Device, real> col2im;
GemmFunctor<Device, real> gemm;
size_t inputOffset = imShape.getElements(); size_t inputOffset = imShape.getElements();
size_t outputOffset = size_t outputOffset =
(outputChannels / groups_) * outputHeight * outputWidth; (outputChannels / groups_) * outputHeight * outputWidth;
@ -205,19 +202,19 @@ public:
colData = inputGrad + g * inputOffset; colData = inputGrad + g * inputOffset;
scale = 1.0f; scale = 1.0f;
} }
gemm(CblasTrans, BlasGemm<Device, real>::compute(true,
CblasNoTrans, false,
M, M,
N, N,
K, K,
1.0f, 1.0f,
filterData + g * filterOffset, filterData + g * filterOffset,
M, M,
outputGrad + g * outputOffset, outputGrad + g * outputOffset,
N, N,
scale, scale,
colData, colData,
N); N);
if (needIm2col) { if (needIm2col) {
col2im(inputGrad + g * inputOffset, col2im(inputGrad + g * inputOffset,
imShape, imShape,
@ -299,7 +296,6 @@ public:
} }
Im2ColFunctor<kCFO, Device, real> im2col; Im2ColFunctor<kCFO, Device, real> im2col;
GemmFunctor<Device, real> gemm;
size_t inputOffset = imShape.getElements(); size_t inputOffset = imShape.getElements();
size_t outputOffset = size_t outputOffset =
(outputChannels / groups_) * outputHeight * outputWidth; (outputChannels / groups_) * outputHeight * outputWidth;
@ -321,19 +317,19 @@ public:
int M = outputChannels / groups_; int M = outputChannels / groups_;
int K = outputHeight * outputWidth; int K = outputHeight * outputWidth;
int N = inputChannels / groups_ * filterHeight * filterWidth; int N = inputChannels / groups_ * filterHeight * filterWidth;
gemm(CblasNoTrans, BlasGemm<Device, real>::compute(false,
CblasTrans, true,
M, M,
N, N,
K, K,
1.0f, 1.0f,
outputGrad + g * outputOffset, outputGrad + g * outputOffset,
K, K,
colData, colData,
K, K,
i == 0 ? beta : 1.0f, i == 0 ? beta : 1.0f,
filterGrad + g * filterOffset, filterGrad + g * filterOffset,
N); N);
} }
inputData += inputChannels * inputHeight * inputWidth; inputData += inputChannels * inputHeight * inputWidth;
outputGrad += outputChannels * outputHeight * outputWidth; outputGrad += outputChannels * outputHeight * outputWidth;

@ -0,0 +1,90 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "GemmFunctor.h"
#include "paddle/math/MathFunctions.h"
namespace paddle {
template <class T>
struct BlasGemm<DEVICE_TYPE_CPU, T> {
static void compute(const bool transA,
const bool transB,
const int M,
const int N,
const int K,
const T alpha,
const T* A,
const int lda,
const T* B,
const int ldb,
const T beta,
T* C,
const int ldc) {
#ifdef PADDLE_USE_EIGEN_FOR_BLAS
EigenBlasGemm<T>::compute(
transA, transB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
#else
gemm<T>(transA == false ? CblasNoTrans : CblasTrans,
transB == false ? CblasNoTrans : CblasTrans,
M,
N,
K,
alpha,
A,
lda,
B,
ldb,
beta,
C,
ldc);
#endif
}
};
template <class T>
struct BlasGemm<DEVICE_TYPE_GPU, T> {
static void compute(const bool transA,
const bool transB,
const int M,
const int N,
const int K,
const T alpha,
const T* A,
const int lda,
const T* B,
const int ldb,
const T beta,
T* C,
const int ldc) {
hl_matrix_mul((T*)A,
transA == false ? HPPL_OP_N : HPPL_OP_T,
(T*)B,
transB == false ? HPPL_OP_N : HPPL_OP_T,
C,
M,
N,
K,
alpha,
beta,
lda,
ldb,
ldc);
}
};
template struct BlasGemm<DEVICE_TYPE_CPU, real>;
template struct BlasGemm<DEVICE_TYPE_GPU, real>;
} // namespace paddle

@ -14,7 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/math/MathFunctions.h" #include "TensorType.h"
namespace paddle { namespace paddle {
@ -24,73 +24,42 @@ namespace paddle {
// of MatMulFunction, we need to consider the reconstruction of hl_matrix_mul // of MatMulFunction, we need to consider the reconstruction of hl_matrix_mul
// interface. // interface.
template <DeviceType Device, class T> template <DeviceType Device, class T>
class GemmFunctor { struct BlasGemm {
public: static void compute(const bool transA,
void operator()(const CBLAS_TRANSPOSE transA, const bool transB,
const CBLAS_TRANSPOSE TransB, const int M,
const int M, const int N,
const int N, const int K,
const int K, const T alpha,
const T alpha, const T* A,
const T* A, const int lda,
const int lda, const T* B,
const T* B, const int ldb,
const int ldb, const T beta,
const T beta, T* C,
T* C, const int ldc);
const int ldc);
}; };
// TODO(hedaoyuan): Since the definition of the real type in the Paddle
// conflicts with the Eigen library, so compile the Eigen code can not
// include the Paddle header file. And need an EigenBlasGemm template class
// that does not contain the DeviceType parameter.
// I will fix this problem and merge BlasGemm and EigenBlasGemm into one.
template <class T> template <class T>
class GemmFunctor<DEVICE_TYPE_CPU, T> { struct EigenBlasGemm {
public: static void compute(const bool transA,
void operator()(const CBLAS_TRANSPOSE transA, const bool transB,
const CBLAS_TRANSPOSE TransB, const int M,
const int M, const int N,
const int N, const int K,
const int K, const T alpha,
const T alpha, const T* A,
const T* A, const int lda,
const int lda, const T* B,
const T* B, const int ldb,
const int ldb, const T beta,
const T beta, T* C,
T* C, const int ldc);
const int ldc) {
gemm<T>(transA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
}
};
template <class T>
class GemmFunctor<DEVICE_TYPE_GPU, T> {
public:
void operator()(const CBLAS_TRANSPOSE transA,
const CBLAS_TRANSPOSE TransB,
const int M,
const int N,
const int K,
const T alpha,
const T* A,
const int lda,
const T* B,
const int ldb,
const T beta,
T* C,
const int ldc) {
hl_matrix_mul((T*)A,
transA == CblasNoTrans ? HPPL_OP_N : HPPL_OP_T,
(T*)B,
TransB == CblasNoTrans ? HPPL_OP_N : HPPL_OP_T,
C,
M,
N,
K,
alpha,
beta,
lda,
ldb,
ldc);
}
}; };
} // namespace paddle } // namespace paddle

@ -202,7 +202,7 @@ void NeuralNetwork::prefetch(const std::vector<Argument>& inArgs) {
auto mat = dynamic_cast<SparsePrefetchRowCpuMatrix*>( auto mat = dynamic_cast<SparsePrefetchRowCpuMatrix*>(
para->getMat(PARAMETER_VALUE).get()); para->getMat(PARAMETER_VALUE).get());
para->clearGradient(); para->clearGradient();
mat->clearIndices(); if (mat) mat->clearIndices();
} }
} }
} }

@ -184,7 +184,7 @@ public:
} }
void backward(const UpdateCallback& callback) override { void backward(const UpdateCallback& callback) override {
if (biases_) { if (biases_ && biases_->getWGrad()) {
backwardActivation(); backwardActivation();
biases_->getWGrad()->collectBias(*getOutputGrad(), 1); biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
biases_->getParameterPtr()->incUpdate(callback); biases_->getParameterPtr()->incUpdate(callback);
@ -1012,11 +1012,6 @@ void RecurrentGradientMachine::generateSequence() {
/* width */ resultNum, /* width */ resultNum,
false, false,
/* useGpu */ false); /* useGpu */ false);
Matrix::resizeOrCreate(generator_.outArg.value,
/* height */ maxGenWordCount,
/* width */ 1,
false,
/* useGpu */ false);
} }
ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions, ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions,
numSequences + 1, numSequences + 1,
@ -1026,7 +1021,7 @@ void RecurrentGradientMachine::generateSequence() {
} else { } else {
oneWaySearch(numSequences); oneWaySearch(numSequences);
} }
if (dataArgsSize_) createDataOutlink(batchMachineIdVec_); if (dataArgsSize_) createDataOutlink();
size_t size = generator_.ids.size(); size_t size = generator_.ids.size();
generator_.outArg.ids->resize(size); generator_.outArg.ids->resize(size);
@ -1106,6 +1101,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
} }
batchMachineIdVec_.clear(); batchMachineIdVec_.clear();
batchMachineStartPos_.clear();
int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false); int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
starts[0] = 0; starts[0] = 0;
generator_.ids.clear(); generator_.ids.clear();
@ -1312,13 +1308,20 @@ void RecurrentGradientMachine::fillGenOutputs() {
finalPaths_[i].resize(minFinalPathsSize); finalPaths_[i].resize(minFinalPathsSize);
} }
batchMachineIdVec_.clear();
generator_.ids.clear(); generator_.ids.clear();
int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false); int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
starts[0] = 0; starts[0] = 0;
if (numResults > 1) { if (numResults > 1) {
real* probs = generator_.outArg.in->getData(); int idsProbSaveSize = 0;
for (auto inSeq : finalPaths_) {
for (auto path : inSeq) idsProbSaveSize += path.ids.size();
idsProbSaveSize += inSeq.size();
}
Matrix::resizeOrCreate(
generator_.outArg.value, idsProbSaveSize, 1, false, false);
real* idsProb = generator_.outArg.value->getData(); real* idsProb = generator_.outArg.value->getData();
real* probs = generator_.outArg.in->getData();
size_t curPos = 0; size_t curPos = 0;
for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t i = 0; i < finalPaths_.size(); ++i) {
for (size_t j = 0; j < finalPaths_[i].size(); ++j) { for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
@ -1333,24 +1336,16 @@ void RecurrentGradientMachine::fillGenOutputs() {
curPos += genLen; curPos += genLen;
idsProb[curPos++] = -1.0; idsProb[curPos++] = -1.0;
probs[i * numResults + j] = path.logProb; probs[i * numResults + j] = path.logProb;
if (!j && dataArgsSize_) {
// in beam search, here only reserved the top 1 generated result
// for out_links that are not the generated word indices.
batchMachineIdVec_.insert(batchMachineIdVec_.end(),
path.machineIdVec.begin(),
path.machineIdVec.end());
}
} }
starts[i + 1] = generator_.ids.size(); starts[i + 1] = generator_.ids.size();
} }
} else { } else {
for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t i = 0; i < finalPaths_.size(); ++i) {
CHECK(!finalPaths_[i].empty()); CHECK(!finalPaths_[i].empty());
generator_.ids.insert(generator_.ids.begin(), Path& path = finalPaths_[i][0];
finalPaths_[i][0].ids.begin(), generator_.ids.insert(
finalPaths_[i][0].ids.end()); generator_.ids.begin(), path.ids.begin(), path.ids.end());
starts[i + 1] = starts[i] + finalPaths_[i][0].ids.size(); starts[i + 1] = starts[i] + path.ids.size();
} }
} }
} }
@ -1364,25 +1359,76 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) {
} }
} }
void RecurrentGradientMachine::createDataOutlink( void RecurrentGradientMachine::createDataOutlinkSelRowsInfo(
std::vector<int>& machineIdVec) { bool isSeq, std::vector<Argument>& outArgs) {
size_t seqNum = batchMachineIdVec_.clear();
getBeamSize() > 1UL ? finalPaths_.size() : finalPaths_[0].size();
std::vector<int> starts(seqNum + 1, 0); size_t seqIdx = 0;
for (size_t i = 0; i < seqNum; ++i) { for (size_t i = 0; i < finalPaths_.size(); ++i) {
size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size() for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
: finalPaths_[0][i].ids.size(); std::vector<int>& machineIdVec = finalPaths_[i][j].machineIdVec;
starts[i + 1] = starts[i] + seqLen; if (isSeq) {
for (size_t i = 0; i < machineIdVec.size(); ++i) {
size_t rowId = machineIdVec[i];
int* seqPos =
outArgs[i].sequenceStartPositions->getMutableData(false);
batchMachineIdVec_.push_back(seqPos[rowId]);
}
} else {
batchMachineIdVec_.insert(
batchMachineIdVec_.end(), machineIdVec.begin(), machineIdVec.end());
}
seqIdx++;
}
} }
}
void RecurrentGradientMachine::createDataOutlinkCopySizeInfo(
bool isSeq, std::vector<Argument>& outArgs, std::vector<int>& copySize) {
size_t totalSeqNum = std::accumulate(
finalPaths_.begin(),
finalPaths_.end(),
0UL,
[](size_t a, const std::vector<Path>& b) { return a + b.size(); });
copySize.resize(totalSeqNum, 1);
batchMachineStartPos_.resize(totalSeqNum + 1, 0);
if (isSeq) {
ICpuGpuVectorPtr inputSeqStartPos = outArgs[0].sequenceStartPositions;
CHECK_EQ(static_cast<size_t>(inputSeqStartPos->getSize() - 1),
getBeamSize() > 1 ? finalPaths_.size() : finalPaths_[0].size());
int* starts = inputSeqStartPos->getMutableData(false);
int seqId = 0;
for (int i = 0; i < finalPaths_.size(); ++i) {
for (int j = 0; j < finalPaths_[i].size(); ++j) {
copySize[seqId] = getBeamSize() > 1 ? starts[i + 1] - starts[i]
: starts[j + 1] - starts[j];
batchMachineStartPos_[seqId + 1] =
batchMachineStartPos_[seqId] + finalPaths_[i][j].ids.size();
seqId++;
}
}
} else {
for (size_t i = 0; i < finalPaths_[0].size(); ++i)
batchMachineStartPos_[i + 1] =
batchMachineStartPos_[i] + finalPaths_[0][i].ids.size();
}
}
void RecurrentGradientMachine::createDataOutlink() {
for (size_t i = 0; i < dataArgsSize_; i++) { for (size_t i = 0; i < dataArgsSize_; i++) {
bool isSeq = dataArgsFrame_[i][0].hasSeq();
std::vector<int> copySize;
createDataOutlinkCopySizeInfo(isSeq, dataArgsFrame_[i], copySize);
createDataOutlinkSelRowsInfo(isSeq, dataArgsFrame_[i]);
dataArgs_[i].concat(dataArgsFrame_[i], dataArgs_[i].concat(dataArgsFrame_[i],
machineIdVec, batchMachineIdVec_,
starts, batchMachineStartPos_,
copySize,
useGpu_, useGpu_,
HPPL_STREAM_1, HPPL_STREAM_1,
PASS_TEST); PASS_TEST);
auto dataAgent = auto dataAgent =
dynamic_cast<DataLayer*>(outFrameLines_[i + 1].agentLayer.get()); dynamic_cast<DataLayer*>(outFrameLines_[i + 1].agentLayer.get());
CHECK_NOTNULL(dataAgent); CHECK_NOTNULL(dataAgent);

@ -190,7 +190,7 @@ public:
std::vector<int> ids; std::vector<int> ids;
/** /**
* @brief idsProb, log probability of each generated words. * @brief idsProb, log probability of each generated word.
*/ */
std::vector<real> idsProb; std::vector<real> idsProb;
@ -472,15 +472,43 @@ private:
void copyDataOutlinkFrame(size_t machineCur); void copyDataOutlinkFrame(size_t machineCur);
/* /*
* @brief In generation, if the layer group has more than 1 outlink, outlinks * @brief In generation, if the layer group has more than 1 outlink, outlink
* except the first one are data outlinks. This function creates the data * except the first one is a data outlink. In RecurrentLayerGroup, each time
* outlinks. * step is a separate Network, outputs of a layer inside the
* @note In beam search, only one generated sequence with the hightest log * RecurrentLayerGroup are stored in separate Arguments. If one layer is
* probabilites are retained. * specified as an outlink of RecurrentLayerGroup. This function will
* @param machineIdVec : select a row of output matrix in each frame * collect outputs in each time step of each generated sequence which are
* that the generation process expanded. * dispersed in separate Arguments to form a new single Argument as output of
* RecurrentLayerGroup.
*/ */
void createDataOutlink(std::vector<int>& machineIdVec); void createDataOutlink();
/*
* @brief decide to select how many rows from the Matrix stored the forward
* pass results from a start position.
*
* @param isSeq: a flag indicating whetehr the layer to be output of the
* RecurrentGradientMachine is a sequence or not
* @param outArgs: all of the the returned Arguments of the forward pass
* during the generation process.
* @param copySize: the returned result, number of rows to select from the
* Matrix stored the forward pass results from a start position.
*/
void createDataOutlinkCopySizeInfo(bool isSeq,
std::vector<Argument>& outArgs,
std::vector<int>& copySize);
/*
* @brief decide index of the start row for each time step of a generated
* sequence in Matrix stored the entire beam search batch's forward pass
* results.
*
* @param isSeq: a flag indicating whether the layer to be output of the
* RecurrentGradientMachine is a sequence or not
* @param outArgs: all of the returned Arguments of the forward pass
* during the generation process.
*/
void createDataOutlinkSelRowsInfo(bool isSeq, std::vector<Argument>& outArgs);
/* /*
* @brief used in beam search, connect previous frame to form recurrent link * @brief used in beam search, connect previous frame to form recurrent link
@ -543,6 +571,7 @@ private:
std::vector<int> topIds_; std::vector<int> topIds_;
std::vector<int> seqIds_; std::vector<int> seqIds_;
std::vector<int> batchMachineIdVec_; std::vector<int> batchMachineIdVec_;
std::vector<int> batchMachineStartPos_;
std::vector<std::vector<Path>> finalPaths_; std::vector<std::vector<Path>> finalPaths_;
std::vector<real> minFinalPathLogProb_; std::vector<real> minFinalPathLogProb_;
BeamSearchControlCallbacks* beamSearchCtrlCallbacks_; BeamSearchControlCallbacks* beamSearchCtrlCallbacks_;

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save