Merge branch 'develop' into update-doc-pybind

update-doc-pybind
Yang Yang(Tony) 8 years ago committed by GitHub
commit 62b56e76ed

File diff suppressed because it is too large Load Diff

@ -285,41 +285,27 @@ class TestMulGradOp(GradientChecker):
'Y': np.random.random((84, 100)).astype("float32")
}
def test_cpu_gpu_compare(self):
self.compare_grad(self.op, self.inputs)
def test_normal(self):
def test_check_grad_normal(self):
# mul op will enlarge the relative error
self.check_grad(
self.op, self.inputs, ["X", "Y"], "Out", max_relative_error=0.5)
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.5)
def test_ignore_x(self):
def test_check_grad_ingore_x(self):
self.check_grad(
self.op,
self.inputs, ["Y"],
"Out",
max_relative_error=0.5,
no_grad_set={"X"})
['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
def test_ignore_y(self):
def test_check_grad_ingore_y(self):
self.check_grad(
self.op,
self.inputs, ["X"],
"Out",
max_relative_error=0.5,
no_grad_set={"Y"})
['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
```
下面解释代码中一些关键的地方:
- 调用`create_op("mul")`创建反向Op对应的前向Op。
- 调用`compare_grad`函数对比CPU、GPU计算结果。
- `test_normal`中调用`check_grad`使用数值法检测梯度正确性和稳定性。
- 第一个参数`self.op` : 前向Op。
- 第二个参数`self.inputs` : 输入词典词典的Key和`ProtoMaker`定义保持一致。
- 第三个参数`["X", "Y"]` : 指定对输入变量`X`、`Y`做梯度检测。
- 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out`
- `test_ignore_x`和`test_ignore_y`分支用来测试只需要计算一个输入梯度的情况。
- `test_check_grad_normal`中调用`check_grad`使用数值法检测梯度正确性和稳定性。
- 第一个参数`["X", "Y"]` : 指定对输入变量`X`、`Y`做梯度检测。
- 第二个参数`"Out"` : 指定前向网络最终的输出目标变量`Out`。
- 第三个参数`max_relative_error`:指定检测梯度时能容忍的最大错误值。
- `test_check_grad_ingore_x`和`test_check_grad_ingore_y`分支用来测试只需要计算一个输入梯度的情况。
### 编译和执行单元测试

@ -293,41 +293,27 @@ class TestMulGradOp(GradientChecker):
'Y': np.random.random((84, 100)).astype("float32")
}
def test_cpu_gpu_compare(self):
self.compare_grad(self.op, self.inputs)
def test_normal(self):
def test_check_grad_normal(self):
# mul op will enlarge the relative error
self.check_grad(
self.op, self.inputs, ["X", "Y"], "Out", max_relative_error=0.5)
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.5)
def test_ignore_x(self):
def test_check_grad_ingore_x(self):
self.check_grad(
self.op,
self.inputs, ["Y"],
"Out",
max_relative_error=0.5,
no_grad_set={"X"})
['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
def test_ignore_y(self):
def test_check_grad_ingore_y(self):
self.check_grad(
self.op,
self.inputs, ["X"],
"Out",
max_relative_error=0.5,
no_grad_set={"Y"})
['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
```
Some key points in the code above include:
- `create_op("mul")` creates the backward operator's corresponding forward operator.
- `compare_grad` compares results between utilizing the CPU and the GPU.
- `test_normal` calls `check_grad` to validate scaling tests' correctness and stability through numeric methods.
- The first variable `self.op` denotes the forward operator.
- The second variable `self.inputs` denotes the input dictionary, which has its key value identical to its `ProtoMaker` definitions.
- The third variable `["X", "Y"]` appoints `X` and `Y` to be scale tested.
- The fourth variable `"Out"` points to the network's final output target `Out`.
- `test_ignore_x` and `test_ignore_y`branches test the cases where there is only one scaling input.
- The first variable `["X", "Y"]` appoints `X` and `Y` to be scale tested.
- The second variable `"Out"` points to the network's final output target `Out`.
- The third variable `max_relative_error` points to the maximum relative tolerance error during scaling tests.
- `test_check_grad_ingore_x` and `test_check_grad_ingore_y`branches test the cases where there is only one scaling input.
### Compiling and Running

@ -26,7 +26,7 @@ cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator)
cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator proto_desc)
cc_library(op_registry SRCS op_registry.cc DEPS grad_op_builder op_proto_maker op_info)
cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_op)

@ -12,12 +12,25 @@
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/rowwise_add_op.h"
#pragma once
#include <typeindex>
#include "paddle/framework/framework.pb.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
rowwise_add, ops::RowwiseAddKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
rowwise_add_grad,
ops::RowwiseAddGradKernel<paddle::platform::GPUPlace, float>);
namespace paddle {
namespace framework {
inline DataType ToDataType(std::type_index type) {
if (typeid(float).hash_code() == type.hash_code()) {
return DataType::FP32;
} else if (typeid(double).hash_code() == type.hash_code()) {
return DataType::FP64;
} else if (typeid(int).hash_code() == type.hash_code()) {
return DataType::INT32;
} else {
PADDLE_THROW("Not supported");
return static_cast<DataType>(-1);
}
}
} // namespace framework
} // namespace paddle

@ -54,5 +54,44 @@ OperatorBase* BuildGradOp(const OperatorBase* op) {
return grad_info.Creator()(info.grad_op_type_, inputs, outputs, op->Attrs());
}
static void TransOpDescArg(const OpDescBind* src_op, const OpArgType& src_type,
bool is_grad, OpDescBind* dst_op,
const OpArgType& dst_type) {
PADDLE_ENFORCE(dst_op != nullptr,
"Protobuf desc of gradient op must be initialized first.");
const auto& proto = OpInfoMap::Instance().Get(src_op->Type()).Proto();
const auto& src_arg_list =
src_type == OpArgType::IN ? proto.inputs() : proto.outputs();
for (const auto& arg : src_arg_list) {
if (arg.not_in_gradient() && !is_grad) continue;
const std::string src_name = arg.name();
std::vector<std::string> vars = src_type == OpArgType::IN
? src_op->Input(src_name)
: src_op->Output(src_name);
if (is_grad) {
for (std::string& var : vars) {
var = GradVarName(var);
}
}
std::string dst_name = is_grad ? GradVarName(src_name) : src_name;
dst_type == OpArgType::IN ? dst_op->SetInput(dst_name, vars)
: dst_op->SetOutput(dst_name, vars);
}
}
void CompleteGradOpDesc(const OpDescBind* forw_op, OpDescBind* grad_op) {
auto& info = OpInfoMap::Instance().Get(forw_op->Type());
PADDLE_ENFORCE(info.HasGradientOp());
grad_op->SetType(info.grad_op_type_);
TransOpDescArg(forw_op, OpArgType::IN, false, grad_op, OpArgType::IN);
TransOpDescArg(forw_op, OpArgType::OUT, false, grad_op, OpArgType::IN);
TransOpDescArg(forw_op, OpArgType::OUT, true, grad_op, OpArgType::IN);
TransOpDescArg(forw_op, OpArgType::IN, true, grad_op, OpArgType::OUT);
grad_op->SetAttrMap(forw_op->GetAttrMap());
}
} // namespace framework
} // namespace paddle

@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include "paddle/framework/op_desc.h"
#include "paddle/framework/operator.h"
namespace paddle {
@ -21,5 +22,7 @@ namespace framework {
OperatorBase* BuildGradOp(const OperatorBase* op);
void CompleteGradOpDesc(const OpDescBind* forw_op, OpDescBind* grad_op);
} // namespace framework
} // namespace paddle

@ -120,3 +120,82 @@ TEST(GradOpBuilder, IOIgnoredInGradient) {
std::vector<std::string>(
{f::GradVarName("in3_1"), f::GradVarName("in3_2")}));
}
TEST(GradOpDescBuilder, MutiInOut) {
f::OpDescBind *forw_op = new f::OpDescBind();
forw_op->SetType("mult_io");
forw_op->SetInput("In1", {"in1"});
forw_op->SetInput("In2_mult", {"in2_1", "in2_2", "in2_3"});
forw_op->SetInput("In3", {"in3"});
forw_op->SetOutput("Out1", {"out1"});
forw_op->SetOutput("Out2_mult", {"out2_1", "out2_2"});
f::OpDescBind *grad_op = new f::OpDescBind();
f::CompleteGradOpDesc(forw_op, grad_op);
EXPECT_EQ(grad_op->Type(), "mult_io_grad");
ASSERT_EQ(grad_op->InputNames().size(), 3UL + 2UL + 2UL);
EXPECT_EQ(grad_op->Input("In1"), std::vector<std::string>({"in1"}));
EXPECT_EQ(grad_op->Input("In2_mult"),
std::vector<std::string>({"in2_1", "in2_2", "in2_3"}));
EXPECT_EQ(grad_op->Input("In3"), std::vector<std::string>({"in3"}));
EXPECT_EQ(grad_op->Input("Out1"), std::vector<std::string>({"out1"}));
EXPECT_EQ(grad_op->Input("Out2_mult"),
std::vector<std::string>({"out2_1", "out2_2"}));
EXPECT_EQ(grad_op->Input(f::GradVarName("Out1")),
std::vector<std::string>({f::GradVarName("out1")}));
EXPECT_EQ(grad_op->Input(f::GradVarName("Out2_mult")),
std::vector<std::string>(
{f::GradVarName("out2_1"), f::GradVarName("out2_2")}));
ASSERT_EQ(grad_op->OutputNames().size(), 3UL);
EXPECT_EQ(grad_op->Output(f::GradVarName("In1")),
std::vector<std::string>({f::GradVarName("in1")}));
EXPECT_EQ(grad_op->Output(f::GradVarName("In2_mult")),
std::vector<std::string>({f::GradVarName("in2_1"),
f::GradVarName("in2_2"),
f::GradVarName("in2_3")}));
EXPECT_EQ(grad_op->Output(f::GradVarName("In3")),
std::vector<std::string>({f::GradVarName("in3")}));
delete forw_op;
delete grad_op;
}
TEST(GradOpDescBuilder, IOIgnoredInGradient) {
f::OpDescBind *forw_op = new f::OpDescBind();
forw_op->SetType("io_ignored");
forw_op->SetInput("In1", {"in1"});
forw_op->SetInput("In2_mult", {"in2_1", "in2_2"});
forw_op->SetInput("In3_mult", {"in3_1", "in3_2"});
forw_op->SetOutput("Out1_mult", {"out1_1", "out1_2"});
forw_op->SetOutput("Out2", {"out2"});
f::OpDescBind *grad_op = new f::OpDescBind();
f::CompleteGradOpDesc(forw_op, grad_op);
EXPECT_EQ(grad_op->Type(), "io_ignored_grad");
// 'In2' and 'Out2' are ignored in gradient calculating
ASSERT_EQ(grad_op->InputNames().size(), 2UL + 1UL + 2UL);
EXPECT_EQ(grad_op->Input("In1"), std::vector<std::string>({"in1"}));
EXPECT_EQ(grad_op->Input("In3_mult"),
std::vector<std::string>({"in3_1", "in3_2"}));
EXPECT_EQ(grad_op->Input("Out1_mult"),
std::vector<std::string>({"out1_1", "out1_2"}));
EXPECT_EQ(grad_op->Input(f::GradVarName("Out1_mult")),
std::vector<std::string>(
{f::GradVarName("out1_1"), f::GradVarName("out1_2")}));
EXPECT_EQ(grad_op->Input(f::GradVarName("Out2")),
std::vector<std::string>({f::GradVarName("out2")}));
ASSERT_EQ(grad_op->OutputNames().size(), 3UL);
EXPECT_EQ(grad_op->Output(f::GradVarName("In1")),
std::vector<std::string>({f::GradVarName("in1")}));
EXPECT_EQ(grad_op->Output(f::GradVarName("In2_mult")),
std::vector<std::string>(
{f::GradVarName("in2_1"), f::GradVarName("in2_2")}));
EXPECT_EQ(grad_op->Output(f::GradVarName("In3_mult")),
std::vector<std::string>(
{f::GradVarName("in3_1"), f::GradVarName("in3_2")}));
delete forw_op;
delete grad_op;
}

@ -89,6 +89,12 @@ void OpDescBind::SetAttr(const std::string &name, const Attribute &v) {
need_update_ = true;
}
void OpDescBind::SetAttrMap(
const std::unordered_map<std::string, Attribute> &attr_map) {
attrs_ = attr_map;
need_update_ = true;
}
Attribute OpDescBind::GetAttr(const std::string &name) const {
auto it = attrs_.find(name);
PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name);
@ -101,6 +107,11 @@ int OpDescBind::GetBlockAttr(const std::string &name) const {
return boost::get<BlockDesc *>(it->second)->idx();
}
const std::unordered_map<std::string, Attribute> &OpDescBind::GetAttrMap()
const {
return attrs_;
}
void OpDescBind::Sync() {
if (need_update_) {
this->op_desc_.mutable_inputs()->Clear();

@ -60,10 +60,16 @@ class OpDescBind {
void SetBlockAttr(const std::string &name, BlockDescBind &block);
// Only be used in C++
void SetAttrMap(const std::unordered_map<std::string, Attribute> &attr_map);
Attribute GetAttr(const std::string &name) const;
int GetBlockAttr(const std::string &name) const;
// Only be used in C++
const std::unordered_map<std::string, Attribute> &GetAttrMap() const;
private:
struct SetAttrDescVisitor : public boost::static_visitor<void> {
explicit SetAttrDescVisitor(OpDesc::Attr *attr) : attr_(attr) {}

@ -100,13 +100,39 @@ class OpRegistrar : public Registrar {
}
};
template <typename PlaceType, typename KernelType>
template <typename PlaceType, bool at_end, size_t I, typename... KernelType>
struct OpKernelRegistrarFunctor;
template <typename PlaceType, size_t I, typename... KernelTypes>
struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
using KERNEL_TYPE =
typename std::tuple_element<I, std::tuple<KernelTypes...>>::type;
void operator()(const char* op_type) const {
using T = typename KERNEL_TYPE::ELEMENT_TYPE;
OperatorWithKernel::OpKernelKey key(ToDataType(std::type_index(typeid(T))),
PlaceType());
OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE);
constexpr auto size = std::tuple_size<std::tuple<KernelTypes...>>::value;
OpKernelRegistrarFunctor<PlaceType, I + 1 == size, I + 1, KernelTypes...>
func;
func(op_type);
}
};
template <typename PlaceType, size_t I, typename... KernelType>
struct OpKernelRegistrarFunctor<PlaceType, true, I, KernelType...> {
void operator()(const char* op_type) const {}
};
// User can register many kernel in one place. The data type could be different.
template <typename PlaceType, typename... KernelType>
class OpKernelRegistrar : public Registrar {
public:
explicit OpKernelRegistrar(const char* op_type) {
OperatorWithKernel::OpKernelKey key;
key.place_ = PlaceType();
OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KernelType);
OpKernelRegistrarFunctor<PlaceType, false, 0, KernelType...> func;
func(op_type);
}
};

@ -22,14 +22,14 @@ namespace framework {
template <>
Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
platform::CPUPlace, Eigen::DefaultDevice>() const {
return *device_context_.get_eigen_device<Eigen::DefaultDevice>();
return *device_context_.GetEigenDevice<platform::CPUPlace>();
}
#ifndef PADDLE_ONLY_CPU
template <>
Eigen::GpuDevice&
ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
return *device_context_.get_eigen_device<Eigen::GpuDevice>();
return *device_context_.GetEigenDevice<platform::GPUPlace>();
}
#endif

@ -22,6 +22,7 @@ limitations under the License. */
#include "op_info.h"
#include "paddle/framework/attribute.h"
#include "paddle/framework/data_type.h"
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/scope.h"
@ -295,21 +296,6 @@ template <>
std::vector<Tensor*> InferShapeContext::MultiOutput<Tensor>(
const std::string& name) const;
template <typename T>
struct EigenDeviceConverter;
template <>
struct EigenDeviceConverter<platform::CPUPlace> {
using EigenDeviceType = Eigen::DefaultDevice;
};
#ifndef PADDLE_ONLY_CPU
template <>
struct EigenDeviceConverter<platform::GPUPlace> {
using EigenDeviceType = Eigen::GpuDevice;
};
#endif
class ExecutionContext : public InferShapeContext {
public:
ExecutionContext(const OperatorBase& op, const Scope& scope,
@ -317,8 +303,8 @@ class ExecutionContext : public InferShapeContext {
: InferShapeContext(op, scope), device_context_(device_context) {}
template <typename PlaceType,
typename DeviceType =
typename EigenDeviceConverter<PlaceType>::EigenDeviceType>
typename DeviceType = typename platform::EigenDeviceConverter<
PlaceType>::EigenDeviceType>
DeviceType& GetEigenDevice() const;
platform::Place GetPlace() const { return device_context_.GetPlace(); }
@ -403,7 +389,7 @@ class RuntimeInferShapeContext : public InferShapeContextBase {
const Scope& scope_;
};
class OpKernel {
class OpKernelBase {
public:
/**
* ExecutionContext is the only parameter of Kernel Run function.
@ -414,33 +400,47 @@ class OpKernel {
virtual void Compute(const ExecutionContext& context) const = 0;
virtual ~OpKernel() {}
virtual ~OpKernelBase() = default;
};
template <typename T>
class OpKernel : public OpKernelBase {
public:
using ELEMENT_TYPE = T;
};
class OperatorWithKernel : public OperatorBase {
public:
struct OpKernelKey {
platform::Place place_;
DataType data_type_;
OpKernelKey() = default;
explicit OpKernelKey(const platform::DeviceContext& dev_ctx) {
place_ = dev_ctx.GetPlace();
}
OpKernelKey(DataType data_type, platform::Place place)
: place_(place), data_type_(data_type) {}
OpKernelKey(DataType data_type, const platform::DeviceContext& dev_ctx)
: place_(dev_ctx.GetPlace()), data_type_(data_type) {}
bool operator==(const OpKernelKey& o) const {
return platform::places_are_same_class(place_, o.place_);
return platform::places_are_same_class(place_, o.place_) &&
data_type_ == o.data_type_;
}
};
struct OpKernelHash {
std::hash<bool> hash_;
std::hash<int> hash_;
size_t operator()(const OpKernelKey& key) const {
return hash_(platform::is_gpu_place(key.place_));
int place = key.place_.which();
int data_type = static_cast<int>(key.data_type_);
int pre_hash = data_type << NUM_PLACE_TYPE_LIMIT_IN_BIT |
(place & ((1 << NUM_PLACE_TYPE_LIMIT_IN_BIT) - 1));
return hash_(pre_hash);
}
};
using OpKernelMap =
std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>;
std::unordered_map<OpKernelKey, std::unique_ptr<OpKernelBase>,
OpKernelHash>;
OperatorWithKernel(const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, const AttributeMap& attrs)
@ -451,8 +451,10 @@ class OperatorWithKernel : public OperatorBase {
RuntimeInferShapeContext infer_shape_ctx(*this, scope);
this->InferShape(&infer_shape_ctx);
auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx));
opKernel->Compute(ExecutionContext(*this, scope, dev_ctx));
ExecutionContext ctx(*this, scope, dev_ctx);
auto& opKernel = AllOpKernels().at(type_).at(
OpKernelKey(IndicateDataType(ctx), dev_ctx));
opKernel->Compute(ctx);
}
static std::unordered_map<std::string /* op_type */, OpKernelMap>&
@ -462,13 +464,43 @@ class OperatorWithKernel : public OperatorBase {
}
bool SupportGPU() const override {
OperatorWithKernel::OpKernelKey key;
key.place_ = platform::GPUPlace();
return OperatorWithKernel::AllOpKernels().at(type_).count(key) != 0;
auto& op_kernels = OperatorWithKernel::AllOpKernels().at(type_);
return std::any_of(op_kernels.begin(), op_kernels.end(),
[](OpKernelMap::const_reference kern_pair) {
return platform::is_gpu_place(kern_pair.first.place_);
});
}
protected:
virtual void InferShape(InferShapeContextBase* ctx) const = 0;
// indicate kernel DataType by input data. Defaultly all input data must be
// same.
virtual DataType IndicateDataType(const ExecutionContext& ctx) const {
auto& scope = ctx.scope();
int data_type = -1;
for (auto& input : this->inputs_) {
for (auto& ipt_name : input.second) {
auto* var = scope.FindVar(ipt_name);
if (var != nullptr) {
const Tensor* t = nullptr;
if (var->IsType<Tensor>()) {
t = &var->Get<Tensor>();
} else if (var->IsType<LoDTensor>()) {
t = &var->Get<LoDTensor>();
}
if (t != nullptr) {
int tmp = static_cast<int>(ToDataType(t->type()));
PADDLE_ENFORCE(tmp == data_type || data_type == -1,
"DataType of Paddle Op must be same.");
data_type = tmp;
}
}
}
}
PADDLE_ENFORCE(data_type != -1, "DataType should be indicated by input");
return static_cast<DataType>(data_type);
}
};
} // namespace framework

@ -114,10 +114,13 @@ class OpWithKernelTest : public OperatorWithKernel {
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {}
DataType IndicateDataType(const ExecutionContext& ctx) const override {
return DataType::FP32;
}
};
template <typename T1, typename T2>
class CPUKernelTest : public OpKernel {
class CPUKernelTest : public OpKernel<float> {
public:
void Compute(const ExecutionContext& ctx) const {
std::cout << "this is cpu kernel" << std::endl;
@ -144,7 +147,7 @@ class OpKernelTestMultiInputsProtoAndCheckerMaker
}
};
class CPUKernalMultiInputsTest : public OpKernel {
class CPUKernalMultiInputsTest : public OpKernel<float> {
public:
void Compute(const ExecutionContext& ctx) const {
auto xs = ctx.op().Inputs("xs");

@ -29,20 +29,10 @@ limitations under the License. */
namespace paddle {
namespace pybind {
namespace details {
template <bool less, size_t i, typename... args>
struct CastToPyBufferImpl;
}
} // namespace pybind
namespace framework {
class Tensor {
public:
template <bool less, size_t i, typename... args>
friend struct pybind::details::CastToPyBufferImpl;
template <typename T, size_t D, int MajorType, typename IndexType>
friend struct EigenTensor;
@ -119,6 +109,8 @@ class Tensor {
return holder_->place();
}
std::type_index type() const { return holder_->type(); }
private:
template <typename T>
inline void check_memory_size() const;

@ -215,13 +215,13 @@ struct testActDesc {
static void getAddtoConfig(TestConfig& cfg, const testActDesc& pm) {
cfg.biasSize = 0;
cfg.layerConfig.set_type("addto");
size_t layerSize = pm.ih * pm.ih * pm.iw;
size_t layerSize = pm.ic * pm.ih * pm.iw;
cfg.layerConfig.set_size(layerSize);
cfg.inputDefs.push_back({INPUT_DATA, "layer_0", layerSize, 0});
cfg.layerConfig.add_inputs();
}
void testActivation(std::string& actType, const testActDesc& pm) {
void testActivation(std::string actType, const testActDesc& pm) {
// TODO(TJ): remove me when paddle support elu activation
if (actType == "mkldnn_elu") {
return;
@ -240,6 +240,7 @@ TEST(MKLDNNActivation, Activations) {
for (auto type : types) {
/* bs, c, h, w*/
testActivation(type, {16, 64, 32, 32});
testActivation(type, {2, 8, 1, 1});
}
}

@ -99,7 +99,11 @@ public:
/**
* @brief clear local buffer. It only affect auto-growth buffer.
*/
inline void clear() { rowStore_.clear(); }
inline void clear() {
// swap an empty vector to it to free the memory.
std::vector<real, AlignedAllocator<real, 32>> empty;
rowStore_.swap(empty);
}
/**
* @brief get current number of rows.

@ -101,8 +101,8 @@ set(DEPS_OPS
op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
DEPS framework_proto tensor net_op)
op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op)
op_library(cross_entropy_op DEPS cross_entropy_function)
op_library(softmax_with_cross_entropy_op DEPS cross_entropy_function softmax_function)
op_library(cross_entropy_op DEPS cross_entropy)
op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
foreach(src ${GENERAL_OPS})

@ -47,7 +47,7 @@ __global__ void AccuracyCudaKernel(const int N, const int D, const int* Xdata,
}
template <typename T>
class AccuracyOpCUDAKernel : public framework::OpKernel {
class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),

@ -35,7 +35,7 @@ template <typename T, int MajorType = Eigen::RowMajor,
using EigenScalar = framework::EigenScalar<T, MajorType, IndexType>;
template <typename Place, typename T>
class AccuracyKernel : public framework::OpKernel {
class AccuracyKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* inference = ctx.Input<Tensor>("Inference");

@ -132,6 +132,17 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker {
}
};
class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftsignOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softsign operator");
AddOutput("Y", "Output of Softsign operator");
AddComment("Softsign activation operator, softsign(x) = x / (1 + |x|)");
}
};
template <typename AttrType>
class BReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
@ -195,111 +206,57 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(sigmoid, ops::ActivationOp, ops::SigmoidOpMaker, sigmoid_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(sigmoid,
ops::ActivationKernel<paddle::platform::CPUPlace, float,
ops::SigmoidFunctor<float>>);
REGISTER_OP_CPU_KERNEL(
sigmoid_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::SigmoidGradFunctor<float>>);
REGISTER_OP(exp, ops::ActivationOp, ops::ExpOpMaker, exp_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(
exp,
ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::ExpFunctor>);
REGISTER_OP_CPU_KERNEL(exp_grad,
ops::ActivationGradKernel<paddle::platform::CPUPlace,
float, ops::ExpGradFunctor>);
REGISTER_OP(relu, ops::ActivationOp, ops::ReluOpMaker, relu_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(relu,
ops::ActivationKernel<paddle::platform::CPUPlace, float,
ops::ReluFunctor<float>>);
REGISTER_OP_CPU_KERNEL(
relu_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::ReluGradFunctor<float>>);
REGISTER_OP(tanh, ops::ActivationOp, ops::TanhOpMaker, tanh_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(
tanh,
ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::TanhFunctor>);
REGISTER_OP_CPU_KERNEL(
tanh_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::TanhGradFunctor<float>>);
REGISTER_OP(sqrt, ops::ActivationOp, ops::SqrtOpMaker, sqrt_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(
sqrt,
ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::SqrtFunctor>);
REGISTER_OP_CPU_KERNEL(
sqrt_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::SqrtGradFunctor<float>>);
REGISTER_OP(abs, ops::ActivationOp, ops::AbsOpMaker, abs_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(
abs,
ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::AbsFunctor>);
REGISTER_OP_CPU_KERNEL(abs_grad,
ops::ActivationGradKernel<paddle::platform::CPUPlace,
float, ops::AbsGradFunctor>);
REGISTER_OP(reciprocal, ops::ActivationOp, ops::ReciprocalOpMaker,
reciprocal_grad, ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(reciprocal,
ops::ActivationKernel<paddle::platform::CPUPlace, float,
ops::ReciprocalFunctor<float>>);
REGISTER_OP_CPU_KERNEL(
reciprocal_grad,
ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::ReciprocalGradFunctor<float>>);
REGISTER_OP(log, ops::ActivationOp, ops::LogOpMaker, log_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(
log,
ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::LogFunctor>);
REGISTER_OP_CPU_KERNEL(
log_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::LogGradFunctor<float>>);
REGISTER_OP(square, ops::ActivationOp, ops::SquareOpMaker, square_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(square,
ops::ActivationKernel<paddle::platform::CPUPlace, float,
ops::SquareFunctor>);
REGISTER_OP_CPU_KERNEL(
square_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::SquareGradFunctor<float>>);
REGISTER_OP(softsign, ops::ActivationOp, ops::SoftsignOpMaker, softsign_grad,
ops::ActivationOpGrad);
REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker<float>, brelu_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(brelu,
ops::BReluKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(brelu_grad,
ops::BReluGradKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker<float>,
soft_relu_grad, ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(soft_relu,
ops::SoftReluKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
soft_relu_grad, ops::SoftReluGradKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker<float>, pow_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(pow, ops::PowKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(pow_grad,
ops::PowGradKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker<float>, stanh_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(stanh,
ops::STanhKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(stanh_grad,
ops::STanhGradKernel<paddle::platform::CPUPlace, float>);
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \
act_type, \
paddle::operators::ActivationKernel<paddle::platform::CPUPlace, \
paddle::operators::functor<float>>); \
REGISTER_OP_CPU_KERNEL(act_type##_grad, \
paddle::operators::ActivationGradKernel< \
paddle::platform::CPUPlace, \
paddle::operators::grad_functor<float>>);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CPU_KERNEL);

@ -15,86 +15,14 @@
#define EIGEN_USE_GPU
#include "paddle/operators/activation_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(sigmoid,
ops::ActivationKernel<paddle::platform::GPUPlace, float,
ops::SigmoidFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
sigmoid_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::SigmoidGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
exp,
ops::ActivationKernel<paddle::platform::GPUPlace, float, ops::ExpFunctor>);
REGISTER_OP_GPU_KERNEL(exp_grad,
ops::ActivationGradKernel<paddle::platform::GPUPlace,
float, ops::ExpGradFunctor>);
REGISTER_OP_GPU_KERNEL(relu,
ops::ActivationKernel<paddle::platform::GPUPlace, float,
ops::ReluFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
relu_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::ReluGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
tanh,
ops::ActivationKernel<paddle::platform::GPUPlace, float, ops::TanhFunctor>);
REGISTER_OP_GPU_KERNEL(
tanh_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::TanhGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
sqrt,
ops::ActivationKernel<paddle::platform::GPUPlace, float, ops::SqrtFunctor>);
REGISTER_OP_GPU_KERNEL(
sqrt_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::SqrtGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
abs,
ops::ActivationKernel<paddle::platform::GPUPlace, float, ops::AbsFunctor>);
REGISTER_OP_GPU_KERNEL(abs_grad,
ops::ActivationGradKernel<paddle::platform::GPUPlace,
float, ops::AbsGradFunctor>);
REGISTER_OP_GPU_KERNEL(reciprocal,
ops::ActivationKernel<paddle::platform::GPUPlace, float,
ops::ReciprocalFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
reciprocal_grad,
ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::ReciprocalGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
log,
ops::ActivationKernel<paddle::platform::GPUPlace, float, ops::LogFunctor>);
REGISTER_OP_GPU_KERNEL(
log_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::LogGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(square,
ops::ActivationKernel<paddle::platform::GPUPlace, float,
ops::SquareFunctor>);
REGISTER_OP_GPU_KERNEL(
square_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::SquareGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(brelu,
ops::BReluKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(brelu_grad,
ops::BReluGradKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(soft_relu,
ops::SoftReluKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
soft_relu_grad, ops::SoftReluGradKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(pow, ops::PowKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(pow_grad,
ops::PowGradKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(stanh,
ops::STanhKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(stanh_grad,
ops::STanhGradKernel<paddle::platform::GPUPlace, float>);
#define REGISTER_ACTIVATION_GPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_GPU_KERNEL( \
act_type, \
paddle::operators::ActivationKernel<paddle::platform::GPUPlace, \
paddle::operators::functor<float>>); \
REGISTER_OP_GPU_KERNEL(act_type##_grad, \
paddle::operators::ActivationGradKernel< \
paddle::platform::GPUPlace, \
paddle::operators::grad_functor<float>>);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_GPU_KERNEL);

File diff suppressed because it is too large Load Diff

@ -25,7 +25,7 @@ template <typename T, int MajorType = Eigen::RowMajor,
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename Place, typename T>
class AddKernel : public framework::OpKernel {
class AddKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* input0 = context.Input<Tensor>("X");

@ -56,7 +56,7 @@ class ClipGradFunctor {
};
template <typename Place, typename T>
class ClipKernel : public framework::OpKernel {
class ClipKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto max = context.Attr<T>("max");
@ -73,7 +73,7 @@ class ClipKernel : public framework::OpKernel {
};
template <typename Place, typename T>
class ClipGradKernel : public framework::OpKernel {
class ClipGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto max = context.Attr<T>("max");

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save