commit
be8bef9bdd
@ -0,0 +1,67 @@
|
||||
# Design Doc: Gradient Operators Registration
|
||||
|
||||
|
||||
## The Problem Posed
|
||||
|
||||
In our current operator registration mechanism, for each operator, the programmer should register a *gradient operator creator* function, which takes a C++ operator instance, and returns the corresponding gradient instance.
|
||||
|
||||
However, as we decided to separate the *compilation* and *execution* of DL models, we need to reshape the creator to take a protobuf `OpDesc` message, and returns a corresponding message.
|
||||
|
||||
More than that, the new registration mechanism need to support the fact that an operators' gradient computation might be a composition of operators.
|
||||
|
||||
## Current Implementation
|
||||
|
||||
OpInfos store in a association map which key is the operator type. The `grad_op_type` indicate associated gradient operator type. Operator can create gradient operator by `OpInfo::creator_` of gradient. The pseudo code is
|
||||
|
||||
```cpp
|
||||
struct OpInfo {
|
||||
std::function<OperatorBase*(...)> creator_;
|
||||
std::string grad_op_type_;
|
||||
...
|
||||
};
|
||||
|
||||
map<string, OpInfo> OpInfoMap;
|
||||
|
||||
OperatorBase* CreateGradientOperator(const OperatorBase& op) {
|
||||
return OpInfoMap.at(op.Type()).creator_(...);
|
||||
}
|
||||
```
|
||||
|
||||
## Proposed Solution
|
||||
|
||||
The mapping relationship between an operator and its gradient operators is a function. The interface of that function is:
|
||||
|
||||
```cpp
|
||||
// (OpDesc) --> vector<OpDesc>
|
||||
using GradOpDescMaker = std::function<std::vector<OpDesc>(const OpDesc&)>;
|
||||
```
|
||||
|
||||
The function take a `OpDesc` of the forward operator and return one or many gradient operator descriptions.
|
||||
|
||||
The `GradOpDescMaker` will be registered in `OpInfo`, to replace `grad_op_type_` field. The `OpInfo` should be
|
||||
|
||||
```cpp
|
||||
struct OpInfo {
|
||||
GradOpDescMaker grad_op_maker_;
|
||||
...
|
||||
};
|
||||
```
|
||||
|
||||
The `grad_op_maker_ ` is `nullptr` if the operator does not have associated gradient operators.
|
||||
|
||||
We should chagne register macros at the same time. In the current solution, there is no difference between forwarding operators and backward operators. So `REGISTER_OP` just register one operator. If the `REGISTER_OPERATOR ` contains `OpProtoAndCheckerMaker` and `GradOpDescMaker`, we just list them in the same macro. It can be done by a macro contains `__VA_ARGS__`.
|
||||
|
||||
The user interface should be
|
||||
|
||||
```cpp
|
||||
vector<OpDesc> MinusOpGradMaker(OpDesc) {...}
|
||||
REGISTER_OPERATOR(minus, MinusOp, MinusOpProtoAndCheckerMaker, SumOpGradMaker);
|
||||
// Developers can still manually implement gradient operator.
|
||||
REGISTER_OPERATOR(minus_grad, MinusGradOp);
|
||||
```
|
||||
|
||||
The interface of current `REGISTER_OP` macro could not be changed. In `REGISTER_OP`, it will invoke `REGISTER_OPERATOR` two times and generate GradOpDescMaker inside.
|
||||
|
||||
```cpp
|
||||
REGISTER_OP(minus, MinusOp, MinusOpProtoAndCheckerMaker, minus_grad, MinusGradOp);
|
||||
```
|
@ -0,0 +1,105 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "paddle/framework/op_info.h"
|
||||
#include "paddle/framework/op_proto_maker.h"
|
||||
#include "paddle/framework/operator.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
enum OpInfoFillType {
|
||||
kOperator = 0,
|
||||
kOpProtoAndCheckerMaker = 1,
|
||||
kGradOpDescMaker = 2
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct OpInfoFillTypeID {
|
||||
static constexpr OpInfoFillType ID() {
|
||||
return std::is_base_of<OperatorBase, T>::value
|
||||
? kOperator
|
||||
: (std::is_base_of<OpProtoAndCheckerMaker, T>::value
|
||||
? kOpProtoAndCheckerMaker
|
||||
: (std::is_base_of<GradOpDescMakerBase, T>::value
|
||||
? kGradOpDescMaker
|
||||
: static_cast<OpInfoFillType>(-1)));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, OpInfoFillType = OpInfoFillTypeID<T>::ID()>
|
||||
struct OpInfoFiller;
|
||||
|
||||
template <size_t I, bool at_end, typename... ARGS>
|
||||
class OperatorRegistrarRecursive;
|
||||
|
||||
template <size_t I, typename... ARGS>
|
||||
class OperatorRegistrarRecursive<I, false, ARGS...> {
|
||||
public:
|
||||
using T = typename std::tuple_element<I, std::tuple<ARGS...>>::type;
|
||||
OperatorRegistrarRecursive(const char* op_type, OpInfo* info) {
|
||||
OpInfoFiller<T> fill;
|
||||
fill(op_type, info);
|
||||
constexpr auto size = sizeof...(ARGS);
|
||||
OperatorRegistrarRecursive<I + 1, I + 1 == size, ARGS...> reg(op_type,
|
||||
info);
|
||||
(void)(reg);
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t I, typename... ARGS>
|
||||
class OperatorRegistrarRecursive<I, true, ARGS...> {
|
||||
public:
|
||||
OperatorRegistrarRecursive(const char* op_type, OpInfo* info) {}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct OpInfoFiller<T, kOperator> {
|
||||
void operator()(const char* op_type, OpInfo* info) const {
|
||||
info->creator_ = [](const std::string& type, const VariableNameMap& inputs,
|
||||
const VariableNameMap& outputs,
|
||||
const AttributeMap& attrs) {
|
||||
return new T(type, inputs, outputs, attrs);
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct OpInfoFiller<T, kOpProtoAndCheckerMaker> {
|
||||
void operator()(const char* op_type, OpInfo* info) const {
|
||||
info->proto_ = new OpProto;
|
||||
info->checker_ = new OpAttrChecker();
|
||||
auto maker = T(info->proto_, info->checker_);
|
||||
maker.Validate();
|
||||
info->proto_->set_type(op_type);
|
||||
PADDLE_ENFORCE(
|
||||
info->proto_->IsInitialized(),
|
||||
"Fail to initialize %s's OpProto, because %s is not initialized",
|
||||
op_type, info->proto_->InitializationErrorString());
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct OpInfoFiller<T, kGradOpDescMaker> {
|
||||
void operator()(const char* op_type, OpInfo* info) const {
|
||||
info->grad_op_maker_ = new T();
|
||||
}
|
||||
};
|
||||
} // namespace details
|
||||
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -1,68 +0,0 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/add_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class AddOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
protected:
|
||||
void InferShape(framework::InferShapeContextBase* ctx) const override {
|
||||
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of AddOp should not be null.");
|
||||
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) of AddOp should not be null.");
|
||||
PADDLE_ENFORCE(ctx->HasOutput("Out"),
|
||||
"Output(Out) of AddOp should not be null.");
|
||||
|
||||
auto x_dims = ctx->GetInputDim("X");
|
||||
auto y_dims = ctx->GetInputDim("Y");
|
||||
PADDLE_ENFORCE_EQ(x_dims, y_dims,
|
||||
"Two input of Add Op's dimension must be same.");
|
||||
ctx->SetOutputDim("Out", x_dims);
|
||||
}
|
||||
};
|
||||
|
||||
class AddOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
AddOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
|
||||
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||
AddInput("X", "The first input of add op");
|
||||
AddInput("Y", "The second input of add op");
|
||||
AddOutput("Out", "The output of add op");
|
||||
AddComment(R"DOC(
|
||||
Two Element Add Operator.
|
||||
|
||||
The equation is: Out = X + Y
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
class AddOpGrad : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
protected:
|
||||
void InferShape(framework::InferShapeContextBase* ctx) const override {}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP(add, ops::AddOp, ops::AddOpMaker, add_grad, ops::AddOpGrad);
|
||||
|
||||
REGISTER_OP_CPU_KERNEL(add, ops::AddKernel<paddle::platform::CPUPlace, float>);
|
@ -1,18 +0,0 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/add_op.h"
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP_GPU_KERNEL(add, ops::AddKernel<paddle::platform::GPUPlace, float>);
|
@ -1,48 +0,0 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include "paddle/framework/eigen.h"
|
||||
#include "paddle/framework/op_registry.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
template <typename T, int MajorType = Eigen::RowMajor,
|
||||
typename IndexType = Eigen::DenseIndex>
|
||||
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
|
||||
|
||||
template <typename Place, typename T>
|
||||
class AddKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& context) const override {
|
||||
auto* input0 = context.Input<Tensor>("X");
|
||||
auto* input1 = context.Input<Tensor>("Y");
|
||||
auto* output = context.Output<Tensor>("Out");
|
||||
|
||||
output->mutable_data<T>(context.GetPlace());
|
||||
|
||||
auto X = EigenVector<T>::Flatten(*input0);
|
||||
auto Y = EigenVector<T>::Flatten(*input1);
|
||||
auto Z = EigenVector<T>::Flatten(*output);
|
||||
|
||||
auto place = context.GetEigenDevice<Place>();
|
||||
|
||||
Z.device(place) = X + Y;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -1,20 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
from op_test import OpTest
|
||||
|
||||
|
||||
class TestAddOp(OpTest):
|
||||
def setUp(self):
|
||||
self.op_type = "add"
|
||||
self.inputs = {
|
||||
'X': np.random.random((102, 105)).astype("float32"),
|
||||
'Y': np.random.random((102, 105)).astype("float32")
|
||||
}
|
||||
self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']}
|
||||
|
||||
def test_check_output(self):
|
||||
self.check_output()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
@ -1,46 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
import paddle.v2.framework.core as core
|
||||
from op_test import get_numeric_gradient
|
||||
from op_test import create_op
|
||||
|
||||
|
||||
class GetNumericGradientTest(unittest.TestCase):
|
||||
def test_add_op(self):
|
||||
x = np.random.random((10, 1)).astype("float32")
|
||||
y = np.random.random((10, 1)).astype("float32")
|
||||
z = x + y
|
||||
scope = core.Scope()
|
||||
add_op = create_op(scope, "add", {'X': x, 'Y': y}, {'Out': z}, dict())
|
||||
arr = get_numeric_gradient(scope, add_op, {'X': x,
|
||||
'Y': y}, 'X', ['Out'])
|
||||
self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-4)
|
||||
|
||||
def test_softmax_op(self):
|
||||
def stable_softmax(x):
|
||||
"""Compute the softmax of vector x in a numerically stable way."""
|
||||
shiftx = x - np.max(x)
|
||||
exps = np.exp(shiftx)
|
||||
return exps / np.sum(exps)
|
||||
|
||||
def label_softmax_grad(Y, dY):
|
||||
dX = Y * 0.0
|
||||
for i in range(Y.shape[0]):
|
||||
d = np.dot(Y[i, :], dY[i, :])
|
||||
dX[i, :] = Y[i, :] * (dY[i, :] - d)
|
||||
return dX
|
||||
|
||||
X = np.random.random((2, 2)).astype("float32")
|
||||
Y = np.apply_along_axis(stable_softmax, 1, X)
|
||||
dY = np.ones(Y.shape)
|
||||
dX = label_softmax_grad(Y, dY)
|
||||
|
||||
scope = core.Scope()
|
||||
softmax_op = create_op(scope, "softmax", {"X": X}, {"Y": Y}, dict())
|
||||
|
||||
arr = get_numeric_gradient(scope, softmax_op, {"X": X}, "X", "Y")
|
||||
np.testing.assert_almost_equal(arr, dX, decimal=1e-2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Loading…
Reference in new issue