refine softmax operator.

Adaptive_data_structure_for_SwitchOrderLayer
caoying03 8 years ago
parent b3afe30dc8
commit 7d16fe87a3

@ -44,11 +44,12 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
The equation is: Out = scale*X The equation is: Out = scale*X
)DOC"); )DOC");
AddAttr<AttrType>("scale", "scale of scale operator.").SetDefault(1.0); AddAttr<AttrType>("scale", "The scaling factor of the scale operator.")
.SetDefault(1.0);
} }
}; };
// Identity Op's gradient is identity op, too. // IdentityOp's gradient is IdentityOp, too.
// Grad(Out=scale(X)) => Grad(X) = scale(Grad(Out)) // Grad(Out=scale(X)) => Grad(X) = scale(Grad(Out))
template <typename AttrType> template <typename AttrType>
class ScaleGradOp : public NetOp { class ScaleGradOp : public NetOp {
@ -65,17 +66,20 @@ class ScaleGradOp : public NetOp {
} }
}; };
// identity is a alias of scale op. This is also a example for creating a alias // IdentityOp is an alias of the ScaleOp. This is also an example for creating
// operator. // an alias of an existing operator.
template <typename AttrType> template <typename AttrType>
class IdentityOpMaker : public framework::OpProtoAndCheckerMaker { class IdentityOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
IdentityOpMaker(framework::OpProto *proto, IdentityOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "input tensor of identity op"); AddInput("X", "The input tensor of identity op.");
AddOutput("Out", "output tensor of identity op"); AddOutput("Out", "The output tensor of identity op.");
AddComment("identity operator. Just a alias of scale op which scale = 1.0"); AddComment(R"DOC(
The identity operator is just an alias of the scale operator with the
attribute scale is fixed to 1.0.
)DOC");
} }
}; };

@ -23,9 +23,9 @@ class SoftmaxOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const framework::InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.Input<Tensor>("X")->dims().size() == 2UL, PADDLE_ENFORCE(ctx.Input<Tensor>("logits")->dims().size() == 2UL,
"The input of softmax op must be a matrix."); "The input of softmax op must be a matrix.");
ctx.Output<Tensor>("Y")->Resize(ctx.Input<Tensor>("X")->dims()); ctx.Output<Tensor>("softmax")->Resize(ctx.Input<Tensor>("logits")->dims());
} }
}; };
@ -34,10 +34,10 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
SoftmaxOpMaker(framework::OpProto *proto, SoftmaxOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("logits",
"The input tensor of softmax. " "The input tensor of softmax. "
"2-D with shape [batch_size, input_feature_dimensions]."); "2-D with shape [batch_size, input_feature_dimensions].");
AddOutput("Y", "The normalized values with the same shape as X."); AddOutput("softmax", "The normalized values with the same shape as X.");
AddComment(R"DOC( AddComment(R"DOC(
The input of softmax operator is a 2-D tensor with shape N x K (N is the The input of softmax operator is a 2-D tensor with shape N x K (N is the
batch_size, K is the dimension of input feature). The output tensor has the batch_size, K is the dimension of input feature). The output tensor has the
@ -64,14 +64,17 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const framework::InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("softmax"),
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")), "Input(softmax) should be not null.");
"Input(Y@GRAD) should not be null"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("softmax")),
PADDLE_ENFORCE(ctx.Input<Tensor>("Y")->dims() == "Input(softmax@GRAD) should be not null.");
ctx.Input<Tensor>(framework::GradVarName("Y"))->dims(), PADDLE_ENFORCE_EQ(
"the shape of Input(0) and Input(1) should be the same"); ctx.Input<Tensor>("softmax")->dims(),
ctx.Output<Tensor>(framework::GradVarName("X")) ctx.Input<Tensor>(framework::GradVarName("softmax"))->dims(),
->Resize(ctx.Input<Tensor>("Y")->dims()); "Input(softmax) and its gradients should have a same shape.");
ctx.Output<Tensor>(framework::GradVarName("logits"))
->Resize(ctx.Input<Tensor>("logits")->dims());
} }
}; };

@ -28,12 +28,12 @@ template <typename Place, typename T>
class SoftmaxKernel : public framework::OpKernel { class SoftmaxKernel : public framework::OpKernel {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto input = context.Input<Tensor>("X"); auto X = context.Input<Tensor>("logits");
auto output = context.Output<Tensor>("Y"); auto Y = context.Output<Tensor>("softmax");
output->mutable_data<T>(context.GetPlace()); Y->mutable_data<T>(context.GetPlace());
auto logits = EigenMatrix<T>::From(*input); auto logits = EigenMatrix<T>::From(*X);
auto softmax = EigenMatrix<T>::From(*output); auto softmax = EigenMatrix<T>::From(*Y);
const int kBatchDim = 0; const int kBatchDim = 0;
const int kClassDim = 1; const int kClassDim = 1;
@ -69,9 +69,9 @@ class SoftmaxGradKernel : public framework::OpKernel {
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
std::shared_ptr<Tensor> scale_ = std::make_shared<Tensor>(); std::shared_ptr<Tensor> scale_ = std::make_shared<Tensor>();
auto Y = context.Input<Tensor>("Y"); auto Y = context.Input<Tensor>("softmax");
auto dY = context.Input<Tensor>(framework::GradVarName("Y")); auto dY = context.Input<Tensor>(framework::GradVarName("softmax"));
auto dX = context.Output<Tensor>(framework::GradVarName("X")); auto dX = context.Output<Tensor>(framework::GradVarName("logits"));
dX->mutable_data<T>(context.GetPlace()); dX->mutable_data<T>(context.GetPlace());
const int batch_size = Y->dims()[0]; const int batch_size = Y->dims()[0];

@ -18,18 +18,23 @@ class TestSoftmaxOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "softmax" self.type = "softmax"
self.inputs = {'X': np.random.random((32, 100)).astype("float32")} self.inputs = {"logits": np.random.random((10, 10)).astype("float32")}
self.outputs = { self.outputs = {
'Y': np.apply_along_axis(stable_softmax, 1, self.inputs['X']) "softmax":
np.apply_along_axis(stable_softmax, 1, self.inputs["logits"])
} }
class SoftmaxGradOpTest(GradientChecker): class TestSoftmaxGradOp(GradientChecker):
def test_softmax(self): def setUp(self):
op = create_op("softmax") self.op = create_op("softmax")
inputs = {"X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")} self.inputs = {
self.check_grad(op, inputs, set("X"), "Y") "logits": np.random.uniform(0.1, 1, [10, 10]).astype("float32")
}
def test_softmax_grad(self):
self.check_grad(self.op, self.inputs, ["logits"], "softmax")
if __name__ == '__main__': if __name__ == "__main__":
unittest.main() unittest.main()

Loading…
Cancel
Save