|
|
@ -57,25 +57,30 @@ class MomentumOpMaker : public framework::OpProtoAndCheckerMaker {
|
|
|
|
MomentumOpMaker(framework::OpProto *proto,
|
|
|
|
MomentumOpMaker(framework::OpProto *proto,
|
|
|
|
framework::OpAttrChecker *op_checker)
|
|
|
|
framework::OpAttrChecker *op_checker)
|
|
|
|
: OpProtoAndCheckerMaker(proto, op_checker) {
|
|
|
|
: OpProtoAndCheckerMaker(proto, op_checker) {
|
|
|
|
AddInput("Param", "Input parameter");
|
|
|
|
AddInput("Param",
|
|
|
|
AddInput("Grad", "Input gradient");
|
|
|
|
"(Tensor, default Tensor<float>) "
|
|
|
|
AddInput("Velocity", "Input velocity");
|
|
|
|
"Input parameter that has to be updated");
|
|
|
|
AddInput("LearningRate", "Input learning rate");
|
|
|
|
AddInput("Grad",
|
|
|
|
|
|
|
|
"(Tensor, default Tensor<float>) "
|
|
|
|
AddOutput("ParamOut", "Output parameter");
|
|
|
|
"Input gradient of the parameter");
|
|
|
|
AddOutput("VelocityOut", "Output velocity");
|
|
|
|
AddInput("Velocity",
|
|
|
|
|
|
|
|
"(Tensor, default Tensor<float>) "
|
|
|
|
AddAttr<float>("mu", "Momentum coefficient");
|
|
|
|
"Input velocity (corresponding to the parameter) "
|
|
|
|
|
|
|
|
"that has to be updated");
|
|
|
|
|
|
|
|
AddInput("LearningRate",
|
|
|
|
|
|
|
|
"(Tensor, default Tensor<float>) "
|
|
|
|
|
|
|
|
"Input learning rate");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
AddOutput("ParamOut", "(Tensor) Output updated parameter");
|
|
|
|
|
|
|
|
AddOutput("VelocityOut", "(Tensor) Output updated velocity");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
AddAttr<float>("mu", "(float) Momentum coefficient");
|
|
|
|
AddComment(R"DOC(
|
|
|
|
AddComment(R"DOC(
|
|
|
|
|
|
|
|
|
|
|
|
Momentum Algorithm (momentum).
|
|
|
|
Momentum Algorithm (momentum).
|
|
|
|
|
|
|
|
|
|
|
|
velocity_out = mu * velocity - learning_rate * grad
|
|
|
|
velocity = mu * velocity + gradient
|
|
|
|
param_out = param + velocity_out
|
|
|
|
param = param - learning_rate * velocity
|
|
|
|
|
|
|
|
|
|
|
|
Ref: Sutskever, Ilya, et al. "On the importance of initialization
|
|
|
|
|
|
|
|
and momentum in deep learning." ICML 2013;
|
|
|
|
|
|
|
|
http://jmlr.org/proceedings/papers/v28/sutskever13.pdf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
)DOC");
|
|
|
|
)DOC");
|
|
|
|
}
|
|
|
|
}
|
|
|
|