Add optimizer operators for VM.

5 years ago · 2097a0e90a
parent efecaf43df
commit 2097a0e90a
9 changed files with 665 additions and 9 deletions
--- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
+++ b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
@ -77,6 +77,8 @@ static std::map<string, string> tbe_func_adapter_map = {
  {"sparse_apply_adagrad", "sparse_apply_adagrad_d"},
  {"apply_proximal_adagrad", "apply_proximal_adagrad_d"},
  {"sparse_apply_proximal_adagrad", "sparse_apply_proximal_adagrad_d"},
+  {"apply_add_sign", "apply_add_sign_d"},
+  {"apply_power_sign", "apply_power_sign_d"},
  {"transpose", "transpose_d"},
  {"fill", "fill_d"},
  {"unsorted_segment_sum", "unsorted_segment_sum_d"},
--- a/mindspore/ops/_op_impl/tbe/init.py
+++ b/mindspore/ops/_op_impl/tbe/init.py
@ -34,6 +34,10 @@ from .apply_ada_max import _apply_ada_max_tbe
 from .apply_adadelta import _apply_adadelta_tbe
 from .apply_adagrad import _apply_adagrad_tbe
 from .apply_adagrad_v2 import _apply_adagrad_v2_tbe
+from .apply_add_sign import _apply_add_sign_tbe
+from .apply_power_sign import _apply_power_sign_tbe
+from .apply_gradient_descent import _apply_gradient_descent_tbe
+from .apply_proximal_gradient_descent import _apply_proximal_gradient_descent_tbe
 from .approximate_equal import _approximate_equal_tbe
 from .adam_apply_one import _adam_apply_one_tbe
 from .assign import _assign_tbe
--- a/mindspore/ops/_op_impl/tbe/apply_add_sign.py
+++ b/mindspore/ops/_op_impl/tbe/apply_add_sign.py
@ -0,0 +1,65 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""ApplyAddSignD op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+apply_add_sign_d_op_info = TBERegOp("ApplyAddSign") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("apply_add_sign_d.so") \
+    .compute_cost(10) \
+    .kernel_name("apply_add_sign_d") \
+    .partial_flag(True) \
+    .input(0, "var", False, "required", "all") \
+    .input(1, "m", False, "required", "all") \
+    .input(2, "lr", False, "required", "all") \
+    .input(3, "alpha", False, "required", "all") \
+    .input(4, "sign_decay", False, "required", "all") \
+    .input(5, "beta", False, "required", "all") \
+    .input(6, "grad", False, "required", "all") \
+    .output(0, "var", False, "required", "all") \
+    .output(1, "m", False, "required", "all") \
+    .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_Default, DataType.F16_Default, DataType.F16_5HD, DataType.F16_5HD,
+                  DataType.F16_5HD) \
+    .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_Default, DataType.F16_Default, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0,
+                  DataType.F16_C1HWNCoC0) \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_Default) \
+    .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_Default, DataType.F16_Default, DataType.F16_FracZ, DataType.F16_FracZ,
+                  DataType.F16_FracZ) \
+    .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_Default, DataType.F32_Default, DataType.F32_5HD, DataType.F32_5HD,
+                  DataType.F32_5HD) \
+    .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_Default, DataType.F32_Default, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0,
+                  DataType.F32_C1HWNCoC0) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_Default) \
+    .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_Default, DataType.F32_Default, DataType.F32_FracZ, DataType.F32_FracZ,
+                  DataType.F32_FracZ) \
+    .get_op_info()
+
+
+@op_info_register(apply_add_sign_d_op_info)
+def _apply_add_sign_tbe():
+    """ApplyAddSignD TBE register"""
+    return
--- a/mindspore/ops/_op_impl/tbe/apply_gradient_descent.py
+++ b/mindspore/ops/_op_impl/tbe/apply_gradient_descent.py
@ -0,0 +1,44 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""ApplyGradientDescent op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+apply_gradient_descent_op_info = TBERegOp("ApplyGradientDescent") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("apply_gradient_descent.so") \
+    .compute_cost(10) \
+    .kernel_name("apply_gradient_descent") \
+    .partial_flag(True) \
+    .input(0, "var", False, "required", "all") \
+    .input(1, "alpha", False, "required", "all") \
+    .input(2, "delta", False, "required", "all") \
+    .output(0, "var", False, "required", "all") \
+    .dtype_format(DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD, DataType.F16_5HD) \
+    .dtype_format(DataType.F16_FracZ, DataType.F16_Default, DataType.F16_FracZ, DataType.F16_FracZ) \
+    .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F32_5HD, DataType.F32_Default, DataType.F32_5HD, DataType.F32_5HD) \
+    .dtype_format(DataType.F32_FracZ, DataType.F32_Default, DataType.F32_FracZ, DataType.F32_FracZ) \
+    .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \
+    .get_op_info()
+
+
+@op_info_register(apply_gradient_descent_op_info)
+def _apply_gradient_descent_tbe():
+    """ApplyGradientDescent TBE register"""
+    return
--- a/mindspore/ops/_op_impl/tbe/apply_power_sign.py
+++ b/mindspore/ops/_op_impl/tbe/apply_power_sign.py
@ -0,0 +1,65 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""ApplyPowerSignD op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+apply_power_sign_d_op_info = TBERegOp("ApplyPowerSign") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("apply_power_sign_d.so") \
+    .compute_cost(10) \
+    .kernel_name("apply_power_sign_d") \
+    .partial_flag(True) \
+    .input(0, "var", False, "required", "all") \
+    .input(1, "m", False, "required", "all") \
+    .input(2, "lr", False, "required", "all") \
+    .input(3, "logbase", False, "required", "all") \
+    .input(4, "sign_decay", False, "required", "all") \
+    .input(5, "beta", False, "required", "all") \
+    .input(6, "grad", False, "required", "all") \
+    .output(0, "var", False, "required", "all") \
+    .output(1, "m", False, "required", "all") \
+    .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_Default, DataType.F16_Default, DataType.F16_5HD, DataType.F16_5HD,
+                  DataType.F16_5HD) \
+    .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_Default, DataType.F16_Default, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0,
+                  DataType.F16_C1HWNCoC0) \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_Default) \
+    .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_Default, DataType.F16_Default, DataType.F16_FracZ, DataType.F16_FracZ,
+                  DataType.F16_FracZ) \
+    .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_Default, DataType.F32_Default, DataType.F32_5HD, DataType.F32_5HD,
+                  DataType.F32_5HD) \
+    .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_Default, DataType.F32_Default, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0,
+                  DataType.F32_C1HWNCoC0) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_Default) \
+    .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_Default, DataType.F32_Default, DataType.F32_FracZ, DataType.F32_FracZ,
+                  DataType.F32_FracZ) \
+    .get_op_info()
+
+
+@op_info_register(apply_power_sign_d_op_info)
+def _apply_power_sign_tbe():
+    """ApplyPowerSignD TBE register"""
+    return
--- a/mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent.py
+++ b/mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent.py
@ -0,0 +1,54 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""ApplyProximalGradientDescent op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+apply_proximal_gradient_descent_op_info = TBERegOp("ApplyProximalGradientDescent") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("apply_proximal_gradient_descent.so") \
+    .compute_cost(10) \
+    .kernel_name("apply_proximal_gradient_descent") \
+    .partial_flag(True) \
+    .input(0, "var", False, "required", "all") \
+    .input(1, "alpha", False, "required", "all") \
+    .input(2, "l1", False, "required", "all") \
+    .input(3, "l2", False, "required", "all") \
+    .input(4, "delta", False, "required", "all") \
+    .output(0, "var", False, "required", "all") \
+    .dtype_format(DataType.F16_5HD, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_5HD, DataType.F16_5HD) \
+    .dtype_format(DataType.F16_FracZ, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_FracZ, DataType.F16_FracZ) \
+    .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
+                  DataType.F16_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F32_5HD, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_5HD, DataType.F32_5HD) \
+    .dtype_format(DataType.F32_FracZ, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_FracZ, DataType.F32_FracZ) \
+    .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
+                  DataType.F32_Default, DataType.F32_Default) \
+    .get_op_info()
+
+
+@op_info_register(apply_proximal_gradient_descent_op_info)
+def _apply_proximal_gradient_descent_tbe():
+    """ApplyProximalGradientDescent TBE register"""
+    return
--- a/mindspore/ops/operations/init.py
+++ b/mindspore/ops/operations/init.py
@ -74,6 +74,7 @@ from .nn_ops import (LSTM, SGD, Adam, SparseApplyAdam, SparseApplyLazyAdam, Appl
                     TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrl,
                     ApplyProximalAdagrad, SparseApplyProximalAdagrad,
                     ApplyAdaMax, ApplyAdadelta, ApplyAdagrad, ApplyAdagradV2,
+                     ApplyAddSign, ApplyPowerSign, ApplyGradientDescent, ApplyProximalGradientDescent,
                     ApplyRMSProp, ApplyCenteredRMSProp, BasicLSTMCell, InTopK)
 from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode,
                        CheckValid, MakeRefKey, Partial, Depend, CheckBprop)
@ -295,6 +296,10 @@ __all__ = [
    "ApplyAdadelta",
    "ApplyAdagrad",
    "ApplyAdagradV2",
+    "ApplyAddSign",
+    "ApplyPowerSign",
+    "ApplyGradientDescent",
+    "ApplyProximalGradientDescent",
    "BatchToSpace",
    "Atan2",
    "ApplyRMSProp",
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@ -351,6 +351,64 @@ class ApplyAdagradV2Net(nn.Cell):
        return out


+class ApplyAddSignNet(nn.Cell):
+    def __init__(self):
+        super(ApplyAddSignNet, self).__init__()
+        self.apply_add_sign = P.ApplyAddSign()
+        self.lr = 0.001
+        self.alpha = 1.0
+        self.sign_decay = 0.99
+        self.beta = 0.99
+        self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
+        self.m = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="m")
+
+    def construct(self, grad):
+        out = self.apply_add_sign(self.var, self.m, self.lr, self.alpha, self.sign_decay, self.beta, grad)
+        return out
+
+
+class ApplyPowerSignNet(nn.Cell):
+    def __init__(self):
+        super(ApplyPowerSignNet, self).__init__()
+        self.apply_power_sign = P.ApplyPowerSign()
+        self.lr = 0.001
+        self.logbase = np.e
+        self.sign_decay = 0.99
+        self.beta = 0.99
+        self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
+        self.m = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="m")
+
+    def construct(self, grad):
+        out = self.apply_power_sign(self.var, self.m, self.lr, self.logbase, self.sign_decay, self.beta, grad)
+        return out
+
+
+class ApplyGradientDescentNet(nn.Cell):
+    def __init__(self):
+        super(ApplyGradientDescentNet, self).__init__()
+        self.apply_gradient_descent = P.ApplyGradientDescent()
+        self.alpha = 0.001
+        self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
+
+    def construct(self, delta):
+        out = self.apply_gradient_descent(self.var, self.alpha, delta)
+        return out
+
+
+class ApplyProximalGradientDescentNet(nn.Cell):
+    def __init__(self):
+        super(ApplyProximalGradientDescentNet, self).__init__()
+        self.apply_proximal_gradient_descent = P.ApplyProximalGradientDescent()
+        self.alpha = 0.001
+        self.l1 = 0.0
+        self.l2 = 0.0
+        self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
+
+    def construct(self, delta):
+        out = self.apply_proximal_gradient_descent(self.var, self.alpha, self.l1, self.l2, delta)
+        return out
+
+
 class SparseApplyAdagradNet(nn.Cell):
    def __init__(self):
        super(SparseApplyAdagradNet, self).__init__()
@ -1241,6 +1299,22 @@ test_case_nn_ops = [
        'block': ApplyAdagradV2Net(),
        'desc_inputs': [[3, 3]],
        'skip': ['backward']}),
+    ('ApplyAddSign', {
+        'block': ApplyAddSignNet(),
+        'desc_inputs': [[3, 3]],
+        'skip': ['backward']}),
+    ('ApplyPowerSign', {
+        'block': ApplyPowerSignNet(),
+        'desc_inputs': [[3, 3]],
+        'skip': ['backward']}),
+    ('ApplyGradientDescent', {
+        'block': ApplyGradientDescentNet(),
+        'desc_inputs': [[3, 3]],
+        'skip': ['backward']}),
+    ('ApplyProximalGradientDescent', {
+        'block': ApplyProximalGradientDescentNet(),
+        'desc_inputs': [[3, 3]],
+        'skip': ['backward']}),
    ('Flatten_1', {
        'block': NetForFlatten(),
        'desc_inputs': [Tensor(np.ones([2, 3, 4]).astype(np.int32)), Tensor(np.ones([2, 12]).astype(np.int32))],