add tbe some ops about optimizer for vm

pull/1924/head
liuxiao 5 years ago
parent cc0add562b
commit cda547e92f

@ -69,6 +69,10 @@ static std::map<string, string> tbe_func_adapter_map = {
{"strided_slice", "strided_slice_d"},
{"strided_slice_grad", "strided_slice_grad_d"},
{"sparse_apply_ftrl", "sparse_apply_ftrl_d"},
{"apply_ada_max", "apply_ada_max_d"},
{"apply_adadelta", "apply_adadelta_d"},
{"apply_adagrad", "apply_adagrad_d"},
{"apply_adagrad_v2", "apply_adagradv2_d"},
{"transpose", "transpose_d"},
{"fill", "fill_d"},
{"unsorted_segment_sum", "unsorted_segment_sum_d"},

@ -27,6 +27,10 @@ from .add_n import _add_n_tbe
from .apply_ftrl import _apply_ftrl_tbe
from .apply_momentum import _apply_momentum_tbe
from .apply_adam import _apply_adam_tbe
from .apply_ada_max import _apply_ada_max_tbe
from .apply_adadelta import _apply_adadelta_tbe
from .apply_adagrad import _apply_adagrad_tbe
from .apply_adagrad_v2 import _apply_adagrad_v2_tbe
from .adam_apply_one import _adam_apply_one_tbe
from .assign import _assign_tbe
from .assign_add import _assign_add_tbe

@ -0,0 +1,68 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""ApplyAdaMaxD op"""
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
apply_ada_max_d_op_info = TBERegOp("ApplyAdaMax") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("apply_ada_max_d.so") \
.compute_cost(10) \
.kernel_name("apply_ada_max_d") \
.partial_flag(True) \
.input(0, "var", False, "required", "all") \
.input(1, "m", False, "required", "all") \
.input(2, "v", False, "required", "all") \
.input(3, "beta1_power", False, "required", "all") \
.input(4, "lr", False, "required", "all") \
.input(5, "beta1", False, "required", "all") \
.input(6, "beta2", False, "required", "all") \
.input(7, "epsilon", False, "required", "all") \
.input(8, "grad", False, "required", "all") \
.output(0, "var", False, "required", "all") \
.output(1, "m", False, "required", "all") \
.output(2, "v", False, "required", "all") \
.dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \
.dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ) \
.dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
.dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \
.dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ) \
.dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \
.get_op_info()
@op_info_register(apply_ada_max_d_op_info)
def _apply_ada_max_tbe():
"""ApplyAdaMaxD TBE register"""
return

@ -0,0 +1,66 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""ApplyAdadeltaD op"""
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
apply_adadelta_d_op_info = TBERegOp("ApplyAdadelta") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("apply_adadelta_d.so") \
.compute_cost(10) \
.kernel_name("apply_adadelta_d") \
.partial_flag(True) \
.input(0, "var", False, "required", "all") \
.input(1, "accum", False, "required", "all") \
.input(2, "accum_update", False, "required", "all") \
.input(3, "lr", False, "required", "all") \
.input(4, "rho", False, "required", "all") \
.input(5, "epsilon", False, "required", "all") \
.input(6, "grad", False, "required", "all") \
.output(0, "var", False, "required", "all") \
.output(1, "accum", False, "required", "all") \
.output(2, "accum_update", False, "required", "all") \
.dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_5HD, DataType.F16_5HD,
DataType.F16_5HD, DataType.F16_5HD) \
.dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_FracZ, DataType.F16_FracZ,
DataType.F16_FracZ, DataType.F16_FracZ) \
.dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0,
DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
.dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_5HD, DataType.F32_5HD,
DataType.F32_5HD, DataType.F32_5HD) \
.dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_FracZ, DataType.F32_FracZ,
DataType.F32_FracZ, DataType.F32_FracZ) \
.dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0,
DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default) \
.get_op_info()
@op_info_register(apply_adadelta_d_op_info)
def _apply_adadelta_tbe():
"""ApplyAdadeltaD TBE register"""
return

@ -0,0 +1,55 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""ApplyAdagradD op"""
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
apply_adagrad_d_op_info = TBERegOp("ApplyAdagrad") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("apply_adagrad_d.so") \
.compute_cost(10) \
.kernel_name("apply_adagrad_d") \
.partial_flag(True) \
.attr("update_slots", "optional", "bool", "true,false", "false") \
.input(0, "var", False, "required", "all") \
.input(1, "accum", False, "required", "all") \
.input(2, "lr", False, "required", "all") \
.input(3, "grad", False, "required", "all") \
.output(0, "var", False, "required", "all") \
.output(1, "accum", False, "required", "all") \
.dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD,
DataType.F16_5HD, DataType.F16_5HD) \
.dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_FracZ,
DataType.F16_FracZ, DataType.F16_FracZ) \
.dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_C1HWNCoC0,
DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
.dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_5HD,
DataType.F32_5HD, DataType.F32_5HD) \
.dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_FracZ,
DataType.F32_FracZ, DataType.F32_FracZ) \
.dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_C1HWNCoC0,
DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default) \
.get_op_info()
@op_info_register(apply_adagrad_d_op_info)
def _apply_adagrad_tbe():
"""ApplyAdagradD TBE register"""
return

@ -0,0 +1,56 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""ApplyAdagradV2D op"""
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
apply_adagrad_v2_d_op_info = TBERegOp("ApplyAdagradV2") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("apply_adagradv2_d.so") \
.compute_cost(10) \
.kernel_name("apply_adagradv2_d") \
.partial_flag(True) \
.attr("epsilon", "required", "float", "all") \
.attr("update_slots", "optional", "bool", "true,false", "false") \
.input(0, "var", False, "required", "all") \
.input(1, "accum", False, "required", "all") \
.input(2, "lr", False, "required", "all") \
.input(3, "grad", False, "required", "all") \
.output(0, "var", False, "required", "all") \
.output(1, "accum", False, "required", "all") \
.dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD,
DataType.F16_5HD, DataType.F16_5HD) \
.dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_FracZ,
DataType.F16_FracZ, DataType.F16_FracZ) \
.dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_C1HWNCoC0,
DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
.dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_5HD,
DataType.F32_5HD, DataType.F32_5HD) \
.dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_FracZ,
DataType.F32_FracZ, DataType.F32_FracZ) \
.dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_C1HWNCoC0,
DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default) \
.get_op_info()
@op_info_register(apply_adagrad_v2_d_op_info)
def _apply_adagrad_v2_tbe():
"""ApplyAdagradV2D TBE register"""
return

@ -72,6 +72,7 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
SparseSoftmaxCrossEntropyWithLogits, Tanh,
TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrl,
ApplyProximalAdagrad, SparseApplyProximalAdagrad,
ApplyAdaMax, ApplyAdadelta, ApplyAdagrad, ApplyAdagradV2,
ApplyRMSProp, ApplyCenteredRMSProp, BasicLSTMCell)
from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode,
CheckValid, MakeRefKey, Partial, Depend, CheckBprop, ConfusionMatrix)
@ -278,6 +279,10 @@ __all__ = [
"SparseApplyFtrl",
"ApplyProximalAdagrad",
"SparseApplyProximalAdagrad",
"ApplyAdaMax",
"ApplyAdadelta",
"ApplyAdagrad",
"ApplyAdagradV2",
"BatchToSpace",
"Atan2",
"ApplyRMSProp",

File diff suppressed because it is too large Load Diff

@ -270,6 +270,67 @@ class ApplyProximalAdagradNet(nn.Cell):
return out
class ApplyAdaMaxNet(nn.Cell):
def __init__(self):
super(ApplyAdaMaxNet, self).__init__()
self.apply_ada_max = P.ApplyAdaMax()
self.beta1_power = 0.9
self.lr = 0.001
self.beta1 = 0.9
self.beta2 = 0.99
self.epsilon = 1e-10
self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
self.m = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="m")
self.v = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="v")
def construct(self, grad):
out = self.apply_ada_max(self.var, self.m, self.v, self.beta1_power, self.lr,
self.beta1, self.beta2, self.epsilon, grad)
return out
class ApplyAdadeltaNet(nn.Cell):
def __init__(self):
super(ApplyAdadeltaNet, self).__init__()
self.apply_adadelta = P.ApplyAdadelta()
self.lr = 0.001
self.rho = 0.0
self.epsilon = 1e-6
self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
self.accum_update = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum_update")
def construct(self, grad):
out = self.apply_adadelta(self.var, self.accum, self.accum_update, self.lr, self.rho, self.epsilon, grad)
return out
class ApplyAdagradNet(nn.Cell):
def __init__(self):
super(ApplyAdagradNet, self).__init__()
self.apply_adagrad = P.ApplyAdagrad()
self.lr = 0.001
self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
def construct(self, grad):
out = self.apply_adagrad(self.var, self.accum, self.lr, grad)
return out
class ApplyAdagradV2Net(nn.Cell):
def __init__(self):
super(ApplyAdagradV2Net, self).__init__()
self.apply_adagrad_v2 = P.ApplyAdagradV2(epsilon=1e-6)
self.lr = 0.001
self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
def construct(self, grad):
out = self.apply_adagrad_v2(self.var, self.accum, self.lr, grad)
return out
class ApplyRMSNet(nn.Cell):
def __init__(self):
super(ApplyRMSNet, self).__init__()
@ -1072,6 +1133,22 @@ test_case_nn_ops = [
'block': SparseApplyProximalAdagradNet(),
'desc_inputs': [[3, 3], Tensor(np.ones((3,), np.int32))],
'skip': ['backward']}),
('ApplyAdaMax', {
'block': ApplyAdaMaxNet(),
'desc_inputs': [[3, 3]],
'skip': ['backward']}),
('ApplyAdadelta', {
'block': ApplyAdadeltaNet(),
'desc_inputs': [[3, 3]],
'skip': ['backward']}),
('ApplyAdagrad', {
'block': ApplyAdagradNet(),
'desc_inputs': [[3, 3]],
'skip': ['backward']}),
('ApplyAdagradV2', {
'block': ApplyAdagradV2Net(),
'desc_inputs': [[3, 3]],
'skip': ['backward']}),
('Flatten_1', {
'block': NetForFlatten(),
'desc_inputs': [Tensor(np.ones([2, 3, 4]).astype(np.int32)), Tensor(np.ones([2, 12]).astype(np.int32))],

Loading…
Cancel
Save