add tbe some ops about optimizer for vm

pull/1924/head
liuxiao 5 years ago
parent cc0add562b
commit cda547e92f

@ -69,6 +69,10 @@ static std::map<string, string> tbe_func_adapter_map = {
{"strided_slice", "strided_slice_d"}, {"strided_slice", "strided_slice_d"},
{"strided_slice_grad", "strided_slice_grad_d"}, {"strided_slice_grad", "strided_slice_grad_d"},
{"sparse_apply_ftrl", "sparse_apply_ftrl_d"}, {"sparse_apply_ftrl", "sparse_apply_ftrl_d"},
{"apply_ada_max", "apply_ada_max_d"},
{"apply_adadelta", "apply_adadelta_d"},
{"apply_adagrad", "apply_adagrad_d"},
{"apply_adagrad_v2", "apply_adagradv2_d"},
{"transpose", "transpose_d"}, {"transpose", "transpose_d"},
{"fill", "fill_d"}, {"fill", "fill_d"},
{"unsorted_segment_sum", "unsorted_segment_sum_d"}, {"unsorted_segment_sum", "unsorted_segment_sum_d"},

@ -27,6 +27,10 @@ from .add_n import _add_n_tbe
from .apply_ftrl import _apply_ftrl_tbe from .apply_ftrl import _apply_ftrl_tbe
from .apply_momentum import _apply_momentum_tbe from .apply_momentum import _apply_momentum_tbe
from .apply_adam import _apply_adam_tbe from .apply_adam import _apply_adam_tbe
from .apply_ada_max import _apply_ada_max_tbe
from .apply_adadelta import _apply_adadelta_tbe
from .apply_adagrad import _apply_adagrad_tbe
from .apply_adagrad_v2 import _apply_adagrad_v2_tbe
from .adam_apply_one import _adam_apply_one_tbe from .adam_apply_one import _adam_apply_one_tbe
from .assign import _assign_tbe from .assign import _assign_tbe
from .assign_add import _assign_add_tbe from .assign_add import _assign_add_tbe

@ -0,0 +1,68 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""ApplyAdaMaxD op"""
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
apply_ada_max_d_op_info = TBERegOp("ApplyAdaMax") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("apply_ada_max_d.so") \
.compute_cost(10) \
.kernel_name("apply_ada_max_d") \
.partial_flag(True) \
.input(0, "var", False, "required", "all") \
.input(1, "m", False, "required", "all") \
.input(2, "v", False, "required", "all") \
.input(3, "beta1_power", False, "required", "all") \
.input(4, "lr", False, "required", "all") \
.input(5, "beta1", False, "required", "all") \
.input(6, "beta2", False, "required", "all") \
.input(7, "epsilon", False, "required", "all") \
.input(8, "grad", False, "required", "all") \
.output(0, "var", False, "required", "all") \
.output(1, "m", False, "required", "all") \
.output(2, "v", False, "required", "all") \
.dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \
.dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ) \
.dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
.dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \
.dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ) \
.dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \
.get_op_info()
@op_info_register(apply_ada_max_d_op_info)
def _apply_ada_max_tbe():
"""ApplyAdaMaxD TBE register"""
return

@ -0,0 +1,66 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""ApplyAdadeltaD op"""
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
apply_adadelta_d_op_info = TBERegOp("ApplyAdadelta") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("apply_adadelta_d.so") \
.compute_cost(10) \
.kernel_name("apply_adadelta_d") \
.partial_flag(True) \
.input(0, "var", False, "required", "all") \
.input(1, "accum", False, "required", "all") \
.input(2, "accum_update", False, "required", "all") \
.input(3, "lr", False, "required", "all") \
.input(4, "rho", False, "required", "all") \
.input(5, "epsilon", False, "required", "all") \
.input(6, "grad", False, "required", "all") \
.output(0, "var", False, "required", "all") \
.output(1, "accum", False, "required", "all") \
.output(2, "accum_update", False, "required", "all") \
.dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_5HD, DataType.F16_5HD,
DataType.F16_5HD, DataType.F16_5HD) \
.dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_FracZ, DataType.F16_FracZ,
DataType.F16_FracZ, DataType.F16_FracZ) \
.dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0,
DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
.dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_5HD, DataType.F32_5HD,
DataType.F32_5HD, DataType.F32_5HD) \
.dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_FracZ, DataType.F32_FracZ,
DataType.F32_FracZ, DataType.F32_FracZ) \
.dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0,
DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default) \
.get_op_info()
@op_info_register(apply_adadelta_d_op_info)
def _apply_adadelta_tbe():
"""ApplyAdadeltaD TBE register"""
return

@ -0,0 +1,55 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""ApplyAdagradD op"""
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
apply_adagrad_d_op_info = TBERegOp("ApplyAdagrad") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("apply_adagrad_d.so") \
.compute_cost(10) \
.kernel_name("apply_adagrad_d") \
.partial_flag(True) \
.attr("update_slots", "optional", "bool", "true,false", "false") \
.input(0, "var", False, "required", "all") \
.input(1, "accum", False, "required", "all") \
.input(2, "lr", False, "required", "all") \
.input(3, "grad", False, "required", "all") \
.output(0, "var", False, "required", "all") \
.output(1, "accum", False, "required", "all") \
.dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD,
DataType.F16_5HD, DataType.F16_5HD) \
.dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_FracZ,
DataType.F16_FracZ, DataType.F16_FracZ) \
.dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_C1HWNCoC0,
DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
.dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_5HD,
DataType.F32_5HD, DataType.F32_5HD) \
.dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_FracZ,
DataType.F32_FracZ, DataType.F32_FracZ) \
.dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_C1HWNCoC0,
DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default) \
.get_op_info()
@op_info_register(apply_adagrad_d_op_info)
def _apply_adagrad_tbe():
"""ApplyAdagradD TBE register"""
return

@ -0,0 +1,56 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""ApplyAdagradV2D op"""
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
apply_adagrad_v2_d_op_info = TBERegOp("ApplyAdagradV2") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("apply_adagradv2_d.so") \
.compute_cost(10) \
.kernel_name("apply_adagradv2_d") \
.partial_flag(True) \
.attr("epsilon", "required", "float", "all") \
.attr("update_slots", "optional", "bool", "true,false", "false") \
.input(0, "var", False, "required", "all") \
.input(1, "accum", False, "required", "all") \
.input(2, "lr", False, "required", "all") \
.input(3, "grad", False, "required", "all") \
.output(0, "var", False, "required", "all") \
.output(1, "accum", False, "required", "all") \
.dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD,
DataType.F16_5HD, DataType.F16_5HD) \
.dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_FracZ,
DataType.F16_FracZ, DataType.F16_FracZ) \
.dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_C1HWNCoC0,
DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
.dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_5HD,
DataType.F32_5HD, DataType.F32_5HD) \
.dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_FracZ,
DataType.F32_FracZ, DataType.F32_FracZ) \
.dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_C1HWNCoC0,
DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default) \
.get_op_info()
@op_info_register(apply_adagrad_v2_d_op_info)
def _apply_adagrad_v2_tbe():
"""ApplyAdagradV2D TBE register"""
return

@ -72,6 +72,7 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
SparseSoftmaxCrossEntropyWithLogits, Tanh, SparseSoftmaxCrossEntropyWithLogits, Tanh,
TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrl, TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrl,
ApplyProximalAdagrad, SparseApplyProximalAdagrad, ApplyProximalAdagrad, SparseApplyProximalAdagrad,
ApplyAdaMax, ApplyAdadelta, ApplyAdagrad, ApplyAdagradV2,
ApplyRMSProp, ApplyCenteredRMSProp, BasicLSTMCell) ApplyRMSProp, ApplyCenteredRMSProp, BasicLSTMCell)
from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode,
CheckValid, MakeRefKey, Partial, Depend, CheckBprop, ConfusionMatrix) CheckValid, MakeRefKey, Partial, Depend, CheckBprop, ConfusionMatrix)
@ -278,6 +279,10 @@ __all__ = [
"SparseApplyFtrl", "SparseApplyFtrl",
"ApplyProximalAdagrad", "ApplyProximalAdagrad",
"SparseApplyProximalAdagrad", "SparseApplyProximalAdagrad",
"ApplyAdaMax",
"ApplyAdadelta",
"ApplyAdagrad",
"ApplyAdagradV2",
"BatchToSpace", "BatchToSpace",
"Atan2", "Atan2",
"ApplyRMSProp", "ApplyRMSProp",

File diff suppressed because it is too large Load Diff

@ -270,6 +270,67 @@ class ApplyProximalAdagradNet(nn.Cell):
return out return out
class ApplyAdaMaxNet(nn.Cell):
def __init__(self):
super(ApplyAdaMaxNet, self).__init__()
self.apply_ada_max = P.ApplyAdaMax()
self.beta1_power = 0.9
self.lr = 0.001
self.beta1 = 0.9
self.beta2 = 0.99
self.epsilon = 1e-10
self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
self.m = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="m")
self.v = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="v")
def construct(self, grad):
out = self.apply_ada_max(self.var, self.m, self.v, self.beta1_power, self.lr,
self.beta1, self.beta2, self.epsilon, grad)
return out
class ApplyAdadeltaNet(nn.Cell):
def __init__(self):
super(ApplyAdadeltaNet, self).__init__()
self.apply_adadelta = P.ApplyAdadelta()
self.lr = 0.001
self.rho = 0.0
self.epsilon = 1e-6
self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
self.accum_update = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum_update")
def construct(self, grad):
out = self.apply_adadelta(self.var, self.accum, self.accum_update, self.lr, self.rho, self.epsilon, grad)
return out
class ApplyAdagradNet(nn.Cell):
def __init__(self):
super(ApplyAdagradNet, self).__init__()
self.apply_adagrad = P.ApplyAdagrad()
self.lr = 0.001
self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
def construct(self, grad):
out = self.apply_adagrad(self.var, self.accum, self.lr, grad)
return out
class ApplyAdagradV2Net(nn.Cell):
def __init__(self):
super(ApplyAdagradV2Net, self).__init__()
self.apply_adagrad_v2 = P.ApplyAdagradV2(epsilon=1e-6)
self.lr = 0.001
self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
def construct(self, grad):
out = self.apply_adagrad_v2(self.var, self.accum, self.lr, grad)
return out
class ApplyRMSNet(nn.Cell): class ApplyRMSNet(nn.Cell):
def __init__(self): def __init__(self):
super(ApplyRMSNet, self).__init__() super(ApplyRMSNet, self).__init__()
@ -1072,6 +1133,22 @@ test_case_nn_ops = [
'block': SparseApplyProximalAdagradNet(), 'block': SparseApplyProximalAdagradNet(),
'desc_inputs': [[3, 3], Tensor(np.ones((3,), np.int32))], 'desc_inputs': [[3, 3], Tensor(np.ones((3,), np.int32))],
'skip': ['backward']}), 'skip': ['backward']}),
('ApplyAdaMax', {
'block': ApplyAdaMaxNet(),
'desc_inputs': [[3, 3]],
'skip': ['backward']}),
('ApplyAdadelta', {
'block': ApplyAdadeltaNet(),
'desc_inputs': [[3, 3]],
'skip': ['backward']}),
('ApplyAdagrad', {
'block': ApplyAdagradNet(),
'desc_inputs': [[3, 3]],
'skip': ['backward']}),
('ApplyAdagradV2', {
'block': ApplyAdagradV2Net(),
'desc_inputs': [[3, 3]],
'skip': ['backward']}),
('Flatten_1', { ('Flatten_1', {
'block': NetForFlatten(), 'block': NetForFlatten(),
'desc_inputs': [Tensor(np.ones([2, 3, 4]).astype(np.int32)), Tensor(np.ones([2, 12]).astype(np.int32))], 'desc_inputs': [Tensor(np.ones([2, 3, 4]).astype(np.int32)), Tensor(np.ones([2, 12]).astype(np.int32))],

Loading…
Cancel
Save