for pylint 4th

pull/1442/head
z00478463 6 years ago
parent c59e6667f1
commit f34d06cd0f

@ -33,5 +33,5 @@ config = ed({
"save_checkpoint_path": "./", "save_checkpoint_path": "./",
"label_smooth": 1, "label_smooth": 1,
"label_smooth_factor": 0.1, "label_smooth_factor": 0.1,
"frequency": 278 "frequency": 834
}) })

@ -22,12 +22,6 @@ from mindspore.nn.optim.optimizer import Optimizer
from mindspore.ops import functional as F, composite as C, operations as P from mindspore.ops import functional as F, composite as C, operations as P
from mindspore.parallel._utils import _get_device_num, _get_mirror_mean from mindspore.parallel._utils import _get_device_num, _get_mirror_mean
from cus_ops.cus_matmul_cube_dense_right import CusMatMulCubeDenseRight
from cus_ops.cus_matmul_cube_fracz_left_cast import CusMatMulCubeFraczLeftCast
from cus_ops.cus_matmul_cube_dense_left import CusMatMulCubeDenseLeft
from cus_ops.cus_matmul_cube_fracz_right_mul import CusMatMulCubeFraczRightMul
from model.grad_reducer_thor import DistributedGradReducerThor
momentum_opt = C.MultitypeFuncGraph("momentum_opt") momentum_opt = C.MultitypeFuncGraph("momentum_opt")
@ -68,10 +62,10 @@ class THOR(Optimizer):
self.matrix_G = ParameterTuple(matrix_G) self.matrix_G = ParameterTuple(matrix_G)
self.A_inv_max = ParameterTuple(A_inv_max) self.A_inv_max = ParameterTuple(A_inv_max)
self.G_inv_max = ParameterTuple(G_inv_max) self.G_inv_max = ParameterTuple(G_inv_max)
self.cube_matmul_left = CusMatMulCubeFraczLeftCast() self.cube_matmul_left = P.CusMatMulCubeFraczLeftCast()
self.cube_matmul_left_fc = CusMatMulCubeDenseLeft() self.cube_matmul_left_fc = P.CusMatMulCubeDenseLeft()
self.cube_matmul_right_fc = CusMatMulCubeDenseRight() self.cube_matmul_right_fc = P.CusMatMulCubeDenseRight()
self.cube_matmul_right_mul = CusMatMulCubeFraczRightMul() self.cube_matmul_right_mul = P.CusMatMulCubeFraczRightMul()
self.transpose = P.Transpose() self.transpose = P.Transpose()
self.shape = P.Shape() self.shape = P.Shape()
self.reshape = P.Reshape() self.reshape = P.Reshape()

@ -23,19 +23,9 @@ from mindspore.common.tensor import Tensor
from mindspore.nn.cell import Cell from mindspore.nn.cell import Cell
from mindspore.nn.layer.activation import get_activation from mindspore.nn.layer.activation import get_activation
from mindspore.ops import operations as P from mindspore.ops import operations as P
from cus_ops.cus_batch_matmul import CusBatchMatMul
from cus_ops.cus_cholesky_trsm import CusCholeskyTrsm
from cus_ops.cus_fused_abs_max1 import CusFusedAbsMax1
from cus_ops.cus_img2col import CusImg2Col
from cus_ops.cus_matmul_cube import CusMatMulCube
from cus_ops.cus_matrix_combine import CusMatrixCombine
from cus_ops.cus_transpose02314 import CusTranspose02314
import numpy as np import numpy as np
C0 = 16 C0 = 16
def caculate_device_shape(matrix_dim, channel, is_A): def caculate_device_shape(matrix_dim, channel, is_A):
ll = (0) ll = (0)
if is_A: if is_A:
@ -153,11 +143,11 @@ class Conv2d_Thor(_Conv):
group=self.group group=self.group
) )
self.img2col = CusImg2Col(ksizes=ksizes, strides=strides) self.img2col = P.CusImg2Col(ksizes=ksizes, strides=strides)
self.cube_matmul = CusMatMulCube(transpose_a=True) self.cube_matmul = P.CusMatMulCube(transpose_a=True)
self.matrix_combine = CusMatrixCombine() self.matrix_combine = P.CusMatrixCombine()
self.cholesky = CusCholeskyTrsm() self.cholesky = P.CusCholeskyTrsm()
self.transpose02314 = CusTranspose02314() self.transpose02314 = P.CusTranspose02314()
self.matrix_A_dim = self.in_channels * self.kernel_size[0] * self.kernel_size[1] self.matrix_A_dim = self.in_channels * self.kernel_size[0] * self.kernel_size[1]
self.matrix_G_dim = self.out_channels self.matrix_G_dim = self.out_channels
self.matrix_A_device_shape, self.matrix_A_device_dim = caculate_device_shape(self.matrix_A_dim, self.matrix_A_device_shape, self.matrix_A_device_dim = caculate_device_shape(self.matrix_A_dim,
@ -190,7 +180,7 @@ class Conv2d_Thor(_Conv):
self.mul = P.Mul() self.mul = P.Mul()
self.cast = P.Cast() self.cast = P.Cast()
self.damping = Tensor(damping) self.damping = Tensor(damping)
self.vector_matmul = CusBatchMatMul() self.vector_matmul = P.CusBatchMatMul()
self.diag_block_dim = 128 self.diag_block_dim = 128
self.channels_slice_flag = False self.channels_slice_flag = False
if self.in_channels % C0 != 0: if self.in_channels % C0 != 0:
@ -221,8 +211,8 @@ class Conv2d_Thor(_Conv):
self.dampingA = Tensor(np.identity(dampingA_dim), mstype.float32) self.dampingA = Tensor(np.identity(dampingA_dim), mstype.float32)
self.dampingG = Tensor(np.identity(dampingG_dim), mstype.float32) self.dampingG = Tensor(np.identity(dampingG_dim), mstype.float32)
self.fused_abs_max1 = CusFusedAbsMax1([self.matrix_A_dim, self.matrix_A_dim]) self.fused_abs_max1 = P.CusFusedAbsMax1([self.matrix_A_dim, self.matrix_A_dim])
self.fused_abs_max2 = CusFusedAbsMax1() self.fused_abs_max2 = P.CusFusedAbsMax1()
self.log = P.Log() self.log = P.Log()
self.exp = P.Exp() self.exp = P.Exp()
self.sqrt = P.Sqrt() self.sqrt = P.Sqrt()
@ -375,9 +365,9 @@ class Dense_Thor(Cell):
self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)) self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16))
self.matmul = P.MatMul(transpose_b=True) self.matmul = P.MatMul(transpose_b=True)
self.cube_matmul = CusMatMulCube(transpose_a=True) self.cube_matmul = P.CusMatMulCube(transpose_a=True)
self.matrix_combine = CusMatrixCombine() self.matrix_combine = P.CusMatrixCombine()
self.cholesky = CusCholeskyTrsm() self.cholesky = P.CusCholeskyTrsm()
self.shape = P.Shape() self.shape = P.Shape()
self.reshape = P.Reshape() self.reshape = P.Reshape()
self.transpose = P.Transpose() self.transpose = P.Transpose()
@ -386,7 +376,7 @@ class Dense_Thor(Cell):
self.cast = P.Cast() self.cast = P.Cast()
self.damping = Tensor(damping) self.damping = Tensor(damping)
self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
self.vector_matmul = CusBatchMatMul() self.vector_matmul = P.CusBatchMatMul()
self.pad = P.Pad(((0, 24), (0, 24))) self.pad = P.Pad(((0, 24), (0, 24)))
self.pad1 = P.Pad(((0, 8), (0, 8))) self.pad1 = P.Pad(((0, 8), (0, 8)))
self.slice = P.Slice() self.slice = P.Slice()
@ -396,8 +386,8 @@ class Dense_Thor(Cell):
self.axis = 0 self.axis = 0
self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False) self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False) self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
self.fused_abs_max1 = CusFusedAbsMax1([1000, 1000]) self.fused_abs_max1 = P.CusFusedAbsMax1([1000, 1000])
self.fused_abs_max2 = CusFusedAbsMax1() self.fused_abs_max2 = P.CusFusedAbsMax1()
self.log = P.Log() self.log = P.Log()
self.exp = P.Exp() self.exp = P.Exp()
self.dampingA = Tensor(np.identity(2048), mstype.float32) self.dampingA = Tensor(np.identity(2048), mstype.float32)

@ -33,6 +33,7 @@ cus_batchmatmul_op_info = TBERegOp("CusBatchMatMul") \
def _get_flattern_shape(shape): def _get_flattern_shape(shape):
"""_get_flattern_shape"""
flattern_shape = 1 flattern_shape = 1
for dim in shape: for dim in shape:
flattern_shape *= dim flattern_shape *= dim
@ -40,6 +41,7 @@ def _get_flattern_shape(shape):
def _inner_matmul_new(tik_instance, dtype, input1, input1_index, input2, input2_index, res, res_index): def _inner_matmul_new(tik_instance, dtype, input1, input1_index, input2, input2_index, res, res_index):
"""_inner_matmul_new"""
input_1_local_UB = tik_instance.Tensor(dtype, [128], name="input_1_local_UB", scope=tik.scope_ubuf) input_1_local_UB = tik_instance.Tensor(dtype, [128], name="input_1_local_UB", scope=tik.scope_ubuf)
t_1_0_local_UB = tik_instance.Tensor(dtype, [64 * 128], name="t_1_0_local_UB", scope=tik.scope_ubuf) t_1_0_local_UB = tik_instance.Tensor(dtype, [64 * 128], name="t_1_0_local_UB", scope=tik.scope_ubuf)
tik_instance.data_move(input_1_local_UB, input1[input1_index], 0, 1, 16, 0, 0) tik_instance.data_move(input_1_local_UB, input1[input1_index], 0, 1, 16, 0, 0)
@ -71,6 +73,7 @@ def _inner_matmul_new(tik_instance, dtype, input1, input1_index, input2, input2_
def _inner_matmul_new_1_64_32_64(tik_instance, dtype, input1, input1_index, input2, input2_index, res, res_index): def _inner_matmul_new_1_64_32_64(tik_instance, dtype, input1, input1_index, input2, input2_index, res, res_index):
"""_inner_matmul_new_1_64_32_64"""
input_1_local_UB = tik_instance.Tensor(dtype, [64], name="input_1_local_UB", scope=tik.scope_ubuf) input_1_local_UB = tik_instance.Tensor(dtype, [64], name="input_1_local_UB", scope=tik.scope_ubuf)
tik_instance.data_move(input_1_local_UB, input1[input1_index], 0, 1, 8, 0, 0) tik_instance.data_move(input_1_local_UB, input1[input1_index], 0, 1, 8, 0, 0)
with tik_instance.for_range(0, 2, thread_num=2) as thread_idx2: with tik_instance.for_range(0, 2, thread_num=2) as thread_idx2:
@ -90,6 +93,7 @@ def _inner_matmul_new_1_64_32_64(tik_instance, dtype, input1, input1_index, inpu
@op_info_register(cus_batchmatmul_op_info) @op_info_register(cus_batchmatmul_op_info)
def CusBatchMatMul(input_x1, input_x2, output, transpose_a=False, transpose_b=True, kernel_name="batchmatmul"): def CusBatchMatMul(input_x1, input_x2, output, transpose_a=False, transpose_b=True, kernel_name="batchmatmul"):
"""CusBatchMatMul"""
if util.get_product_version() == util.VERSION_MINI: if util.get_product_version() == util.VERSION_MINI:
tik_instance = tik.Tik(tik.Dprofile("v100", "mini")) tik_instance = tik.Tik(tik.Dprofile("v100", "mini"))
else: else:
@ -116,7 +120,6 @@ def CusBatchMatMul(input_x1, input_x2, output, transpose_a=False, transpose_b=Tr
# if not transpose_a and transpose_b: # if not transpose_a and transpose_b:
batch, m, k = x1_shape batch, m, k = x1_shape
_, n, _ = x2_shape
input1_shape = _get_flattern_shape(x1_shape) input1_shape = _get_flattern_shape(x1_shape)
input1 = tik_instance.Tensor(dtype, input1_shape, name="input1", scope=tik.scope_gm) input1 = tik_instance.Tensor(dtype, input1_shape, name="input1", scope=tik.scope_gm)

@ -32,6 +32,7 @@ cus_cholesky_trsm_op_info = TBERegOp("CusCholeskyTrsm") \
@op_info_register(cus_cholesky_trsm_op_info) @op_info_register(cus_cholesky_trsm_op_info)
def CusCholeskyTrsm(input_x, output, kernel_name): def CusCholeskyTrsm(input_x, output, kernel_name):
"""CusCholeskyTrsm"""
input_x_shape = input_x.get("shape") input_x_shape = input_x.get("shape")
output_shape = output.get("shape") output_shape = output.get("shape")
split_dim = 128 split_dim = 128

@ -33,6 +33,7 @@ cus_fused_abs_max1_op_info = TBERegOp("CusFusedAbsMax1") \
@op_info_register(cus_fused_abs_max1_op_info) @op_info_register(cus_fused_abs_max1_op_info)
def CusFusedAbsMax1(input_x, output, origin_shape=None, kernel_name="fused_abs_max1"): def CusFusedAbsMax1(input_x, output, origin_shape=None, kernel_name="fused_abs_max1"):
"""CusFusedAbsMax1"""
input_x_shape = input_x.get("shape") input_x_shape = input_x.get("shape")
output_shape = output.get("shape") output_shape = output.get("shape")

@ -36,6 +36,7 @@ cus_img2col_info = TBERegOp("CusImg2Col") \
@op_info_register(cus_img2col_info) @op_info_register(cus_img2col_info)
def CusImg2Col(input_x, output, ksizes, strides, dilates, mode, kernel_name="img2col"): def CusImg2Col(input_x, output, ksizes, strides, dilates, mode, kernel_name="img2col"):
"""CusImg2Col"""
input_x_shape = input_x.get("shape") input_x_shape = input_x.get("shape")
input_x_dtype = input_x.get("dtype") input_x_dtype = input_x.get("dtype")
N, C1, H, W, C0 = input_x_shape N, C1, H, W, C0 = input_x_shape

@ -17,10 +17,9 @@ limitations under the License.
matmul matmul
""" """
from __future__ import absolute_import from __future__ import absolute_import
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
import te.lang.cce import te.lang.cce
import te.platform.cce_params as cce import te.platform.cce_params as cce
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
from te import tik from te import tik
from te import tvm from te import tvm
from topi import generic from topi import generic
@ -128,7 +127,7 @@ def _shape_check(shape_a, shape_b, shape_bias, src_dtype, trans_a, trans_b):
if n_shape % cce.BLOCK_IN != 0 and n_shape != 1: if n_shape % cce.BLOCK_IN != 0 and n_shape != 1:
raise RuntimeError("input shape N should be 1 or multiple of %d" % cce.BLOCK_IN) raise RuntimeError("input shape N should be 1 or multiple of %d" % cce.BLOCK_IN)
if len(shape_bias): if len(shape_bias) != 0:
if len(shape_bias) == 1: if len(shape_bias) == 1:
if is_gevm or is_gemv: if is_gevm or is_gemv:
if shape_bias[0] != m_shape * n_shape: if shape_bias[0] != m_shape * n_shape:
@ -145,16 +144,19 @@ def _shape_check(shape_a, shape_b, shape_bias, src_dtype, trans_a, trans_b):
def _get_bias(shape_bias): def _get_bias(shape_bias):
bias_length = shape_bias[0] bias_length = shape_bias[0]
shb = []
if bias_length % 16 == 0: if bias_length % 16 == 0:
return shape_bias shb = shape_bias
else: else:
bias_length = (bias_length // 16) * 16 + 16 bias_length = (bias_length // 16) * 16 + 16
shape_bias = [] shape_bias = []
shape_bias.append(bias_length) shape_bias.append(bias_length)
return shape_bias shb = shape_bias
return shb
def _get_input_shape(shape_x): def _get_input_shape(shape_x):
"""_get_input_shape"""
dim_a = shape_x[0] dim_a = shape_x[0]
dim_b = shape_x[1] dim_b = shape_x[1]
res = [] res = []
@ -173,6 +175,7 @@ def _get_input_shape(shape_x):
def check_supported(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"): def check_supported(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"):
"""check_supported"""
shape_a = input_x1.get("shape") shape_a = input_x1.get("shape")
shape_b = input_x2.get("shape") shape_b = input_x2.get("shape")
print("shape_a: ", shape_a) print("shape_a: ", shape_a)
@ -183,8 +186,6 @@ def check_supported(input_x1, input_x2, bias=None, output_y={}, trans_a=False, t
util.check_shape_rule(shape_b) util.check_shape_rule(shape_b)
util.check_shape_size(shape_a, SHAPE_SIZE_LIMIT) util.check_shape_size(shape_a, SHAPE_SIZE_LIMIT)
util.check_shape_size(shape_b, SHAPE_SIZE_LIMIT) util.check_shape_size(shape_b, SHAPE_SIZE_LIMIT)
if bias is not None and bool(bias):
shape_bias = bias.get("shape")
try: try:
trans_a_f = bool(1 - trans_a) trans_a_f = bool(1 - trans_a)
if src_dtype == "float32" or src_dtype == "int32": if src_dtype == "float32" or src_dtype == "int32":

@ -43,6 +43,7 @@ matmul_cube_dense_right_op_info = TBERegOp("CusMatMulCubeDenseRight") \
@op_info_register(matmul_cube_dense_right_op_info) @op_info_register(matmul_cube_dense_right_op_info)
def CusMatMulCubeDenseRight(input_x1, input_x2, input_x3, bias=None, output_y={}, trans_a=False, trans_b=False, def CusMatMulCubeDenseRight(input_x1, input_x2, input_x3, bias=None, output_y={}, trans_a=False, trans_b=False,
kernel_name="matmulcube"): kernel_name="matmulcube"):
"""CusMatMulCubeDenseRight"""
shape_a_temp = (128, 63, 16, 16) shape_a_temp = (128, 63, 16, 16)
shape_b_temp = (128, 128, 16, 16) shape_b_temp = (128, 128, 16, 16)
shape_output = output_y.get("shape") shape_output = output_y.get("shape")

@ -17,9 +17,8 @@ limitations under the License.
matmul matmul
""" """
from __future__ import absolute_import from __future__ import absolute_import
import te.platform.cce_params as cce
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
import te.platform.cce_params as cce
from te import tik from te import tik
from topi.cce import util from topi.cce import util
@ -141,6 +140,7 @@ src_dtype: str
def _get_bias(shape_bias): def _get_bias(shape_bias):
"""_get_bias"""
bias_length = shape_bias[0] bias_length = shape_bias[0]
if bias_length % 16 == 0: if bias_length % 16 == 0:
return shape_bias return shape_bias
@ -152,6 +152,7 @@ def _get_bias(shape_bias):
def _get_input_shape(shape_x): def _get_input_shape(shape_x):
"""_get_input_shape"""
dim_a = shape_x[0] dim_a = shape_x[0]
dim_b = shape_x[1] dim_b = shape_x[1]
res = [] res = []
@ -170,6 +171,7 @@ def _get_input_shape(shape_x):
def check_supported(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"): def check_supported(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"):
"""check_supported"""
shape_a = input_x1.get("shape") shape_a = input_x1.get("shape")
shape_b = input_x2.get("shape") shape_b = input_x2.get("shape")
print("shape_a: ", shape_a) print("shape_a: ", shape_a)
@ -180,8 +182,6 @@ def check_supported(input_x1, input_x2, bias=None, output_y={}, trans_a=False, t
util.check_shape_rule(shape_b) util.check_shape_rule(shape_b)
util.check_shape_size(shape_a, SHAPE_SIZE_LIMIT) util.check_shape_size(shape_a, SHAPE_SIZE_LIMIT)
util.check_shape_size(shape_b, SHAPE_SIZE_LIMIT) util.check_shape_size(shape_b, SHAPE_SIZE_LIMIT)
if bias is not None and bool(bias):
shape_bias = bias.get("shape")
try: try:
trans_a_f = bool(1 - trans_a) trans_a_f = bool(1 - trans_a)
if src_dtype == "float32" or src_dtype == "int32": if src_dtype == "float32" or src_dtype == "int32":
@ -381,6 +381,7 @@ def CusMatMulCubeFraczLeftCast(input_x1, input_x2, bias=None, output_y={}, trans
def get_cus_tile_info(input_x1, input_x2, diag_size): def get_cus_tile_info(input_x1, input_x2, diag_size):
"""get_cus_tile_info"""
tile_map = { tile_map = {
((32, 32, 16, 16), (128, 32, 16, 16)): (8, 8, 16), ((32, 32, 16, 16), (128, 32, 16, 16)): (8, 8, 16),
((8, 8, 16, 16), (72, 8, 16, 16)): (8, 8, 4), ((8, 8, 16, 16), (72, 8, 16, 16)): (8, 8, 4),
@ -415,8 +416,9 @@ def get_cus_tile_info(input_x1, input_x2, diag_size):
def cus_cube_matmul_cast(tik_instance, input_x1, trans_a, input_x2, trans_b, def cus_cube_matmul_cast(tik_instance, input_x1, trans_a, input_x2, trans_b,
res, mo_tile, ko_tile, no_tile, diag_opt=False, diag_size=128): res, mo_tile, ko_tile, no_tile, diag_opt=False, diag_size=128):
ko, mo, mi, ki = input_x1.shape """cus_cube_matmul_cast"""
no, ko, ki, ni = input_x2.shape ko, mo, _, _ = input_x1.shape
no, ko, ki, _ = input_x2.shape
c0 = input_x1.shape[-1] c0 = input_x1.shape[-1]
diag_outer = diag_size // c0 diag_outer = diag_size // c0
maxblocknum = 32 maxblocknum = 32

@ -47,6 +47,7 @@ cus_matmul_cube_fracz_right_mul_op_info = TBERegOp("CusMatMulCubeFraczRightMul")
@op_info_register(cus_matmul_cube_fracz_right_mul_op_info) @op_info_register(cus_matmul_cube_fracz_right_mul_op_info)
def CusMatMulCubeFraczRightMul(input_x1, input_x2, input_x3, bias=None, output_y={}, trans_a=False, trans_b=False, def CusMatMulCubeFraczRightMul(input_x1, input_x2, input_x3, bias=None, output_y={}, trans_a=False, trans_b=False,
kernel_name="matmulcube"): kernel_name="matmulcube"):
"""CusMatMulCubeFraczRightMul"""
if util.get_product_version() == util.VERSION_MINI: if util.get_product_version() == util.VERSION_MINI:
tik_instance = tik.Tik(tik.Dprofile("v100", "mini")) tik_instance = tik.Tik(tik.Dprofile("v100", "mini"))
else: else:
@ -95,15 +96,17 @@ def CusMatMulCubeFraczRightMul(input_x1, input_x2, input_x3, bias=None, output_y
def cus_cube_matmul_right_mul(tik_instance, input_x1, input_x2, input_x3, def cus_cube_matmul_right_mul(tik_instance, input_x1, input_x2, input_x3,
res): res):
"""cus_cube_matmul_right_mul"""
diag_size = 128 diag_size = 128
ko, mo, mi, ki = input_x1.shape ko, mo, _, _ = input_x1.shape
no, ko, ki, ni = input_x2.shape no, ko, ki, _ = input_x2.shape
c0 = input_x1.shape[-1] c0 = input_x1.shape[-1]
diag_outer = diag_size // c0 diag_outer = diag_size // c0
if [input_x1.shape[-1], input_x1.shape[-2], input_x2.shape[-1], input_x2.shape[-2]] != [c0, c0, c0, c0]: if [input_x1.shape[-1], input_x1.shape[-2], input_x2.shape[-1], input_x2.shape[-2]] != [c0, c0, c0, c0]:
raise ValueError("shape of input_x1 or input_x2 is not supported!") raise ValueError("shape of input_x1 or input_x2 is not supported!")
def get_cus_tile_info(input_x1, input_x2, input_x3): def get_cus_tile_info(input_x1, input_x2, input_x3):
"""get_cus_tile_info"""
input_shape = (tuple(input_x1.shape), input_x1.dtype, tuple(input_x2.shape), input_x2.dtype, input_shape = (tuple(input_x1.shape), input_x1.dtype, tuple(input_x2.shape), input_x2.dtype,
tuple(input_x3.shape), input_x3.dtype) tuple(input_x3.shape), input_x3.dtype)
tile_map = { tile_map = {

@ -18,11 +18,10 @@ limitations under the License.
matmul matmul
""" """
from __future__ import absolute_import from __future__ import absolute_import
import te.lang.cce
import te.platform.cce_params as cce
from impl.matmul_vector import matmul_vector_cce from impl.matmul_vector import matmul_vector_cce
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
import te.lang.cce
import te.platform.cce_params as cce
from te import tvm from te import tvm
from topi import generic from topi import generic
from topi.cce import util from topi.cce import util
@ -146,6 +145,7 @@ def _shape_check(shape_a, shape_b, shape_bias, src_dtype, trans_a, trans_b):
def _get_bias(shape_bias): def _get_bias(shape_bias):
"""_get_bias"""
bias_length = shape_bias[0] bias_length = shape_bias[0]
if bias_length % 16 == 0: if bias_length % 16 == 0:
return shape_bias return shape_bias
@ -157,6 +157,7 @@ def _get_bias(shape_bias):
def _get_input_shape(shape_x): def _get_input_shape(shape_x):
"""_get_input_shape"""
dim_a = shape_x[0] dim_a = shape_x[0]
dim_b = shape_x[1] dim_b = shape_x[1]
res = [] res = []
@ -175,6 +176,7 @@ def _get_input_shape(shape_x):
def check_supported(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"): def check_supported(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"):
"""check_supported"""
shape_a = input_x1.get("shape") shape_a = input_x1.get("shape")
shape_b = input_x2.get("shape") shape_b = input_x2.get("shape")
print("shape_a: ", shape_a) print("shape_a: ", shape_a)
@ -185,8 +187,6 @@ def check_supported(input_x1, input_x2, bias=None, output_y={}, trans_a=False, t
util.check_shape_rule(shape_b) util.check_shape_rule(shape_b)
util.check_shape_size(shape_a, SHAPE_SIZE_LIMIT) util.check_shape_size(shape_a, SHAPE_SIZE_LIMIT)
util.check_shape_size(shape_b, SHAPE_SIZE_LIMIT) util.check_shape_size(shape_b, SHAPE_SIZE_LIMIT)
if bias is not None and bool(bias):
shape_bias = bias.get("shape")
try: try:
trans_a_f = bool(1 - trans_a) trans_a_f = bool(1 - trans_a)
if src_dtype == "float32" or src_dtype == "int32": if src_dtype == "float32" or src_dtype == "int32":

@ -32,6 +32,7 @@ cus_transpose02314_op_info = TBERegOp("CusTranspose02314") \
@op_info_register(cus_transpose02314_op_info) @op_info_register(cus_transpose02314_op_info)
def CusTranspose02314(input_x, output, kernel_name="transpose021354"): def CusTranspose02314(input_x, output, kernel_name="transpose021354"):
"""CusTranspose02314"""
input_x_shape = input_x.get("shape") input_x_shape = input_x.get("shape")
output_shape = output.get("shape") output_shape = output.get("shape")
perm = (0, 2, 3, 1, 4) perm = (0, 2, 3, 1, 4)
@ -263,7 +264,7 @@ def CusTranspose02314(input_x, output, kernel_name="transpose021354"):
with tik_instance.for_range(0, 32, block_num=32) as block_idx: with tik_instance.for_range(0, 32, block_num=32) as block_idx:
with tik_instance.for_range(0, 6, thread_num=2) as cc1: with tik_instance.for_range(0, 6, thread_num=2) as cc1:
_inner_ + compute(cc1) _inner_compute(cc1)
_inner_compute(6) _inner_compute(6)
elif tuple(input_x_shape) == (32, 64, 14, 14, 16) and tuple(perm) == (0, 2, 3, 1, 4) and dtype == "float16": elif tuple(input_x_shape) == (32, 64, 14, 14, 16) and tuple(perm) == (0, 2, 3, 1, 4) and dtype == "float16":
def _inner_compute(split_index, block_idx): def _inner_compute(split_index, block_idx):

Loading…
Cancel
Save