diff --git a/mindspore/ops/_op_impl/akg/gpu/tile.py b/mindspore/ops/_op_impl/akg/gpu/tile.py
index 32b993f239..3ba6fced60 100644
--- a/mindspore/ops/_op_impl/akg/gpu/tile.py
+++ b/mindspore/ops/_op_impl/akg/gpu/tile.py
@@ -22,8 +22,9 @@ tile_op_info = AkgGpuRegOp("Tile") \
     .attr("multiples", "required", "listInt") \
     .dtype_format(DataType.F16_Default, DataType.F16_Default) \
     .dtype_format(DataType.F32_Default, DataType.F32_Default) \
-    .dtype_format(DataType.I32_Default, DataType.I32_Default) \
     .dtype_format(DataType.I16_Default, DataType.I16_Default) \
+    .dtype_format(DataType.I32_Default, DataType.I32_Default) \
+    .dtype_format(DataType.I64_Default, DataType.I64_Default) \
     .get_op_info()
 
 
diff --git a/tests/st/ops/gpu/test_tile_op.py b/tests/st/ops/gpu/test_tile_op.py
index 3d65fea24d..5738c7d639 100644
--- a/tests/st/ops/gpu/test_tile_op.py
+++ b/tests/st/ops/gpu/test_tile_op.py
@@ -24,170 +24,70 @@ from mindspore.common.tensor import Tensor
 from mindspore.nn import Cell
 from mindspore.ops.operations import Tile
 
-context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
-
-input_x0 = np.arange(2).reshape((2, 1, 1)).astype(np.float32)
-mul0 = (8, 1, 1)
-input_x1 = np.arange(32).reshape((2, 4, 4)).astype(np.float32)
-mul1 = (2, 2, 2)
-input_x2 = np.arange(1).reshape((1, 1, 1)).astype(np.float32)
-mul2 = (1, 1, 1)
-
-input_32_x0 = np.arange(2).reshape((2, 1, 1)).astype(np.int32)
-mul_32_0 = (8, 1, 1)
-input_32_x1 = np.arange(32).reshape((2, 4, 4)).astype(np.int32)
-mul_32_1 = (2, 2, 2)
-input_32_x2 = np.arange(1).reshape((1, 1, 1)).astype(np.int32)
-mul_32_2 = (1, 1, 1)
-
-input_16_x0 = np.arange(2).reshape((2, 1, 1)).astype(np.int16)
-mul_16_0 = (8, 1, 1)
-input_16_x1 = np.arange(32).reshape((2, 4, 4)).astype(np.int16)
-mul_16_1 = (2, 2, 2)
-input_16_x2 = np.arange(1).reshape((1, 1, 1)).astype(np.int16)
-mul_16_2 = (1, 1, 1)
-
-input_8_x0 = np.arange(2).reshape((2, 1, 1)).astype(np.uint8)
-mul_8_0 = (8, 1, 1)
-input_8_x1 = np.arange(32).reshape((2, 4, 4)).astype(np.int8)
-mul_8_1 = (2, 2, 2)
-input_8_x2 = np.arange(1).reshape((1, 1, 1)).astype(np.uint8)
-mul_8_2 = (1, 1, 1)
-
-
-class Net(Cell):
-    def __init__(self):
-        super(Net, self).__init__()
-        self.Tile = Tile()
-
-        self.input_x0 = Parameter(initializer(Tensor(input_x0), input_x0.shape), name='x0')
-        self.mul0 = mul0
-        self.input_x1 = Parameter(initializer(Tensor(input_x1), input_x1.shape), name='x1')
-        self.mul1 = mul1
-        self.input_x2 = Parameter(initializer(Tensor(input_x2), input_x2.shape), name='x2')
-        self.mul2 = mul2
-
-    @ms_function
-    def construct(self):
-        output = (self.Tile(self.input_x0, self.mul0),
-                  self.Tile(self.input_x1, self.mul1),
-                  self.Tile(self.input_x2, self.mul2))
-        return output
-
 
-class Net32(Cell):
-    def __init__(self):
-        super(Net32, self).__init__()
+class TileNet(Cell):
+    def __init__(self, numpy_input):
+        super(TileNet, self).__init__()
         self.Tile = Tile()
 
-        self.input_32_x0 = Parameter(initializer(Tensor(input_32_x0), input_32_x0.shape), name='x0')
-        self.mul_32_0 = mul_32_0
-        self.input_32_x1 = Parameter(initializer(Tensor(input_32_x1), input_32_x1.shape), name='x1')
-        self.mul_32_1 = mul_32_1
-        self.input_32_x2 = Parameter(initializer(Tensor(input_32_x2), input_32_x2.shape), name='x2')
-        self.mul_32_2 = mul_32_2
+        self.input_parameter = Parameter(initializer(Tensor(numpy_input), numpy_input.shape), name='x')
 
     @ms_function
-    def construct(self):
-        output = (self.Tile(self.input_32_x0, self.mul_32_0),
-                  self.Tile(self.input_32_x1, self.mul_32_1),
-                  self.Tile(self.input_32_x2, self.mul_32_2))
-        return output
+    def construct(self, mul):
+        return self.Tile(self.input_parameter, mul)
 
 
-class Net16(Cell):
-    def __init__(self):
-        super(Net16, self).__init__()
-        self.Tile = Tile()
+def ms_tile(nptype):
+    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
 
-        self.input_16_x0 = Parameter(initializer(Tensor(input_16_x0), input_16_x0.shape), name='x0')
-        self.mul_16_0 = mul_16_0
-        self.input_16_x1 = Parameter(initializer(Tensor(input_16_x1), input_16_x1.shape), name='x1')
-        self.mul_16_1 = mul_16_1
-        self.input_16_x2 = Parameter(initializer(Tensor(input_16_x2), input_16_x2.shape), name='x2')
-        self.mul_16_2 = mul_16_2
+    input_0 = np.arange(2).reshape((2, 1, 1)).astype(nptype)
+    mul_0 = (8, 1, 1)
+    input_1 = np.arange(32).reshape((2, 4, 4)).astype(nptype)
+    mul_1 = (2, 2, 2)
+    input_2 = np.arange(1).reshape((1, 1, 1)).astype(nptype)
+    mul_2 = (1, 1, 1)
 
-    @ms_function
-    def construct(self):
-        output = (self.Tile(self.input_16_x0, self.mul_16_0),
-                  self.Tile(self.input_16_x1, self.mul_16_1),
-                  self.Tile(self.input_16_x2, self.mul_16_2))
-        return output
+    tile_net = TileNet(input_0)
+    np_expected = np.tile(input_0, mul_0)
+    ms_output = tile_net(mul_0).asnumpy()
+    np.testing.assert_array_equal(ms_output, np_expected)
+
+    tile_net = TileNet(input_1)
+    np_expected = np.tile(input_1, mul_1)
+    ms_output = tile_net(mul_1).asnumpy()
+    np.testing.assert_array_equal(ms_output, np_expected)
 
+    tile_net = TileNet(input_2)
+    np_expected = np.tile(input_2, mul_2)
+    ms_output = tile_net(mul_2).asnumpy()
+    np.testing.assert_array_equal(ms_output, np_expected)
 
 @pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
-def test_tile():
-    net = Net()
-    output = net()
-
-    expect0 = np.tile(input_x0, mul0)
-    diff0 = output[0].asnumpy() - expect0
-    error0 = np.ones(shape=expect0.shape) * 1.0e-5
-    assert np.all(diff0 < error0)
-    assert output[0].shape == expect0.shape
-
-    expect1 = np.tile(input_x1, mul1)
-    diff1 = output[1].asnumpy() - expect1
-    error1 = np.ones(shape=expect1.shape) * 1.0e-5
-    assert np.all(diff1 < error1)
-    assert output[1].shape == expect1.shape
-
-    expect2 = np.tile(input_x2, mul2)
-    diff2 = output[2].asnumpy() - expect2
-    error2 = np.ones(shape=expect2.shape) * 1.0e-5
-    assert np.all(diff2 < error2)
-    assert output[2].shape == expect2.shape
-
+def test_tile_float16():
+    ms_tile(np.float16)
 
 @pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
-def test_tile_32():
-    net = Net32()
-    output = net()
-
-    expect0 = np.tile(input_32_x0, mul_32_0)
-    diff0 = output[0].asnumpy() - expect0
-    error0 = np.ones(shape=expect0.shape) * 1.0e-5
-    assert np.all(diff0 < error0)
-    assert output[0].shape == expect0.shape
+def test_tile_float32():
+    ms_tile(np.float32)
 
-    expect1 = np.tile(input_32_x1, mul_32_1)
-    diff1 = output[1].asnumpy() - expect1
-    error1 = np.ones(shape=expect1.shape) * 1.0e-5
-    assert np.all(diff1 < error1)
-    assert output[1].shape == expect1.shape
-
-    expect2 = np.tile(input_32_x2, mul_32_2)
-    diff2 = output[2].asnumpy() - expect2
-    error2 = np.ones(shape=expect2.shape) * 1.0e-5
-    assert np.all(diff2 < error2)
-    assert output[2].shape == expect2.shape
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_tile_int16():
+    ms_tile(np.int16)
 
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_tile_int32():
+    ms_tile(np.int32)
 
 @pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
-def test_tile_16():
-    net = Net16()
-    output = net()
-
-    expect0 = np.tile(input_16_x0, mul_16_0)
-    diff0 = output[0].asnumpy() - expect0
-    error0 = np.ones(shape=expect0.shape) * 1.0e-5
-    assert np.all(diff0 < error0)
-    assert output[0].shape == expect0.shape
-
-    expect1 = np.tile(input_16_x1, mul_16_1)
-    diff1 = output[1].asnumpy() - expect1
-    error1 = np.ones(shape=expect1.shape) * 1.0e-5
-    assert np.all(diff1 < error1)
-    assert output[1].shape == expect1.shape
-
-    expect2 = np.tile(input_16_x2, mul_16_2)
-    diff2 = output[2].asnumpy() - expect2
-    error2 = np.ones(shape=expect2.shape) * 1.0e-5
-    assert np.all(diff2 < error2)
-    assert output[2].shape == expect2.shape
+def test_tile_int64():
+    ms_tile(np.int64)