add int64-->fp16 and update conv pad

pull/3240/head
VectorSL 5 years ago
parent 8e4c0a9d93
commit 90f15df037

@ -20,6 +20,8 @@ from _akg.topi.generic import schedule_elemwise
def Cast(x, dst_type): def Cast(x, dst_type):
"""cast.""" """cast."""
if x.dtype == "int64" and dst_type == "float16":
x = cast.cast(x, "float32")
return cast.cast(x, dst_type) return cast.cast(x, dst_type)

@ -109,12 +109,14 @@ class Conv2dGpuFwdKernel : public GpuKernel {
Set4DDesc(in_shape, filter_shape, output_shape); Set4DDesc(in_shape, filter_shape, output_shape);
group_ = GetAttr<int>(kernel_node, "group"); group_ = GetAttr<int>(kernel_node, "group");
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed"); CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
pad_height_ = GetAttr<int>(kernel_node, "pad"); auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
pad_width_ = pad_height_; pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode"); pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
SetStrideAndDilation(kernel_node); SetStrideAndDilation(kernel_node);
cudnnTensorDescriptor_t input_descriptor_real = nullptr; cudnnTensorDescriptor_t input_descriptor_real = nullptr;
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
SetPad(in_shape, kernel_node); SetPad(in_shape, kernel_node);
input_descriptor_real = use_pad_ ? padded_desc_ : input_desc_; input_descriptor_real = use_pad_ ? padded_desc_ : input_desc_;
} else { } else {

@ -113,12 +113,14 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
group_ = GetAttr<int>(kernel_node, "group"); group_ = GetAttr<int>(kernel_node, "group");
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed"); CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
pad_height_ = GetAttr<int>(kernel_node, "pad"); auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
pad_width_ = pad_height_; pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode"); pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
SetStrideAndDilation(kernel_node); SetStrideAndDilation(kernel_node);
cudnnTensorDescriptor_t x_desc_real = nullptr; cudnnTensorDescriptor_t x_desc_real = nullptr;
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
SetPad(in_shape, kernel_node); SetPad(in_shape, kernel_node);
x_desc_real = use_pad_ ? padded_descriptor_ : x_desc_; x_desc_real = use_pad_ ? padded_descriptor_ : x_desc_;
} else { } else {

@ -114,12 +114,14 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
group_ = GetAttr<int>(kernel_node, "group"); group_ = GetAttr<int>(kernel_node, "group");
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed"); CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
pad_height_ = GetAttr<int>(kernel_node, "pad"); auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
pad_width_ = pad_height_; pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode"); pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
SetStrideAndDilation(kernel_node); SetStrideAndDilation(kernel_node);
cudnnTensorDescriptor_t dx_desc_real = nullptr; cudnnTensorDescriptor_t dx_desc_real = nullptr;
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
SetPad(input_shape, kernel_node); SetPad(input_shape, kernel_node);
dx_desc_real = use_pad_ ? padded_descriptor_ : dx_desc_; dx_desc_real = use_pad_ ? padded_descriptor_ : dx_desc_;
} else { } else {

@ -50,6 +50,9 @@ cast_op_info = AkgGpuRegOp("Cast") \
.dtype_format(DataType.I16_Default, DataType.I32_Default) \ .dtype_format(DataType.I16_Default, DataType.I32_Default) \
.dtype_format(DataType.I16_Default, DataType.I64_Default) \ .dtype_format(DataType.I16_Default, DataType.I64_Default) \
.dtype_format(DataType.I64_Default, DataType.F64_Default) \ .dtype_format(DataType.I64_Default, DataType.F64_Default) \
.dtype_format(DataType.I64_Default, DataType.F32_Default) \
.dtype_format(DataType.I64_Default, DataType.F16_Default) \
.dtype_format(DataType.I64_Default, DataType.I32_Default) \
.dtype_format(DataType.I16_Default, DataType.F32_Default) \ .dtype_format(DataType.I16_Default, DataType.F32_Default) \
.dtype_format(DataType.I16_Default, DataType.F16_Default) \ .dtype_format(DataType.I16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_Default, DataType.I32_Default) \ .dtype_format(DataType.F32_Default, DataType.I32_Default) \

@ -92,7 +92,7 @@ def test_cast2():
@pytest.mark.platform_x86_gpu_training @pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard @pytest.mark.env_onecard
def test_cast3(): def test_cast3():
x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float16)) x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64))
t0 = mstype.int32 t0 = mstype.int32
x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float32)) x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float32))
t1 = mstype.int32 t1 = mstype.int32
@ -342,3 +342,20 @@ def test_cast17():
assert type0 == 'float32' assert type0 == 'float32'
type1 = output[1].asnumpy().dtype type1 = output[1].asnumpy().dtype
assert type1 == 'float16' assert type1 == 'float16'
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_cast18():
x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64))
t0 = mstype.float32
x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64))
t1 = mstype.float16
context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
net = Net(t0, t1)
output = net(x0, x1)
type0 = output[0].asnumpy().dtype
assert type0 == 'float32'
type1 = output[1].asnumpy().dtype
assert type1 == 'float16'

Loading…
Cancel
Save