add float64 support for ops for end of version 1.1

fix typo
pull/13649/head
Peilin Wang 4 years ago
parent 1965ecb9a1
commit 57cb024e03

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,6 +18,10 @@
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(
ArgMaxWithValue,
KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat64),
ArgmaxWithValueGpuKernel, double, int)
MS_REG_GPU_KERNEL_TWO(
ArgMaxWithValue,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,6 +18,8 @@
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_ONE(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
BroadcastToGpuKernel, double)
MS_REG_GPU_KERNEL_ONE(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BroadcastToGpuKernel, float)
MS_REG_GPU_KERNEL_ONE(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),

@ -34,6 +34,14 @@ MS_REG_GPU_KERNEL_TWO(TensorScatterUpdate,
.AddOutputAttr(kNumberTypeFloat32),
TensorScatterUpdateGpuFwdKernel, float, int)
MS_REG_GPU_KERNEL_TWO(TensorScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat64)
.AddOutputAttr(kNumberTypeFloat64),
TensorScatterUpdateGpuFwdKernel, double, int)
MS_REG_GPU_KERNEL_TWO(TensorScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeInt8)

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -113,6 +113,9 @@ void NoBroadcastGrad(const int &nums, const bool &grad_x1, const bool &grad_x2,
NoBroadcastGradKernel<<<GET_BLOCKS(nums), GET_THREADS, 0, stream>>>(nums, grad_x1, grad_x2, op, x1, x2, dy, dx1, dx2);
}
template void NoBroadcastGrad(const int &nums, const bool &grad_x1, const bool &grad_x2, enum BroadcastGradOpType op,
const double *x1, const double *x2, const double *dy, double *dx1, double *dx2,
cudaStream_t stream);
template void NoBroadcastGrad(const int &nums, const bool &grad_x1, const bool &grad_x2, enum BroadcastGradOpType op,
const float *x1, const float *x2, const float *dy, float *dx1, float *dx2,
cudaStream_t stream);
@ -124,6 +127,10 @@ template void NoBroadcastGrad(const int &nums, const bool &grad_x1, const bool &
template void NoBroadcastGrad(const int &nums, const bool &grad_x1, const bool &grad_x2, enum BroadcastGradOpType op,
const int64_t *x1, const int64_t *x2, const int64_t *dy, int64_t *dx1, int64_t *dx2,
cudaStream_t stream);
template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1,
const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3,
const bool &grad_x1, const bool &grad_x2, enum BroadcastGradOpType op, const double *x1,
const double *x2, const double *dy, double *dx1, double *dx2, cudaStream_t stream);
template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1,
const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3,
const bool &grad_x1, const bool &grad_x2, enum BroadcastGradOpType op, const float *x1,

@ -555,6 +555,9 @@ void BroadcastTo(const size_t &i0, const size_t &i1, const size_t &i2, const siz
output_addr);
}
template void BroadcastTo(const size_t &i0, const size_t &i1, const size_t &i2, const size_t &i3, const size_t &o0,
const size_t &o1, const size_t &o2, const size_t &o3, const double *input_addr,
double *output_addr, cudaStream_t stream);
template void BroadcastTo(const size_t &i0, const size_t &i1, const size_t &i2, const size_t &i3, const size_t &o0,
const size_t &o1, const size_t &o2, const size_t &o3, const float *input_addr,
float *output_addr, cudaStream_t stream);

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -327,6 +327,8 @@ void CalGeneralReduction(bool small, const T *input, const size_t bound, const s
return;
}
template void CalGeneralReduction(bool small, const double *input, const size_t bound_, const size_t outerSize_,
const size_t innerSize_, int *index, double *output, cudaStream_t cuda_stream);
template void CalGeneralReduction(bool small, const float *input, const size_t bound_, const size_t outerSize_,
const size_t innerSize_, int *index, float *output, cudaStream_t cuda_stream);
template void CalGeneralReduction(bool small, const half *input, const size_t bound_, const size_t outerSize_,

@ -67,6 +67,11 @@ template void TensorScatterUpdate<float, int>(float *input, int *indices, float
const size_t &output_size, const size_t &indices_dim_0,
const size_t &indices_dim_1, int *indices_stride, int *work_shape,
cudaStream_t stream);
template void TensorScatterUpdate<double, int>(double *input, int *indices, double *update, double *output,
const size_t &block_size, const size_t &input_size,
const size_t &output_size, const size_t &indices_dim_0,
const size_t &indices_dim_1, int *indices_stride, int *work_shape,
cudaStream_t stream);
template void TensorScatterUpdate<char, int>(char *input, int *indices, char *update, char *output,
const size_t &block_size, const size_t &input_size,
const size_t &output_size, const size_t &indices_dim_0,

@ -23,6 +23,10 @@ MS_REG_GPU_KERNEL_ONE(
Greater,
KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeBool),
BroadcastOpGpuKernel, double)
MS_REG_GPU_KERNEL_ONE(
Minimum,
KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
BroadcastOpGpuKernel, double)
MS_REG_GPU_KERNEL_ONE(
Less, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeBool),
BroadcastOpGpuKernel, double)
@ -46,6 +50,9 @@ MS_REG_GPU_KERNEL_ONE(
RealDiv,
KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
BroadcastOpGpuKernel, double)
MS_REG_GPU_KERNEL_ONE(
Pow, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
BroadcastOpGpuKernel, double)
// fp32
MS_REG_GPU_KERNEL_ONE(

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,6 +18,14 @@
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_ONE(MinimumGrad,
KernelAttr()
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeFloat64)
.AddOutputAttr(kNumberTypeFloat64)
.AddOutputAttr(kNumberTypeFloat64),
BroadcastOpGradGpuKernel, double)
MS_REG_GPU_KERNEL_ONE(MinimumGrad,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)

Loading…
Cancel
Save