Support fp16 in GPU impl of fused_elemwise_activation_op. (#20636)

* Support fp16 in fused_elemwise_activation_op.
* Fix unit testing in ONLY-CPU mode.
revert-20712-fix_depthwise_conv
qingqing01 6 years ago committed by GitHub
parent db9fbcbc7e
commit 01eddc1a04
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -20,11 +20,15 @@ REGISTER_OP_CUDA_KERNEL(
ops::FusedElemwiseActivationKernel<paddle::platform::CUDADeviceContext,
float>,
ops::FusedElemwiseActivationKernel<paddle::platform::CUDADeviceContext,
double>);
double>,
ops::FusedElemwiseActivationKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
REGISTER_OP_CUDA_KERNEL(
fused_elemwise_activation_grad,
ops::FusedElemwiseActivationGradKernel<paddle::platform::CUDADeviceContext,
float>,
ops::FusedElemwiseActivationGradKernel<paddle::platform::CUDADeviceContext,
double>);
double>,
ops::FusedElemwiseActivationGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);

@ -14,6 +14,8 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/operators/math.h"
namespace paddle {
namespace operators {
namespace math {
@ -40,8 +42,8 @@ struct AddFunctor {
template <typename T>
struct AddGradFunctor {
inline HOSTDEVICE T Dx(T x, T y) { return 1; }
inline HOSTDEVICE T Dy(T x, T y) { return 1; }
inline HOSTDEVICE T Dx(T x, T y) { return static_cast<T>(1.); }
inline HOSTDEVICE T Dy(T x, T y) { return static_cast<T>(1.); }
};
template <typename T>
@ -68,14 +70,22 @@ struct ScaleGradFunctor {
template <typename T>
struct ReluFunctor {
inline HOSTDEVICE T operator()(T x) { return x * (x > 0); }
inline HOSTDEVICE T operator()(T x) {
return x * (x > static_cast<T>(0) ? static_cast<T>(1) : static_cast<T>(0));
}
};
template <typename T>
struct ReluGradFunctor {
inline HOSTDEVICE T UseX(T x) { return x > 0 ? 1 : 0; }
inline HOSTDEVICE T UseOut(T out) { return out > 0 ? 1 : 0; }
inline HOSTDEVICE T UseXAndOut(T x, T out) { return out > 0 ? 1 : 0; }
inline HOSTDEVICE T UseX(T x) {
return x > static_cast<T>(0) ? static_cast<T>(1) : static_cast<T>(0);
}
inline HOSTDEVICE T UseOut(T out) {
return out > static_cast<T>(0) ? static_cast<T>(1) : static_cast<T>(0);
}
inline HOSTDEVICE T UseXAndOut(T x, T out) {
return out > static_cast<T>(0) ? static_cast<T>(1) : static_cast<T>(0);
}
};
template <typename T>
@ -84,9 +94,9 @@ struct TanhFunctor {
const T kMax = static_cast<T>(13);
inline HOSTDEVICE T operator()(T x) {
// y = 2 / (1 + e^-2x) - 1
T t0 = 2 * x;
T t0 = static_cast<T>(2) * x;
T t1 = (t0 < kMin) ? kMin : ((t0 > kMax) ? kMax : t0);
return static_cast<T>(2) / (static_cast<T>(1) + std::exp(-t1)) -
return static_cast<T>(2) / (static_cast<T>(1) + real_exp(-t1)) -
static_cast<T>(1);
}
};
@ -107,7 +117,7 @@ struct SigmoidFunctor {
inline HOSTDEVICE T operator()(T x) {
// y = 1 / (1 + e^-x)
T tmp = (x < kMin) ? kMin : ((x > kMax) ? kMax : x);
return static_cast<T>(1) / (static_cast<T>(1) + std::exp(-tmp));
return static_cast<T>(1) / (static_cast<T>(1) + real_exp(-tmp));
}
};

@ -33,17 +33,24 @@ from op_test import OpTest
# TestFusedElementwiseActivationOp_channelwise_add
def create_test_class(test_case, callback, attrs):
def create_test_class(test_case,
callback,
attrs,
dtype=np.float32,
grad_chek=True):
class TestFusedElementwiseActivationOp_base(OpTest):
def setUp(self):
self.op_type = "fused_elemwise_activation"
self.dtype = np.float32
self.dtype = dtype
self.axis = -1
self.init_input()
self.init_output()
self.init_attr()
self.out = self.out.astype(self.dtype)
self.intermediate_out = self.intermediate_out.astype(self.dtype)
self.inputs = {
'X': OpTest.np_dtype_to_fluid_dtype(self.x),
'Y': OpTest.np_dtype_to_fluid_dtype(self.y)
@ -71,16 +78,25 @@ def create_test_class(test_case, callback, attrs):
self.attrs[key] = attrs[key]
def test_check_output(self):
if self.dtype == np.float16 and core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=1e-3)
else:
self.check_output()
# FIXME(zcd): the intermediate_out_grad is not checked.
def test_check_grad_normal(self):
if not grad_chek:
return
if self.attrs["save_intermediate_out"]:
self.check_grad(['X', 'Y'], ['Out'], max_relative_error=0.005)
else:
self.check_grad(['X', 'Y'], ['Out'], max_relative_error=0.005)
def test_check_grad_ingore_x(self):
if not grad_chek:
return
if self.attrs["save_intermediate_out"]:
self.check_grad(
['Y'], ['Out'],
@ -93,6 +109,8 @@ def create_test_class(test_case, callback, attrs):
no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
if not grad_chek:
return
if self.attrs["save_intermediate_out"]:
self.check_grad(
['X'], ['Out'],
@ -307,11 +325,29 @@ for mode in {0, 1}:
'functor_list': ["scale", "elementwise_add"],
'save_intermediate_out': save_intermediate_out,
})
create_test_class(
'scale_add_fp16' + suffix,
scale_add_func, {
'scale': scale,
'functor_list': ["scale", "elementwise_add"],
'save_intermediate_out': save_intermediate_out,
},
dtype=np.float16,
grad_chek=False)
create_test_class('add_scale' + suffix, add_scale_func, {
'scale': scale,
'functor_list': ["elementwise_add", "scale"],
'save_intermediate_out': save_intermediate_out,
})
create_test_class(
'add_scale_fp16' + suffix,
add_scale_func, {
'scale': scale,
'functor_list': ["elementwise_add", "scale"],
'save_intermediate_out': save_intermediate_out,
},
dtype=np.float16,
grad_chek=False)
create_test_class('add_relu' + suffix, add_relu_func, {
'functor_list': ["elementwise_add", "relu"],
'save_intermediate_out': save_intermediate_out,
@ -320,11 +356,36 @@ for mode in {0, 1}:
'functor_list': ["relu", "elementwise_add"],
'save_intermediate_out': save_intermediate_out,
})
create_test_class(
'add_relu_fp16' + suffix,
add_relu_func, {
'functor_list': ["elementwise_add", "relu"],
'save_intermediate_out': save_intermediate_out,
},
dtype=np.float16,
grad_chek=False)
create_test_class(
'relu_add_fp16' + suffix,
relu_add_func, {
'functor_list': ["relu", "elementwise_add"],
'save_intermediate_out': save_intermediate_out,
},
dtype=np.float16,
grad_chek=False)
create_test_class('mul_scale' + suffix, mul_scale_func, {
'scale': scale,
'functor_list': ["elementwise_mul", "scale"],
'save_intermediate_out': save_intermediate_out,
})
create_test_class(
'mul_scale' + suffix,
mul_scale_func, {
'scale': scale,
'functor_list': ["elementwise_mul", "scale"],
'save_intermediate_out': save_intermediate_out,
},
dtype=np.float16,
grad_chek=False)
if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save