You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/paddle/fluid/operators/activation_op.cu

222 lines
12 KiB

/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/float16.h"
namespace ops = paddle::operators;
namespace plat = paddle::platform;
#define REGISTER_ACTIVATION_CUDA_KERNEL(act_type, op_name, functor, \
grad_functor) \
REGISTER_OP_CUDA_KERNEL( \
act_type, \
ops::ActivationKernel<plat::CUDADeviceContext, ops::functor<float>>, \
ops::ActivationKernel<plat::CUDADeviceContext, ops::functor<double>>, \
ops::ActivationKernel<plat::CUDADeviceContext, \
ops::functor<plat::float16>>); \
REGISTER_OP_CUDA_KERNEL( \
act_type##_grad, ops::ActivationGradKernel<plat::CUDADeviceContext, \
ops::grad_functor<float>>, \
ops::ActivationGradKernel<plat::CUDADeviceContext, \
ops::grad_functor<double>>, \
ops::ActivationGradKernel<plat::CUDADeviceContext, \
ops::grad_functor<plat::float16>>);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CUDA_KERNEL);
/* ======================== leaky relu register ============================ */
REGISTER_ACTIVATION_CUDA_KERNEL(leaky_relu, LeakyRelu, LeakyReluFunctor,
LeakyReluGradFunctor);
REGISTER_OP_CUDA_KERNEL(
leaky_relu_grad_grad,
ops::ActivationDoubleGradKernel<plat::CUDADeviceContext,
ops::LeakyReluGradGradFunctor<float>>,
ops::ActivationDoubleGradKernel<plat::CUDADeviceContext,
ops::LeakyReluGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<
plat::CUDADeviceContext, ops::LeakyReluGradGradFunctor<plat::float16>>);
/* ========================================================================== */
/* ======================== elu register ============================ */
REGISTER_ACTIVATION_CUDA_KERNEL(elu, ELU, ELUFunctor, ELUGradFunctor);
REGISTER_OP_CUDA_KERNEL(
elu_grad_grad, ops::ELUDoubleGradKernel<plat::CUDADeviceContext,
ops::ELUGradGradFunctor<float>>,
ops::ELUDoubleGradKernel<plat::CUDADeviceContext,
ops::ELUGradGradFunctor<double>>,
ops::ELUDoubleGradKernel<plat::CUDADeviceContext,
ops::ELUGradGradFunctor<plat::float16>>);
/* ========================================================================== */
/* =========================== relu register ============================ */
REGISTER_ACTIVATION_CUDA_KERNEL(relu, Relu, ReluFunctor, ReluGradFunctor);
REGISTER_OP_CUDA_KERNEL(
relu_grad_grad,
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::ReluGradGradFunctor<float>>,
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::ReluGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<plat::CUDADeviceContext,
ops::ReluGradGradFunctor<plat::float16>>);
/* ========================================================================== */
/* =========================== sqrt register ============================= */
REGISTER_ACTIVATION_CUDA_KERNEL(sqrt, Sqrt, SqrtFunctor, SqrtGradFunctor);
REGISTER_OP_CUDA_KERNEL(
sqrt_grad_grad,
ops::SqrtDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::SqrtGradGradFunctor<float>>,
ops::SqrtDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::SqrtGradGradFunctor<double>>,
ops::SqrtDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::SqrtGradGradFunctor<plat::float16>>);
/* ========================================================================== */
/* =========================== rsqrt register =============================
*/
REGISTER_ACTIVATION_CUDA_KERNEL(rsqrt, Rsqrt, RsqrtFunctor, RsqrtGradFunctor);
REGISTER_OP_CUDA_KERNEL(
rsqrt_grad_grad,
ops::RsqrtDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::RsqrtGradGradFunctor<float>>,
ops::RsqrtDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::RsqrtGradGradFunctor<double>>,
ops::RsqrtDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::RsqrtGradGradFunctor<plat::float16>>);
/* ========================================================================== */
/* =========================== square register ============================ */
REGISTER_OP_CUDA_KERNEL(
square,
ops::ActivationKernel<plat::CUDADeviceContext, ops::SquareFunctor<float>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::SquareFunctor<double>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::SquareFunctor<int>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::SquareFunctor<int64_t>>,
ops::ActivationKernel<plat::CUDADeviceContext,
ops::SquareFunctor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
square_grad, ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::SquareGradFunctor<float>>,
ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::SquareGradFunctor<double>>,
ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::SquareGradFunctor<int>>,
ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::SquareGradFunctor<int64_t>>,
ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::SquareGradFunctor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
square_grad_grad,
ops::SquareDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::SquareGradGradFunctor<float>>,
ops::SquareDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::SquareGradGradFunctor<double>>,
ops::SquareDoubleGradKernel<plat::CUDADeviceContext,
ops::SquareGradGradFunctor<plat::float16>>,
ops::SquareDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::SquareGradGradFunctor<int>>,
ops::SquareDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::SquareGradGradFunctor<int64_t>>);
/* ========================================================================== */
/* ========================== pow register ============================ */
REGISTER_OP_CUDA_KERNEL(
pow, ops::PowKernel<plat::CUDADeviceContext, ops::PowFunctor<float>>,
ops::PowKernel<plat::CUDADeviceContext, ops::PowFunctor<double>>,
ops::PowKernel<plat::CUDADeviceContext, ops::PowFunctor<int>>,
ops::PowKernel<plat::CUDADeviceContext, ops::PowFunctor<int64_t>>,
ops::PowKernel<plat::CUDADeviceContext, ops::PowFunctor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
pow_grad,
ops::PowGradKernel<plat::CUDADeviceContext, ops::PowGradFunctor<float>>,
ops::PowGradKernel<plat::CUDADeviceContext, ops::PowGradFunctor<double>>,
ops::PowGradKernel<plat::CUDADeviceContext, ops::PowGradFunctor<int>>,
ops::PowGradKernel<plat::CUDADeviceContext, ops::PowGradFunctor<int64_t>>,
ops::PowGradKernel<plat::CUDADeviceContext,
ops::PowGradFunctor<plat::float16>>);
/* ========================================================================== */
/* ========================== exp register ============================ */
REGISTER_OP_CUDA_KERNEL(
exp, ops::ActivationKernel<plat::CUDADeviceContext, ops::ExpFunctor<float>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::ExpFunctor<double>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::ExpFunctor<int>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::ExpFunctor<int64_t>>,
ops::ActivationKernel<plat::CUDADeviceContext,
ops::ExpFunctor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
exp_grad, ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::ExpGradFunctor<float>>,
ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::ExpGradFunctor<double>>,
ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::ExpGradFunctor<int>>,
ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::ExpGradFunctor<int64_t>>,
ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::ExpGradFunctor<plat::float16>>);
/* ========================================================================== */
/* ========================== abs register ============================ */
REGISTER_OP_CUDA_KERNEL(
abs, ops::ActivationKernel<plat::CUDADeviceContext, ops::AbsFunctor<float>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::AbsFunctor<double>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::AbsFunctor<int>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::AbsFunctor<int64_t>>,
ops::ActivationKernel<plat::CUDADeviceContext,
ops::AbsFunctor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
abs_grad, ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::AbsGradFunctor<float>>,
ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::AbsGradFunctor<double>>,
ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::AbsGradFunctor<int>>,
ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::AbsGradFunctor<int64_t>>,
ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::AbsGradFunctor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
abs_grad_grad,
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::AbsGradGradFunctor<float>>,
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::AbsGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<plat::CUDADeviceContext,
ops::AbsGradGradFunctor<plat::float16>>,
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::AbsGradGradFunctor<int>>,
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::AbsGradGradFunctor<int64_t>>);
/* ========================================================================== */
/* ========================== Log register ==================================*/
REGISTER_ACTIVATION_CUDA_KERNEL(log, Log, LogFunctor, LogGradFunctor);
REGISTER_OP_CUDA_KERNEL(
log_grad_grad, ops::LogDoubleGradKernel<plat::CUDADeviceContext,
ops::LogGradGradFunctor<float>>,
ops::LogDoubleGradKernel<plat::CUDADeviceContext,
ops::LogGradGradFunctor<double>>,
ops::LogDoubleGradKernel<plat::CUDADeviceContext,
ops::LogGradGradFunctor<plat::float16>>);
/* ========================================================================== */