Add float16 support to non-cudnn softmax op on GPU (#9686)

* initial commit

* fix error

* fix typo and order
fea/docker_cudnn7
Kexin Zhao 7 years ago committed by Yi Wang
parent 797a7184ac
commit b2a1c9e8b7

@ -14,6 +14,8 @@ limitations under the License. */
#define EIGEN_USE_GPU
#include <vector>
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/math/softmax_impl.h"
@ -95,6 +97,7 @@ template class SoftmaxCUDNNFunctor<double>;
template class SoftmaxGradCUDNNFunctor<float>;
template class SoftmaxGradCUDNNFunctor<double>;
template class SoftmaxFunctor<platform::CUDADeviceContext, platform::float16>;
template class SoftmaxFunctor<platform::CUDADeviceContext, float>;
template class SoftmaxFunctor<platform::CUDADeviceContext, double>;
template class SoftmaxGradFunctor<platform::CUDADeviceContext, float>;

@ -27,7 +27,7 @@ using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename T>
struct ValueClip {
HOSTDEVICE T operator()(const T& x) const {
const T kThreshold = -64.;
const T kThreshold = static_cast<T>(-64.);
return x < kThreshold ? kThreshold : x;
}
};

@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/softmax_op.h"
#include <string>
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h"
#endif
@ -20,6 +23,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle {
namespace operators {
@ -60,8 +64,8 @@ class SoftmaxOp : public framework::OperatorWithKernel {
auto input_data_type =
framework::ToDataType(ctx.Input<Tensor>("X")->type());
if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE_EQ(library_, framework::LibraryType::kCUDNN,
"float16 can only be used when CUDNN is used");
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"float16 can only be used on GPU place");
}
std::string data_format = ctx.Attr<std::string>("data_format");
@ -70,6 +74,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
library_);
}
};
class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftmaxOpMaker(OpProto* proto, OpAttrChecker* op_checker)

@ -13,11 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/platform/float16.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
softmax, ops::SoftmaxKernel<paddle::platform::CUDADeviceContext, float>);
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(
softmax_grad,
ops::SoftmaxGradKernel<paddle::platform::CUDADeviceContext, float>);
softmax, ops::SoftmaxKernel<plat::CUDADeviceContext, float>,
ops::SoftmaxKernel<plat::CUDADeviceContext, plat::float16>);
REGISTER_OP_CUDA_KERNEL(softmax_grad,
ops::SoftmaxGradKernel<plat::CUDADeviceContext, float>);

File diff suppressed because it is too large Load Diff

@ -68,6 +68,17 @@ class TestSoftmaxCUDNNOp(TestSoftmaxOp):
self.use_cudnn = True
class TestSoftmaxFP16Op(TestSoftmaxOp):
def init_kernel_type(self):
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=1e-3)
class TestSoftmaxFP16CUDNNOp(TestSoftmaxOp):
def init_kernel_type(self):
self.use_cudnn = True

Loading…
Cancel
Save