polish cudnn related code and fix bug. (#15164)
* staged. * polish code * polish code. test=develop * polish code. test=develop * api change. test=develop * fix default value. test=develop * fix default value. test=developrevert-15774-anakin_subgraph_engine
parent
6e3624442e
commit
225c11a91f
@ -0,0 +1,40 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/operators/activation_op.h"
|
||||
#include "paddle/fluid/platform/cudnn_desc.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
using framework::Tensor;
|
||||
using platform::ActivationDescriptor;
|
||||
using platform::TensorDescriptor;
|
||||
|
||||
template <typename Functor>
|
||||
class CudnnActivationKernel
|
||||
: public framework::OpKernel<Functor::ElEWISE_TYPE> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& context) const override {
|
||||
framework::Tensor *X, *Out;
|
||||
ExtractActivationTensor(context, X, Out);
|
||||
ActivationDescriptor act_desc;
|
||||
TensorDescriptor x_desc, out_desc;
|
||||
x_desc.set(detail::Ref(X));
|
||||
out_desc.set(detail::Ref(Out));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,175 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/operators/activation_op.h"
|
||||
#include "paddle/fluid/platform/cudnn_desc.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
using framework::Tensor;
|
||||
using platform::ActivationDescriptor;
|
||||
using platform::TensorDescriptor;
|
||||
using platform::CUDADeviceContext;
|
||||
|
||||
template <typename T>
|
||||
struct CudnnActivationFunctor {
|
||||
using ELEMENT_TYPE = T;
|
||||
CudnnActivationFunctor(const CUDADeviceContext& ctx, const T& c,
|
||||
const cudnnActivationMode_t& m)
|
||||
: ctx_(ctx), coef_(c), mode_(m) {}
|
||||
void operator()(const Tensor& x, Tensor* out) {
|
||||
ActivationDescriptor act_desc;
|
||||
act_desc.set(mode_, coef_);
|
||||
TensorDescriptor x_desc, out_desc;
|
||||
x_desc.set(x);
|
||||
out_desc.set(detail::Ref(out));
|
||||
PADDLE_ENFORCE(platform::dynload::cudnnActivationForward(
|
||||
ctx_.cudnn_handle(), act_desc.desc(),
|
||||
platform::CudnnDataType<T>::kOne(), x_desc.desc(), x.data<T>(),
|
||||
platform::CudnnDataType<T>::kZero(), out_desc.desc(),
|
||||
out->mutable_data<T>(ctx_.GetPlace())));
|
||||
}
|
||||
const CUDADeviceContext& ctx_;
|
||||
const T coef_;
|
||||
const cudnnActivationMode_t mode_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct CudnnActivationGradFunctor {
|
||||
using ELEMENT_TYPE = T;
|
||||
CudnnActivationGradFunctor(const CUDADeviceContext& ctx, const T& c,
|
||||
const cudnnActivationMode_t& m)
|
||||
: ctx_(ctx), coef_(c), mode_(m) {}
|
||||
void operator()(const Tensor& x, const Tensor& out, const Tensor dout,
|
||||
Tensor* dx) {
|
||||
ActivationDescriptor act_desc;
|
||||
act_desc.set(mode_, coef_);
|
||||
TensorDescriptor x_desc, out_desc, dout_desc, dx_desc;
|
||||
x_desc.set(x);
|
||||
out_desc.set(out);
|
||||
dout_desc.set(dout);
|
||||
dx_desc.set(detail::Ref(dx));
|
||||
PADDLE_ENFORCE(platform::dynload::cudnnActivationBackward(
|
||||
ctx_.cudnn_handle(), act_desc.desc(),
|
||||
platform::CudnnDataType<T>::kOne(), out_desc.desc(), out.data<T>(),
|
||||
dout_desc.desc(), dout.data<T>(), x_desc.desc(), x.data<T>(),
|
||||
platform::CudnnDataType<T>::kZero(), dx_desc.desc(),
|
||||
dx->mutable_data<T>(ctx_.GetPlace())));
|
||||
}
|
||||
const CUDADeviceContext& ctx_;
|
||||
const T coef_;
|
||||
const cudnnActivationMode_t mode_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct CudnnReluFunctor : public CudnnActivationFunctor<T> {
|
||||
explicit CudnnReluFunctor(const CUDADeviceContext& ctx)
|
||||
: CudnnActivationFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_RELU) {}
|
||||
};
|
||||
template <typename T>
|
||||
struct CudnnReluGradFunctor : public CudnnActivationGradFunctor<T> {
|
||||
explicit CudnnReluGradFunctor(const CUDADeviceContext& ctx)
|
||||
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_RELU) {}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct CudnnRelu6Functor : public CudnnActivationFunctor<T> {
|
||||
explicit CudnnRelu6Functor(const CUDADeviceContext& ctx)
|
||||
: CudnnActivationFunctor<T>(ctx, 6.0, CUDNN_ACTIVATION_CLIPPED_RELU) {}
|
||||
};
|
||||
template <typename T>
|
||||
struct CudnnRelu6GradFunctor : public CudnnActivationGradFunctor<T> {
|
||||
explicit CudnnRelu6GradFunctor(const CUDADeviceContext& ctx)
|
||||
: CudnnActivationGradFunctor<T>(ctx, 6.0, CUDNN_ACTIVATION_CLIPPED_RELU) {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct CudnnSigmoidFunctor : public CudnnActivationFunctor<T> {
|
||||
explicit CudnnSigmoidFunctor(const CUDADeviceContext& ctx)
|
||||
: CudnnActivationFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_SIGMOID) {}
|
||||
};
|
||||
template <typename T>
|
||||
struct CudnnSigmoidGradFunctor : public CudnnActivationGradFunctor<T> {
|
||||
explicit CudnnSigmoidGradFunctor(const CUDADeviceContext& ctx)
|
||||
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_SIGMOID) {}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct CudnnTanhFunctor : public CudnnActivationFunctor<T> {
|
||||
explicit CudnnTanhFunctor(const CUDADeviceContext& ctx)
|
||||
: CudnnActivationFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_TANH) {}
|
||||
};
|
||||
template <typename T>
|
||||
struct CudnnTanhGradFunctor : public CudnnActivationGradFunctor<T> {
|
||||
explicit CudnnTanhGradFunctor(const CUDADeviceContext& ctx)
|
||||
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_TANH) {}
|
||||
};
|
||||
|
||||
template <typename Functor>
|
||||
class CudnnActivationKernel
|
||||
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
|
||||
public:
|
||||
using T = typename Functor::ELEMENT_TYPE;
|
||||
void Compute(const framework::ExecutionContext& context) const override {
|
||||
const framework::Tensor* X = nullptr;
|
||||
framework::Tensor* Out = nullptr;
|
||||
ExtractActivationTensor(context, &X, &Out);
|
||||
Out->mutable_data<T>(context.GetPlace());
|
||||
auto& dev_ctx = context.template device_context<CUDADeviceContext>();
|
||||
Functor functor(dev_ctx);
|
||||
functor(detail::Ref(X), Out);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Functor>
|
||||
class CudnnActivationGradKernel
|
||||
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
|
||||
public:
|
||||
using T = typename Functor::ELEMENT_TYPE;
|
||||
void Compute(const framework::ExecutionContext& context) const override {
|
||||
const framework::Tensor *X, *Out, *dOut;
|
||||
X = Out = dOut = nullptr;
|
||||
framework::Tensor* dX = nullptr;
|
||||
ExtractActivationGradTensor(context, &X, &Out, &dOut, &dX);
|
||||
dX->mutable_data<T>(context.GetPlace());
|
||||
auto& dev_ctx = context.template device_context<CUDADeviceContext>();
|
||||
Functor functor(dev_ctx);
|
||||
functor(detail::Ref(X), detail::Ref(Out), detail::Ref(dOut), dX);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace plat = paddle::platform;
|
||||
namespace ops = paddle::operators;
|
||||
|
||||
#define FOR_EACH_CUDNN_OP_FUNCTOR(__macro) \
|
||||
__macro(relu, CudnnReluFunctor, CudnnReluGradFunctor); \
|
||||
__macro(relu6, CudnnRelu6Functor, CudnnRelu6GradFunctor); \
|
||||
__macro(sigmoid, CudnnTanhFunctor, CudnnTanhGradFunctor); \
|
||||
__macro(tanh, CudnnTanhFunctor, CudnnTanhGradFunctor)
|
||||
|
||||
#define REGISTER_ACTIVATION_CUDNN_KERNEL(act_type, functor, grad_functor) \
|
||||
REGISTER_OP_KERNEL(act_type, CUDNN, plat::CUDAPlace, \
|
||||
ops::CudnnActivationKernel<ops::functor<float>>, \
|
||||
ops::CudnnActivationKernel<ops::functor<double>>); \
|
||||
REGISTER_OP_KERNEL( \
|
||||
act_type##_grad, CUDNN, plat::CUDAPlace, \
|
||||
ops::CudnnActivationGradKernel<ops::grad_functor<float>>, \
|
||||
ops::CudnnActivationGradKernel<ops::grad_functor<double>>);
|
||||
|
||||
FOR_EACH_CUDNN_OP_FUNCTOR(REGISTER_ACTIVATION_CUDNN_KERNEL);
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,124 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/platform/cudnn_helper.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
using framework::Tensor;
|
||||
|
||||
template <typename T>
|
||||
cudnnDataType_t ToCudnnDataType(const T& t) {
|
||||
auto type = framework::ToDataType(t);
|
||||
return ToCudnnDataType(type);
|
||||
}
|
||||
|
||||
template <>
|
||||
cudnnDataType_t ToCudnnDataType(const framework::proto::VarType::Type& t) {
|
||||
cudnnDataType_t type = CUDNN_DATA_FLOAT;
|
||||
switch (t) {
|
||||
case framework::proto::VarType::FP16:
|
||||
type = CUDNN_DATA_HALF;
|
||||
break;
|
||||
case framework::proto::VarType::FP32:
|
||||
type = CUDNN_DATA_FLOAT;
|
||||
break;
|
||||
case framework::proto::VarType::FP64:
|
||||
type = CUDNN_DATA_DOUBLE;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
class ActivationDescriptor {
|
||||
public:
|
||||
using T = cudnnActivationStruct;
|
||||
struct Deleter {
|
||||
void operator()(T* t) {
|
||||
if (t != nullptr) {
|
||||
PADDLE_ENFORCE(dynload::cudnnDestroyActivationDescriptor(t));
|
||||
t = nullptr;
|
||||
}
|
||||
}
|
||||
};
|
||||
ActivationDescriptor() {
|
||||
T* raw_ptr;
|
||||
PADDLE_ENFORCE(dynload::cudnnCreateActivationDescriptor(&raw_ptr));
|
||||
desc_.reset(raw_ptr);
|
||||
}
|
||||
template <typename T>
|
||||
void set(cudnnActivationMode_t mode, const T& coef) {
|
||||
CUDNN_ENFORCE(dynload::cudnnSetActivationDescriptor(
|
||||
desc_.get(), mode, CUDNN_NOT_PROPAGATE_NAN, static_cast<double>(coef)));
|
||||
}
|
||||
|
||||
T* desc() { return desc_.get(); }
|
||||
T* desc() const { return desc_.get(); }
|
||||
|
||||
private:
|
||||
std::unique_ptr<T, Deleter> desc_;
|
||||
};
|
||||
|
||||
class TensorDescriptor {
|
||||
public:
|
||||
using T = cudnnTensorStruct;
|
||||
struct Deleter {
|
||||
void operator()(T* t) {
|
||||
if (t != nullptr) {
|
||||
PADDLE_ENFORCE(dynload::cudnnDestroyTensorDescriptor(t));
|
||||
t = nullptr;
|
||||
}
|
||||
}
|
||||
};
|
||||
TensorDescriptor() {
|
||||
T* raw_ptr;
|
||||
PADDLE_ENFORCE(dynload::cudnnCreateTensorDescriptor(&raw_ptr));
|
||||
desc_.reset(raw_ptr);
|
||||
}
|
||||
T* desc() { return desc_.get(); }
|
||||
T* desc() const { return desc_.get(); }
|
||||
void set(const Tensor& tensor, const int groups = 1) {
|
||||
auto dims = framework::vectorize2int(tensor.dims());
|
||||
std::vector<int> strides(dims.size());
|
||||
strides[dims.size() - 1] = 1;
|
||||
for (int i = dims.size() - 2; i >= 0; i--) {
|
||||
strides[i] = dims[i + 1] * strides[i + 1];
|
||||
}
|
||||
std::vector<int> dims_with_group(dims.begin(), dims.end());
|
||||
if (groups > 1) {
|
||||
dims_with_group[1] = dims_with_group[1] / groups;
|
||||
}
|
||||
PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor(
|
||||
desc_.get(), ToCudnnDataType(tensor.type()), dims_with_group.size(),
|
||||
dims_with_group.data(), strides.data()));
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<T, Deleter> desc_;
|
||||
};
|
||||
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
@ -0,0 +1,41 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/platform/cudnn_desc.h"
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
|
||||
TEST(TensorDescriptor, Empty) {
|
||||
ActivationDescriptor a;
|
||||
TensorDescriptor t;
|
||||
TensorDescriptor t1;
|
||||
TensorDescriptor *t11 = new TensorDescriptor();
|
||||
delete t11;
|
||||
std::unique_ptr<TensorDescriptor> tt(new TensorDescriptor());
|
||||
}
|
||||
|
||||
TEST(TensorDescriptor, Normal) {
|
||||
framework::Tensor tt;
|
||||
tt.Resize({2, 3, 4});
|
||||
tt.mutable_data<float>(platform::CPUPlace());
|
||||
|
||||
TensorDescriptor desc;
|
||||
desc.set(tt);
|
||||
EXPECT_TRUE(desc.desc() != nullptr);
|
||||
}
|
||||
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
Loading…
Reference in new issue