[Custom OP]Remove old custom OP and reduce whl package volume (#31813)
* Remove old custom OP to reduce whl package volume * [Custom OP]Remove old custom OP to reduce whl package volumedevelop
parent
fe2848686b
commit
04a49b097e
@ -1,53 +0,0 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/framework/c/c_api.h"
|
||||
|
||||
#include "paddle/fluid/framework/op_info.h"
|
||||
#include "paddle/fluid/platform/device_context.h"
|
||||
#include "paddle/fluid/platform/enforce.h"
|
||||
|
||||
extern "C" {
|
||||
|
||||
paddle::framework::OpInfoMap &PD_GetOpInfoMap() {
|
||||
return paddle::framework::OpInfoMap::Instance();
|
||||
}
|
||||
|
||||
void PD_InitDevicesPool(paddle::platform::DeviceContextPool *pool) {
|
||||
paddle::platform::DeviceContextPool::SetPool(pool);
|
||||
}
|
||||
|
||||
std::vector<std::string> PD_GetGradOpDescStrs(
|
||||
const paddle::framework::OpDesc &op_desc,
|
||||
const std::unordered_set<std::string> &no_grad_set,
|
||||
std::unordered_map<std::string, std::string> *grad_to_var,
|
||||
const std::vector<paddle::framework::BlockDesc *> &grad_block) {
|
||||
auto &op_info = PD_GetOpInfoMap().Get(op_desc.Type());
|
||||
std::vector<std::string> ret;
|
||||
if (op_info.grad_op_maker_) {
|
||||
auto grad_op_descs =
|
||||
op_info.grad_op_maker_(op_desc, no_grad_set, grad_to_var, grad_block);
|
||||
size_t op_num = grad_op_descs.size();
|
||||
ret.resize(op_num);
|
||||
for (size_t i = 0; i < op_num; ++i) {
|
||||
PADDLE_ENFORCE_EQ(
|
||||
grad_op_descs[i]->Proto()->SerializePartialToString(&ret[i]), true,
|
||||
paddle::platform::errors::Unavailable(
|
||||
"Cannot serialize operator desc message."));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // end extern "C"
|
@ -1,55 +0,0 @@
|
||||
/* copyright (c) 2019 paddlepaddle authors. all rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "paddle/fluid/framework/block_desc.h"
|
||||
#include "paddle/fluid/framework/op_desc.h"
|
||||
#include "paddle/fluid/framework/op_info.h"
|
||||
#include "paddle/fluid/platform/device_context.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
class OpInfoMap;
|
||||
} // namespace framework
|
||||
namespace platform {
|
||||
class DeviceContextPool;
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// C-API to get global OpInfo map.
|
||||
paddle::framework::OpInfoMap &PD_GetOpInfoMap();
|
||||
|
||||
// C-API to init global DeviceContextPool from outside.
|
||||
void PD_InitDevicesPool(paddle::platform::DeviceContextPool *pool);
|
||||
|
||||
// C-API to serialize the grad op protocol message to a binary string.
|
||||
std::vector<std::string> PD_GetGradOpDescStrs(
|
||||
const paddle::framework::OpDesc &op_desc,
|
||||
const std::unordered_set<std::string> &no_grad_set,
|
||||
std::unordered_map<std::string, std::string> *grad_to_var,
|
||||
const std::vector<paddle::framework::BlockDesc *> &grad_block);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
@ -1,120 +0,0 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "paddle/fluid/framework/op_desc.h"
|
||||
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
|
||||
#include "paddle/fluid/platform/port.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
|
||||
template <typename T>
|
||||
T *DynLoad(void *handle, std::string name) {
|
||||
T *func = reinterpret_cast<T *>(dlsym(handle, name.c_str()));
|
||||
#if !defined(_WIN32)
|
||||
auto errorno = dlerror();
|
||||
#else
|
||||
auto errorno = GetLastError();
|
||||
#endif // !_WIN32
|
||||
PADDLE_ENFORCE_NOT_NULL(
|
||||
func,
|
||||
platform::errors::NotFound(
|
||||
"Failed to load dynamic operator library, error code(%s).", errorno));
|
||||
return func;
|
||||
}
|
||||
|
||||
void LoadOpLib(const std::string &dso_name) {
|
||||
void *handle = paddle::platform::dynload::GetOpDsoHandle(dso_name);
|
||||
|
||||
typedef OpInfoMap &get_op_info_t();
|
||||
get_op_info_t *get_op_info =
|
||||
DynLoad<get_op_info_t>(handle, "PD_GetOpInfoMap");
|
||||
auto &op_info = get_op_info();
|
||||
auto *dyn_info_map = op_info.mutable_map();
|
||||
|
||||
typedef std::vector<std::string> grad_op_desc_maker_t(
|
||||
const OpDesc &, const std::unordered_set<std::string> &,
|
||||
std::unordered_map<std::string, std::string> *,
|
||||
const std::vector<BlockDesc *> &);
|
||||
|
||||
grad_op_desc_maker_t *grad_op_desc_maker =
|
||||
DynLoad<grad_op_desc_maker_t>(handle, "PD_GetGradOpDescStrs");
|
||||
|
||||
auto &info_map = OpInfoMap::Instance();
|
||||
for (const auto &n : *(dyn_info_map)) {
|
||||
auto type = n.first;
|
||||
if (type == "recurrent" || type == "recurrent_grad" ||
|
||||
type == "conditional_block" || type == "conditional_block_grad") {
|
||||
continue;
|
||||
}
|
||||
PADDLE_ENFORCE_NE(info_map.Has(n.first), true,
|
||||
platform::errors::AlreadyExists(
|
||||
"Operator (%s) has been registered.", type));
|
||||
OpInfo info;
|
||||
info.creator_ = n.second.creator_;
|
||||
|
||||
// If get the protocol buffer from dynamic library directly, there
|
||||
// will be deconstruction error
|
||||
// ** Error in `python`: free(): invalid pointer:
|
||||
// ... paddle::framework::proto::OpDesc::SharedDtor()
|
||||
// It seems a bug in protobuf, see
|
||||
// https://github.com/protocolbuffers/protobuf/issues/435
|
||||
// So, get the serialized binary string from dynamic library,
|
||||
// then deserialize to protocol buffer.
|
||||
info.grad_op_maker_ = [grad_op_desc_maker](
|
||||
const OpDesc &op_desc,
|
||||
const std::unordered_set<std::string> &no_grad_set,
|
||||
std::unordered_map<std::string, std::string> *grad_to_var,
|
||||
const std::vector<BlockDesc *> &grad_block) {
|
||||
std::vector<std::string> strs =
|
||||
grad_op_desc_maker(op_desc, no_grad_set, grad_to_var, grad_block);
|
||||
std::vector<std::unique_ptr<OpDesc>> ret;
|
||||
for (auto &str : strs) {
|
||||
proto::OpDesc proto_desc;
|
||||
PADDLE_ENFORCE_EQ(proto_desc.ParseFromString(str), true,
|
||||
platform::errors::InvalidArgument(
|
||||
"Failed to parse OpDesc from string."));
|
||||
ret.emplace_back(new OpDesc(proto_desc, nullptr));
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
info.proto_ = n.second.proto_;
|
||||
info.checker_ = n.second.checker_;
|
||||
info.infer_var_type_ = n.second.infer_var_type_;
|
||||
info.infer_shape_ = n.second.infer_shape_;
|
||||
info.infer_inplace_ = n.second.infer_inplace_;
|
||||
info.infer_no_need_buffer_vars_ = n.second.infer_no_need_buffer_vars_;
|
||||
info.use_default_grad_op_desc_maker_ =
|
||||
n.second.use_default_grad_op_desc_maker_;
|
||||
info.use_empty_grad_op_desc_maker_ = n.second.use_empty_grad_op_desc_maker_;
|
||||
|
||||
info_map.Insert(type, info);
|
||||
}
|
||||
|
||||
typedef void init_device_t(platform::DeviceContextPool *);
|
||||
init_device_t *init_dev =
|
||||
DynLoad<init_device_t>(handle, "PD_InitDevicesPool");
|
||||
init_dev(&(platform::DeviceContextPool::Instance()));
|
||||
}
|
||||
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -1,115 +0,0 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class Relu2Op : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||
auto in_dims = ctx->GetInputDim("X");
|
||||
ctx->SetOutputDim("Y", in_dims);
|
||||
}
|
||||
};
|
||||
|
||||
class Relu2OpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
void Make() override {
|
||||
AddInput("X", "The input tensor.");
|
||||
AddOutput("Y", "Output of relu_op");
|
||||
AddComment(R"DOC(
|
||||
Relu2 Operator.
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
class Relu2GradOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||
auto in_dims = ctx->GetInputDim(framework::GradVarName("Y"));
|
||||
ctx->SetOutputDim(framework::GradVarName("X"), in_dims);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class Relu2GradMaker : public framework::SingleGradOpMaker<T> {
|
||||
public:
|
||||
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
|
||||
|
||||
void Apply(GradOpPtr<T> op) const override {
|
||||
op->SetType("relu2_grad");
|
||||
op->SetInput("Y", this->Output("Y"));
|
||||
op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y"));
|
||||
op->SetAttrMap(this->Attrs());
|
||||
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
|
||||
}
|
||||
};
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class Relu2Kernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto* in_t = ctx.Input<Tensor>("X");
|
||||
auto* out_t = ctx.Output<Tensor>("Y");
|
||||
auto x = in_t->data<T>();
|
||||
auto y = out_t->mutable_data<T>(ctx.GetPlace());
|
||||
for (int i = 0; i < in_t->numel(); ++i) {
|
||||
y[i] = std::max(static_cast<T>(0.), x[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class Relu2GradKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto* dy_t = ctx.Input<Tensor>(framework::GradVarName("Y"));
|
||||
auto* y_t = ctx.Input<Tensor>("Y");
|
||||
auto* dx_t = ctx.Output<Tensor>(framework::GradVarName("X"));
|
||||
|
||||
auto dy = dy_t->data<T>();
|
||||
auto y = y_t->data<T>();
|
||||
auto dx = dx_t->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
for (int i = 0; i < y_t->numel(); ++i) {
|
||||
dx[i] = dy[i] * (y[i] > static_cast<T>(0) ? 1. : 0.);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
using CPU = paddle::platform::CPUDeviceContext;
|
||||
REGISTER_OPERATOR(relu2,
|
||||
ops::Relu2Op,
|
||||
ops::Relu2OpMaker,
|
||||
ops::Relu2GradMaker<paddle::framework::OpDesc>,
|
||||
ops::Relu2GradMaker<paddle::imperative::OpBase>);
|
||||
REGISTER_OPERATOR(relu2_grad, ops::Relu2GradOp);
|
||||
REGISTER_OP_CPU_KERNEL(relu2,
|
||||
ops::Relu2Kernel<CPU, float>,
|
||||
ops::Relu2Kernel<CPU, double>);
|
||||
REGISTER_OP_CPU_KERNEL(relu2_grad,
|
||||
ops::Relu2GradKernel<CPU, float>,
|
||||
ops::Relu2GradKernel<CPU, double>);
|
@ -1,87 +0,0 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
|
||||
template <typename T>
|
||||
__global__ void KeRelu2(const T* x, const int num, T* y) {
|
||||
int gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
for (int i = gid; i < num; i += blockDim.x * gridDim.x) {
|
||||
y[i] = max(x[i], static_cast<T>(0.));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class Relu2CUDAKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto* in_t = ctx.Input<Tensor>("X");
|
||||
auto* out_t = ctx.Output<Tensor>("Y");
|
||||
auto x = in_t->data<T>();
|
||||
auto y = out_t->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
auto& dev_ctx = ctx.template device_context<DeviceContext>();
|
||||
|
||||
int num = in_t->numel();
|
||||
int block = 512;
|
||||
int grid = (num + block - 1) / block;
|
||||
KeRelu2<T><<<grid, block, 0, dev_ctx.stream()>>>(x, num, y);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__global__ void KeRelu2Grad(const T* y, const T* dy, const int num, T* dx) {
|
||||
int gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
for (int i = gid; i < num; i += blockDim.x * gridDim.x) {
|
||||
dx[i] = dy[i] * (y[i] > 0 ? 1. : 0.);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class Relu2GradCUDAKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto* dy_t = ctx.Input<Tensor>(framework::GradVarName("Y"));
|
||||
auto* y_t = ctx.Input<Tensor>("Y");
|
||||
auto* dx_t = ctx.Output<Tensor>(framework::GradVarName("X"));
|
||||
|
||||
auto dy = dy_t->data<T>();
|
||||
auto y = y_t->data<T>();
|
||||
auto dx = dx_t->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
auto& dev_ctx = ctx.template device_context<DeviceContext>();
|
||||
|
||||
int num = dy_t->numel();
|
||||
int block = 512;
|
||||
int grid = (num + block - 1) / block;
|
||||
KeRelu2Grad<T><<<grid, block, 0, dev_ctx.stream()>>>(y, dy, num, dx);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
using CUDA = paddle::platform::CUDADeviceContext;
|
||||
REGISTER_OP_CUDA_KERNEL(relu2,
|
||||
paddle::operators::Relu2CUDAKernel<CUDA, float>,
|
||||
paddle::operators::Relu2CUDAKernel<CUDA, double>);
|
||||
|
||||
REGISTER_OP_CUDA_KERNEL(relu2_grad,
|
||||
paddle::operators::Relu2GradCUDAKernel<CUDA, float>,
|
||||
paddle::operators::Relu2GradCUDAKernel<CUDA, double>);
|
@ -1,115 +0,0 @@
|
||||
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class Relu3Op : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||
auto in_dims = ctx->GetInputDim("X");
|
||||
ctx->SetOutputDim("Y", in_dims);
|
||||
}
|
||||
};
|
||||
|
||||
class Relu3OpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
void Make() override {
|
||||
AddInput("X", "The input tensor.");
|
||||
AddOutput("Y", "Output of relu_op");
|
||||
AddComment(R"DOC(
|
||||
Relu3 Operator.
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
class Relu3GradOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||
auto in_dims = ctx->GetInputDim(framework::GradVarName("Y"));
|
||||
ctx->SetOutputDim(framework::GradVarName("X"), in_dims);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class Relu3GradMaker : public framework::SingleGradOpMaker<T> {
|
||||
public:
|
||||
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
|
||||
|
||||
void Apply(GradOpPtr<T> op) const override {
|
||||
op->SetType("relu3_grad");
|
||||
op->SetInput("Y", this->Output("Y"));
|
||||
op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y"));
|
||||
op->SetAttrMap(this->Attrs());
|
||||
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
|
||||
}
|
||||
};
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class Relu3Kernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto* in_t = ctx.Input<Tensor>("X");
|
||||
auto* out_t = ctx.Output<Tensor>("Y");
|
||||
auto x = in_t->data<T>();
|
||||
auto y = out_t->mutable_data<T>(ctx.GetPlace());
|
||||
for (int i = 0; i < in_t->numel(); ++i) {
|
||||
y[i] = std::max(static_cast<T>(0.), x[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class Relu3GradKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto* dy_t = ctx.Input<Tensor>(framework::GradVarName("Y"));
|
||||
auto* y_t = ctx.Input<Tensor>("Y");
|
||||
auto* dx_t = ctx.Output<Tensor>(framework::GradVarName("X"));
|
||||
|
||||
auto dy = dy_t->data<T>();
|
||||
auto y = y_t->data<T>();
|
||||
auto dx = dx_t->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
for (int i = 0; i < y_t->numel(); ++i) {
|
||||
dx[i] = dy[i] * (y[i] > static_cast<T>(0) ? 1. : 0.);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
using CPU = paddle::platform::CPUDeviceContext;
|
||||
REGISTER_OPERATOR(relu3,
|
||||
ops::Relu3Op,
|
||||
ops::Relu3OpMaker,
|
||||
ops::Relu3GradMaker<paddle::framework::OpDesc>,
|
||||
ops::Relu3GradMaker<paddle::imperative::OpBase>);
|
||||
REGISTER_OPERATOR(relu3_grad, ops::Relu3GradOp);
|
||||
REGISTER_OP_CPU_KERNEL(relu3,
|
||||
ops::Relu3Kernel<CPU, float>,
|
||||
ops::Relu3Kernel<CPU, double>);
|
||||
REGISTER_OP_CPU_KERNEL(relu3_grad,
|
||||
ops::Relu3GradKernel<CPU, float>,
|
||||
ops::Relu3GradKernel<CPU, double>);
|
@ -1,87 +0,0 @@
|
||||
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
|
||||
template <typename T>
|
||||
__global__ void KeRelu3(const T* x, const int num, T* y) {
|
||||
int gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
for (int i = gid; i < num; i += blockDim.x * gridDim.x) {
|
||||
y[i] = max(x[i], static_cast<T>(0.));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class Relu3CUDAKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto* in_t = ctx.Input<Tensor>("X");
|
||||
auto* out_t = ctx.Output<Tensor>("Y");
|
||||
auto x = in_t->data<T>();
|
||||
auto y = out_t->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
auto& dev_ctx = ctx.template device_context<DeviceContext>();
|
||||
|
||||
int num = in_t->numel();
|
||||
int block = 512;
|
||||
int grid = (num + block - 1) / block;
|
||||
KeRelu3<T><<<grid, block, 0, dev_ctx.stream()>>>(x, num, y);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__global__ void KeRelu3Grad(const T* y, const T* dy, const int num, T* dx) {
|
||||
int gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
for (int i = gid; i < num; i += blockDim.x * gridDim.x) {
|
||||
dx[i] = dy[i] * (y[i] > 0 ? 1. : 0.);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class Relu3GradCUDAKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto* dy_t = ctx.Input<Tensor>(framework::GradVarName("Y"));
|
||||
auto* y_t = ctx.Input<Tensor>("Y");
|
||||
auto* dx_t = ctx.Output<Tensor>(framework::GradVarName("X"));
|
||||
|
||||
auto dy = dy_t->data<T>();
|
||||
auto y = y_t->data<T>();
|
||||
auto dx = dx_t->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
auto& dev_ctx = ctx.template device_context<DeviceContext>();
|
||||
|
||||
int num = dy_t->numel();
|
||||
int block = 512;
|
||||
int grid = (num + block - 1) / block;
|
||||
KeRelu3Grad<T><<<grid, block, 0, dev_ctx.stream()>>>(y, dy, num, dx);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
using CUDA = paddle::platform::CUDADeviceContext;
|
||||
REGISTER_OP_CUDA_KERNEL(relu3,
|
||||
paddle::operators::Relu3CUDAKernel<CUDA, float>,
|
||||
paddle::operators::Relu3CUDAKernel<CUDA, double>);
|
||||
|
||||
REGISTER_OP_CUDA_KERNEL(relu3_grad,
|
||||
paddle::operators::Relu3GradCUDAKernel<CUDA, float>,
|
||||
paddle::operators::Relu3GradCUDAKernel<CUDA, double>);
|
@ -1,37 +0,0 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
|
||||
from utils import paddle_includes, extra_compile_args
|
||||
from paddle.utils.cpp_extension import CppExtension, CUDAExtension, BuildExtension, setup
|
||||
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
|
||||
|
||||
# switch to old custom op method
|
||||
use_new_custom_op_load_method(False)
|
||||
|
||||
file_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
setup(
|
||||
name='librelu2_op_from_setup',
|
||||
ext_modules=[
|
||||
CUDAExtension(
|
||||
sources=['relu_op3.cc', 'relu_op3.cu', 'relu_op.cc',
|
||||
'relu_op.cu'], # test for multi ops
|
||||
include_dirs=paddle_includes,
|
||||
extra_compile_args=extra_compile_args)
|
||||
],
|
||||
cmdclass={
|
||||
'build_ext': BuildExtension.with_options(
|
||||
no_python_abi_suffix=True, output_dir=file_dir) # for unittest
|
||||
})
|
@ -1,29 +0,0 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
|
||||
from utils import paddle_includes, extra_compile_args
|
||||
from paddle.utils.cpp_extension import CUDAExtension, setup
|
||||
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
|
||||
|
||||
# switch to old custom op method
|
||||
use_new_custom_op_load_method(False)
|
||||
|
||||
setup(
|
||||
name='custom_relu2',
|
||||
ext_modules=CUDAExtension( # test for not specific name here.
|
||||
sources=['relu_op.cc', 'relu_op.cu', 'relu_op3.cc',
|
||||
'relu_op3.cu'], # test for multi ops
|
||||
include_dirs=paddle_includes,
|
||||
extra_compile_args=extra_compile_args))
|
@ -1,120 +0,0 @@
|
||||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
import unittest
|
||||
import contextlib
|
||||
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
paddle.enable_static()
|
||||
|
||||
|
||||
def load_so(so_name):
|
||||
"""
|
||||
Load .so file and parse custom op into OpInfoMap.
|
||||
"""
|
||||
file_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
fluid.load_op_library(os.path.join(file_dir, so_name))
|
||||
|
||||
|
||||
from paddle.fluid.layer_helper import LayerHelper
|
||||
|
||||
|
||||
def relu2(x, name=None):
|
||||
helper = LayerHelper("relu2", **locals())
|
||||
out = helper.create_variable(
|
||||
type=x.type, name=name, dtype=x.dtype, persistable=False)
|
||||
helper.append_op(type="relu2", inputs={"X": x}, outputs={"Y": out})
|
||||
return out
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def scope_prog_guard():
|
||||
prog = fluid.Program()
|
||||
startup_prog = fluid.Program()
|
||||
scope = fluid.core.Scope()
|
||||
with fluid.scope_guard(scope):
|
||||
with fluid.program_guard(prog, startup_prog):
|
||||
yield
|
||||
|
||||
|
||||
def linear_fc(data, label, use_custom_relu):
|
||||
hidden = fluid.layers.fc(data, size=128)
|
||||
hidden = relu2(hidden) if use_custom_relu else fluid.layers.relu(hidden)
|
||||
hidden = fluid.layers.fc(hidden, size=128)
|
||||
hidden = fluid.layers.fc(hidden, size=10, act='softmax')
|
||||
loss = fluid.layers.cross_entropy(input=hidden, label=label)
|
||||
loss = fluid.layers.mean(loss)
|
||||
return loss
|
||||
|
||||
|
||||
def custom_op_test(use_gpu=True, use_custom_relu=True):
|
||||
with scope_prog_guard():
|
||||
np.random.seed(0)
|
||||
fluid.default_startup_program().random_seed = 10
|
||||
fluid.default_main_program().random_seed = 10
|
||||
|
||||
data = fluid.layers.data(
|
||||
name='data', shape=[1, 28, 28], dtype='float32')
|
||||
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
|
||||
loss = linear_fc(data, label, use_custom_relu)
|
||||
|
||||
optimizer = fluid.optimizer.Momentum(learning_rate=0.1, momentum=0.9)
|
||||
optimizer.minimize(loss)
|
||||
|
||||
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
exe.run(fluid.default_startup_program())
|
||||
|
||||
compile_program = fluid.compiler.CompiledProgram(
|
||||
fluid.default_main_program()).with_data_parallel(
|
||||
loss_name=loss.name)
|
||||
|
||||
reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=32)
|
||||
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
|
||||
|
||||
num = 4
|
||||
for i, data in enumerate(reader()):
|
||||
outs, = exe.run(compile_program,
|
||||
feed=feeder.feed(data),
|
||||
fetch_list=[loss])
|
||||
if i == num:
|
||||
break
|
||||
return outs
|
||||
|
||||
|
||||
class CustomOpTest(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
os.environ['CPU_NUM'] = str(2)
|
||||
|
||||
def test_cpu(self):
|
||||
actual = custom_op_test(False, True)
|
||||
expect = custom_op_test(False, False)
|
||||
self.assertEqual(actual.all(), expect.all())
|
||||
|
||||
def test_gpu(self):
|
||||
if not fluid.core.is_compiled_with_cuda():
|
||||
return
|
||||
actual = custom_op_test(True, True)
|
||||
expect = custom_op_test(True, False)
|
||||
self.assertEqual(actual.all(), expect.all())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
load_so(so_name='librelu2_op.so')
|
||||
unittest.main()
|
@ -1,51 +0,0 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import unittest
|
||||
import paddle
|
||||
import numpy as np
|
||||
from paddle.utils.cpp_extension import load
|
||||
from utils import paddle_includes, extra_cc_args, extra_nvcc_args
|
||||
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
|
||||
|
||||
# switch to old custom op method
|
||||
use_new_custom_op_load_method(False)
|
||||
|
||||
# Compile and load custom op Just-In-Time.
|
||||
custom_module = load(
|
||||
name='custom_relu2',
|
||||
sources=['relu_op.cc', 'relu_op.cu', 'relu_op3.cc', 'relu_op3.cu'],
|
||||
extra_include_paths=paddle_includes, # add for Coverage CI
|
||||
extra_cxx_cflags=extra_cc_args, # test for cc flags
|
||||
extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags
|
||||
verbose=True # add for unittest
|
||||
)
|
||||
|
||||
|
||||
class TestJITLoad(unittest.TestCase):
|
||||
def test_api(self):
|
||||
raw_data = np.array([[-1, 1, 0], [1, -1, -1]]).astype('float32')
|
||||
gt_data = np.array([[0, 1, 0], [1, 0, 0]]).astype('float32')
|
||||
x = paddle.to_tensor(raw_data, dtype='float32')
|
||||
# use custom api
|
||||
out = custom_module.relu2(x)
|
||||
out3 = custom_module.relu3(x)
|
||||
|
||||
self.assertTrue(np.array_equal(out.numpy(), gt_data))
|
||||
self.assertTrue(np.array_equal(out3.numpy(), gt_data))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -1,69 +0,0 @@
|
||||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import unittest
|
||||
import numpy as np
|
||||
from test_custom_op import CustomOpTest, load_so
|
||||
import paddle
|
||||
from paddle.utils.cpp_extension.extension_utils import run_cmd
|
||||
from paddle.fluid.layer_helper import LayerHelper
|
||||
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
|
||||
|
||||
# switch to old custom op method
|
||||
use_new_custom_op_load_method(False)
|
||||
|
||||
|
||||
def compile_so():
|
||||
"""
|
||||
Compile .so file by running setup.py config.
|
||||
"""
|
||||
# build .so with setup.py
|
||||
file_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
cmd = 'cd {} && python setup_build.py build'.format(file_dir)
|
||||
run_cmd(cmd)
|
||||
|
||||
|
||||
# `setup.py build` only produce .so file containing multi operators.
|
||||
# Python Interface should be added manually. `relu2` api is in `test_custom_op.py`
|
||||
def relu3(x, name=None):
|
||||
helper = LayerHelper("relu3", **locals())
|
||||
out = helper.create_variable(
|
||||
type=x.type, name=name, dtype=x.dtype, persistable=False)
|
||||
helper.append_op(type="relu3", inputs={"X": x}, outputs={"Y": out})
|
||||
return out
|
||||
|
||||
|
||||
class TestCompileMultiOp(unittest.TestCase):
|
||||
def setUp(self):
|
||||
paddle.disable_static()
|
||||
|
||||
def test_relu3(self):
|
||||
raw_data = np.array([[-1, 1, 0], [1, -1, -1]]).astype('float32')
|
||||
x = paddle.to_tensor(raw_data, dtype='float32')
|
||||
# use custom api
|
||||
out = relu3(x)
|
||||
|
||||
self.assertTrue(
|
||||
np.array_equal(out.numpy(),
|
||||
np.array([[0, 1, 0], [1, 0, 0]]).astype('float32')))
|
||||
|
||||
def tearDown(self):
|
||||
paddle.enable_static()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
compile_so()
|
||||
load_so(so_name='librelu2_op_from_setup.so')
|
||||
unittest.main()
|
@ -1,65 +0,0 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import site
|
||||
import unittest
|
||||
import paddle
|
||||
import subprocess
|
||||
import numpy as np
|
||||
from paddle.utils.cpp_extension.extension_utils import run_cmd
|
||||
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
|
||||
|
||||
# switch to old custom op method
|
||||
use_new_custom_op_load_method(False)
|
||||
|
||||
|
||||
class TestSetUpInstall(unittest.TestCase):
|
||||
def setUp(self):
|
||||
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
# compile, install the custom op egg into site-packages under background
|
||||
cmd = 'cd {} && python setup_install.py install'.format(cur_dir)
|
||||
run_cmd(cmd)
|
||||
|
||||
# NOTE(Aurelius84): Normally, it's no need to add following codes for users.
|
||||
# But we simulate to pip install in current process, so interpreter don't snap
|
||||
# sys.path has been updated. So we update it manually.
|
||||
|
||||
# See: https://stackoverflow.com/questions/56974185/import-runtime-installed-module-using-pip-in-python-3
|
||||
site_dir = site.getsitepackages()[0]
|
||||
custom_egg_path = [
|
||||
x for x in os.listdir(site_dir) if 'custom_relu2' in x
|
||||
]
|
||||
assert len(custom_egg_path) == 1, "Matched egg number is %d." % len(
|
||||
custom_egg_path)
|
||||
sys.path.append(os.path.join(site_dir, custom_egg_path[0]))
|
||||
|
||||
def test_api(self):
|
||||
# usage: import the package directly
|
||||
import custom_relu2
|
||||
|
||||
raw_data = np.array([[-1, 1, 0], [1, -1, -1]]).astype('float32')
|
||||
gt_data = np.array([[0, 1, 0], [1, 0, 0]]).astype('float32')
|
||||
x = paddle.to_tensor(raw_data, dtype='float32')
|
||||
# use custom api
|
||||
out = custom_relu2.relu2(x)
|
||||
out3 = custom_relu2.relu3(x)
|
||||
|
||||
self.assertTrue(np.array_equal(out.numpy(), gt_data))
|
||||
self.assertTrue(np.array_equal(out3.numpy(), gt_data))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in new issue