|
|
|
@ -27,10 +27,6 @@ class GemmConvXPUKernel : public framework::OpKernel<T> {
|
|
|
|
|
// that avoids modifying the variable in the Scope.
|
|
|
|
|
Tensor filter = *context.Input<Tensor>("Filter");
|
|
|
|
|
Tensor* output = context.Output<Tensor>("Output");
|
|
|
|
|
// Tensor* max_input = context.Output<Tensor>("MaxInput");
|
|
|
|
|
// Tensor* max_filter = context.Output<Tensor>("MaxFilter");
|
|
|
|
|
// max_input->mutable_data<T>(context.GetPlace());
|
|
|
|
|
// max_filter->mutable_data<T>(context.GetPlace());
|
|
|
|
|
output->mutable_data<T>(context.GetPlace());
|
|
|
|
|
int groups = context.Attr<int>("groups");
|
|
|
|
|
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
|
|
|
|
@ -43,52 +39,18 @@ class GemmConvXPUKernel : public framework::OpKernel<T> {
|
|
|
|
|
const int f = static_cast<int>(filter.dims()[0]);
|
|
|
|
|
const int win_h = static_cast<int>(filter.dims()[2]);
|
|
|
|
|
const int win_w = static_cast<int>(filter.dims()[3]);
|
|
|
|
|
PADDLE_ENFORCE_EQ(
|
|
|
|
|
dilations[0] == 1 && dilations[1] == 1, true,
|
|
|
|
|
platform::errors::InvalidArgument("XPU only support dilation == 1."));
|
|
|
|
|
auto& dev_ctx = context.template device_context<DeviceContext>();
|
|
|
|
|
// PADDLE_ENFORCE_EQ(
|
|
|
|
|
// xpu::findmax(dev_ctx.x_context(), input->data<T>(), input->numel(),
|
|
|
|
|
// max_input->data<T>()) == xpu::Error_t::SUCCESS,
|
|
|
|
|
// true, platform::errors::InvalidArgument(
|
|
|
|
|
// "XPU conv kernel error,can not finde max_input,please "
|
|
|
|
|
// "check whether Baidu Kunlun "
|
|
|
|
|
// "Card is properly installed."));
|
|
|
|
|
// PADDLE_ENFORCE_EQ(
|
|
|
|
|
// xpu::findmax(dev_ctx.x_context(), filter.data<T>(), filter.numel(),
|
|
|
|
|
// max_filter->data<T>()) == xpu::Error_t::SUCCESS,
|
|
|
|
|
// true, platform::errors::InvalidArgument(
|
|
|
|
|
// "XPU conv kernel error,can not find max_filter,please "
|
|
|
|
|
// "check whether Baidu Kunlun "
|
|
|
|
|
// "Card is properly installed."));
|
|
|
|
|
if (groups == 1) {
|
|
|
|
|
int r = xpu::conv2d_forward_int16<float, float, float, float>(
|
|
|
|
|
dev_ctx.x_context(), batch_size, img_c, img_h, img_w, f, win_h, win_w,
|
|
|
|
|
strides[0], strides[1], paddings[0], paddings[1], dilations[0],
|
|
|
|
|
dilations[1], groups, input->data<float>(), filter.data<float>(),
|
|
|
|
|
output->data<float>(), nullptr, nullptr, xpu::Activation_t::LINEAR,
|
|
|
|
|
nullptr, nullptr);
|
|
|
|
|
// max_input->data<float>(), max_filter->data<float>());
|
|
|
|
|
PADDLE_ENFORCE_EQ(
|
|
|
|
|
r, XPU_SUCCESS,
|
|
|
|
|
platform::errors::External("XPU conv kernel return wrong value[%d], "
|
|
|
|
|
"please check whether Baidu Kunlun Card "
|
|
|
|
|
"is properly installed.",
|
|
|
|
|
r));
|
|
|
|
|
} else {
|
|
|
|
|
int r = xpu::conv2d_int16_with_group<float, float, float>(
|
|
|
|
|
dev_ctx.x_context(), input->data<float>(), filter.data<float>(),
|
|
|
|
|
output->data<float>(), batch_size, img_c, img_h, img_w, f, win_h,
|
|
|
|
|
win_w, groups, strides[0], strides[1], paddings[0], paddings[1],
|
|
|
|
|
nullptr, nullptr);
|
|
|
|
|
// max_input->data<float>(), max_filter->data<float>());
|
|
|
|
|
PADDLE_ENFORCE_EQ(
|
|
|
|
|
r, XPU_SUCCESS,
|
|
|
|
|
platform::errors::External("XPU conv kernel return wrong value[%d], "
|
|
|
|
|
"please check whether Baidu Kunlun Card "
|
|
|
|
|
"is properly installed.",
|
|
|
|
|
r));
|
|
|
|
|
}
|
|
|
|
|
std::vector<int> k_size;
|
|
|
|
|
k_size.push_back(win_h);
|
|
|
|
|
k_size.push_back(win_w);
|
|
|
|
|
int r = xpu::conv2d<float, float, float, int16_t>(
|
|
|
|
|
dev_ctx.x_context(), input->data<float>(), filter.data<float>(),
|
|
|
|
|
output->data<float>(), batch_size, img_c, img_h, img_w, f, k_size,
|
|
|
|
|
strides, paddings, dilations, groups, nullptr, nullptr, nullptr, true);
|
|
|
|
|
PADDLE_ENFORCE_EQ(
|
|
|
|
|
r, XPU_SUCCESS,
|
|
|
|
|
platform::errors::External("XPU conv kernel return wrong value[%d %s]",
|
|
|
|
|
r, XPUAPIErrorMsg[r]));
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
template <typename DeviceContext, typename T>
|
|
|
|
@ -96,9 +58,6 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> {
|
|
|
|
|
public:
|
|
|
|
|
void Compute(const framework::ExecutionContext& context) const override {
|
|
|
|
|
const Tensor* input = context.Input<Tensor>("Input");
|
|
|
|
|
// const Tensor* max_input = context.Input<Tensor>("MaxInput");
|
|
|
|
|
// const Tensor* max_filter = context.Input<Tensor>("MaxFilter");
|
|
|
|
|
// Tensor* max_output_grad = context.Output<Tensor>("MaxOutputGrad");
|
|
|
|
|
const Tensor* output_grad =
|
|
|
|
|
context.Input<Tensor>(framework::GradVarName("Output"));
|
|
|
|
|
Tensor* input_grad =
|
|
|
|
@ -115,11 +74,6 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> {
|
|
|
|
|
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
|
|
|
|
|
std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
|
|
|
|
|
const int batch_size = static_cast<int>(input->dims()[0]);
|
|
|
|
|
PADDLE_ENFORCE_EQ(groups == 1, true, platform::errors::InvalidArgument(
|
|
|
|
|
"XPU only support groups == 1."));
|
|
|
|
|
PADDLE_ENFORCE_EQ(
|
|
|
|
|
dilations[0] == 1 && dilations[1] == 1, true,
|
|
|
|
|
platform::errors::InvalidArgument("XPU only support dilation == 1."));
|
|
|
|
|
const int img_c = static_cast<int>(input->dims()[1]);
|
|
|
|
|
const int img_h = static_cast<int>(input->dims()[2]);
|
|
|
|
|
const int img_w = static_cast<int>(input->dims()[3]);
|
|
|
|
@ -133,52 +87,24 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> {
|
|
|
|
|
filter_grad->mutable_data<T>(context.GetPlace());
|
|
|
|
|
}
|
|
|
|
|
auto& dev_ctx = context.template device_context<DeviceContext>();
|
|
|
|
|
// max_output_grad->Resize({4});
|
|
|
|
|
// max_output_grad->mutable_data<T>(context.GetPlace());
|
|
|
|
|
// PADDLE_ENFORCE_EQ(
|
|
|
|
|
// xpu::findmax(dev_ctx.x_context(), output_grad->data<T>(),
|
|
|
|
|
// output_grad->numel(),
|
|
|
|
|
// max_output_grad->data<T>()) == xpu::Error_t::SUCCESS,
|
|
|
|
|
// true,
|
|
|
|
|
// platform::errors::External(
|
|
|
|
|
// "XPU conv kernel error, can not find max_output_grad, please
|
|
|
|
|
// check "
|
|
|
|
|
// "whether Baidu Kunlun Card is "
|
|
|
|
|
// "properly installed."));
|
|
|
|
|
if (input_grad) {
|
|
|
|
|
int r = xpu::conv2d_backward_int16(
|
|
|
|
|
dev_ctx.x_context(), batch_size, img_c, img_h, img_w, f, win_h, win_w,
|
|
|
|
|
strides[0], strides[1], paddings[0], paddings[1], dilations[0],
|
|
|
|
|
dilations[1], groups, output_grad->data<float>(),
|
|
|
|
|
filter.data<float>(), input_grad->data<float>(), nullptr, nullptr);
|
|
|
|
|
// max_output_grad->data<float>(), max_filter->data<float>());
|
|
|
|
|
PADDLE_ENFORCE_EQ(
|
|
|
|
|
r, XPU_SUCCESS,
|
|
|
|
|
platform::errors::External("XPU conv kernel return wrong value[%d], "
|
|
|
|
|
"please check whether Baidu Kunlun Card "
|
|
|
|
|
"is properly installed.",
|
|
|
|
|
r));
|
|
|
|
|
}
|
|
|
|
|
if (filter_grad) {
|
|
|
|
|
int r = xpu::conv2d_backward_weight_int16(
|
|
|
|
|
dev_ctx.x_context(), batch_size, img_c, img_h, img_w, f, win_h, win_w,
|
|
|
|
|
strides[0], strides[1], paddings[0], paddings[1], dilations[0],
|
|
|
|
|
dilations[1], groups, output_grad->data<float>(),
|
|
|
|
|
input->data<float>(), filter_grad->data<float>(), nullptr, nullptr);
|
|
|
|
|
// max_output_grad->data<float>(), max_input->data<float>());
|
|
|
|
|
PADDLE_ENFORCE_EQ(
|
|
|
|
|
r, XPU_SUCCESS,
|
|
|
|
|
platform::errors::External("XPU conv kernel return wrong value[%d], "
|
|
|
|
|
"please check whether Baidu Kunlun Card "
|
|
|
|
|
"is properly installed.",
|
|
|
|
|
r));
|
|
|
|
|
}
|
|
|
|
|
std::vector<int> k_size;
|
|
|
|
|
k_size.push_back(win_h);
|
|
|
|
|
k_size.push_back(win_w);
|
|
|
|
|
int r = xpu::conv2d_grad<float, float, float, int16_t>(
|
|
|
|
|
dev_ctx.x_context(), input->data<T>(), filter.data<T>(),
|
|
|
|
|
output_grad->data<T>(), input_grad ? input_grad->data<T>() : nullptr,
|
|
|
|
|
filter_grad ? filter_grad->data<T>() : nullptr, batch_size, img_c,
|
|
|
|
|
img_h, img_w, f, k_size, strides, paddings, dilations, groups, nullptr,
|
|
|
|
|
nullptr, nullptr, nullptr, nullptr, true);
|
|
|
|
|
PADDLE_ENFORCE_EQ(
|
|
|
|
|
r, XPU_SUCCESS,
|
|
|
|
|
platform::errors::External("XPU conv kernel return wrong value[%d %s]",
|
|
|
|
|
r, XPUAPIErrorMsg[r]));
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
} // namespace operators
|
|
|
|
|
} // namespace paddle
|
|
|
|
|
namespace ops = paddle::operators;
|
|
|
|
|
// TODO(xingzhaolong): neon kernel for mobile
|
|
|
|
|
REGISTER_OP_XPU_KERNEL(
|
|
|
|
|
depthwise_conv2d,
|
|
|
|
|
ops::GemmConvXPUKernel<paddle::platform::XPUDeviceContext, float>);
|
|
|
|
@ -187,4 +113,7 @@ REGISTER_OP_XPU_KERNEL(
|
|
|
|
|
REGISTER_OP_XPU_KERNEL(
|
|
|
|
|
conv2d_grad,
|
|
|
|
|
ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
|
|
|
|
|
REGISTER_OP_XPU_KERNEL(
|
|
|
|
|
depthwise_conv2d_grad,
|
|
|
|
|
ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
|
|
|
|
|
#endif
|
|
|
|
|