add xpu slice op (#27349)
* add xpu slice op test=xpu * add slice xpu op test=xpu * code style test=kunlun * style test=kunlun * format test=kunlunmy_2.0rc
parent
8d2cb14f98
commit
04be37c57f
@ -0,0 +1,203 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#ifdef PADDLE_WITH_XPU
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/operators/slice_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class SliceXPUKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto in = ctx.Input<framework::Tensor>("Input");
|
||||
auto out = ctx.Output<framework::Tensor>("Out");
|
||||
auto axes = ctx.Attr<std::vector<int>>("axes");
|
||||
auto starts = ctx.Attr<std::vector<int>>("starts");
|
||||
auto ends = ctx.Attr<std::vector<int>>("ends");
|
||||
auto in_dims = in->dims();
|
||||
|
||||
// prepare starts, ends on XPU
|
||||
int dim_value = 0, start = 0, end = 0;
|
||||
// If a negative value is passed for any of the start or end indices,
|
||||
// it represents number of elements before the end of that dimension.
|
||||
// If the value passed to start or end is larger than the n
|
||||
// (the number of elements in this dimension), it represents n.
|
||||
for (size_t i = 0; i < axes.size(); ++i) {
|
||||
dim_value = in_dims[axes[i]];
|
||||
start = starts[i];
|
||||
end = ends[i];
|
||||
start = start < 0 ? (start + dim_value) : start;
|
||||
end = end < 0 ? (end + dim_value) : end;
|
||||
start = std::max(start, 0);
|
||||
end = std::max(end, 0);
|
||||
end = std::min(end, dim_value);
|
||||
PADDLE_ENFORCE_GT(end, start, platform::errors::InvalidArgument(
|
||||
"end should greater than start"));
|
||||
starts[i] = start;
|
||||
ends[i] = end;
|
||||
}
|
||||
size_t shape_size = in_dims.size();
|
||||
// the slice XPU kernel require that the length of `start`, `end` must be
|
||||
// equal
|
||||
// to the dims size of input tensor, therefore, if shape_size > axes.size(),
|
||||
// the `starts_extension` and `ends_extension` is necessary.
|
||||
std::vector<int> starts_extension(shape_size, 0);
|
||||
std::vector<int> ends_extension(shape_size, 0);
|
||||
if (shape_size > axes.size()) {
|
||||
for (size_t i = 0; i < shape_size; ++i) {
|
||||
ends_extension[i] = in_dims[i];
|
||||
}
|
||||
for (size_t i = 0; i < axes.size(); ++i) {
|
||||
starts_extension[axes[i]] = starts[i];
|
||||
ends_extension[axes[i]] = ends[i];
|
||||
}
|
||||
} else {
|
||||
starts_extension = std::move(starts);
|
||||
ends_extension = std::move(ends);
|
||||
}
|
||||
|
||||
// prepare shape on XPU
|
||||
std::vector<int> shape(shape_size, 0);
|
||||
for (size_t i = 0; i < shape_size; ++i) {
|
||||
shape[i] = in_dims[i];
|
||||
}
|
||||
|
||||
auto& dev_ctx = ctx.template device_context<DeviceContext>();
|
||||
auto* in_data = in->data<T>();
|
||||
auto* out_data = out->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
int r = xpu::slice_forward(dev_ctx.x_context(), shape.data(),
|
||||
starts_extension.data(), ends_extension.data(),
|
||||
shape_size, in_data, out_data);
|
||||
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
|
||||
platform::errors::External("XPU slice kernel error!"));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class SliceGradXPUKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto* d_out = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
|
||||
auto* d_in = ctx.Output<framework::Tensor>(framework::GradVarName("Input"));
|
||||
d_in->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
auto in_dims = d_in->dims();
|
||||
auto axes = ctx.Attr<std::vector<int>>("axes");
|
||||
auto starts = ctx.Attr<std::vector<int>>("starts");
|
||||
auto ends = ctx.Attr<std::vector<int>>("ends");
|
||||
|
||||
// prepare starts, ends on XPU
|
||||
int dim_value = 0, start = 0, end = 0;
|
||||
// If a negative value is passed for any of the start or end indices,
|
||||
// it represents number of elements before the end of that dimension.
|
||||
// If the value passed to start or end is larger than the n
|
||||
// (the number of elements in this dimension), it represents n.
|
||||
for (size_t i = 0; i < axes.size(); ++i) {
|
||||
dim_value = in_dims[axes[i]];
|
||||
start = starts[i];
|
||||
end = ends[i];
|
||||
start = start < 0 ? (start + dim_value) : start;
|
||||
end = end < 0 ? (end + dim_value) : end;
|
||||
start = std::max(start, 0);
|
||||
end = std::max(end, 0);
|
||||
end = std::min(end, dim_value);
|
||||
PADDLE_ENFORCE_GT(end, start, platform::errors::InvalidArgument(
|
||||
"end should greater than start"));
|
||||
starts[i] = start;
|
||||
ends[i] = end;
|
||||
}
|
||||
size_t shape_size = in_dims.size();
|
||||
// the slice XPU kernel require that the length of `start`, `end` must be
|
||||
// equal
|
||||
// to the dims size of input tensor, therefore, if shape_size > axes.size(),
|
||||
// the `starts_extension` and `ends_extension` is necessary.
|
||||
std::vector<int> starts_extension(shape_size, 0);
|
||||
std::vector<int> ends_extension(shape_size, 0);
|
||||
if (shape_size > axes.size()) {
|
||||
for (size_t i = 0; i < shape_size; ++i) {
|
||||
ends_extension[i] = in_dims[i];
|
||||
}
|
||||
for (size_t i = 0; i < axes.size(); ++i) {
|
||||
starts_extension[axes[i]] = starts[i];
|
||||
ends_extension[axes[i]] = ends[i];
|
||||
}
|
||||
}
|
||||
int* starts_device = nullptr;
|
||||
int* ends_device = nullptr;
|
||||
int* starts_host =
|
||||
shape_size > axes.size() ? starts_extension.data() : starts.data();
|
||||
int* ends_host =
|
||||
shape_size > axes.size() ? ends_extension.data() : ends.data();
|
||||
PADDLE_ENFORCE_EQ(
|
||||
xpu_malloc((void**)(&starts_device), shape_size * sizeof(int)),
|
||||
XPU_SUCCESS, platform::errors::External("XPU has no enough memory"));
|
||||
PADDLE_ENFORCE_EQ(
|
||||
xpu_malloc((void**)(&ends_device), shape_size * sizeof(int)),
|
||||
XPU_SUCCESS, platform::errors::External("XPU has no enough memory"));
|
||||
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()),
|
||||
starts_device, platform::CPUPlace(), starts_host,
|
||||
shape_size * sizeof(int));
|
||||
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()),
|
||||
ends_device, platform::CPUPlace(), ends_host,
|
||||
shape_size * sizeof(int));
|
||||
|
||||
// prepare shape on XPU
|
||||
std::vector<int> shape(shape_size, 0);
|
||||
for (size_t i = 0; i < shape_size; ++i) {
|
||||
shape[i] = in_dims[i];
|
||||
}
|
||||
int* shape_device = nullptr;
|
||||
PADDLE_ENFORCE_EQ(
|
||||
xpu_malloc((void**)(&shape_device), shape_size * sizeof(int)),
|
||||
XPU_SUCCESS, platform::errors::External("XPU has no enough memory"));
|
||||
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()),
|
||||
shape_device, platform::CPUPlace(), shape.data(),
|
||||
shape_size * sizeof(int));
|
||||
|
||||
auto& dev_ctx = ctx.template device_context<DeviceContext>();
|
||||
int r =
|
||||
xpu::slice_backward(dev_ctx.x_context(), shape_device, starts_device,
|
||||
ends_device, shape_size, d_out->data<T>(),
|
||||
d_in->data<T>(), d_in->numel(), d_out->numel());
|
||||
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
|
||||
platform::errors::External("xpu slice kernel error"));
|
||||
dev_ctx.Wait();
|
||||
// free device data
|
||||
xpu_free(shape_device);
|
||||
xpu_free(starts_device);
|
||||
xpu_free(ends_device);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
|
||||
REGISTER_OP_XPU_KERNEL(
|
||||
slice, ops::SliceXPUKernel<paddle::platform::XPUDeviceContext, float>);
|
||||
REGISTER_OP_XPU_KERNEL(
|
||||
slice_grad,
|
||||
ops::SliceGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
|
||||
#endif
|
@ -0,0 +1,171 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import sys
|
||||
sys.path.append("..")
|
||||
import paddle
|
||||
import paddle.fluid.core as core
|
||||
from op_test import OpTest
|
||||
import paddle.fluid as fluid
|
||||
import paddle.fluid.layers as layers
|
||||
|
||||
|
||||
# Situation 1: starts(list, no tensor), ends(list, no tensor)
|
||||
# 1.1 without attr(decrease)
|
||||
class TestSliceOp(OpTest):
|
||||
def setUp(self):
|
||||
self.op_type = "slice"
|
||||
self.config()
|
||||
self.inputs = {'Input': self.input}
|
||||
self.outputs = {'Out': self.out}
|
||||
self.attrs = {
|
||||
'axes': self.axes,
|
||||
'starts': self.starts,
|
||||
'ends': self.ends,
|
||||
'infer_flags': self.infer_flags,
|
||||
"use_xpu": True
|
||||
}
|
||||
|
||||
def config(self):
|
||||
self.input = np.random.random([3, 4, 5, 6]).astype("float64")
|
||||
self.starts = [1, 0, 2]
|
||||
self.ends = [3, 3, 4]
|
||||
self.axes = [0, 1, 2]
|
||||
self.infer_flags = [1, 1, 1]
|
||||
self.out = self.input[1:3, 0:3, 2:4, :]
|
||||
|
||||
def test_check_output(self):
|
||||
place = paddle.XPUPlace(0)
|
||||
self.check_output_with_place(place)
|
||||
|
||||
def test_check_grad_normal(self):
|
||||
place = paddle.XPUPlace(0)
|
||||
self.check_grad_with_place(place, ['Input'], 'Out')
|
||||
|
||||
|
||||
class TestCase1(TestSliceOp):
|
||||
def config(self):
|
||||
self.input = np.random.random([3, 4, 5, 6]).astype("float64")
|
||||
self.starts = [-3, 0, 2]
|
||||
self.ends = [3, 100, -1]
|
||||
self.axes = [0, 1, 2]
|
||||
self.infer_flags = [1, 1, 1]
|
||||
self.out = self.input[-3:3, 0:100, 2:-1, :]
|
||||
|
||||
|
||||
class TestCase2(TestSliceOp):
|
||||
def config(self):
|
||||
self.input = np.random.random([3, 4, 5, 6]).astype("float64")
|
||||
self.starts = [-3, 0, 2]
|
||||
self.ends = [3, 100, -1]
|
||||
self.axes = [0, 1, 3]
|
||||
self.infer_flags = [1, 1, 1]
|
||||
self.out = self.input[-3:3, 0:100, :, 2:-1]
|
||||
|
||||
|
||||
# 1.2 with attr(decrease)
|
||||
class TestSliceOp_decs_dim(OpTest):
|
||||
def setUp(self):
|
||||
self.op_type = "slice"
|
||||
self.config()
|
||||
self.inputs = {'Input': self.input}
|
||||
self.outputs = {'Out': self.out}
|
||||
self.attrs = {
|
||||
'axes': self.axes,
|
||||
'starts': self.starts,
|
||||
'ends': self.ends,
|
||||
'infer_flags': self.infer_flags,
|
||||
'decrease_axis': self.decrease_axis,
|
||||
"use_xpu": True
|
||||
}
|
||||
|
||||
def config(self):
|
||||
self.input = np.random.random([3, 4, 5, 6]).astype("float64")
|
||||
self.starts = [1, 0, 2]
|
||||
self.ends = [2, 3, 4]
|
||||
self.axes = [0, 1, 2]
|
||||
self.decrease_axis = [0]
|
||||
self.infer_flags = [1, 1, 1]
|
||||
self.out = self.input[1, 0:3, 2:4, :]
|
||||
|
||||
def test_check_output(self):
|
||||
place = paddle.XPUPlace(0)
|
||||
self.check_output_with_place(place)
|
||||
|
||||
def test_check_grad_normal(self):
|
||||
place = paddle.XPUPlace(0)
|
||||
self.check_grad_with_place(place, ['Input'], 'Out')
|
||||
|
||||
|
||||
class TestSliceOp_decs_dim_2(TestSliceOp_decs_dim):
|
||||
def config(self):
|
||||
self.input = np.random.random([3, 4, 5, 6]).astype("float64")
|
||||
self.starts = [1, 0, 2]
|
||||
self.ends = [2, 1, 4]
|
||||
self.axes = [0, 1, 2]
|
||||
self.decrease_axis = [0, 1]
|
||||
self.infer_flags = [1, 1, 1]
|
||||
self.out = self.input[1, 0, 2:4, :]
|
||||
|
||||
|
||||
class TestSliceOp_decs_dim_3(TestSliceOp_decs_dim):
|
||||
def config(self):
|
||||
self.input = np.random.random([3, 4, 5, 6]).astype("float64")
|
||||
self.starts = [-1, 0, 2]
|
||||
self.ends = [1000000, 1, 4]
|
||||
self.axes = [0, 1, 2]
|
||||
self.decrease_axis = [0, 1]
|
||||
self.infer_flags = [1, 1, 1]
|
||||
self.out = self.input[-1, 0, 2:4, :]
|
||||
|
||||
|
||||
class TestSliceOp_decs_dim_4(TestSliceOp_decs_dim):
|
||||
def config(self):
|
||||
self.input = np.random.random([3, 4, 5, 7]).astype("float64")
|
||||
self.starts = [0, 1, 2, 3]
|
||||
self.ends = [1, 2, 3, 4]
|
||||
self.axes = [0, 1, 2, 3]
|
||||
self.decrease_axis = [0, 1, 2, 3]
|
||||
self.infer_flags = [1, 1, 1]
|
||||
self.out = self.input[0, 1, 2, 3:4]
|
||||
|
||||
|
||||
class TestSliceOp_decs_dim_5(TestSliceOp_decs_dim):
|
||||
def config(self):
|
||||
self.input = np.random.random([3, 4, 5, 6]).astype("float64")
|
||||
self.starts = [-1]
|
||||
self.ends = [1000000]
|
||||
self.axes = [3]
|
||||
self.decrease_axis = [3]
|
||||
self.infer_flags = [1, 1, 1]
|
||||
self.out = self.input[:, :, :, -1]
|
||||
|
||||
|
||||
class TestSliceOp_decs_dim_6(TestSliceOp_decs_dim):
|
||||
def config(self):
|
||||
self.input = np.random.random([3, 4, 5, 6]).astype("float64")
|
||||
self.starts = [0, 1, 2, 3]
|
||||
self.ends = [1, 2, 3, 4]
|
||||
self.axes = [0, 1, 2, 3]
|
||||
self.decrease_axis = [0, 1, 2, 3]
|
||||
self.infer_flags = [1, 1, 1]
|
||||
self.out = self.input[0, 1, 2, 3:4]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in new issue