You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
87 lines
3.2 KiB
87 lines
3.2 KiB
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "paddle/fluid/operators/stack_op.h"
|
|
#include <string>
|
|
#ifdef PADDLE_WITH_XPU
|
|
|
|
namespace paddle {
|
|
namespace operators {
|
|
|
|
using framework::Tensor;
|
|
template <typename DeviceContext, typename T>
|
|
class StackXPUKernel : public framework::OpKernel<T> {
|
|
public:
|
|
void Compute(const framework::ExecutionContext& ctx) const override {
|
|
auto x = ctx.MultiInput<Tensor>("X");
|
|
auto* y = ctx.Output<Tensor>("Y");
|
|
int axis = ctx.Attr<int>("axis");
|
|
if (axis < 0) {
|
|
axis += (x[0]->dims().size() + 1);
|
|
}
|
|
int n = static_cast<int>(x.size());
|
|
PADDLE_ENFORCE_LE(n, 24,
|
|
platform::errors::InvalidArgument(
|
|
"XPU only surpport at most 24 tensors for now"));
|
|
auto* y_data = y->mutable_data<T>(ctx.GetPlace());
|
|
int pre = 1, post = 1;
|
|
auto& dim = x[0]->dims();
|
|
for (auto i = 0; i < axis; ++i) {
|
|
pre *= dim[i];
|
|
}
|
|
for (auto i = axis; i < dim.size(); ++i) {
|
|
post *= dim[i];
|
|
}
|
|
auto& dev_ctx = ctx.template device_context<DeviceContext>();
|
|
void* x_datas_host = std::malloc(n * sizeof(void*));
|
|
void* x_datas_device = nullptr;
|
|
PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast<void**>(&x_datas_device),
|
|
n * sizeof(void*)),
|
|
XPU_SUCCESS,
|
|
platform::errors::ResourceExhausted(
|
|
"\n\nOut of memory error on XPU, Cannot"
|
|
"allocate %s memory on XPU. \n\nPlease "
|
|
"check whether there is any other process "
|
|
"using XPU.\n",
|
|
string::HumanReadableSize(n * sizeof(void*))));
|
|
for (auto i = 0; i < n; ++i) {
|
|
((const void**)x_datas_host)[i] = x[i]->data<T>();
|
|
}
|
|
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()),
|
|
x_datas_device, platform::CPUPlace(), x_datas_host,
|
|
n * sizeof(void*));
|
|
int r = xpu::stack_forward<float>(dev_ctx.x_context(), pre, post, n,
|
|
x_datas_device, y_data);
|
|
PADDLE_ENFORCE_EQ(
|
|
r, xpu::Error_t::SUCCESS,
|
|
platform::errors::External(
|
|
"The stack XPU API return wrong value[%d], please check "
|
|
"where Baidu Kunlun Card is properly installed.",
|
|
r));
|
|
dev_ctx.Wait();
|
|
std::free(x_datas_host);
|
|
xpu_free(x_datas_device);
|
|
}
|
|
};
|
|
|
|
} // namespace operators
|
|
} // namespace paddle
|
|
|
|
namespace plat = paddle::platform;
|
|
namespace ops = paddle::operators;
|
|
|
|
REGISTER_OP_XPU_KERNEL(stack,
|
|
ops::StackXPUKernel<plat::XPUDeviceContext, float>);
|
|
#endif
|