|
|
|
@ -30,6 +30,9 @@
|
|
|
|
|
#include "paddle/fluid/platform/device_context.h"
|
|
|
|
|
#include "paddle/fluid/platform/float16.h"
|
|
|
|
|
#include "paddle/fluid/platform/profiler.h"
|
|
|
|
|
#ifdef PADDLE_WITH_XPU
|
|
|
|
|
#include "xpu/refactor/math.h"
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
namespace paddle {
|
|
|
|
|
namespace imperative {
|
|
|
|
@ -81,12 +84,20 @@ class TensorAddFunctor : public boost::static_visitor<> {
|
|
|
|
|
blas.AXPY(numel_, 1., x_, y_);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef PADDLE_WITH_XPU
|
|
|
|
|
void operator()(const platform::XPUPlace& place) {
|
|
|
|
|
platform::XPUDeviceContext* ctx = dynamic_cast<platform::XPUDeviceContext*>(
|
|
|
|
|
platform::DeviceContextPool::Instance().Get(place));
|
|
|
|
|
xpu::add<T>(ctx->x_context(), x_, y_, y_, static_cast<int>(numel_));
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
void operator()(const platform::XPUPlace& place) {
|
|
|
|
|
PADDLE_THROW(platform::errors::PermissionDenied(
|
|
|
|
|
"Gradient accumulation on place (%s) "
|
|
|
|
|
"is not supported in imperative mode",
|
|
|
|
|
place));
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
void operator()(const platform::CUDAPlace& place) {
|
|
|
|
@ -162,11 +173,14 @@ void TensorAdd(const framework::Variable& src, framework::Variable* dst) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PADDLE_TENSOR_ADD(float);
|
|
|
|
|
#ifndef PADDLE_WITH_XPU
|
|
|
|
|
// NOTE(phlrain): xpu only support float
|
|
|
|
|
PADDLE_TENSOR_ADD(double);
|
|
|
|
|
// NOTE(chenweihang): only support complex grad tensor accumulated,
|
|
|
|
|
// support selected rows if needed in the future
|
|
|
|
|
PADDLE_TENSOR_ADD(platform::complex64);
|
|
|
|
|
PADDLE_TENSOR_ADD(platform::complex128);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#undef PADDLE_TENSOR_ADD
|
|
|
|
|
|
|
|
|
|