|
|
|
@ -105,11 +105,10 @@ typename std::enable_if<
|
|
|
|
|
std::is_floating_point<T>::value &&
|
|
|
|
|
std::is_same<DeviceContext, platform::CPUDeviceContext>::value>::type
|
|
|
|
|
elementwise_add_grad(const framework::ExecutionContext& ctx,
|
|
|
|
|
const framework::Tensor* x,
|
|
|
|
|
const framework::Tensor* y,
|
|
|
|
|
const framework::Tensor* x, const framework::Tensor* y,
|
|
|
|
|
const framework::Tensor* out,
|
|
|
|
|
const framework::Tensor* dout,
|
|
|
|
|
framework::Tensor* dx, framework::Tensor* dy) {
|
|
|
|
|
const framework::Tensor* dout, framework::Tensor* dx,
|
|
|
|
|
framework::Tensor* dy) {
|
|
|
|
|
auto blas = math::GetBlas<DeviceContext, T>(ctx);
|
|
|
|
|
|
|
|
|
|
if (dx) {
|
|
|
|
@ -128,11 +127,10 @@ typename std::enable_if<
|
|
|
|
|
!std::is_floating_point<T>::value ||
|
|
|
|
|
!std::is_same<DeviceContext, platform::CPUDeviceContext>::value>::type
|
|
|
|
|
elementwise_add_grad(const framework::ExecutionContext& ctx,
|
|
|
|
|
const framework::Tensor* x,
|
|
|
|
|
const framework::Tensor* y,
|
|
|
|
|
const framework::Tensor* x, const framework::Tensor* y,
|
|
|
|
|
const framework::Tensor* out,
|
|
|
|
|
const framework::Tensor* dout,
|
|
|
|
|
framework::Tensor* dx, framework::Tensor* dy) {
|
|
|
|
|
const framework::Tensor* dout, framework::Tensor* dx,
|
|
|
|
|
framework::Tensor* dy) {
|
|
|
|
|
default_elementwise_add_grad<DeviceContext, T>(ctx, x, y, out, dout, dx, dy);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -152,8 +150,8 @@ class ElementwiseAddGradKernel : public framework::OpKernel<T> {
|
|
|
|
|
if (platform::is_cpu_place(ctx.GetPlace()) && (x->dims() == y->dims())) {
|
|
|
|
|
elementwise_add_grad<DeviceContext, T>(ctx, x, y, out, dout, dx, dy);
|
|
|
|
|
} else {
|
|
|
|
|
default_elementwise_add_grad<DeviceContext, T>(
|
|
|
|
|
ctx, x, y, out, dout, dx, dy);
|
|
|
|
|
default_elementwise_add_grad<DeviceContext, T>(ctx, x, y, out, dout, dx,
|
|
|
|
|
dy);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|