|
|
|
@ -43,22 +43,24 @@ class ClipGradientOpCUDAKernel : public framework::OpKernel {
|
|
|
|
|
auto min = context.Attr<float>("min");
|
|
|
|
|
auto* d_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
|
|
|
|
|
auto* d_x = context.Output<LoDTensor>(framework::GradVarName("X"));
|
|
|
|
|
auto* x = context.Input<LoDTensor>("X");
|
|
|
|
|
auto dims = d_x->dims();
|
|
|
|
|
int64_t count = d_out->numel();
|
|
|
|
|
auto d_x_data = d_x->mutable_data<T>(context.GetPlace());
|
|
|
|
|
auto d_out_data = d_out->data<T>();
|
|
|
|
|
auto x_data = x->data<T>();
|
|
|
|
|
if (d_x != nullptr) {
|
|
|
|
|
auto* x = context.Input<LoDTensor>("X");
|
|
|
|
|
auto dims = d_x->dims();
|
|
|
|
|
int64_t count = d_out->numel();
|
|
|
|
|
auto d_x_data = d_x->mutable_data<T>(context.GetPlace());
|
|
|
|
|
auto d_out_data = d_out->data<T>();
|
|
|
|
|
auto x_data = x->data<T>();
|
|
|
|
|
|
|
|
|
|
int N = d_x->dims()[0];
|
|
|
|
|
int D = d_x->dims()[1];
|
|
|
|
|
int block = 512;
|
|
|
|
|
int grid = (N * D + block - 1) / block;
|
|
|
|
|
ClipGradientKernel<T><<<
|
|
|
|
|
grid, block, 0, reinterpret_cast<const platform::CUDADeviceContext&>(
|
|
|
|
|
context.device_context())
|
|
|
|
|
.stream()>>>(count, min, max, x_data, d_out_data,
|
|
|
|
|
d_x_data);
|
|
|
|
|
int N = d_x->dims()[0];
|
|
|
|
|
int D = d_x->dims()[1];
|
|
|
|
|
int block = 512;
|
|
|
|
|
int grid = (N * D + block - 1) / block;
|
|
|
|
|
ClipGradientKernel<T><<<
|
|
|
|
|
grid, block, 0, reinterpret_cast<const platform::CUDADeviceContext&>(
|
|
|
|
|
context.device_context())
|
|
|
|
|
.stream()>>>(count, min, max, x_data, d_out_data,
|
|
|
|
|
d_x_data);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|