|
|
|
@ -316,9 +316,11 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
|
|
|
|
|
dst_memory_p = handler.AcquireDstMemoryFromResidualDataMemory(
|
|
|
|
|
user_residual_memory_p, to_void_cast<T>(output_data), pipeline);
|
|
|
|
|
} else {
|
|
|
|
|
auto output_data = output->mutable_data<T>(
|
|
|
|
|
ctx.GetPlace(), residual_param->memory_size());
|
|
|
|
|
framework::TensorCopy(*residual_param, residual_param->place(), output);
|
|
|
|
|
// Changing ShareDataWith to TensorCopy results in performance drop
|
|
|
|
|
// on ResNet architectures
|
|
|
|
|
// (https://github.com/PaddlePaddle/Paddle/issues/22964)
|
|
|
|
|
output->ShareDataWith(*residual_param);
|
|
|
|
|
auto output_data = output->mutable_data<T>(ctx.GetPlace());
|
|
|
|
|
dst_memory_p =
|
|
|
|
|
handler.AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data));
|
|
|
|
|
}
|
|
|
|
@ -611,8 +613,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
|
|
|
|
|
ctx, output, residual_param, user_residual_md, handler,
|
|
|
|
|
&pipeline);
|
|
|
|
|
} else {
|
|
|
|
|
framework::TensorCopy(*residual_param, residual_param->place(),
|
|
|
|
|
output);
|
|
|
|
|
output->ShareDataWith(*residual_param);
|
|
|
|
|
dst_memory_p = platform::SetDstMemory<T_out>(ctx, output, handler);
|
|
|
|
|
}
|
|
|
|
|
need_s8_to_u8 =
|
|
|
|
@ -683,7 +684,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
|
|
|
|
|
|
|
|
|
|
if (fuse_residual_conn) {
|
|
|
|
|
auto residual_param = ctx.Input<Tensor>("ResidualData");
|
|
|
|
|
framework::TensorCopy(*residual_param, residual_param->place(), output);
|
|
|
|
|
output->ShareDataWith(*residual_param);
|
|
|
|
|
need_s8_to_u8 =
|
|
|
|
|
(platform::MKLDNNGetDataType<T_out>() == memory::data_type::s8) &&
|
|
|
|
|
unsigned_output;
|
|
|
|
|