|
|
|
@ -50,11 +50,13 @@ void FusedBatchNormCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|
|
|
|
dnnl::memory::desc scale_bias_desc = GetDefaultMemDesc({2, channel});
|
|
|
|
|
auto epsilon = AnfAlgo::GetNodeAttr<float>(kernel_node, "epsilon");
|
|
|
|
|
auto prop_kind = dnnl::prop_kind::forward_inference;
|
|
|
|
|
auto normalization_flags = dnnl::normalization_flags::use_scale_shift | dnnl::normalization_flags::use_global_stats;
|
|
|
|
|
if (is_train) {
|
|
|
|
|
prop_kind = dnnl::prop_kind::forward_training;
|
|
|
|
|
normalization_flags = dnnl::normalization_flags::use_scale_shift;
|
|
|
|
|
}
|
|
|
|
|
dnnl::batch_normalization_forward::desc desc =
|
|
|
|
|
dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, dnnl::normalization_flags::use_scale_shift);
|
|
|
|
|
dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags);
|
|
|
|
|
auto prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
|
|
|
|
|
primitive_ = std::make_shared<dnnl::batch_normalization_forward>(prim_desc);
|
|
|
|
|
AddArgument(DNNL_ARG_SRC, x_desc);
|
|
|
|
@ -74,14 +76,14 @@ bool FusedBatchNormCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inpu
|
|
|
|
|
auto wksp = reinterpret_cast<float *>(workspace[0]->addr);
|
|
|
|
|
memcpy_s(wksp, workspace[0]->size, inputs[1]->addr, inputs[1]->size);
|
|
|
|
|
memcpy_s(wksp + (inputs[1]->size / sizeof(float)), inputs[2]->size, inputs[2]->addr, inputs[2]->size);
|
|
|
|
|
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_MEAN, outputs[3]->addr);
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_VARIANCE, outputs[4]->addr);
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_SCALE_SHIFT, workspace[0]->addr);
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
|
|
|
|
|
ExecutePrimitive();
|
|
|
|
|
if (is_train) {
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_MEAN, outputs[3]->addr);
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_VARIANCE, outputs[4]->addr);
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_SCALE_SHIFT, workspace[0]->addr);
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
|
|
|
|
|
ExecutePrimitive();
|
|
|
|
|
|
|
|
|
|
auto moving_mean = reinterpret_cast<float *>(inputs[3]->addr);
|
|
|
|
|
auto moving_variance = reinterpret_cast<float *>(inputs[4]->addr);
|
|
|
|
|
auto mean = reinterpret_cast<float *>(outputs[3]->addr);
|
|
|
|
@ -90,6 +92,13 @@ bool FusedBatchNormCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inpu
|
|
|
|
|
moving_mean[i] = moving_mean[i] * (1 - momentum) + mean[i] * momentum;
|
|
|
|
|
moving_variance[i] = moving_variance[i] * (1 - momentum) + variance[i] * momentum;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_MEAN, inputs[3]->addr);
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_VARIANCE, inputs[4]->addr);
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_SCALE_SHIFT, workspace[0]->addr);
|
|
|
|
|
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
|
|
|
|
|
ExecutePrimitive();
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|