|
|
@ -39,36 +39,6 @@ class TestHWLayerNormGradSplit : public BackendCommon {
|
|
|
|
UT::PyFuncGraphFetcher get_py_fun_;
|
|
|
|
UT::PyFuncGraphFetcher get_py_fun_;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class MockLayerNormGradSplitKernelSelect : public KernelSelect {
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
MockLayerNormGradSplitKernelSelect() = default;
|
|
|
|
|
|
|
|
~MockLayerNormGradSplitKernelSelect() override = default;
|
|
|
|
|
|
|
|
void SelectKernel(const CNodePtr &cnode) override {
|
|
|
|
|
|
|
|
auto name = AnfAlgo::GetCNodeName(cnode);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (name == kLayerNormXBackpropOpName) {
|
|
|
|
|
|
|
|
kernel::KernelBuildInfo::KernelBuildInfoBuilder builder;
|
|
|
|
|
|
|
|
builder.SetInputsFormat(
|
|
|
|
|
|
|
|
{kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0});
|
|
|
|
|
|
|
|
builder.SetInputsDeviceType(
|
|
|
|
|
|
|
|
{kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16});
|
|
|
|
|
|
|
|
builder.SetOutputsFormat({kOpFormat_NC1HWC0});
|
|
|
|
|
|
|
|
builder.SetOutputsDeviceType({kNumberTypeFloat16});
|
|
|
|
|
|
|
|
AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), cnode.get());
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (name == kLayerNormBetaGammaBackpropOpName) {
|
|
|
|
|
|
|
|
kernel::KernelBuildInfo::KernelBuildInfoBuilder builder;
|
|
|
|
|
|
|
|
builder.SetInputsFormat({kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0});
|
|
|
|
|
|
|
|
builder.SetInputsDeviceType({kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16});
|
|
|
|
|
|
|
|
builder.SetOutputsFormat({kOpFormat_NC1HWC0, kOpFormat_NC1HWC0});
|
|
|
|
|
|
|
|
builder.SetOutputsDeviceType({kNumberTypeFloat16, kNumberTypeFloat16});
|
|
|
|
|
|
|
|
AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), cnode.get());
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}; // namespace opt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TEST_F(TestHWLayerNormGradSplit, test_layer_norm_grad_split) {
|
|
|
|
TEST_F(TestHWLayerNormGradSplit, test_layer_norm_grad_split) {
|
|
|
|
get_py_fun_.SetDoResolve(true);
|
|
|
|
get_py_fun_.SetDoResolve(true);
|
|
|
|
FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_layer_norm_grad_split", "before");
|
|
|
|
FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_layer_norm_grad_split", "before");
|
|
|
@ -81,49 +51,9 @@ TEST_F(TestHWLayerNormGradSplit, test_layer_norm_grad_split) {
|
|
|
|
auto kernel_graph = GetKernelGraph(g, args_spec_list);
|
|
|
|
auto kernel_graph = GetKernelGraph(g, args_spec_list);
|
|
|
|
EXPECT_NE(kernel_graph, nullptr);
|
|
|
|
EXPECT_NE(kernel_graph, nullptr);
|
|
|
|
|
|
|
|
|
|
|
|
// get LayerNormGrad
|
|
|
|
|
|
|
|
CNodePtr ret = kernel_graph->get_return();
|
|
|
|
|
|
|
|
EXPECT_NE(ret, nullptr);
|
|
|
|
|
|
|
|
EXPECT_NE(ret->input(1), nullptr);
|
|
|
|
|
|
|
|
EXPECT_TRUE(ret->input(1)->isa<CNode>());
|
|
|
|
|
|
|
|
auto make_tuple1 = ret->input(1)->cast<CNodePtr>();
|
|
|
|
|
|
|
|
EXPECT_NE(make_tuple1->input(1), nullptr);
|
|
|
|
|
|
|
|
EXPECT_TRUE(make_tuple1->input(1)->isa<CNode>());
|
|
|
|
|
|
|
|
auto make_tuple2 = make_tuple1->input(1)->cast<CNodePtr>();
|
|
|
|
|
|
|
|
EXPECT_NE(make_tuple2->input(1), nullptr);
|
|
|
|
|
|
|
|
EXPECT_TRUE(make_tuple2->input(1)->isa<CNode>());
|
|
|
|
|
|
|
|
auto tuple_getitem = make_tuple2->input(1)->cast<CNodePtr>();
|
|
|
|
|
|
|
|
EXPECT_NE(tuple_getitem->input(1), nullptr);
|
|
|
|
|
|
|
|
EXPECT_TRUE(tuple_getitem->input(1)->isa<CNode>());
|
|
|
|
|
|
|
|
auto layer_norm_grad = tuple_getitem->input(1)->cast<CNodePtr>();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// set kernel for LayerNormGrad
|
|
|
|
|
|
|
|
kernel::KernelBuildInfo::KernelBuildInfoBuilder builder1;
|
|
|
|
|
|
|
|
builder1.SetInputsFormat(
|
|
|
|
|
|
|
|
{kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0});
|
|
|
|
|
|
|
|
builder1.SetOutputsFormat({kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0});
|
|
|
|
|
|
|
|
builder1.SetInputsDeviceType(
|
|
|
|
|
|
|
|
{kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16});
|
|
|
|
|
|
|
|
builder1.SetOutputsDeviceType({kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16});
|
|
|
|
|
|
|
|
builder1.SetKernelType(TBE_KERNEL);
|
|
|
|
|
|
|
|
AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), layer_norm_grad.get());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// get param5
|
|
|
|
|
|
|
|
EXPECT_NE(layer_norm_grad->input(5), nullptr);
|
|
|
|
|
|
|
|
auto param = layer_norm_grad->input(5);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// set kernel for param5
|
|
|
|
|
|
|
|
kernel::KernelBuildInfo::KernelBuildInfoBuilder builder2;
|
|
|
|
|
|
|
|
builder2.SetOutputsFormat({kOpFormat_NC1HWC0});
|
|
|
|
|
|
|
|
builder2.SetOutputsDeviceType({kNumberTypeFloat16});
|
|
|
|
|
|
|
|
AnfAlgo::SetSelectKernelBuildInfo(builder2.Build(), param.get());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// do layer_norm_grad_split pass
|
|
|
|
|
|
|
|
auto optimizer = std::make_shared<opt::GraphOptimizer>();
|
|
|
|
auto optimizer = std::make_shared<opt::GraphOptimizer>();
|
|
|
|
auto pm = std::make_shared<opt::PassManager>();
|
|
|
|
auto pm = std::make_shared<opt::PassManager>();
|
|
|
|
auto pass = std::make_shared<opt::LayerNormGradSplit>();
|
|
|
|
auto pass = std::make_shared<opt::LayerNormGradSplit>();
|
|
|
|
auto kernel_select = std::make_shared<MockLayerNormGradSplitKernelSelect>();
|
|
|
|
|
|
|
|
pass->kernel_select_ = kernel_select;
|
|
|
|
|
|
|
|
pm->AddPass(pass);
|
|
|
|
pm->AddPass(pass);
|
|
|
|
optimizer->AddPassManager(pm);
|
|
|
|
optimizer->AddPassManager(pm);
|
|
|
|
auto new_graph = optimizer->Optimize(kernel_graph);
|
|
|
|
auto new_graph = optimizer->Optimize(kernel_graph);
|
|
|
|