!9796 Add SpaceToDepth fission pass to fix bug when data type is float16.

From: @liu_xiao_93 Reviewed-by: @liangchenghui,@wuxuejian Signed-off-by: @liangchenghui
4 years ago · d8a64b4ac4
parent d0bd77c1ef 2bbd97d334
commit d8a64b4ac4
4 changed files with 165 additions and 0 deletions
--- a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
@ -53,6 +53,7 @@
 #include "backend/optimizer/ascend/ir_fission/transdata_split.h"
 #include "backend/optimizer/ascend/ir_fission/topk_split.h"
 #include "backend/optimizer/ascend/ir_fission/lin_space_fission.h"
+#include "backend/optimizer/ascend/ir_fission/space_to_depth_split.h"
 #include "backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h"
 #include "backend/optimizer/ascend/ir_fusion/mul_add_fusion.h"
 #include "backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h"
@ -180,6 +181,7 @@ void AddAscendIRFusionPass(PassManager *ir_fusion_pm) {
  ir_fusion_pm->AddPass(std::make_shared<DynamicRnnGradFissionV2>());
  ir_fusion_pm->AddPass(std::make_shared<SplitFission>());
  ir_fusion_pm->AddPass(std::make_shared<SplitVFission>());
+  ir_fusion_pm->AddPass(std::make_shared<SpaceToDepthSplit>());
  ir_fusion_pm->AddPass(std::make_shared<TensorScatterUpdateFission>());
  ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>());
  ir_fusion_pm->AddPass(std::make_shared<PackFission>());
@ -324,6 +326,7 @@ void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr<session::Kerne
  ir_fusion_pm->AddPass(std::make_shared<LayerNormGradSplit>());
  ir_fusion_pm->AddPass(std::make_shared<TopKSplit>());
  ir_fusion_pm->AddPass(std::make_shared<LinSpaceFission>());
+  ir_fusion_pm->AddPass(std::make_shared<SpaceToDepthSplit>());
  ir_fusion_pm->AddPass(std::make_shared<AddnFission>());
  ir_fusion_pm->AddPass(std::make_shared<InsertPadForNMSWithMask>());
  ir_fusion_pm->AddPass(std::make_shared<TensorScatterUpdateFission>());
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/space_to_depth_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/space_to_depth_split.cc
@ -0,0 +1,127 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/ascend/ir_fission/space_to_depth_split.h"
+#include <vector>
+#include <memory>
+#include <string>
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/opt.h"
+#include "backend/optimizer/common/helper.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+constexpr size_t kFloat16Len = 2;
+constexpr size_t kSpaceToDepthInputNum = 1;
+
+tensor::TensorPtr CreateTensor(const AnfNodePtr &node) {
+  // 1 create tensor
+  auto cnode = node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
+  auto input_x = cnode->input(kSpaceToDepthInputNum);
+  int64_t block_size = AnfAlgo::GetNodeAttr<int64_t>(cnode, "block_size");
+  std::vector<size_t> x_shape = AnfAlgo::GetOutputInferShape(input_x, 0);
+  int64_t input_channel = SizeToLong(x_shape[1]);
+  int64_t assist_input_channel = SizeToLong(x_shape[1]) * block_size * block_size;
+  std::vector<int64_t> assist_input_shape = {assist_input_channel, input_channel, block_size, block_size};
+  int64_t dest_size = assist_input_channel * input_channel * block_size * block_size;
+  MS_LOG(INFO) << "For SpaceToDepth op, assist input shape is: (" << assist_input_channel << ", " << input_channel
+               << ", " << block_size << ", " << block_size << ")";
+  TensorTypePtr tensor_type = std::make_shared<TensorType>(kFloat16);
+  tensor::DeviceInfo device_info{kOpFormat_NC1HWC0, tensor_type};
+  tensor::TensorPtr assist_tensor = std::make_shared<tensor::Tensor>(kFloat16->type_id(), assist_input_shape);
+  assist_tensor->set_device_info(device_info);
+
+  // 2 set value of tensor
+  int64_t window_size = assist_input_shape[2] * assist_input_shape[3];
+  int64_t channel_size = input_channel;
+  auto data_ptr = assist_tensor->data_c();
+  MS_EXCEPTION_IF_NULL(data_ptr);
+  std::vector<float16> half_data(dest_size);
+  float16 assist_value = float16(static_cast<float>(1));
+  for (int64_t window_idx = 0; window_idx < window_size; window_idx++) {
+    for (int64_t channel_idx = 0; channel_idx < channel_size; channel_idx++) {
+      int64_t stride_idx = channel_idx * (channel_size + 1) * window_size;
+      int64_t fill_assist_idx = stride_idx + (window_idx * (channel_size * channel_size * window_size + 1));
+      half_data[fill_assist_idx] = assist_value;
+    }
+  }
+  auto elem_num = dest_size * kFloat16Len;
+  auto ret_code = memcpy_s(data_ptr, static_cast<size_t>(assist_tensor->data().nbytes()), half_data.data(), elem_num);
+  if (ret_code != 0) {
+    MS_LOG(ERROR) << "Failed to copy data into Tensor.";
+    return nullptr;
+  }
+  return assist_tensor;
+}
+
+ValueNodePtr CreateValueNode(const AnfNodePtr &node) {
+  tensor::TensorPtr assist_tensor = CreateTensor(node);
+  MS_EXCEPTION_IF_NULL(assist_tensor);
+  auto assist_const = std::make_shared<ValueNode>(assist_tensor);
+  auto assist_abstract = assist_tensor->ToAbstract();
+  assist_const->set_abstract(assist_abstract);
+  auto assist_kernel_info = std::make_shared<device::KernelInfo>();
+  assist_const->set_kernel_info(assist_kernel_info);
+  kernel::KernelBuildInfo::KernelBuildInfoBuilder op_builder;
+  op_builder.SetOutputsFormat({kOpFormat_NC1HWC0});
+  op_builder.SetOutputsDeviceType({kNumberTypeFloat16});
+  AnfAlgo::SetSelectKernelBuildInfo(op_builder.Build(), assist_const.get());
+  return assist_const;
+}
+}  // namespace
+
+const BaseRef SpaceToDepthSplit::DefinePattern() const {
+  VarPtr Xs = std::make_shared<SeqVar>();
+  auto space_to_depth_prim = std::make_shared<Primitive>(kSpaceToDepthOpName);
+  return VectorRef({space_to_depth_prim, Xs});
+}
+
+const AnfNodePtr SpaceToDepthSplit::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, const EquivPtr &) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(node);
+  auto kernel_graph = graph->cast<KernelGraphPtr>();
+  auto cnode = node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
+  if (cnode->size() != kSpaceToDepthInputNum + 1) {
+    MS_LOG(INFO) << "The node " << cnode->DebugString() << " is not equal to " << kSpaceToDepthInputNum << " inputs";
+    return nullptr;
+  }
+  const auto &ori_inputs = cnode->inputs();
+  TypeId x_dtype = AnfAlgo::GetOutputInferDataType(ori_inputs[1], 0);
+  if (x_dtype != kNumberTypeFloat16) {
+    MS_LOG(INFO) << "Node " << cnode->DebugString() << ": The data type of node's first input is: " << x_dtype
+                 << ", not fp16, cannot do fusion.";
+    return nullptr;
+  }
+
+  std::vector<AnfNodePtr> new_inputs{NewValueNode(std::make_shared<Primitive>(kSpaceToDepthOpName))};
+  auto assist_const = CreateValueNode(cnode);
+  new_inputs.insert(new_inputs.end(), cnode->inputs().begin() + 1, cnode->inputs().end());
+  new_inputs.push_back(assist_const);
+  CNodePtr new_cnode = graph->NewCNode(new_inputs);
+  MS_EXCEPTION_IF_NULL(new_cnode);
+  new_cnode->set_abstract(cnode->abstract());
+  new_cnode->set_scope(cnode->scope());
+  AnfAlgo::CopyNodeAttrs(cnode, new_cnode);
+  if (kernel_graph != nullptr) {
+    kernel_graph->AddValueNodeToGraph(assist_const);
+    MS_LOG(INFO) << "Split SpaceToDepth op success.";
+  }
+  return new_cnode;
+}
+}  // namespace opt
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/space_to_depth_split.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/space_to_depth_split.h
@ -0,0 +1,34 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FISSION_SPACE_TO_DEPTH_SPLIT_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FISSION_SPACE_TO_DEPTH_SPLIT_H_
+
+#include <memory>
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+
+namespace mindspore {
+namespace opt {
+class SpaceToDepthSplit : public PatternProcessPass {
+ public:
+  explicit SpaceToDepthSplit(bool multigraph = true) : PatternProcessPass("space_to_depth_split", multigraph) {}
+  ~SpaceToDepthSplit() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FISSION_SPACE_TO_DEPTH_SPLIT_H_
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@ -160,6 +160,7 @@ constexpr auto kReduceProdOpName = "ReduceProd";
 constexpr auto kCumprodOpName = "Cumprod";
 constexpr auto kSpaceToBatchOpName = "SpaceToBatch";
 constexpr auto kBatchToSpaceOpName = "BatchToSpace";
+constexpr auto kSpaceToDepthOpName = "SpaceToDepth";
 constexpr auto kPadOpName = "Pad";
 constexpr auto kConv2DBackpropInputOpName = "Conv2DBackpropInput";
 constexpr auto kConv2DBackpropFilterOpName = "Conv2DBackpropFilter";