diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc index 7219d4f62a..71bb99d2c5 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc @@ -31,6 +31,7 @@ #include "backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h" #include "backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h" #include "backend/optimizer/ascend/ir_fission/unsorted_segment_sum_fission.h" +#include "backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.h" #include "backend/optimizer/pass/communication_op_fusion.h" #include "backend/optimizer/ascend/ir_fusion/square_sum_fusion.h" #include "backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h" @@ -181,6 +182,7 @@ void AddAscendIRFusionPass(PassManager *ir_fusion_pm) { ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); } } // namespace diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.cc new file mode 100644 index 0000000000..7a3172efbe --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.cc @@ -0,0 +1,177 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.h" +#include +#include +#include +#include "backend/session/anf_runtime_algorithm.h" +#include "ir/primitive.h" +#include "utils/utils.h" + +namespace mindspore { +namespace opt { +namespace { +// only pad operator can run in dynamic shape. +CNodePtr CreatePad(const FuncGraphPtr &graph, const CNodePtr &origin_node, const size_t &pad_dim_size) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(origin_node); + std::vector pad_inputs = {NewValueNode(std::make_shared(kPadOpName)), origin_node->input(1)}; + auto pad = graph->NewCNode(pad_inputs); + MS_EXCEPTION_IF_NULL(pad); + pad->set_scope(origin_node->scope()); + + auto param_abstract_shape = origin_node->input(1)->Shape(); + MS_EXCEPTION_IF_NULL(param_abstract_shape); + if (!param_abstract_shape->isa()) { + MS_LOG(EXCEPTION) << "Gatherv2 's first input has wrong shape type"; + } + auto param_dyn_shape = param_abstract_shape->cast(); + ShapeVector shape(param_dyn_shape->shape()); + if (shape.empty()) { + MS_LOG(EXCEPTION) << "Gatherv2 's shape is empty"; + } + if (shape[shape.size() - 1] == -1) { + MS_LOG(EXCEPTION) << "Dim needs pad should not be dynamic"; + } + shape[shape.size() - 1] = pad_dim_size; + auto type_id = AnfAlgo::GetPrevNodeOutputInferDataType(origin_node, 0); + auto abstract = std::make_shared(TypeIdToType(type_id), shape); + if (param_dyn_shape->max_shape().size() == param_dyn_shape->shape().size() && + param_dyn_shape->min_shape().size() == param_dyn_shape->shape().size()) { + ShapeVector max_shape(param_dyn_shape->max_shape()); + ShapeVector min_shape(param_dyn_shape->min_shape()); + ShapeVector new_shape(shape); + max_shape[max_shape.size() - 1] = pad_dim_size; + min_shape[min_shape.size() - 1] = pad_dim_size; + abstract->set_shape(std::make_shared(new_shape, min_shape, max_shape)); + } + pad->set_abstract(abstract); + + std::vector elements; + for (size_t i = 0; i < shape.size() - 1; ++i) { + ShapeVector padding_vector(2); + auto padding_value = MakeValue(padding_vector); + elements.push_back(padding_value); + } + ShapeVector last_padding_vector = {0, SizeToLong(pad_dim_size - 1)}; + auto last_padding_value = MakeValue(last_padding_vector); + elements.push_back(last_padding_value); + ValueTuplePtr paddings = std::make_shared(elements); + AnfAlgo::SetNodeAttr(kAttrPaddings, paddings, pad); + AnfAlgo::SetNodeAttr(kAttrIsDynamicShape, MakeValue(true), pad); + AnfAlgo::SetNodeAttr(kAttrInputIsDynamicShape, MakeValue(true), pad); + AnfAlgo::SetNodeAttr(kAttrOutputIsDynamicShape, MakeValue(true), pad); + return pad; +} + +CNodePtr CreateGatherV2Ds(const FuncGraphPtr &graph, const CNodePtr &origin_node, const CNodePtr &pad, + const size_t &pad_dim_size) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(origin_node); + MS_EXCEPTION_IF_NULL(pad); + if (origin_node->size() != 4) { + MS_LOG(EXCEPTION) << "In dynamic shape scene, gatherv2 should have 3 inputs"; + } + std::vector gatherv2_inputs = {NewValueNode(std::make_shared(prim::kPrimGatherV2->name())), + pad, origin_node->input(2), origin_node->input(3)}; + auto gather_v2 = graph->NewCNode(gatherv2_inputs); + MS_EXCEPTION_IF_NULL(gather_v2); + gather_v2->set_scope(origin_node->scope()); + + auto shape = AnfAlgo::GetOutputInferShape(origin_node, 0); + shape[shape.size() - 1] = pad_dim_size; + AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(origin_node, 0)}, {shape}, gather_v2.get()); + AnfAlgo::SetNodeAttr(kAttrIsDynamicShape, MakeValue(true), gather_v2); + AnfAlgo::SetNodeAttr(kAttrInputIsDynamicShape, MakeValue(true), gather_v2); + auto depends_list_me = AnfAlgo::GetNodeAttr>(origin_node, kAttrDynamicShapeDepends); + AnfAlgo::SetNodeAttr(kAttrDynamicShapeDepends, MakeValue(depends_list_me), gather_v2); + auto input_names = AnfAlgo::GetNodeAttr>(origin_node, kAttrInputNames); + AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), gather_v2); + auto output_names = AnfAlgo::GetNodeAttr>(origin_node, kAttrOutputNames); + AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), gather_v2); + return gather_v2; +} + +CNodePtr CreateSlice(const FuncGraphPtr &graph, const CNodePtr &gather_v2, const CNodePtr &gather_v2_padding_8) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(gather_v2); + MS_EXCEPTION_IF_NULL(gather_v2_padding_8); + std::vector slice_inputs = {NewValueNode(std::make_shared(kSliceOpName)), gather_v2_padding_8}; + auto slice = graph->NewCNode(slice_inputs); + MS_EXCEPTION_IF_NULL(slice); + slice->set_scope(gather_v2->scope()); + slice->set_abstract(gather_v2->abstract()); + auto gather_v2_shape = AnfAlgo::GetOutputInferShape(gather_v2, 0); + std::vector offsets(gather_v2_shape.size(), 0); + AnfAlgo::SetNodeAttr(kAttrBegin, MakeValue(Convert2Long(offsets)), slice); + AnfAlgo::SetNodeAttr(kAttrSize, MakeValue(Convert2Long(gather_v2_shape)), slice); + return slice; +} + +bool CheckInputs(const CNodePtr &origin_node) { + MS_EXCEPTION_IF_NULL(origin_node); + if (origin_node->size() != kGatherV2DynInputNum + 1) { + MS_LOG(DEBUG) << "GatherV2 in dynamic shape has wrong inputs num, not equal " << kGatherV2DynInputNum + << ". CNode= " << origin_node->DebugString(); + return false; + } + auto param_shape = AnfAlgo::GetPrevNodeOutputInferShape(origin_node, 0); + auto indice_shape = AnfAlgo::GetPrevNodeOutputInferShape(origin_node, 1); + + // this optimizer only support embedding_table has dynamic shape + if (param_shape.empty() || indice_shape.empty() || AnfAlgo::IsDynamicShape(origin_node->input(2))) { + return false; + } + if (param_shape[param_shape.size() - 1] != 1) { + MS_LOG(DEBUG) << "GatherV2 in dynamic shape is not need fission. The last value of input0's shape is " + << param_shape[param_shape.size() - 1]; + return false; + } + return true; +} +} // namespace + +const BaseRef GatherV2DsFission::DefinePattern() const { + VarPtr Xs = std::make_shared(); + VectorRef pattern({prim::kPrimGatherV2, Xs}); + return pattern; +} + +const AnfNodePtr GatherV2DsFission::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, const EquivPtr &) const { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(node); + auto origin_node = node->cast(); + MS_EXCEPTION_IF_NULL(origin_node); + if (!CheckInputs(origin_node)) { + return nullptr; + } + size_t pad_dim_size; + auto input_dtype = AnfAlgo::GetPrevNodeOutputInferDataType(origin_node, 0); + if (input_dtype == kNumberTypeFloat32) { + pad_dim_size = 8; + } else if (input_dtype == kNumberTypeFloat16) { + pad_dim_size = 16; + } else { + MS_LOG(DEBUG) << "GatherV2 data type not in (float32, float16), no need change"; + return nullptr; + } + CNodePtr gather_v2_8; + auto pad = CreatePad(graph, origin_node, pad_dim_size); + gather_v2_8 = CreateGatherV2Ds(graph, origin_node, pad, pad_dim_size); + return CreateSlice(graph, origin_node, gather_v2_8); +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.h new file mode 100644 index 0000000000..868fcaf73a --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.h @@ -0,0 +1,36 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FISSION_GATHER_V2_DS_FISSION_H_ +#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FISSION_GATHER_V2_DS_FISSION_H_ + +#include +#include +#include "backend/optimizer/common/optimizer.h" +#include "backend/optimizer/common/helper.h" +#include "backend/optimizer/ascend/ascend_helper.h" + +namespace mindspore { +namespace opt { +class GatherV2DsFission : public PatternProcessPass { + public: + explicit GatherV2DsFission(bool multigraph = true) : PatternProcessPass("gather_v2_ds_fission", multigraph) {} + ~GatherV2DsFission() override = default; + const BaseRef DefinePattern() const override; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; +}; +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FISSION_GATHER_V2_DS_FISSION_H_ diff --git a/mindspore/ccsrc/backend/optimizer/common/helper.h b/mindspore/ccsrc/backend/optimizer/common/helper.h index d496bb42ab..c3075d000a 100644 --- a/mindspore/ccsrc/backend/optimizer/common/helper.h +++ b/mindspore/ccsrc/backend/optimizer/common/helper.h @@ -98,6 +98,7 @@ constexpr size_t kTopkInputNum = 3; constexpr size_t kLarsV2InputNum = 5; constexpr size_t kFusedMulApplyMomentumOutputNum = 2; constexpr size_t kSplitInputNum = 2; +constexpr size_t kGatherV2DynInputNum = 3; constexpr size_t kUnsortedSegmentSumInputNum = 2; enum FusedBatchNormInput { diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_calculater.cc b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_calculater.cc index 9019abd451..e0bd51ac1b 100644 --- a/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_calculater.cc +++ b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_calculater.cc @@ -148,6 +148,7 @@ std::string GetRealOpType(const std::string &op_type) { {"SparseApplyFtrl", "SparseApplyFtrlD"}, {"SparseApplyProximalAdagrad", "SparseApplyProximalAdagradD"}, {"SparseGatherV2", "GatherV2"}, + {"Pad", "PadD"}, }; auto iter = kOpTypeMap.find(op_type); if (iter == kOpTypeMap.end()) { diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h index 3d37befbf2..6a8df52fa7 100644 --- a/mindspore/ccsrc/utils/utils.h +++ b/mindspore/ccsrc/utils/utils.h @@ -323,12 +323,14 @@ constexpr auto kAttrT = "T"; constexpr auto kAttrNum = "num"; constexpr auto kAttrRankSize = "rank_size"; constexpr auto kAttrPadDimSize = "pad_dim_size"; +constexpr auto kAttrPaddings = "paddings"; constexpr auto kAttrNumSegments = "num_segments"; constexpr auto kAttrBegin = "begin"; constexpr auto kAttrSize = "size"; constexpr auto kAttrIsDynamicShape = "is_dynamic_shape"; constexpr auto kAttrInputIsDynamicShape = "input_is_dynamic_shape"; constexpr auto kAttrOutputIsDynamicShape = "output_is_dynamic_shape"; +constexpr auto kAttrDynamicShapeDepends = "dynamic_shape_depends"; constexpr auto kAttrPynativeNextOpName = "next_op"; constexpr auto kAttrPynativeNextIndex = "next_index"; constexpr auto kAttrCompileInfo = "compile_info"; diff --git a/mindspore/core/abstract/infer_functions.h b/mindspore/core/abstract/infer_functions.h index 39db3ef917..48c6bbc16e 100644 --- a/mindspore/core/abstract/infer_functions.h +++ b/mindspore/core/abstract/infer_functions.h @@ -251,7 +251,8 @@ AbstractBasePtr InferImplExpandDims(const AnalysisEnginePtr &, const PrimitivePt const AbstractBasePtrList &args_spec_list); AbstractBasePtr InferImplGpuConvertToDynamicShape(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list); - +AbstractBasePtr InferImplPad(const AnalysisEnginePtr &, const PrimitivePtr &primitive, + const AbstractBasePtrList &args_spec_list); template AbstractBasePtr InferTupleOrListOrDictLen(const std::string &op_name, const AbstractBasePtrList &args_spec_list) { // Inputs: a tuple or list or dict. diff --git a/mindspore/core/abstract/prim_nn.cc b/mindspore/core/abstract/prim_nn.cc index 745e890f50..692814628a 100644 --- a/mindspore/core/abstract/prim_nn.cc +++ b/mindspore/core/abstract/prim_nn.cc @@ -470,5 +470,39 @@ AbstractBasePtr InferImplSGD(const AnalysisEnginePtr &, const PrimitivePtr &prim elements.push_back(args_spec_list[0]->Clone()->Broaden()); return std::make_shared(elements); } + +AbstractBasePtr InferImplPad(const AnalysisEnginePtr &, const PrimitivePtr &primitive, + const AbstractBasePtrList &args_spec_list) { + const std::string op_name = primitive->name(); + CheckArgsSize(op_name, args_spec_list, 1); + auto arg = CheckArg(op_name, args_spec_list, 0); + auto input_shp = arg->shape()->shape(); + MS_EXCEPTION_IF_NULL(primitive); + auto padding_attr = primitive->GetAttr("paddings"); + MS_EXCEPTION_IF_NULL(padding_attr); + if (!padding_attr->isa()) { + MS_LOG(EXCEPTION) << "paddings is not a ValueTuple"; + } + std::vector paddings = padding_attr->cast()->value(); + std::vector> paddings_vec; + for (ValuePtr paddings_elements : paddings) { + std::vector paddings_elements_tuple = paddings_elements->cast()->value(); + std::vector paddings_vec_item; + (void)std::transform(std::begin(paddings_elements_tuple), std::end(paddings_elements_tuple), + std::back_inserter(paddings_vec_item), + [](const ValuePtr &e) -> int64_t { return GetValue(e); }); + paddings_vec.push_back(paddings_vec_item); + } + + ShapeVector result_shp; + size_t length = paddings_vec.size(); + for (size_t i = 0; i < length; ++i) { + if (paddings_vec[i].size() != 2) { + MS_LOG(EXCEPTION) << "paddings 's second dim size is not 2"; + } + result_shp.push_back(input_shp[i] + paddings_vec[i][0] + paddings_vec[i][1]); + } + return std::make_shared(arg->element(), std::make_shared(result_shp)); +} } // namespace abstract } // namespace mindspore diff --git a/mindspore/core/abstract/primitive_infer_map.cc b/mindspore/core/abstract/primitive_infer_map.cc index db7767ef25..f85b6c89ef 100644 --- a/mindspore/core/abstract/primitive_infer_map.cc +++ b/mindspore/core/abstract/primitive_infer_map.cc @@ -50,6 +50,7 @@ PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() { {prim::kPrimArrayToScalar, {InferImplArrayToScalar, true}}, {prim::kPrimBroadcastShape, {InferImplBroadCastShape, true}}, {prim::kPrimPack, {InferImplPack, true}}, + {prim::kPrimPad, {InferImplPad, true}}, {prim::kPrimUnique, {InferImplUnique, true}}, {prim::kPrimUniqueGrad, {InferImplUniqueGrad, true}}, {prim::kPrimGatherV2, {InferImplGatherV2, true}}, diff --git a/mindspore/core/base/core_ops.h b/mindspore/core/base/core_ops.h index 630e1bb7a7..0d507cc0ba 100644 --- a/mindspore/core/base/core_ops.h +++ b/mindspore/core/base/core_ops.h @@ -101,6 +101,7 @@ inline const PrimitivePtr kPrimReshape = std::make_shared("Reshape"); inline const PrimitivePtr kPrimMapCacheIdx = std::make_shared("MapCacheIdx"); inline const PrimitivePtr kPrimUpdateCache = std::make_shared("UpdateCache"); inline const PrimitivePtr kPrimCacheSwapTable = std::make_shared("CacheSwapTable"); +inline const PrimitivePtr kPrimSlice = std::make_shared("Slice"); inline const PrimitivePtr kPrimTile = std::make_shared("Tile"); inline const PrimitivePtr kPrimAddN = std::make_shared("AddN"); inline const PrimitivePtr kPrimAccumulateNV2 = std::make_shared("AccumulateNV2"); diff --git a/mindspore/ops/_op_impl/tbe/__init__.py b/mindspore/ops/_op_impl/tbe/__init__.py index f1415f21cd..b85c192bb2 100644 --- a/mindspore/ops/_op_impl/tbe/__init__.py +++ b/mindspore/ops/_op_impl/tbe/__init__.py @@ -193,6 +193,7 @@ from .sigmoid_grad import _sigmoid_grad_tbe from .resize_nearest_neighbor import _resize_nearest_neighbor_tbe from .resize_nearest_neighbor_grad import _resize_nearest_neighbor_grad_tbe from .pad_d import _pad_d_tbe +from .pad_d_ds import _pad_d_ds_tbe from .arg_max_with_value import _arg_max_with_value_tbe from .arg_min_with_value import _arg_min_with_value_tbe from .smooth_l1_loss import _smooth_l1_loss_tbe diff --git a/mindspore/ops/_op_impl/tbe/pad_d_ds.py b/mindspore/ops/_op_impl/tbe/pad_d_ds.py new file mode 100644 index 0000000000..8ee507377d --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/pad_d_ds.py @@ -0,0 +1,41 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Pad op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +pad_d_op_info = TBERegOp("Pad") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("pad_d.so") \ + .compute_cost(10) \ + .kernel_name("pad_d") \ + .partial_flag(True) \ + .attr("paddings", "optional", "listListInt", "all") \ + .dynamic_shape(True) \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ + .get_op_info() + + +@op_info_register(pad_d_op_info) +def _pad_d_ds_tbe(): + """Pad TBE register""" + return