diff --git a/mindspore/ccsrc/frontend/parallel/dynamic_creator.h b/mindspore/ccsrc/frontend/parallel/dynamic_creator.h index dd9ae230d8..a1764d98e9 100644 --- a/mindspore/ccsrc/frontend/parallel/dynamic_creator.h +++ b/mindspore/ccsrc/frontend/parallel/dynamic_creator.h @@ -133,6 +133,8 @@ REGISTER(LogicalAndInfo); REGISTER(LogicalOrInfo); REGISTER(EluInfo); REGISTER(ReLUInfo); +REGISTER(RepeatElementsInfo); +REGISTER(TensorDotInfo); REGISTER(ReLU6Info); REGISTER(ReLUV2Info); REGISTER(SoftplusInfo); diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.cc index 79ade08268..9eb4a4ea90 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.cc +++ b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.cc @@ -307,27 +307,18 @@ Status ActivationBase::InferTensorMap() { } Status ActivationBase::InferTensorInfo() { - // infer tensor shape - Shape input_shape = inputs_shape_.at(0); - - // infer slice shape - Shapes inputs_slice_shape, outputs_slice_shape; - Strategys inputs_strategy = strategy_->GetInputDim(); - Strategys outputs_strategy = {inputs_strategy.at(0)}; - if (InferSliceShape(inputs_strategy, outputs_strategy, &inputs_slice_shape, &outputs_slice_shape) != SUCCESS) { - return FAILED; - } - Shape input_slice_shape = inputs_slice_shape.at(0); - - TensorLayout input_tensor_layout; - if (input_tensor_layout.InitFromVector(dev_matrix_shape_, inputs_tensor_map_[0], input_shape) != SUCCESS) { + TensorLayout input_tensor_layout, output_tensor_layout; + if ((input_tensor_layout.InitFromVector(dev_matrix_shape_, inputs_tensor_map_[0], inputs_shape_[0]) != SUCCESS) || + (output_tensor_layout.InitFromVector(dev_matrix_shape_, outputs_tensor_map_[0], outputs_shape_[0]))) { + MS_LOG(ERROR) << name_ << ": init tensor layout failed"; return FAILED; } - TensorInfo input_tensor_info(input_tensor_layout, input_shape, input_slice_shape); + TensorInfo input_tensor_info(input_tensor_layout); + TensorInfo output_tensor_info(output_tensor_layout); inputs_tensor_info_.push_back(input_tensor_info); - outputs_tensor_info_.push_back(input_tensor_info); // the same as input + outputs_tensor_info_.push_back(output_tensor_info); return SUCCESS; } diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h index c7e4ccb647..647c564002 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h +++ b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h @@ -146,6 +146,14 @@ class ReLUInfo : public ActivationOther { ~ReLUInfo() override = default; }; +class RepeatElementsInfo : public ActivationOther { + public: + RepeatElementsInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape, + const PrimitiveAttrs &attrs) + : ActivationOther(name, inputs_shape, outputs_shape, attrs) {} + ~RepeatElementsInfo() override = default; +}; + class ReLU6Info : public ActivationOther { public: ReLU6Info(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape, diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/ops_info_head_files.h b/mindspore/ccsrc/frontend/parallel/ops_info/ops_info_head_files.h index 30abd841ac..16b92a63ce 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/ops_info_head_files.h +++ b/mindspore/ccsrc/frontend/parallel/ops_info/ops_info_head_files.h @@ -42,6 +42,7 @@ #include "frontend/parallel/ops_info/strided_slice_info.h" #include "frontend/parallel/ops_info/concat_info.h" #include "frontend/parallel/ops_info/split_info.h" +#include "frontend/parallel/ops_info/tensordot_info.h" #include "frontend/parallel/ops_info/pack_info.h" #include "frontend/parallel/ops_info/broadcast_to_info.h" #include "frontend/parallel/ops_info/unique_info.h" diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h index 4ff6c5b2bc..6338b08384 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h +++ b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h @@ -103,6 +103,7 @@ constexpr char STRIDES[] = "strides"; constexpr char GROUP[] = "group"; constexpr char FUSION[] = "fusion"; constexpr char AXIS[] = "axis"; +constexpr char AXES[] = "axes"; constexpr char OUTPUT_NUM[] = "output_num"; constexpr char SPLIT_COUNT[] = "split_count"; constexpr char SPLIT_DIM[] = "split_dim"; @@ -190,6 +191,8 @@ constexpr char VIRTUAL_DATA_SET[] = "_VirtualDataset"; constexpr char VIRTUAL_DATA_SET_INFO[] = "VirtualDatasetInfo"; constexpr char SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS[] = "SparseSoftmaxCrossEntropyWithLogits"; constexpr char RELU[] = "ReLU"; +constexpr char REPEAT_ELEMENTS[] = "RepeatElements"; +constexpr char TENSOR_DOT[] = "TensorDot"; constexpr char ONEHOT[] = "OneHot"; constexpr char DROPOUT_DO_MASK[] = "DropoutDoMask"; constexpr char DROPOUT_GEN_MASK[] = "DropoutGenMask"; diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/tensordot_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/tensordot_info.cc new file mode 100644 index 0000000000..3bdf626cf8 --- /dev/null +++ b/mindspore/ccsrc/frontend/parallel/ops_info/tensordot_info.cc @@ -0,0 +1,451 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "frontend/parallel/ops_info/tensordot_info.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "ir/value.h" +#include "frontend/parallel/auto_parallel/graph_costmodel.h" +#include "frontend/parallel/device_manager.h" +#include "frontend/parallel/device_matrix.h" +#include "frontend/parallel/tensor_layout/tensor_redistribution.h" + +namespace mindspore { +namespace parallel { +static std::string AxesToString(const std::vector &shape) { + std::string str = "["; + for (size_t i = 0; i < shape.size(); ++i) { + str += std::to_string(shape[i]); + if (i < shape.size() - 1) { + str += ", "; + } + } + return str + "]"; +} + +static std::vector GetValueSequeue(ValuePtr sequeue) { + MS_EXCEPTION_IF_NULL(sequeue); + std::vector ret; + if (!sequeue->isa() && !sequeue->isa()) { + MS_LOG(ERROR) << "The arg is not value tuple or value list"; + return ret; + } + + if (sequeue->isa()) { + auto val = sequeue->cast(); + return val->value(); + } + auto val = sequeue->cast(); + return val->value(); +} + +void TensorDotInfo::ShowAxes() { + if (axes_tuple_.size()) { + MS_LOG(INFO) << name_ << ": The axes tuple is " << AxesToString(axes_tuple_); + } else if (axes_tuple_tuple_.size()) { + MS_LOG(INFO) << name_ << ": The axes tuple tuple is " << AxesToString(axes_tuple_tuple_[0]) << " and " + << AxesToString(axes_tuple_tuple_[1]); + } +} + +Status TensorDotInfo::GetAttrs() { + auto axes_iter = attrs_.find(AXES); + if (axes_iter == attrs_.end()) { + MS_LOG(ERROR) << name_ << ": Can not find the axes attr"; + return FAILED; + } + + MS_EXCEPTION_IF_NULL(axes_iter->second); + if (axes_iter->second->isa()) { + axes_int_ = axes_iter->second->cast()->value(); + if ((axes_int_ < 0) || (IntToSize(axes_int_) > inputs_shape_[0].size()) || + (IntToSize(axes_int_) > inputs_shape_[1].size())) { + MS_LOG(ERROR) << name_ << ": The value of axes int (" << axes_int_ << ") is out of range"; + return FAILED; + } + axes_type_ = INT_TYPE; + } else if (axes_iter->second->isa() || axes_iter->second->isa()) { + std::vector var_tuple = GetValueSequeue(axes_iter->second); + if (var_tuple.size() != 2) { + MS_LOG(ERROR) << name_ << ": The length of axes tuple must be 2, bug got " << var_tuple.size(); + return FAILED; + } + + for (size_t i = 0; i < var_tuple.size(); ++i) { + if (var_tuple[i]->isa()) { + int32_t ele_var = var_tuple[i]->cast()->value(); + if (ele_var < 0) { + ele_var += inputs_shape_[i].size(); + } + axes_tuple_.push_back(ele_var); + } else { + std::vector var_ele = GetValue>(var_tuple[i]); + for (auto &ele : var_ele) { + if (ele < 0) { + MS_LOG(DEBUG) << name_ << ": The element of axes is " << ele; + ele += inputs_shape_[i].size(); + } + } + axes_tuple_tuple_.push_back(var_ele); + } + } + + if (!axes_tuple_.empty()) { + axes_type_ = TUPLE_TYPE; + MS_LOG(ERROR) << name_ << ": Now do not support axes type is TUPLE_TYPE"; + return FAILED; + } else if (!axes_tuple_tuple_.empty()) { + axes_type_ = TUPLE_TUPLE_TYPE; + } + } else { + MS_LOG(ERROR) << name_ << ": The axes is not int or tuple or list"; + return FAILED; + } + + ShowAxes(); + return SUCCESS; +} + +Status TensorDotInfo::CheckStrategy(const StrategyPtr &strategy) { + MS_EXCEPTION_IF_NULL(strategy); + if (CheckStrategyValue(strategy, inputs_shape_) != SUCCESS) { + MS_LOG(ERROR) << name_ << ": Invalid strategy"; + return FAILED; + } + + Strategys stra = strategy->GetInputDim(); + if (stra.size() != 2) { + MS_LOG(ERROR) << name_ << ": Invalid strategy size " << stra.size(); + return FAILED; + } + Dimensions input_a_strategy = stra[0]; + Dimensions input_b_strategy = stra[1]; + + if (axes_type_ == INT_TYPE) { // for example: axes = 3, [a, b, c, d] and [b, c, d, e] + for (int32_t i = 0; i < axes_int_; ++i) { + if (input_a_strategy[input_a_strategy.size() - axes_int_ + i] != input_b_strategy[i]) { + MS_LOG(ERROR) << name_ << ": The strategies of relavent dimensions are no equal"; + return FAILED; + } + } + } else if (axes_type_ == TUPLE_TUPLE_TYPE) { + for (size_t i = 0; i < axes_tuple_tuple_[0].size(); ++i) { + if (input_a_strategy[axes_tuple_tuple_[0][i]] != input_b_strategy[axes_tuple_tuple_[1][i]]) { + MS_LOG(ERROR) << name_ << ": The strategies of relavent dimensions are no equal"; + return FAILED; + } + } + } else { + MS_LOG(ERROR) << name_ << ": Now do not support axes type is TUPLE_TYPE"; + return FAILED; + } + return SUCCESS; +} + +Status TensorDotInfo::InferDevMatrixShape() { + Strategys stra = strategy_->GetInputDim(); + Dimensions input_a_strategy = stra.at(0); + Dimensions input_b_strategy = stra.at(1); + + if (axes_type_ == INT_TYPE) { + dev_matrix_shape_ = input_a_strategy; + for (size_t i = axes_int_; i < input_b_strategy.size(); i++) { + dev_matrix_shape_.push_back(input_b_strategy[i]); + } + } else if (axes_type_ == TUPLE_TUPLE_TYPE) { + dev_matrix_shape_ = input_a_strategy; + for (size_t i = 0; i < input_b_strategy.size(); ++i) { + bool found = false; + for (auto &ele : axes_tuple_tuple_[1]) { + if (i == IntToSize(ele)) { + found = true; + break; + } + } + + if (!found) { + dev_matrix_shape_.push_back(input_b_strategy[i]); + } + } + } else { + MS_LOG(ERROR) << name_ << ": Now do not support axes type is TUPLE_TYPE"; + return FAILED; + } + + MS_LOG(INFO) << name_ << ": The dev matrix is " << ShapeToString(dev_matrix_shape_); + return SUCCESS; +} + +Status TensorDotInfo::InferMirrorOps() { + mirror_ops_.clear(); + + Shape input_a_tensor_map = inputs_tensor_map_[0]; + Shape input_b_tensor_map = inputs_tensor_map_[1]; + std::vector input_a_group, input_b_group; + if ((CreateGroupByTensorMap(input_a_tensor_map, &input_a_group) != SUCCESS) || + (CreateGroupByTensorMap(input_b_tensor_map, &input_b_group) != SUCCESS)) { + MS_LOG(ERROR) << name_ << ": Create group by tensor map failed"; + return FAILED; + } + + if (input_a_group.empty() && input_b_group.empty()) { + MS_LOG(INFO) << name_ << ": The mirror ops is empty"; + return SUCCESS; + } + + OperatorVector op_for_input_a, op_for_input_b; + if (!input_a_group.empty()) { + op_for_input_a = CreateMirrorOps(input_a_group[0].name(), input_a_group[0].GetDevNum()); + MS_LOG(INFO) << name_ << ": Create the mirror ops for input_a success, group is " << input_a_group[0].name(); + } + if (!input_b_group.empty()) { + op_for_input_b = CreateMirrorOps(input_b_group[0].name(), input_b_group[0].GetDevNum()); + MS_LOG(INFO) << name_ << ": Create the mirror ops for input_b success, group is " << input_b_group[0].name(); + } + + mirror_ops_.push_back(op_for_input_a); + mirror_ops_.push_back(op_for_input_b); + return SUCCESS; +} + +Status TensorDotInfo::InferForwardCommunication() { + forward_op_.clear(); + Shape forward_group_map = outputs_tensor_map_[0]; + // handel the repeat calculation, the forward communication's group can not include the dimension of repeat + // calculation + if (repeated_calc_num_ > 1) { + if (repeated_num_in_dev_matrix_right_) { + forward_group_map.push_back(0); + } else { + forward_group_map.push_back(dev_matrix_shape_.size() - 1); + } + } + + std::vector forward_group; + if (CreateGroupByTensorMap(forward_group_map, &forward_group) != SUCCESS) { + MS_LOG(ERROR) << name_ << ": Create group by tensor map failed"; + return FAILED; + } + + if (forward_group.empty()) { + MS_LOG(INFO) << name_ << ": No need to create forward op"; + return SUCCESS; + } + + Operator op = CreateAllReduceOp(REDUCE_OP_SUM, forward_group[0].name()); + forward_op_.push_back(op); + MS_LOG(INFO) << name_ << ": The group name of forward communication is " << forward_group[0].name(); + return SUCCESS; +} + +void TensorDotInfo::InferTensorMapAxesInt(const TensorMap &tensor_map_index) { + // infer input_b tensor map + // for example: the dimension of input_b is 4, the tensor map is [3, 2, 1, 0] + TensorMap input_a_tensor_map, input_b_tensor_map, output_tensor_map; + for (size_t i = 0; i < inputs_shape_[1].size(); i++) { + input_b_tensor_map.push_back((int64_t)(inputs_shape_[1].size() - i - 1)); + } + + // infer output tensor map + output_tensor_map = tensor_map_index; + (void)output_tensor_map.erase( + output_tensor_map.begin() + static_cast(inputs_shape_[0].size() - IntToSize(axes_int_)), + output_tensor_map.begin() + static_cast(inputs_shape_[0].size())); + + inputs_tensor_map_.push_back(input_b_tensor_map); + outputs_tensor_map_.push_back(output_tensor_map); +} + +void TensorDotInfo::InferTensorMapAxesTuple(size_t size, const TensorMap &input_a_tensor_map, + const TensorMap &tensor_map_index) { + // for example: [a, b, c, d] + [e, f, b, c, d] -> [a, e, f], axes is ((1, 2, 3), (2, 3, 4)) + // the tensor map of inputs:[5, 4, 3, 2], [1, 0, 4, 3, 2], and the tensor map of output: [5, 1, 0] + // infer input_b tensor map + TensorMap input_b_tensor_map, output_tensor_map, tmp_b_map_index; + for (size_t i = 0; i < size - inputs_shape_[0].size(); ++i) { + tmp_b_map_index.push_back((int64_t)(size - inputs_shape_[0].size() - i - 1)); // [1, 0] + } + for (size_t i = 0; i < inputs_shape_[1].size(); ++i) { + bool found = false; + size_t relevant_a_index = 0; + for (size_t j = 0; j < axes_tuple_tuple_[1].size(); ++j) { + if (i == IntToSize(axes_tuple_tuple_[1][j])) { + found = true; + relevant_a_index = axes_tuple_tuple_[0][j]; + break; + } + } + + if (!found) { + input_b_tensor_map.push_back(tmp_b_map_index.front()); + tmp_b_map_index.erase(tmp_b_map_index.begin()); + } else { + input_b_tensor_map.push_back(input_a_tensor_map[relevant_a_index]); + } + } + + // infer output tensor map + for (size_t i = 0; i < size; ++i) { + bool found = false; + for (size_t j = 0; j < axes_tuple_tuple_[0].size(); ++j) { + if (i == IntToSize(axes_tuple_tuple_[0][j])) { + found = true; + break; + } + } + if (!found) { + output_tensor_map.push_back(tensor_map_index[i]); + } + } + inputs_tensor_map_.push_back(input_b_tensor_map); + outputs_tensor_map_.push_back(output_tensor_map); +} + +Status TensorDotInfo::InferTensorMap() { + size_t size = dev_matrix_shape_.size(); + if (repeated_calc_num_ > 1) { + // move the repeat calculation dimension, just for the convenience of tensor-map's calculation + size = dev_matrix_shape_.size() - 1; + } + + TensorMap tensor_map_index, input_a_tensor_map; + // such as 5: tensor_map_index [4, 3, 2, 1, 0] + for (size_t i = 0; i < size; ++i) { + tensor_map_index.push_back((int64_t)(LAST_INDEX(size) - i)); + } + + // infer input_a tensor map + // for example: the dimension of input_a is 4, the tensor map is [4, 3, 2, 1] + for (size_t i = 0; i < inputs_shape_[0].size(); i++) { + input_a_tensor_map.push_back(tensor_map_index[i]); + } + inputs_tensor_map_.push_back(input_a_tensor_map); + + if (axes_type_ == INT_TYPE) { + InferTensorMapAxesInt(tensor_map_index); + } else if (axes_type_ == TUPLE_TUPLE_TYPE) { + InferTensorMapAxesTuple(size, input_a_tensor_map, tensor_map_index); + } else { + MS_LOG(ERROR) << name_ << ": Now do not support axes type is TUPLE_TYPE"; + return FAILED; + } + + return SUCCESS; +} + +Status TensorDotInfo::InferTensorInfo() { + if (inputs_shape_.empty() || outputs_shape_.empty() || inputs_tensor_map_.empty() || outputs_tensor_map_.empty()) { + MS_LOG(ERROR) << name_ << ": Invalid args"; + return FAILED; + } + + TensorLayout input_layout, output_layout; + for (size_t i = 0; i < inputs_shape_.size(); ++i) { + // infer tensor layout + if (input_layout.InitFromVector(dev_matrix_shape_, inputs_tensor_map_[i], inputs_shape_[i]) != SUCCESS) { + MS_LOG(ERROR) << name_ << ": Infer input tensor layout failed."; + return FAILED; + } + TensorInfo input_tensor_info(input_layout); + inputs_tensor_info_.push_back(input_tensor_info); + } + + if (output_layout.InitFromVector(dev_matrix_shape_, outputs_tensor_map_[0], outputs_shape_[0]) != SUCCESS) { + MS_LOG(ERROR) << name_ << ": Infer output tensor layout failed."; + return FAILED; + } + TensorInfo output_tensor_info(output_layout); + outputs_tensor_info_.push_back(output_tensor_info); + + for (size_t i = 0; i < inputs_tensor_info_.size(); i++) { + MS_LOG(INFO) << name_ << ": The input " << i << " layout: " << inputs_tensor_info_[i].tensor_layout().ToString(); + } + MS_LOG(INFO) << name_ << ": The output layout: " << outputs_tensor_info_[0].tensor_layout().ToString(); + return SUCCESS; +} + +Status TensorDotInfo::Init(const StrategyPtr &strategy) { + if (InitWithAutoRepeatCalc(strategy) != SUCCESS) { + MS_LOG(ERROR) << name_ << ": Init failed"; + return FAILED; + } + + MS_LOG(INFO) << name_ << ": Init success"; + return SUCCESS; +} + +Status TensorDotInfo::InitForCostModel(const StrategyPtr &strategy) { + if (InitForCostModelWithAutoRepeatCalc(strategy) != SUCCESS) { + MS_LOG(ERROR) << name_ << ": Init for cost model failed"; + return FAILED; + } + + MS_LOG(INFO) << name_ << ": Init for cost model success"; + return SUCCESS; +} + +std::shared_ptr TensorDotInfo::GenerateBatchStrategies() { + if (GetAttrs() != SUCCESS) { + MS_LOG(EXCEPTION) << name_ << ": Get attr failed"; + } + + CheckGlobalDeviceManager(); + size_t dev_num = g_device_manager->GetDeviceListByStageId(0).size(); + Dimensions input_a_strategy(inputs_shape_[0].size(), 1); + Dimensions input_b_strategy(inputs_shape_[1].size(), 1); + + input_a_strategy[0] = SizeToInt(dev_num); + + if (axes_type_ == INT_TYPE) { + if (IntToSize(axes_int_) == inputs_shape_[0].size()) { + input_b_strategy[0] = SizeToInt(dev_num); // find the relavent dimension for input_b + } + } else if (axes_type_ == TUPLE_TUPLE_TYPE) { + // if the input_a's axes contain 0, the input_b has the relavent dimension with batch dimension + bool found = false; + size_t relavant_index = 0; + for (size_t i = 0; i < axes_tuple_tuple_[0].size(); ++i) { + if (axes_tuple_tuple_[0][i] == 0) { + found = true; + relavant_index = i; + break; + } + } + + if (found) { + // find the relavant + input_b_strategy[axes_tuple_tuple_[1][relavant_index]] = dev_num; + } + } else { + MS_LOG(EXCEPTION) << name_ << ": Now do not support TUPLE_TYPE"; + } + + Strategys strategy = {input_a_strategy, input_b_strategy}; + return std::make_shared(strategy); +} + +Status TensorDotInfo::GenerateStrategies(int64_t stage_id) { return SUCCESS; } + +Status TensorDotInfo::SetCostUnderStrategy(const mindspore::parallel::StrategyPtr &strategy) { return SUCCESS; } +} // namespace parallel +} // namespace mindspore diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/tensordot_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/tensordot_info.h new file mode 100644 index 0000000000..4ca1374a0c --- /dev/null +++ b/mindspore/ccsrc/frontend/parallel/ops_info/tensordot_info.h @@ -0,0 +1,76 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_OPS_INFO_TENSORDOT_INFO_H_ +#define MINDSPORE_CCSRC_FRONTEND_PARALLEL_OPS_INFO_TENSORDOT_INFO_H_ + +#include +#include +#include +#include + +#include "utils/ms_utils.h" +#include "ir/value.h" +#include "frontend/parallel/auto_parallel/operator_costmodel.h" +#include "frontend/parallel/ops_info/operator_info.h" +#include "frontend/parallel/strategy.h" +#include "frontend/parallel/tensor_layout/tensor_redistribution.h" + +namespace mindspore { +namespace parallel { +enum AxesType { + INT_TYPE = 0, + TUPLE_TYPE, + TUPLE_TUPLE_TYPE, +}; + +class TensorDotInfo : public OperatorInfo { + public: + TensorDotInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape, + const PrimitiveAttrs &attrs) + : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared(true)) {} + ~TensorDotInfo() override = default; + + Status Init(const StrategyPtr &strategy) override; + Status InitForCostModel(const StrategyPtr &strategy) override; + + Status GenerateStrategies(int64_t stage_id) override; + Status SetCostUnderStrategy(const StrategyPtr &strategy) override; + Status PrepareStrategy(int32_t stage_id, size_t dev_num, Dimensions combined_partitions, size_t input0_shape_size, + size_t input1_shape_size, StrategyPtr *sp); + + protected: + Status CheckStrategy(const StrategyPtr &strategy) override; + Status InferMirrorOps() override; + Status InferForwardCommunication() override; + Status InferTensorInfo() override; + Status InferDevMatrixShape() override; + Status InferTensorMap() override; + Status GetAttrs() override; + std::shared_ptr GenerateBatchStrategies() override; + void InferTensorMapAxesInt(const TensorMap &tensor_map_index); + void InferTensorMapAxesTuple(size_t size, const TensorMap &input_a_tensor_map, const TensorMap &tensor_map_index); + void ShowAxes(); + Shape origin_dev_matrix_shape_; + + AxesType axes_type_ = INT_TYPE; + int32_t axes_int_ = 1; + std::vector axes_tuple_; + std::vector> axes_tuple_tuple_; +}; +} // namespace parallel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_FRONTEND_PARALLEL_OPS_INFO_TENSORDOT_INFO_H_ diff --git a/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc index 82f11dbc20..26ce63c4fc 100644 --- a/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc +++ b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc @@ -276,6 +276,7 @@ std::vector ExtractOutputTypeByNode(const CNodePtr &node) { } bool IsElementWiseOperator(const std::string &op_name) { + // clang-format off static const std::set elementwise_op = {ACTIVATION, GELU, TANH, SOFTMAX, LOG_SOFTMAX, RELU, SQRT, CAST, POW, @@ -294,7 +295,9 @@ bool IsElementWiseOperator(const std::string &op_name) { DIVNONAN, LOGICALAND, ELU, LOGICALOR, RELU6, SOFTPLUS, SOFTSIGN, LESS, LESSEQUAL, - BESSELI1E, GREATEREQUAL, APPROXIMATEEQUAL}; + BESSELI1E, GREATEREQUAL, APPROXIMATEEQUAL, + REPEAT_ELEMENTS}; + // clang-format on auto iter = elementwise_op.find(op_name); return (iter != elementwise_op.end()); } @@ -313,7 +316,7 @@ bool IsSplittableOperator(const std::string &op_name) { EXPM1, LOG1P, SIN, SINH, TAN, RSQRT, INV, RECIPROCAL, ROUND, FLOOR, SIGN, ERF, ERFC, ZEROSLIKE, ONESLIKE, BESSELI0E, BESSELI1E, FLOORMOD, ASSIGN, ASSIGN_ADD, ATAN2, DIVNONAN, LOGICALAND, LOGICALOR, ELU, RELU6, RELUV2, SOFTPLUS, SOFTSIGN, GREATEREQUAL, LESSEQUAL, LESS, APPROXIMATEEQUAL, MOD, UNIQUE, UNSORTED_SEGMENT_SUM, - UNSORTED_SEGMENT_MIN}; + UNSORTED_SEGMENT_MIN, REPEAT_ELEMENTS, TENSOR_DOT}; // clang-format on auto iter = splittable_op.find(op_name); diff --git a/mindspore/ccsrc/frontend/parallel/step_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_parallel.cc index f07c332818..28a7790e03 100644 --- a/mindspore/ccsrc/frontend/parallel/step_parallel.cc +++ b/mindspore/ccsrc/frontend/parallel/step_parallel.cc @@ -3174,6 +3174,7 @@ bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer) (void)gettimeofday(&end_time, nullptr); uint64_t time = kUSecondInSecond * static_cast(end_time.tv_sec - start_time.tv_sec); time += static_cast(end_time.tv_usec - start_time.tv_usec); + MS_LOG(INFO) << "Now leaving step parallel, used time: " << time << " us"; return changes; } diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 2d2cb74a5e..31579e74b4 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -814,6 +814,7 @@ class TensorDot(PrimitiveWithInfer): raise ValueError("Axes have to be the same size/length") if len(self.axes[0]) != len(set(self.axes[0])) or len(self.axes[1]) != len(set(self.axes[1])): raise ValueError("Axes cannot have duplicating values") + self.add_prim_attr("axes", self.axes) def int_to_tuple_conv(self): """ diff --git a/tests/ut/python/parallel/test_repeat_elements.py b/tests/ut/python/parallel/test_repeat_elements.py new file mode 100644 index 0000000000..aff2fb3f56 --- /dev/null +++ b/tests/ut/python/parallel/test_repeat_elements.py @@ -0,0 +1,86 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +import mindspore as ms +from mindspore import context, Tensor, Parameter +from mindspore.common.api import _executor +from mindspore.nn import Cell, TrainOneStepCell, Momentum +from mindspore.ops import operations as P + + +class Net(Cell): + def __init__(self, mul_weight, strategy1=None, strategy2=None): + super().__init__() + self.mul = P.Mul().shard(strategy1) + self.repeat = P.RepeatElements(rep=2, axis=1).shard(strategy2) + self.mul_weight = Parameter(mul_weight, "w1") + + def construct(self, x, b): + out = self.mul(x, self.mul_weight) + out = self.repeat(out) + return out + + +_x = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) +_w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) +_b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) + + +def compile_net(net): + optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) + train_net = TrainOneStepCell(net, optimizer) + train_net.set_auto_parallel() + train_net.set_train() + _executor.compile(train_net, _x, _b) + context.reset_auto_parallel_context() + + +def test_repeat_elements_data_parallel(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) + strategy1 = ((16, 1, 1), (16, 1, 1)) + strategy2 = ((16, 1, 1),) + net = Net(_w1, strategy1, strategy2) + compile_net(net) + + +def test_repeat_elements_model_parallel(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) + strategy1 = ((1, 1, 16), (1, 1, 16)) + strategy2 = ((1, 1, 16),) + net = Net(_w1, strategy1, strategy2) + compile_net(net) + + +def test_repeat_elements_hybrid_parallel(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) + strategy1 = ((2, 2, 4), (2, 2, 4)) + strategy2 = ((2, 2, 4),) + net = Net(_w1, strategy1, strategy2) + compile_net(net) + + +def test_repeat_elements_auto_parallel(): + context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) + net = Net(_w1) + compile_net(net) + + +def test_repeat_elements_repeat_calc(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) + strategy1 = ((2, 2, 4), (2, 2, 4)) + strategy2 = ((1, 2, 2),) + net = Net(_w1, strategy1, strategy2) + compile_net(net)