You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mindspore/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc

108 lines
4.8 KiB

/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "device/ascend/tasksink/runtime_utils.h"
#include <string>
#include "hccl/hcom.h"
#include "utils/log_adapter.h"
#include "utils/utils.h"
constexpr auto kHcomBroadcast = "hcom_broadcast_";
constexpr auto kHcomAllGather = "hcom_all_gather_";
constexpr auto kHcomAllReduce = "hcom_all_reduce_";
constexpr auto kHcomReduceScatter = "hcom_reduce_scatter_";
constexpr auto kUnderline = "_";
namespace mindspore {
namespace device {
namespace ascend {
namespace tasksink {
bool RuntimeUtils::HcomBindModel(rtModel_t model, rtStream_t stream) {
hcclResult_t ret = hcom_bind_model(model, stream);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "Call hcom_bind_model failed, ret: 0x" << static_cast<int>(ret);
return false;
}
return true;
}
bool RuntimeUtils::HcomUnbindModel(rtModel_t model) {
hcclResult_t ret = hcom_unbind_model(model);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "Call hcom_unbind_model failed, ret: 0x" << static_cast<int>(ret);
return false;
}
return true;
}
bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info, rtStream_t stream) {
MS_LOG(INFO) << "hccl distribute start";
MS_EXCEPTION_IF_NULL(task_info);
hcclResult_t ret;
static uint32_t task_counter = 0;
if (task_info->hccl_type() == kBroadcastOpName) {
// call hcom broadcast interface to run op
const string tag_broadcast = kHcomBroadcast + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_broadcast(tag_broadcast.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
static_cast<u32>(task_info->root_id()), task_info->group().c_str(), stream);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_broadcast fail, return ret: " << static_cast<int>(ret);
return false;
}
} else if (task_info->hccl_type() == kAllGatherOpName) {
// call hcom allgather interface to run op
const string tag_all_gather = kHcomAllGather + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_all_gather(tag_all_gather.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
reinterpret_cast<void *>(task_info->output_data_addr()), static_cast<u64>(task_info->count()),
static_cast<hcclDataType_t>(task_info->data_type()), task_info->group().c_str(), stream);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_all_gather fail, return ret: " << ret;
return false;
}
} else if (task_info->hccl_type() == kAllReduceOpName) {
// call hcom allreduce interface to run op
const string tag_all_reduce = kHcomAllReduce + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_all_reduce(tag_all_reduce.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
reinterpret_cast<void *>(task_info->output_data_addr()), static_cast<u64>(task_info->count()),
static_cast<hcclDataType_t>(task_info->data_type()),
static_cast<hcclRedOp_t>(task_info->op_type()), task_info->group().c_str(), stream);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_all_reduce fail, return ret: " << ret;
return false;
}
} else if (task_info->hccl_type() == kReduceScatterOpName) {
// call hcom reducescatter interface to run op
const string tag_reduce_scatter =
kHcomReduceScatter + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
reinterpret_cast<void *>(task_info->output_data_addr()),
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
static_cast<hcclRedOp_t>(task_info->op_type()), task_info->group().c_str(), stream);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_reduce_scatter fail, return ret: " << ret;
return false;
}
}
return true;
}
} // namespace tasksink
} // namespace ascend
} // namespace device
} // namespace mindspore