You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
230 lines
8.6 KiB
230 lines
8.6 KiB
/**
|
|
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
/*!
|
|
* \file hcom_ops.h
|
|
* \brief huawei collective communication library ops.
|
|
*/
|
|
#ifndef OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_
|
|
#define OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_
|
|
|
|
#include "graph/operator_reg.h"
|
|
|
|
namespace ge {
|
|
/**
|
|
* @brief Outputs a tensor gathering all input tensors.
|
|
* @par Inputs:
|
|
* x: A tensor. Must be one of the following types: int8, int16, int32, float16,
|
|
float32.
|
|
* @par Attributes:
|
|
* @li rank_size: A required integer identifying the number of ranks
|
|
participating in the op.
|
|
* @li group: A required string identifying the group name of ranks
|
|
participating in the op.
|
|
* @par Outputs:
|
|
* y: A Tensor. Has the same type as "x".
|
|
* @attention Constraints:
|
|
"group" is limited to 128 characters. Use "hccl_world_group"
|
|
as the name of a world group.
|
|
*/
|
|
REG_OP(HcomAllGather)
|
|
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
|
|
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
|
|
.REQUIRED_ATTR(rank_size, Int)
|
|
.REQUIRED_ATTR(group, String)
|
|
.ATTR(alpha, Float, 1.0)
|
|
.ATTR(beta, Float, 0.0)
|
|
.OP_END_FACTORY_REG(HcomAllGather)
|
|
|
|
/**
|
|
* @brief Outputs a tensor containing the reduction across all input tensors
|
|
passed to op.
|
|
* @par Inputs:
|
|
* x: A tensor. Must be one of the following types: int8, int16, int32, float16,
|
|
float32.
|
|
* @par Attributes:
|
|
* @li reduction: A required string identifying the reduction operation to
|
|
perform.The supported operation are: "sum", "max", "min", "prod".
|
|
* @li group: A required string identifying the group name of ranks
|
|
participating in the op.
|
|
* @li fusion: An optional integer identifying the fusion flag of the op.
|
|
0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id.
|
|
* @li fusion_id: An optional integer identifying the fusion id of the op.
|
|
* The HcomAllReduce ops with the same fusion id will be fused.
|
|
* @par Outputs:
|
|
* y: A Tensor. Has the same type as "x".
|
|
* @attention Constraints:
|
|
*"group" is limited to 128 characters. Use "hccl_world_group"
|
|
as the name of a world group.
|
|
*/
|
|
REG_OP(HcomAllReduce)
|
|
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
|
|
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
|
|
.REQUIRED_ATTR(reduction, String)
|
|
.REQUIRED_ATTR(group, String)
|
|
.ATTR(fusion, Int, 1)
|
|
.ATTR(fusion_id, Int, -1)
|
|
.ATTR(alpha, Float, 1.0)
|
|
.ATTR(beta, Float, 0.0)
|
|
.OP_END_FACTORY_REG(HcomAllReduce)
|
|
|
|
/**
|
|
* @brief Broadcasts the input tensor in root rank to all ranks.
|
|
* @par Inputs:
|
|
* x: A list of dynamic input tensor. Must be one of the following types:
|
|
int8, int16, int32, float16, float32. It's a dynamic input.
|
|
* @par Attributes:
|
|
* @li root_rank: A required integer identifying the root rank in the op
|
|
input of this rank will be broadcast to other ranks.
|
|
* @li group: A required string identifying the group name of ranks
|
|
participating in the op.
|
|
* @par Outputs:
|
|
* y: A list of dynamic output tensor. Has the same type and length as "x".
|
|
* It's a dynamic output.
|
|
* @attention Constraints:
|
|
"group" is limited to 128 characters. Use "hccl_world_group"
|
|
as the name of a world group.
|
|
*/
|
|
REG_OP(HcomBroadcast)
|
|
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
|
|
.DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
|
|
.REQUIRED_ATTR(root_rank, Int)
|
|
.REQUIRED_ATTR(group, String)
|
|
.ATTR(alpha, Float, 1.0)
|
|
.ATTR(beta, Float, 0.0)
|
|
.OP_END_FACTORY_REG(HcomBroadcast)
|
|
|
|
/**
|
|
* @brief Performs reduction across all input tensors, scattering in equal
|
|
blocks among ranks, each rank getting a chunk of data based on its rank
|
|
index.
|
|
* @par Inputs:
|
|
* x: A tensor. Must be one of the following types: int8, int16, int32, float16,
|
|
float32.
|
|
* @par Attributes:
|
|
* @li reduction: A required string identifying the reduction operation to
|
|
perform. The supported operation are: "sum", "max", "min", "prod".
|
|
* @li group: A required string identifying the group name of ranks
|
|
participating in the op.
|
|
* @li rank_size: A required integer identifying the number of ranks
|
|
participating in the op.
|
|
* @par Outputs:
|
|
* y: A Tensor. Has the same type as "x".
|
|
* @attention Constraints:
|
|
"group" is limited to 128 characters. Use "hccl_world_group"
|
|
as the name of a world group.
|
|
*/
|
|
REG_OP(HcomReduceScatter)
|
|
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
|
|
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
|
|
.REQUIRED_ATTR(reduction, String)
|
|
.REQUIRED_ATTR(group, String)
|
|
.REQUIRED_ATTR(rank_size, Int)
|
|
.ATTR(alpha, Float, 1.0)
|
|
.ATTR(beta, Float, 0.0)
|
|
.OP_END_FACTORY_REG(HcomReduceScatter)
|
|
|
|
/**
|
|
* @brief Sends the input tensor to destination rank.
|
|
* @par Inputs:
|
|
* x: A tensor. Must be one of the following types: int8, int16, int32, float16,
|
|
float32.
|
|
* @par Attributes:
|
|
* @li sr_tag: A required integer identifying the send/recv message tag. The
|
|
message will be received by the HcomReceive op with the same "sr_tag".
|
|
* @li dest_rank: A required integer identifying the destination rank.
|
|
* @li group: A string identifying the group name of ranks participating in
|
|
the op.
|
|
* @par Outputs:
|
|
* None.
|
|
* @attention Constraints:
|
|
@li "group" is limited to 128 characters. Use
|
|
"hccl_world_group" as the name of a world group.
|
|
* @li Operators HcomSend and HcomReceive have the same "sr_tag".
|
|
* @see HcomReceive
|
|
*/
|
|
REG_OP(HcomSend)
|
|
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
|
|
.REQUIRED_ATTR(group, String)
|
|
.REQUIRED_ATTR(sr_tag, Int)
|
|
.REQUIRED_ATTR(dest_rank, Int)
|
|
.ATTR(alpha, Float, 1.0)
|
|
.ATTR(beta, Float, 0.0)
|
|
.OP_END_FACTORY_REG(HcomSend)
|
|
|
|
/**
|
|
* @brief Receives the tensor from source rank.
|
|
* @par Inputs:
|
|
* None.
|
|
* @par Attributes:
|
|
* @li sr_tag: A required integer identifying the send/recv message tag. The
|
|
message will be send by the HcomSend op with the same "sr_tag".
|
|
* @li src_rank: A required integer identifying the source rank.
|
|
* @li group: A required string identifying the group name of ranks
|
|
* participating in the op.
|
|
* @li shape: A required list identifying the shape of the tensor to be
|
|
received.
|
|
* @li dtype: A required integer identifying the type of the tensor to be
|
|
received. The supported types are: int8, int16, int32, float16, float32.
|
|
* @par Outputs:
|
|
* y: A tensor with type identified in "dtype".
|
|
* @attention Constraints:
|
|
@li "group" is limited to 128 characters. Use
|
|
"hccl_world_group" as the name of a world group.
|
|
* @li Operators HcomSend and HcomReceive have the same "sr_tag".
|
|
* @li "shape" should be same as the input tensor of HcomSend.
|
|
* @li "dtype" should be same as the input tensor of HcomSend.
|
|
* @see HcomSend
|
|
*/
|
|
REG_OP(HcomReceive)
|
|
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
|
|
.REQUIRED_ATTR(group, String)
|
|
.REQUIRED_ATTR(sr_tag, Int)
|
|
.REQUIRED_ATTR(src_rank, Int)
|
|
.REQUIRED_ATTR(shape, ListInt)
|
|
.REQUIRED_ATTR(dtype, Type)
|
|
.ATTR(alpha, Float, 1.0)
|
|
.ATTR(beta, Float, 0.0)
|
|
.OP_END_FACTORY_REG(HcomReceive)
|
|
|
|
/**
|
|
* @brief Performs Remote Read of input tensors
|
|
* @par Inputs:
|
|
* remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length
|
|
* @par Outputs:
|
|
* local: A Tensor. whose value is length / size_of(Type)
|
|
*/
|
|
REG_OP(HcomRemoteRead)
|
|
.INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
|
|
.OUTPUT(local, TensorType::ALL())
|
|
.REQUIRED_ATTR(dtype, Type)
|
|
.OP_END_FACTORY_REG(HcomRemoteRead)
|
|
|
|
/**
|
|
* @brief Performs Remote Write of input tensors
|
|
* @par Inputs:
|
|
* remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length
|
|
* @par Inputs:
|
|
* local: A Tensor. whose value is length / size_of(Type)
|
|
*/
|
|
REG_OP(HcomRemoteWrite)
|
|
.INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
|
|
.INPUT(local, TensorType::ALL())
|
|
.OP_END_FACTORY_REG(HcomRemoteWrite)
|
|
|
|
} // namespace ge
|
|
#endif // OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_
|