You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
graphengine/tests/depends/cce/src/cce_stub.cc

577 lines
20 KiB

/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vector>
#include <cce/cce.h>
#include <cce/dnn.h>
#include <cce/compiler_stub.h>
#include <cce/taskdown_api.h>
#include "cce/optimizer/fusion_engine.h"
#include "common/op/attr_value_util.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/graph_utils.h"
using namespace cce;
using namespace std;
using namespace ge;
using namespace fusion;
uint64_t global_mem_base = 0;
namespace cce {
#define DIM_MAX_SIZE 8
static const uint32_t C0 = 16;
struct tagCcPad {};
struct tagCcConvolution {};
struct tagCcLRN {};
struct tagCcFasterRcnnProposal {};
struct tagCcRoiAlign {};
struct tagCcBatchNorm {};
struct tagCcDetectpostprocess {};
struct tagCcSsdDetectionOutput {};
struct tagCcRefinedetDetectionOutput {};
struct tagCcMsrGenerateRpnProposals {};
struct tagCcFilter {
vector<uint32_t> dims;
};
struct tagCcTensor {
ccTensorFormat_t format;
ccDataType_t data_type;
uint32_t dim_cnt;
int32_t real_dim_cnt;
uint32_t data_size;
int32_t dim_buf[DIM_MAX_SIZE];
int32_t stride_buf[DIM_MAX_SIZE];
};
typedef struct tagCcPooling {
ccPoolingMode_t mode;
ccPaddingMode_t pad_mode;
ccNanPropagation_t max_pooling_nan_opt;
uint32_t dim_cnt;
int32_t window_dim[6];
int32_t padding[6];
int32_t stride[6];
} ccPooling_t;
struct tagCcActivation {};
struct tagCcFasterRcnnDetectionOutput {};
struct tagCcSpatialTransformer {};
struct tagCcPower {};
struct tagCcResizeBilinear {};
struct tagCcSsdNormalize {};
struct tagCcSsdPostProcessor {};
struct tagCcSsdPriorBox {};
struct tagCcPsRoiPooling {};
struct tagMsrFastRcnnPredictions {};
struct tagCcPRelu {};
struct tagCcStridedSlice {};
struct tagCcStridedSliceAttrs {};
struct tagCcRnn {};
struct tagCcArgmaxmin {};
typedef struct tagCcLog {
ccDataType_t data_type;
uint32_t param_cnt;
} ccLog_t;
typedef struct tagCcLog *ccLogDescriptor_t;
struct tagCcPadV2 {};
ccStatus_t ccGetPadV2OutputDim(const ccTensorDescriptor_t x_desc, const ccPadV2Descriptor_t pad_desc, int32_t *dim_cnt,
int32_t dim[], int32_t dim_len) {
*dim_cnt = 4;
dim[0] = 1;
dim[1] = 2;
dim[2] = 2;
dim[3] = 3;
return CC_STATUS_SUCCESS;
}
ccStatus_t ccPadV2Forward(ccHandle_t handle, const ccPadV2Descriptor_t pad_desc, const void *alpha,
const ccTensorDescriptor_t x_desc, const void *x, const void *beta,
const ccTensorDescriptor_t output_desc, void *output) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccCreatePadV2Descriptor(ccPadV2Descriptor_t *pad_desc) { return CC_STATUS_SUCCESS; }
ccStatus_t ccDestroyPadV2Descriptor(ccPadV2Descriptor_t *pad_desc) { return CC_STATUS_SUCCESS; }
ccStatus_t ccSetKernelOpMap(ccHandle_t handle) { return CC_STATUS_SUCCESS; }
ccStatus_t ccDataDumpForward(ccHandle_t handle, const void *buffer, const uint64_t buf_len, const uint32_t task_index) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetPadV2Descriptor(ccPadV2Descriptor_t pad_desc, const int32_t pad_shape_cnt,
const int32_t pad_shape_low[], const int32_t pad_shape_high[],
const ccPadMode_t pad_mode, const void *pad_value, const ccDataType_t pad_value_type) {
return CC_STATUS_SUCCESS;
}
struct tagCcYoloDetectionOutput {
ccYoloVersion_t yolo_version;
uint32_t net_h;
uint32_t net_w;
uint32_t post_top_k;
uint32_t classes;
float nms_threshold;
float iou_thre_decay;
float coor_scale_factor;
bool relative;
float obj_threshold;
float cls_threshold;
uint32_t bias_num;
float *bias;
};
struct tagCcYoloRegion {};
struct tagCcEltwise {};
struct tagCcHashTableLookup {};
struct tagCcEmbeddingAttnDecoder {};
struct tagNonMaxSuppression {};
struct tagCcArcSinCos {};
struct tagCcPow {};
struct tagCcConcatFive2Four_t {};
struct tagCcConcatFour2Five_t {};
ccStatus_t ccCreatePowDescriptor(ccPowDescriptor_t *pow_desc) {
*pow_desc = new tagCcPow();
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetPowDescriptor(ccPowDescriptor_t pow_desc, ccDataType_t data_type, uint32_t param_cnt) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccDestroyPowDescriptor(ccPowDescriptor_t *pow_desc) {
if (nullptr == pow_desc) {
return CC_STATUS_BAD_PARAM;
}
delete *pow_desc;
*pow_desc = 0;
return CC_STATUS_SUCCESS;
}
ccStatus_t ccPowForward(ccHandle_t handle, const ccPowDescriptor_t pow_desc, const void *pow_param, const void *alpha,
const ccTensorDescriptor_t x_desc, const void *x, const ccTensorDescriptor_t y_desc,
const void *y, const void *beta, const ccTensorDescriptor_t z_desc, void *z) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccLogicalOrForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t x_desc, const void *x,
const ccTensorDescriptor_t y_desc, const void *y, const void *beta,
const ccTensorDescriptor_t output_desc, void *output) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccCompareForward(ccHandle_t handle, ccCompareType_t compare_type, const void *alpha,
const ccTensorDescriptor_t x_desc, const void *x, const ccTensorDescriptor_t y_desc,
const void *y, const void *beta, const ccTensorDescriptor_t output_desc, void *output) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccGetCompareOutputDim(const ccTensorDescriptor_t x_desc, const ccTensorDescriptor_t y_desc, int32_t *dim_cnt,
int32_t *dim, int32_t dim_len) {
*dim_cnt = 4;
dim[0] = 1;
dim[1] = 1;
dim[2] = 1;
dim[3] = 1;
return CC_STATUS_SUCCESS;
}
ccStatus_t ccArcTanForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t x_desc, const void *x,
const void *beta, const ccTensorDescriptor_t y_desc, void *y) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccAtanhForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t x_desc, const void *x,
const void *beta, const ccTensorDescriptor_t y_desc, void *y) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccIsDepthwiseHighPerformance(int32_t input_n, int32_t input_c, int32_t input_h, int32_t input_w,
int32_t filter_n, int32_t filter_c, int32_t filter_h, int32_t filter_w,
int32_t dilation_h, int32_t dilation_w, int32_t pad_h_head, int32_t pad_h_tail,
int32_t pad_w_head, int32_t pad_w_tail, int32_t stride_h, int32_t stride_w,
int32_t group_num, bool &is_high_performance, bool is_quant,
ccDataType_t input_data_type, ccDataType_t output_data_type) {
is_high_performance = true;
return CC_STATUS_SUCCESS;
}
struct tagCcSpaceToBatch {};
struct tagCcBatchToSpace {};
struct tagCcResizeNearestNeighbor {};
ccStatus_t ccGetStream(ccHandle_t handle, rtStream_t *stream_id) { return CC_STATUS_SUCCESS; }
ccStatus_t ccGetRtVersion(uint32_t *count) { return CC_STATUS_SUCCESS; }
ccStatus_t ccDestroyTensorDescriptor(ccTensorDescriptor_t *tensor_desc) {
if (nullptr == tensor_desc) {
return CC_STATUS_BAD_PARAM;
}
delete *tensor_desc;
*tensor_desc = 0;
return CC_STATUS_SUCCESS;
}
ccStatus_t ccDestroyFilterDescriptor(ccFilterDescriptor_t *filter_desc) {
delete *filter_desc;
*filter_desc = 0;
return CC_STATUS_SUCCESS;
}
ccStatus_t ccGetFilterSizeInBytes(const ccFilterDescriptor_t filter_desc, uint32_t *size) {
*size = filter_desc->dims[0] * filter_desc->dims[1] * filter_desc->dims[2] * filter_desc->dims[3] * sizeof(float);
return CC_STATUS_SUCCESS;
}
ccStatus_t ccTransFilter(const ccFilterDescriptor_t w_desc, const void *w, ccFilterDescriptor_t y_desc, void *y,
uint32_t y_size_in_bytes) {
y = const_cast<void *>(w);
return CC_STATUS_SUCCESS;
}
ccStatus_t ccCreateTensorDescriptor(ccTensorDescriptor_t *tensor_desc) {
*tensor_desc = new tagCcTensor();
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetTensor4dDescriptor(ccTensorDescriptor_t tensor_desc, ccTensorFormat_t format, ccDataType_t data_type,
int32_t n, int32_t c, int32_t h, int32_t w) {
if (CC_TENSOR_NHWC == format) {
tensor_desc->dim_buf[0] = n;
tensor_desc->dim_buf[1] = h;
tensor_desc->dim_buf[2] = w;
tensor_desc->dim_buf[3] = c;
} else {
tensor_desc->dim_buf[0] = n;
tensor_desc->dim_buf[1] = c;
tensor_desc->dim_buf[2] = h;
tensor_desc->dim_buf[3] = w;
}
tensor_desc->dim_cnt = 4;
tensor_desc->data_type = data_type;
tensor_desc->format = format;
tensor_desc->data_size = n * c * h * w * sizeof(data_type);
return CC_STATUS_SUCCESS;
}
ccStatus_t ccGetTensorSizeInBytes(const ccTensorDescriptor_t tensor_desc, uint32_t *size) {
if ((NULL == tensor_desc) || (NULL == size)) {
return CC_STATUS_BAD_PARAM;
}
*size = tensor_desc->data_size;
return CC_STATUS_SUCCESS;
}
ccStatus_t ccGetTensorMemorySizeInBytes(const ccTensorDescriptor_t tensor_desc, uint32_t *size) {
*size = tensor_desc->data_size;
return CC_STATUS_SUCCESS;
}
ccStatus_t ccCreateFilterDescriptor(ccFilterDescriptor_t *filter_desc) {
*filter_desc = new tagCcFilter();
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetFilter4dDescriptor(ccFilterDescriptor_t filter_desc, ccTensorFormat_t format, ccDataType_t data_type,
int32_t k, int32_t c, int32_t h, int32_t w) {
filter_desc->dims.push_back(k);
filter_desc->dims.push_back(c);
filter_desc->dims.push_back(h);
filter_desc->dims.push_back(w);
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetFilterFractalDescriptor(ccFilterDescriptor_t filter_desc, ccTensorFormat_t format,
ccDataType_t data_type, int32_t k, int32_t c, int32_t h, int32_t w) {
filter_desc->dims.push_back(k);
filter_desc->dims.push_back(c);
filter_desc->dims.push_back(h);
filter_desc->dims.push_back(w);
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetStream(ccHandle_t handle, rtStream_t stream_id) { return CC_STATUS_SUCCESS; }
ccStatus_t ccCreatePoolingMaskDescriptor(ccTensorDescriptor_t *pooling_mask_desc) {
*pooling_mask_desc = new tagCcTensor();
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetPoolingMaskTensorDescriptor(ccTensorDescriptor_t tensor_desc, ccTensorFormat_t format,
ccDataType_t data_type, int32_t n, int32_t c, int32_t h, int32_t w,
int32_t window_h, int32_t window_w) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetFilter6dDescriptor(ccTensorDescriptor_t filter_desc, ccTensorFormat_t format, ccDataType_t data_type,
int32_t c1, int32_t h, int32_t w, int32_t n, int32_t co, int32_t c0) {
return CC_STATUS_SUCCESS;
}
/// @ingroup dnn
/// @brief get the format and dimcnt of GeTensor
/// @param [in] tensor_desc descriptor of tensor
/// @param [in|out] format point to format
/// @return ccStatus_t
ccStatus_t ccGetTensorFormat(const ccTensorDescriptor_t tensor_desc, ccTensorFormat_t *format) {
*format = tensor_desc->format;
return CC_STATUS_SUCCESS;
}
ccStatus_t ccTransTensor(const ccTensorDescriptor_t x_desc, const void *x, const ccTensorDescriptor_t y_desc, void *y,
uint32_t y_size_in_bytes) {
return CC_STATUS_SUCCESS;
}
void cceSysInit() {}
bool compilerStubFree() { return true; }
bool compilerStubInit() { return true; }
ccStatus_t ccSetInt8Filter4dDescriptor(ccFilterDescriptor_t filter_desc, ccTensorFormat_t format,
ccDataType_t data_type, int32_t k, int32_t c, int32_t h, int32_t w,
ccDataType_t output_data_type) {
filter_desc->dims.push_back(k);
filter_desc->dims.push_back(c);
filter_desc->dims.push_back(h);
filter_desc->dims.push_back(w);
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetTensorNdDescriptor(ccTensorDescriptor_t tensor_desc, ccDataType_t data_type, int32_t dim_cnt,
int32_t dimA[]) {
tensor_desc->data_type = data_type;
tensor_desc->data_size = sizeof(data_type);
for (int32_t i = 0; i < dim_cnt; i++) {
tensor_desc->data_size = tensor_desc->data_size * dimA[i];
}
tensor_desc->format = CC_TENSOR_ND;
return CC_STATUS_SUCCESS;
}
ccStatus_t CceProfilingConfig(const char *target, const char *job_ctx, uint32_t flag) { return CC_STATUS_SUCCESS; }
ccStatus_t ccSetTensorRealDimCnt(ccTensorDescriptor_t tensor_desc, int32_t real_dim_cnt) {
if (tensor_desc != NULL && tensor_desc != nullptr) {
tensor_desc->real_dim_cnt = real_dim_cnt;
}
return CC_STATUS_SUCCESS;
}
ccStatus_t ccGetTensorRealDimCnt(ccTensorDescriptor_t tensor_desc, int32_t *real_dim_cnt) {
*real_dim_cnt = tensor_desc->real_dim_cnt;
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetQuantizeFactors(ccQuantizeDescriptor_t quantize_info, ccScaleValueMode_t scale_val_mode,
const uint16_t *scale, const uint16_t *offset, const uint8_t *offset_pad) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetReQuantizeFactors(ccQuantizeDescriptor_t quantize_info, ccScaleValueMode_t scale_val_mode,
const uint16_t *scale_rq, const uint16_t *next_layer_offset,
const int32_t *offset_w) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetDeQuantizeFactors(ccQuantizeDescriptor_t quantize_info, ccScaleValueMode_t scale_val_mode,
const uint16_t *scale_dq, const int32_t *offset_w) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetQuantizeAlgoAndScaleType(ccQuantizeDescriptor_t quantize_info, ccQuantizeAlgo_t quant_algo,
ccScaleType_t scale_type, bool relu_flag) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccPrintTimeStat() { return CC_STATUS_SUCCESS; }
ccStatus_t ccSetModelId(ccHandle_t handle, uint32_t model_id) { return CC_STATUS_SUCCESS; }
ccStatus_t ccGetKernelContext(rtStream_t stream_id, ccOpContext &op_context) {
if (stream_id == nullptr) {
op_context.kernelType = ccKernelType::TE;
} else {
op_context.kernelType = ccKernelType::CCE_AI_CORE;
op_context.opId = 1;
op_context.kernelFuncId = 1;
op_context.isFlowtable = true;
op_context.opCount = 1;
op_context.opIndex2[0] = 0;
}
return CC_STATUS_SUCCESS;
}
ccStatus_t ccUpdateKernelArgs(ccOpContext &op_context, uint64_t data_base_addr, uint64_t weight_base_addr,
uint64_t variable_base_addr, void *args_addr, uint64_t args_size, void *l2ctrl_addr) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccGetKernelArgsAddrs(ccOpContext &op_context, void *args_addr, uint64_t args_size, void *l2ctrl_addr,
std::vector<ccOpAddrsInfo> &op_addrs_info) {
// cce
ccOpAddrsInfo tmp_op_addrs_info;
uint64_t tmp_input = (uint64_t)global_mem_base;
tmp_op_addrs_info.addrPos = &tmp_input;
tmp_op_addrs_info.addrData = tmp_input;
op_addrs_info.push_back(tmp_op_addrs_info);
uint64_t tmp_output = (uint64_t)(global_mem_base + 5476352);
tmp_op_addrs_info.addrPos = &tmp_output;
tmp_op_addrs_info.addrData = tmp_output;
op_addrs_info.push_back(tmp_op_addrs_info);
return CC_STATUS_SUCCESS;
}
ccStatus_t ccSetKernelArgs(std::vector<ccOpAddrsInfo> &date_info) { return CC_STATUS_SUCCESS; }
} // namespace cce
// ccFusion no namespace
ccStatus_t ccFusionStart(ccHandle_t handle, uint32_t graph_id, uint32_t init_flag, CceFusionMemCfg_t mem_cfg) {
return CC_STATUS_SUCCESS;
}
//???ccFusion ????namespace cce??
ccStatus_t ccFusionStart(ccHandle_t handle, uint32_t graph_id, uint32_t init_flag, uint32_t addr_change_flag) {
return CC_STATUS_SUCCESS;
}
ccStatus_t ccFusionEnd(ccHandle_t handle, uint32_t graph_id) { return CC_STATUS_SUCCESS; }
ccStatus_t ccFusionTaskEnd(ccHandle_t handle, uint32_t graph_id) { return CC_STATUS_SUCCESS; }
ccStatus_t ccKernelLaunchRepeat(ccHandle_t handle) { return CC_STATUS_SUCCESS; }
ccStatus_t ccKernelDelete(ccHandle_t handle) { return CC_STATUS_SUCCESS; }
ccStatus_t cce::ccSetTensorFormat(cce::tagCcTensor *, cce::tagCcTensorFormat) { return CC_STATUS_SUCCESS; }
namespace fusion {
uint32_t BufferFusion(std::shared_ptr<ge::ComputeGraph>, std::shared_ptr<ge::ComputeGraph>, bool) { return 0; }
uint32_t BufferFusionTrain(std::shared_ptr<ge::ComputeGraph>, std::shared_ptr<ge::ComputeGraph>) { return 0; }
uint32_t GraphFusionTrain(ge::ComputeGraphPtr orig_graph, ge::ComputeGraphPtr fusion_graph) { return 0; }
} // namespace fusion
namespace fusion {
using namespace ge;
uint32_t Fusion(ComputeGraphPtr model_graph, ComputeGraphPtr fusion_graph, kScopeNodeMap_t &te_fusion_map) {
OpDescPtr op_def_a = std::make_shared<OpDesc>();
op_def_a->SetName("reduction_nd");
op_def_a->SetType("reduction_nd");
GeTensorDescPtr v_input_desc = std::make_shared<GeTensorDesc>();
op_def_a->AddInputDesc(*v_input_desc);
vector<int64_t> v_input;
v_input.push_back(0);
op_def_a->SetInputOffset(v_input);
GeTensorDesc input_desc = op_def_a->GetInputDesc(0);
input_desc.SetFormat(FORMAT_NCHW);
input_desc.SetDataType(DT_FLOAT);
input_desc.SetShape(GeShape({1, 3, 5, 5}));
ge::TensorUtils::SetSize(input_desc, 192);
ge::TensorUtils::SetRealDimCnt(input_desc, 4);
GeTensorDescPtr output_desc = std::make_shared<GeTensorDesc>();
op_def_a->AddOutputDesc(*output_desc);
output_desc->SetFormat(FORMAT_NCHW);
output_desc->SetDataType(DT_FLOAT);
output_desc->SetShape(GeShape({1, 3, 5}));
ge::TensorUtils::SetSize(*output_desc, 96);
ge::TensorUtils::SetRealDimCnt(*output_desc, 3);
OpDescPtr op_def_b = std::make_shared<OpDesc>();
op_def_b->SetName("transdata_1");
op_def_b->SetType("TransData");
int stream_num = 1;
int flag = 0;
NodePtr node_a = fusion_graph->AddNode(op_def_a);
NodePtr node_b = fusion_graph->AddNode(op_def_b);
GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
int32_t a = 1;
int32_t b = 2;
AttrUtils::SetInt(op_def_a, "fusion_scope", a);
AttrUtils::SetInt(op_def_b, "fusion_scope", b);
vector<NodePtr> node_list1;
node_list1.push_back(node_a);
vector<NodePtr> node_list2;
node_list2.push_back(node_b);
te_fusion_map[1] = node_list1;
te_fusion_map[2] = node_list2;
return FUSION_STATUS_SUCCESS;
}
uint32_t FusionTaskBuild(cce::ccHandle_t cc_handle, ge::ComputeGraphPtr fusion_graph, ge::Buffer &buffer,
ModelRes &model_res, std::vector<TaskDef> &task_def_list_) {
TaskDef task_def_temp;
task_def_list_.push_back(task_def_temp);
return FUSION_STATUS_SUCCESS;
}
uint32_t GraphFusion(ge::ComputeGraphPtr orig_graph, ge::ComputeGraphPtr fusion_graph) {
*fusion_graph = *orig_graph;
return FUSION_STATUS_SUCCESS;
}
void FusionTaskBuildComplete(std::vector<ccHandle_t> cc_handle_list) { return; }
} // namespace fusion
ccStatus_t cce::ccSetTensorDescriptorQuantizeParam(ccTensorDescriptor_t tensor_desc,
const ccVecQuantizePara_t *vec_quantize_para) {
return CC_STATUS_SUCCESS;
}
ccStatus_t cce::ccSetAllOffsetQuantizeFactors(ccQuantizeDescriptor_t quantize_info, const uint8_t *offset_w,
const uint8_t *offset_d, const uint16_t *scale_req,
const uint16_t *offset_d_next) {
return CC_STATUS_SUCCESS;
}