Merge branch 'master' into 04quant

pull/1084/head
chenzomi 5 years ago
commit 1239cd6135

@ -85,7 +85,7 @@ const std::map<TypeId, size_t> type_map = {{kNumberTypeBool, 1}, {kNumberType
} while (0)
template <typename T>
T Ceil(T n1, T n2) {
T DivCeil(T n1, T n2) {
return (n2 != 0) ? (n1 - 1) / n2 + 1 : 0;
}
@ -371,15 +371,48 @@ std::vector<size_t> C1hwncoc0DeviceShape(const std::vector<size_t> &shape) {
device_shape.push_back(kCubeSize);
return device_shape;
}
std::vector<size_t> FracZc04DeviceShape(const std::vector<size_t> &shape) {
if (!CheckDims(shape)) {
MS_LOG(EXCEPTION) << "Check dims failed.";
}
std::vector<size_t> device_shape;
size_t c0 = 4;
size_t first_dim = DivCeil(c0 * shape[2] * shape[3], kCubeSize);
size_t no = DivCeil(DivCeil(shape[0], kCubeSize) * kCubeSize, kCubeSize);
device_shape.push_back(first_dim);
device_shape.push_back(no);
device_shape.push_back(kCubeSize);
device_shape.push_back(kCubeSize);
return device_shape;
}
std::vector<size_t> Nc1hwc04DeviceShape(const std::vector<size_t> &shape) {
if (!CheckDims(shape)) {
MS_LOG(EXCEPTION) << "Check dims failed.";
}
std::vector<size_t> device_shape;
size_t C1 = 1;
size_t C0 = 4;
device_shape.push_back(shape[0]);
device_shape.push_back(C1);
device_shape.push_back(shape[2]);
device_shape.push_back(shape[3]);
device_shape.push_back(C0);
return device_shape;
}
} // namespace
std::vector<size_t> TransShapeToDevice(const std::vector<size_t> &shape, const std::string &format) {
using DeviceShapeTransfer = std::function<std::vector<size_t>(const std::vector<size_t> &)>;
const std::map<std::string, DeviceShapeTransfer> device_shape_map{
{kOpFormat_NCHW, NchwDeviceShape}, {kOpFormat_NHWC, NhwcDeviceShape},
{kOpFormat_HWCN, HwchDeviceShape}, {kOpFormat_FRAC_Z, FracZDeviceShape},
{kOpFormat_NC1HWC0, Nc1hwc0DeviceShape}, {kOpFormat_C1HWNCoC0, C1hwncoc0DeviceShape},
};
const std::map<std::string, DeviceShapeTransfer> device_shape_map{{kOpFormat_NCHW, NchwDeviceShape},
{kOpFormat_NHWC, NhwcDeviceShape},
{kOpFormat_HWCN, HwchDeviceShape},
{kOpFormat_FRAC_Z, FracZDeviceShape},
{kOpFormat_NC1HWC0, Nc1hwc0DeviceShape},
{kOpFormat_C1HWNCoC0, C1hwncoc0DeviceShape},
{kOpFormat_FRACTAL_Z_C04, FracZc04DeviceShape},
{kOpFormat_NC1HWC0_C04, Nc1hwc04DeviceShape}};
if (format == kOpFormat_ND || format == kOpFormat_DEFAULT) {
return shape;
@ -506,13 +539,13 @@ bool NchwToFracZ(const FormatArgs &args, void *result) {
MS_LOG(ERROR) << "Illegal dtype.";
return false;
}
size_t c1 = Ceil(c, c0);
size_t c1 = DivCeil(c, c0);
size_t hw = h * w;
size_t chw = c * hw;
size_t hwc0 = hw * c0;
size_t nchw = n * chw;
size_t hf_cnt = Ceil(n, kCubeSize);
size_t hf_cnt = DivCeil(n, kCubeSize);
size_t vf_cnt = c1 * hw;
size_t fractal_ele_cnt = c0 * kCubeSize;
size_t total_ele_cnt = hf_cnt * vf_cnt * fractal_ele_cnt;
@ -775,7 +808,7 @@ bool NchwToNc1hwc0(const FormatArgs &args, void *result) {
MS_LOG(ERROR) << "Illegal dtype.";
return false;
}
size_t c1 = Ceil(c, c0);
size_t c1 = DivCeil(c, c0);
size_t hw = h * w;
size_t chw = c * hw;
size_t c1hwc0 = c1 * hw * c0;

@ -408,8 +408,13 @@ Status DEPipeline::ParseMindRecordOp(const py::dict &args, std::shared_ptr<Datas
}
std::shared_ptr<MindRecordOp::Builder> builder = std::make_shared<MindRecordOp::Builder>();
(void)builder->SetDatasetFile(ToString(args["dataset_file"]));
bool load_dataset = ToBool(args["load_dataset"]);
if (load_dataset == true) {
(void)builder->SetDatasetFile({ToString(args["dataset_file"])});
} else {
(void)builder->SetDatasetFile(ToStringVector(args["dataset_file"]));
}
(void)builder->SetLoadDataset(load_dataset);
std::vector<std::string> in_col_names;
if (!args["columns_list"].is_none()) {
in_col_names = ToStringVector(args["columns_list"]);

@ -151,16 +151,17 @@ void bindDatasetOps(py::module *m) {
});
(void)py::class_<MindRecordOp, DatasetOp, std::shared_ptr<MindRecordOp>>(*m, "MindRecordOp")
.def_static("get_num_rows", [](const std::string &path, const py::object &sampler) {
int64_t count = 0;
std::shared_ptr<mindrecord::ShardOperator> op;
if (py::hasattr(sampler, "_create_for_minddataset")) {
auto create = sampler.attr("_create_for_minddataset");
op = create().cast<std::shared_ptr<mindrecord::ShardOperator>>();
}
THROW_IF_ERROR(MindRecordOp::CountTotalRows(path, op, &count));
return count;
});
.def_static("get_num_rows",
[](const std::vector<std::string> &paths, bool load_dataset, const py::object &sampler) {
int64_t count = 0;
std::shared_ptr<mindrecord::ShardOperator> op;
if (py::hasattr(sampler, "_create_for_minddataset")) {
auto create = sampler.attr("_create_for_minddataset");
op = create().cast<std::shared_ptr<mindrecord::ShardOperator>>();
}
THROW_IF_ERROR(MindRecordOp::CountTotalRows(paths, load_dataset, op, &count));
return count;
});
(void)py::class_<ManifestOp, DatasetOp, std::shared_ptr<ManifestOp>>(*m, "ManifestOp")
.def_static("get_num_rows_and_classes",

@ -40,7 +40,7 @@ using mindrecord::ShardOperator;
using mindrecord::ShardReader;
// Builder constructor. Creates the builder object.
MindRecordOp::Builder::Builder() : build_dataset_file_("") {
MindRecordOp::Builder::Builder() : build_dataset_file_({}) {
// Some arguments to the MindRecordOp constructor have a default argument that is taken
// from the client config.
// The user may choose to change these values for the construction of the StorageOp by
@ -63,9 +63,9 @@ Status MindRecordOp::Builder::Build(std::shared_ptr<MindRecordOp> *ptr) {
"Building a MindRecordOp that has not provided a file.");
}
new_mind_record_op = std::make_shared<MindRecordOp>(build_num_mind_record_workers_, build_rows_per_buffer_,
build_dataset_file_, build_op_connector_queue_size_,
build_columns_to_load_, build_operators_, build_block_reader_);
new_mind_record_op = std::make_shared<MindRecordOp>(
build_num_mind_record_workers_, build_rows_per_buffer_, build_dataset_file_, build_load_dataset_,
build_op_connector_queue_size_, build_columns_to_load_, build_operators_, build_block_reader_);
RETURN_IF_NOT_OK(new_mind_record_op->Init());
@ -76,12 +76,14 @@ Status MindRecordOp::Builder::Build(std::shared_ptr<MindRecordOp> *ptr) {
Status MindRecordOp::Builder::SanityCheck() const { return Status::OK(); }
// Constructor of the MindRecordOp.
MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, int32_t rows_per_buffer, std::string dataset_file,
int32_t op_connector_queue_size, const std::vector<std::string> &columns_to_load,
MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, int32_t rows_per_buffer,
std::vector<std::string> dataset_file, bool load_dataset, int32_t op_connector_queue_size,
const std::vector<std::string> &columns_to_load,
const std::vector<std::shared_ptr<ShardOperator>> &operators, const bool &block_reader)
: ParallelOp(num_mind_record_workers, op_connector_queue_size),
rows_per_buffer_(rows_per_buffer),
dataset_file_(dataset_file),
load_dataset_(load_dataset),
columns_to_load_(columns_to_load),
operators_(operators),
num_mind_record_workers_(num_mind_record_workers),
@ -101,9 +103,10 @@ MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, int32_t rows_per_buf
// Private helper method to encapsulate some common construction/reset tasks
Status MindRecordOp::Init() {
shard_reader_ = std::make_unique<ShardReader>();
auto rc = shard_reader_->Open(dataset_file_, num_mind_record_workers_, columns_to_load_, operators_, block_reader_);
auto rc = shard_reader_->Open(dataset_file_, load_dataset_, num_mind_record_workers_, columns_to_load_, operators_,
block_reader_);
CHECK_FAIL_RETURN_UNEXPECTED(rc != MSRStatus::FAILED,
CHECK_FAIL_RETURN_UNEXPECTED(rc == MSRStatus::SUCCESS,
"MindRecordOp init failed. Error message: " + ErrnoToMessage(rc));
data_schema_ = std::make_unique<DataSchema>();
@ -201,8 +204,12 @@ void MindRecordOp::Print(std::ostream &out, bool show_all) const {
// Call the super class for displaying any common detailed info
ParallelOp::Print(out, show_all);
// Then show any custom derived-internal stuff
out << "\n1 Dataset file : " << dataset_file_ << "\nNumber of rows : " << num_rows_
<< "\nRows per buffer : " << rows_per_buffer_ << "\nNumber of buffers : " << buffers_needed_
out << "\n Dataset file : ";
for (auto &file : dataset_file_) {
out << file << " ";
}
out << "\nNumber of rows : " << num_rows_ << "\nRows per buffer : " << rows_per_buffer_
<< "\nNumber of buffers : " << buffers_needed_
<< "\nNumber of ShardReader workers : " << num_mind_record_workers_ << "\n\n";
}
}
@ -668,10 +675,10 @@ Status MindRecordOp::LaunchThreadAndInitOp() {
return Status::OK();
}
Status MindRecordOp::CountTotalRows(const std::string dataset_path, const std::shared_ptr<ShardOperator> &op,
int64_t *count) {
Status MindRecordOp::CountTotalRows(const std::vector<std::string> dataset_path, bool load_dataset,
const std::shared_ptr<ShardOperator> &op, int64_t *count) {
std::unique_ptr<ShardReader> shard_reader = std::make_unique<ShardReader>();
MSRStatus rc = shard_reader->CountTotalRows(dataset_path, op, count);
MSRStatus rc = shard_reader->CountTotalRows(dataset_path, load_dataset, op, count);
if (rc == MSRStatus::FAILED) {
RETURN_STATUS_UNEXPECTED("MindRecordOp count total rows failed.");
}

@ -77,8 +77,8 @@ class MindRecordOp : public ParallelOp {
return *this;
}
Builder &SetDatasetFile(const std::string &file) {
build_dataset_file_ = file;
Builder &SetDatasetFile(const std::vector<std::string> &files) {
build_dataset_file_ = files;
return *this;
}
@ -97,6 +97,11 @@ class MindRecordOp : public ParallelOp {
return *this;
}
Builder &SetLoadDataset(bool load_dataset) {
build_load_dataset_ = load_dataset;
return *this;
}
Status SanityCheck() const;
static int32_t num_mind_record_workers() { return kDefaultMindRecordWorkers; }
@ -109,7 +114,8 @@ class MindRecordOp : public ParallelOp {
int32_t builder_num_workers_;
int32_t build_rows_per_buffer_;
int32_t build_op_connector_queue_size_;
std::string build_dataset_file_;
std::vector<std::string> build_dataset_file_;
bool build_load_dataset_;
std::vector<std::string> build_columns_to_load_;
std::vector<std::shared_ptr<ShardOperator>> build_operators_;
bool build_block_reader_;
@ -119,12 +125,12 @@ class MindRecordOp : public ParallelOp {
// @note The builder class should be used to call it
// @param num_mind_record_workers - The number of workers for the op (run by ShardReader)
// @param rows_per_buffer - The requested number of rows per buffer
// @param dataset_file - A shard file
// @param dataset_file - dataset files
// @param op_connector_queue_size - The output connector queue size
// @param columns_to_load - The list of columns to use (column name)
// @param operators - ShardOperators for Shuffle, Category, Sample
MindRecordOp(int32_t num_mind_record_workers, int32_t rows_per_buffer, std::string dataset_file,
int32_t op_connector_queue_size, const std::vector<std::string> &columns_to_load,
MindRecordOp(int32_t num_mind_record_workers, int32_t rows_per_buffer, std::vector<std::string> dataset_file,
bool load_dataset, int32_t op_connector_queue_size, const std::vector<std::string> &columns_to_load,
const std::vector<std::shared_ptr<ShardOperator>> &operators, const bool &block_reader);
// Destructor
@ -169,21 +175,22 @@ class MindRecordOp : public ParallelOp {
// Getter method
int32_t num_rows() const { return num_rows_; }
// Getter method
static Status CountTotalRows(const std::string dataset_path, const std::shared_ptr<ShardOperator> &op,
int64_t *count);
static Status CountTotalRows(const std::vector<std::string> dataset_path, bool load_dataset,
const std::shared_ptr<ShardOperator> &op, int64_t *count);
// Getter method
int32_t rows_per_buffer() const { return rows_per_buffer_; }
// Getter method
std::string dataset_file() const { return dataset_file_; }
std::vector<std::string> dataset_file() const { return dataset_file_; }
// Getter method
std::vector<std::string> columns_to_load() const { return columns_to_load_; }
bool block_reader() const { return block_reader_; }
bool load_dataset() const { return load_dataset_; }
Status Init();
Status SetColumnsBlob();
@ -246,7 +253,8 @@ class MindRecordOp : public ParallelOp {
Status FetchBlockBuffer(const int32_t &buffer_id);
int32_t rows_per_buffer_; // The number of requested rows per buffer.
std::string dataset_file_; // A dataset file
std::vector<std::string> dataset_file_; // dataset files
bool load_dataset_; // load dataset from single file or not
std::vector<std::string> columns_to_load_; // Columns to load from dataset
std::vector<std::shared_ptr<ShardOperator>> operators_; // ShardOperators to use
int32_t num_mind_record_workers_; // number of workers to be spawned by ShardReader

@ -193,6 +193,14 @@ class TraceForAfter : public TraceInfo {
TraceInfoPtr clone() override { return std::make_shared<TraceForAfter>(*shared_from_base<TraceForAfter>()); }
};
class TraceLoopEnd : public TraceInfo {
public:
explicit TraceLoopEnd(const DebugInfoPtr &info) : TraceInfo(info, "loop_end", "↓↓") {}
MS_DECLARE_PARENT(TraceLoopEnd, TraceInfo);
~TraceLoopEnd() override = default;
TraceInfoPtr clone() override { return std::make_shared<TraceLoopEnd>(*shared_from_base<TraceLoopEnd>()); }
};
class TraceEquiv : public TraceInfo {
public:
explicit TraceEquiv(const DebugInfoPtr &info) : TraceInfo(info, "equiv", "equiv") {}

@ -34,6 +34,7 @@ namespace ascend {
namespace {
const float kWegihtBaseScore = 1;
const float kFeatureMapBaseScore = 10;
constexpr auto kPriChoosenFormat = "pri_format";
enum MatchCountPriority : int {
MATCH_COUNT_PRIORITY_BEGIN = 0,
MATCH_DTYPE_COUNT = MATCH_COUNT_PRIORITY_BEGIN,
@ -85,6 +86,7 @@ string GetPriorityMatchFormat(const CNodePtr &cnode) {
if (need_change_nd) {
priority_matched_format = kOpFormat_DEFAULT;
}
AnfAlgo::SetNodeAttr(kPriChoosenFormat, MakeValue(priority_matched_format), cnode);
return priority_matched_format;
}
/**
@ -394,9 +396,9 @@ void PrintRaiseOrReducePrecisionSelectedInfo(const CNodePtr &cnode,
std::ostringstream buffer;
buffer << cnode->DebugString();
if (precision_reduce) {
buffer << " reduce precision, node datatype: ";
buffer << " reduce precision, node datatype: \n";
} else {
buffer << " raise precision, node datatype: ";
buffer << " raise precision, node datatype: \n";
}
PrintInputAndOutputInferType(buffer, cnode);
buffer << ", select kernel:" << selected_kernel_build_info->ToString();
@ -464,66 +466,57 @@ std::vector<std::shared_ptr<kernel::KernelBuildInfo>> FilterRaisedOrReducePrecis
}
} // namespace
std::shared_ptr<kernel::KernelBuildInfo> CanHitKernelInfo(
int *status, const CNodePtr &kernel_node,
const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list) {
KernelSelectStatus SetMatchedKernelInfo(const CNodePtr &kernel_node,
const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list) {
MS_EXCEPTION_IF_NULL(kernel_node);
KernelSelectStatus select_status = kNoMatched;
bool precision_reduce = false;
std::shared_ptr<kernel::KernelBuildInfo> selected_kernel_info = nullptr;
// Matched kernel info
// Filter kernel info matched with me infered type
auto filtered_kernel_info_list = GetAllMatchedFilteredKernelInfo(kernel_node, kernel_info_list);
if (!filtered_kernel_info_list.empty()) {
selected_kernel_info = ChooseMatchedKernelInfo(kernel_node, filtered_kernel_info_list);
select_status = kStatusAllMatched;
} else {
// selected kernel info using raised precision or reduce precision
filtered_kernel_info_list =
FilterRaisedOrReducePrecisionMatchedKernelInfo(kernel_node, kernel_info_list, &precision_reduce);
selected_kernel_info = ChooseMatchedKernelInfo(kernel_node, filtered_kernel_info_list);
if (selected_kernel_info == nullptr) {
return nullptr;
return select_status;
} else {
PrintRaiseOrReducePrecisionSelectedInfo(kernel_node, selected_kernel_info, precision_reduce);
*status = precision_reduce ? kStatusReducePrecision : kStatusRaisePrecision;
select_status = precision_reduce ? kStatusReducePrecision : kStatusRaisePrecision;
}
}
return selected_kernel_info;
// Set kernel info to the anfnode
AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info, kernel_node.get());
// Set format and data type for input tensor.
SetTensorDeviceInfo(*selected_kernel_info, kernel_node);
return select_status;
}
int SelectKernelInfo(const CNodePtr &kernel_node) {
KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node) {
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;
int status = kStatusAllMatched;
MS_EXCEPTION_IF_NULL(kernel_node);
kernel::KernelQuery(kernel_node, &kernel_info_list);
// filter kernel info matched with me infered type
auto selected_kernel_info = CanHitKernelInfo(&status, kernel_node, kernel_info_list);
if (selected_kernel_info == nullptr) {
auto select_status = SetMatchedKernelInfo(kernel_node, kernel_info_list);
// If aicore not find valid kernel info reloading aicpu kernel info list to find it
if (select_status == kNoMatched) {
MS_LOG(WARNING) << "The node [" << kernel_node->DebugString()
<< "] cannot find valid TBE kernel info, try to get aicpu kernel info";
kernel::AicpuQuery(kernel_node, &kernel_info_list);
selected_kernel_info = CanHitKernelInfo(&status, kernel_node, kernel_info_list);
kernel::AICpuQuery(kernel_node, &kernel_info_list);
select_status = SetMatchedKernelInfo(kernel_node, kernel_info_list);
}
if (selected_kernel_info == nullptr) {
// The kernel info not finded both in the aicpu kernel list & aicore kernel list
if (select_status == kNoMatched) {
std::ostringstream buffer;
PrintInputAndOutputInferType(buffer, kernel_node);
MS_EXCEPTION(TypeError) << "The node [" << kernel_node->DebugString()
<< "] cannot find valid kernel info, not supported the type " << buffer.str();
}
AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info, kernel_node.get());
// Set format and data type for input tensor.
SetTensorDeviceInfo(*selected_kernel_info, kernel_node);
return status;
}
bool CheckKernelAccuracySupported(const CNodePtr &kernel_node,
const kernel::KernelBuildInfoPtr &new_kernel_build_info) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;
kernel::KernelQuery(kernel_node, &kernel_info_list);
auto result = std::find_if(kernel_info_list.begin(), kernel_info_list.end(),
[&new_kernel_build_info](const kernel::KernelBuildInfoPtr item) {
MS_EXCEPTION_IF_NULL(item);
return *item == *new_kernel_build_info;
});
return result != kernel_info_list.end();
return select_status;
}
} // namespace ascend
} // namespace device

@ -21,8 +21,13 @@
namespace mindspore {
namespace device {
namespace ascend {
int SelectKernelInfo(const CNodePtr &kernel_node);
bool CheckKernelAccuracySupported(const CNodePtr &kernel_node, const kernel::KernelBuildInfoPtr &new_kernel_build_info);
enum KernelSelectStatus {
kNoMatched = -1,
kStatusAllMatched = 0,
kStatusReducePrecision = 1,
kStatusRaisePrecision = 2,
};
KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node);
} // namespace ascend
} // namespace device
} // namespace mindspore

@ -69,9 +69,8 @@ class UnsortedSegmentSumGpuKernel : public GpuKernel {
protected:
void InitSizeLists() override {
input_size_list_.push_back(input_dim0_ * input_dim1_ * sizeof(T));
input_size_list_.push_back(output_dim0_ * sizeof(S));
input_size_list_.push_back(output_dim0_ * sizeof(int));
output_size_list_.push_back(output_dim0_ * output_dim1_ * sizeof(S));
input_size_list_.push_back(input_dim0_ * sizeof(S));
output_size_list_.push_back(output_dim0_ * output_dim1_ * sizeof(T));
}
private:

@ -49,6 +49,21 @@ struct PowerFunc<half, half> {
}
};
template <typename T, typename S>
struct RealDivFunc {
__device__ __forceinline__ S operator()(const T &lhs, const T &rhs) { return (lhs / rhs); }
};
template <typename T, typename S>
struct MulFunc {
__device__ __forceinline__ S operator()(const T &lhs, const T &rhs) { return (lhs * rhs); }
};
template <typename T, typename S>
struct SubFunc {
__device__ __forceinline__ S operator()(const T &lhs, const T &rhs) { return (lhs - rhs); }
};
template <>
struct PowerFunc<half, bool> {
// invalid branch
@ -94,6 +109,15 @@ __global__ void BroadcastKernel(const int l0, const int l1, const int l2, const
case BROADCAST_TYPE_POWER:
return BroadcastOperator<T, S, PowerFunc<T, S>>(l0, l1, l2, l3, r0, r1, r2, r3, d0, d1, d2, d3, input0, input1,
output);
case BROADCAST_TYPE_REALDIV:
return BroadcastOperator<T, S, RealDivFunc<T, S>>(l0, l1, l2, l3, r0, r1, r2, r3, d0, d1, d2, d3, input0, input1,
output);
case BROADCAST_TYPE_MUL:
return BroadcastOperator<T, S, MulFunc<T, S>>(l0, l1, l2, l3, r0, r1, r2, r3, d0, d1, d2, d3, input0, input1,
output);
case BROADCAST_TYPE_SUB:
return BroadcastOperator<T, S, SubFunc<T, S>>(l0, l1, l2, l3, r0, r1, r2, r3, d0, d1, d2, d3, input0, input1,
output);
}
}
@ -127,6 +151,12 @@ __global__ void NoBroadcastKernel(const int nums, enum BroadcastOpType op, const
return NoBroadcastOperator<T, S, MaximumFunc<T, S>>(nums, input0, input1, output);
case BROADCAST_TYPE_POWER:
return NoBroadcastOperator<T, S, PowerFunc<T, S>>(nums, input0, input1, output);
case BROADCAST_TYPE_REALDIV:
return NoBroadcastOperator<T, S, RealDivFunc<T, S>>(nums, input0, input1, output);
case BROADCAST_TYPE_MUL:
return NoBroadcastOperator<T, S, MulFunc<T, S>>(nums, input0, input1, output);
case BROADCAST_TYPE_SUB:
return NoBroadcastOperator<T, S, SubFunc<T, S>>(nums, input0, input1, output);
}
}

@ -25,6 +25,9 @@ enum BroadcastOpType {
BROADCAST_TYPE_MAXIMUM = 2,
BROADCAST_TYPE_MINIMUM = 3,
BROADCAST_TYPE_POWER = 4,
BROADCAST_TYPE_REALDIV = 5,
BROADCAST_TYPE_MUL = 6,
BROADCAST_TYPE_SUB = 7,
BROADCAST_TYPE_INVALID = 0xffffffff,
};

@ -1,42 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/gpu/math/binary_op_gpu_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_ONE(
RealDiv,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BinaryOpGpuKernel, float)
MS_REG_GPU_KERNEL_ONE(
RealDiv,
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
BinaryOpGpuKernel, half)
MS_REG_GPU_KERNEL_ONE(
Mul, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BinaryOpGpuKernel, float)
MS_REG_GPU_KERNEL_ONE(
Mul, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
BinaryOpGpuKernel, half)
MS_REG_GPU_KERNEL_ONE(
Sub, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BinaryOpGpuKernel, float)
MS_REG_GPU_KERNEL_ONE(
Sub, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
BinaryOpGpuKernel, half)
} // namespace kernel
} // namespace mindspore

@ -1,237 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_BINARYOP_GPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_BINARYOP_GPU_KERNEL_H_
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <map>
#include "kernel/gpu/gpu_kernel.h"
#include "kernel/gpu/gpu_kernel_factory.h"
#include "kernel/gpu/cuda_impl/unary_op_impl.cuh"
#include "kernel/gpu/kernel_constants.h"
namespace mindspore {
namespace kernel {
enum BinaryOpType { BINARY_OP_ADD = 0, BINARY_OP_SUB, BINARY_OP_MUL, BINARY_OP_DIV, BINARY_OP_INVALID_TYPE = 255 };
static const std::map<std::string, BinaryOpType> kBinaryOpTypeMap = {
{"Sub", BINARY_OP_SUB}, {"Mul", BINARY_OP_MUL}, {"RealDiv", BINARY_OP_DIV}};
template <typename T>
class BinaryOpGpuKernel : public GpuKernel {
public:
BinaryOpGpuKernel()
: cudnn_handle_(nullptr),
binary_op_type_(BINARY_OP_INVALID_TYPE),
tensor_op_(CUDNN_OP_TENSOR_MUL),
inputA_descriptor_(nullptr),
inputB_descriptor_(nullptr),
opTensor_descriptor_(nullptr),
cudnn_data_type_(CUDNN_DATA_FLOAT),
is_null_input_(false),
input_size_(0),
output_size_(0),
workspace_size_(0) {}
~BinaryOpGpuKernel() override { DestroyResource(); }
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
if (is_null_input_) {
return true;
}
T *input_addr = GetDeviceAddress<T>(inputs, 0);
T *input_addr2 = GetDeviceAddress<T>(inputs, 1);
T *output_addr = GetDeviceAddress<T>(outputs, 0);
const float alpha = 1;
const float beta = 0;
T *inputB_addr = nullptr;
switch (binary_op_type_) {
case BINARY_OP_SUB: {
T *workspace_addr = GetDeviceAddress<T>(workspace, 0);
Negative(input_addr2, workspace_addr, inputs[1]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_ptr));
inputB_addr = workspace_addr;
break;
}
case BINARY_OP_MUL: {
inputB_addr = input_addr2;
break;
}
case BINARY_OP_DIV: {
T *workspace_addr = GetDeviceAddress<T>(workspace, 0);
Reciprocal(input_addr2, workspace_addr, inputs[1]->size / sizeof(T),
reinterpret_cast<cudaStream_t>(stream_ptr));
inputB_addr = workspace_addr;
break;
}
default: {
MS_LOG(EXCEPTION) << "Binary operation " << binary_op_type_ << " is not supported.";
}
}
if (inputs[0]->size > inputs[1]->size) {
CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnOpTensor(cudnn_handle_, opTensor_descriptor_, &alpha, inputA_descriptor_, input_addr, &alpha,
inputB_descriptor_, inputB_addr, &beta, inputA_descriptor_, output_addr),
"cudnnOpTensor failed");
} else {
CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnOpTensor(cudnn_handle_, opTensor_descriptor_, &alpha, inputB_descriptor_, inputB_addr, &alpha,
inputA_descriptor_, input_addr, &beta, inputB_descriptor_, output_addr),
"cudnnOpTensor failed");
}
return true;
}
bool Init(const CNodePtr &kernel_node) override {
InitResource();
cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))];
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but binary operation needs 2 inputs.";
return false;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but binary operation needs 1 output.";
return false;
}
InferBinaryType(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto input_shapeB = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
if (input_shape != output_shape && input_shapeB != output_shape) {
MS_LOG(ERROR) << "Double-sided broadcast was not supported in cudnn of cudnnOpTensor:\n"
"InputA must match the corresponding dimension of the destination tensor outC, and each "
"dimension of the inputB "
"must match the corresponding dimension of outC or must be equal to 1.";
return false;
}
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(input_shapeB);
if (is_null_input_) {
MS_LOG(WARNING) << "BinaryOpGpuKernel input is null";
InitSizeLists();
return true;
}
int shape_n = input_shape.size() < 4 ? 1 : SizeToInt(input_shape[input_shape.size() - 4]);
int shape_c = input_shape.size() < 3 ? 1 : SizeToInt(input_shape[input_shape.size() - 3]);
int shape_h = input_shape.size() < 2 ? 1 : SizeToInt(input_shape[input_shape.size() - 2]);
int shape_w = input_shape.size() == 0 ? 1 : SizeToInt(input_shape[input_shape.size() - 1]);
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(inputA_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_,
shape_n, shape_c, shape_h, shape_w),
"cudnnSetTensor4dDescriptor failed");
int shapeB_n = input_shapeB.size() < 4 ? 1 : SizeToInt(input_shapeB[input_shapeB.size() - 4]);
int shapeB_c = input_shapeB.size() < 3 ? 1 : SizeToInt(input_shapeB[input_shapeB.size() - 3]);
int shapeB_h = input_shapeB.size() < 2 ? 1 : SizeToInt(input_shapeB[input_shapeB.size() - 2]);
int shapeB_w = input_shapeB.size() == 0 ? 1 : SizeToInt(input_shapeB[input_shapeB.size() - 1]);
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(inputB_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_,
shapeB_n, shapeB_c, shapeB_h, shapeB_w),
"cudnnSetTensor4dDescriptor failed");
InitSizeLists();
return true;
}
protected:
void InitResource() override {
cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&inputA_descriptor_),
"cudnnCreateTensorDescriptor failed.");
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&inputB_descriptor_),
"cudnnCreateTensorDescriptor failed.");
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateOpTensorDescriptor(&opTensor_descriptor_),
"cudnnCreateOpTensorDescriptor failed.");
}
void InitSizeLists() override {
if (!is_null_input_) {
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(inputA_descriptor_, &input_size_),
"cudnnGetTensorSizeInBytes failed.");
input_size_list_.push_back(input_size_);
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(inputB_descriptor_, &output_size_),
"cudnnGetTensorSizeInBytes failed.");
}
input_size_list_.push_back(output_size_);
if (binary_op_type_ == BINARY_OP_DIV || binary_op_type_ == BINARY_OP_SUB) {
workspace_size_ = output_size_;
}
workspace_size_list_.push_back(workspace_size_);
if (output_size_ > input_size_) {
output_size_list_.push_back(output_size_);
} else {
output_size_list_.push_back(input_size_);
}
return;
}
private:
void DestroyResource() noexcept {
CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyTensorDescriptor(inputA_descriptor_),
"cudnnDestroyTensorDescriptor failed.");
CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyTensorDescriptor(inputB_descriptor_),
"cudnnDestroyTensorDescriptor failed.");
CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyOpTensorDescriptor(opTensor_descriptor_),
"cudnnDestroyOpTensorDescriptor failed.");
}
void InferBinaryType(const CNodePtr &kernel_node) {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kBinaryOpTypeMap.find(kernel_name);
if (iter == kBinaryOpTypeMap.end()) {
MS_LOG(EXCEPTION) << "Binary operation " << kernel_name << " is not supported.";
} else {
binary_op_type_ = iter->second;
}
switch (binary_op_type_) {
case BINARY_OP_DIV:
case BINARY_OP_MUL: {
tensor_op_ = CUDNN_OP_TENSOR_MUL;
break;
}
case BINARY_OP_SUB: {
tensor_op_ = CUDNN_OP_TENSOR_ADD;
break;
}
default: {
MS_LOG(EXCEPTION) << "Binary operation " << binary_op_type_ << " is not supported.";
}
}
CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnSetOpTensorDescriptor(opTensor_descriptor_, tensor_op_, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN),
"cudnnSetOpTensorDescriptor failed");
return;
}
cudnnHandle_t cudnn_handle_;
BinaryOpType binary_op_type_;
cudnnOpTensorOp_t tensor_op_;
cudnnTensorDescriptor_t inputA_descriptor_;
cudnnTensorDescriptor_t inputB_descriptor_;
cudnnOpTensorDescriptor_t opTensor_descriptor_;
cudnnDataType_t cudnn_data_type_;
bool is_null_input_;
size_t input_size_;
size_t output_size_;
size_t workspace_size_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_GPU_BINARYOP_GPU_KERNEL_H_

@ -37,6 +37,16 @@ MS_REG_GPU_KERNEL_TWO(
MS_REG_GPU_KERNEL_TWO(
Pow, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BroadcastOpGpuKernel, float, float)
MS_REG_GPU_KERNEL_TWO(
RealDiv,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BroadcastOpGpuKernel, float, float)
MS_REG_GPU_KERNEL_TWO(
Mul, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BroadcastOpGpuKernel, float, float)
MS_REG_GPU_KERNEL_TWO(
Sub, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BroadcastOpGpuKernel, float, float)
// fp16
MS_REG_GPU_KERNEL_TWO(
@ -57,5 +67,15 @@ MS_REG_GPU_KERNEL_TWO(
MS_REG_GPU_KERNEL_TWO(
Pow, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
BroadcastOpGpuKernel, half, half)
MS_REG_GPU_KERNEL_TWO(
RealDiv,
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
BroadcastOpGpuKernel, half, half)
MS_REG_GPU_KERNEL_TWO(
Mul, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
BroadcastOpGpuKernel, half, half)
MS_REG_GPU_KERNEL_TWO(
Sub, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
BroadcastOpGpuKernel, half, half)
} // namespace kernel
} // namespace mindspore

@ -98,7 +98,8 @@ class BroadcastOpGpuKernel : public GpuKernel {
static std::map<std::string, BroadcastOpType> kBroadcastTypeMap = {
{"Greater", BROADCAST_TYPE_GREATER}, {"Less", BROADCAST_TYPE_LESS}, {"Maximum", BROADCAST_TYPE_MAXIMUM},
{"Minimum", BROADCAST_TYPE_MINIMUM}, {"Pow", BROADCAST_TYPE_POWER},
{"Minimum", BROADCAST_TYPE_MINIMUM}, {"Pow", BROADCAST_TYPE_POWER}, {"RealDiv", BROADCAST_TYPE_REALDIV},
{"Mul", BROADCAST_TYPE_MUL}, {"Sub", BROADCAST_TYPE_SUB},
};
auto iter = kBroadcastTypeMap.find(kernel_name);

@ -58,11 +58,6 @@ class SoftmaxGpuKernel : public GpuKernel {
}
T *input_addr = GetDeviceAddress<T>(inputs, 0);
T *output_addr = GetDeviceAddress<T>(outputs, 0);
T *transpose_input_addr = GetDeviceAddress<T>(workspace, 0);
T *transpose_output_addr = GetDeviceAddress<T>(workspace, 1);
int *input_shape = GetDeviceAddress<int>(workspace, 2);
int *transpose_shape = GetDeviceAddress<int>(workspace, 3);
int *transpose_axis = GetDeviceAddress<int>(workspace, 4);
const float alpha = 1;
const float beta = 0;
@ -71,6 +66,11 @@ class SoftmaxGpuKernel : public GpuKernel {
input_addr, &beta, output_descriptor_, output_addr),
"cudnnSoftmaxForward failed");
} else {
T *transpose_input_addr = GetDeviceAddress<T>(workspace, 0);
T *transpose_output_addr = GetDeviceAddress<T>(workspace, 1);
int *input_shape = GetDeviceAddress<int>(workspace, 2);
int *transpose_shape = GetDeviceAddress<int>(workspace, 3);
int *transpose_axis = GetDeviceAddress<int>(workspace, 4);
CHECK_CUDA_RET_WITH_EXCEPT(cudaMemcpyAsync(input_shape, &input_shape_[0], workspace_size_, cudaMemcpyHostToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr)),
"cudaMemcpyAsync input_shape failed");
@ -114,9 +114,6 @@ class SoftmaxGpuKernel : public GpuKernel {
return true;
}
shape_size_ = SizeToInt(input_shape.size());
if (shape_size_ != 2) {
MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but softmax only supports 2-D inputs.";
}
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
if (kernel_name == "LogSoftmax") {
algo_ = CUDNN_SOFTMAX_LOG;
@ -163,7 +160,15 @@ class SoftmaxGpuKernel : public GpuKernel {
CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyTensorDescriptor(input_descriptor_), "destroy input_descriptor failed");
}
void InitSizeByAxis(const std::vector<size_t> input_shape, const int axis) {
void InitSizeByAxis(const std::vector<size_t> &input_shape, const int &axis) {
if (input_shape.size() == 2) {
InitSizeByAxis2D(input_shape, axis);
} else {
InitSizeByAxisLastDim(input_shape, axis);
}
}
void InitSizeByAxis2D(const std::vector<size_t> &input_shape, const int &axis) {
axis_ = axis;
if (axis_ < 0) {
axis_ += shape_size_;
@ -191,6 +196,31 @@ class SoftmaxGpuKernel : public GpuKernel {
workspace_size_ = IntToSize(shape_size_) * sizeof(int);
}
void InitSizeByAxisLastDim(const std::vector<size_t> &input_shape, const int &axis) {
int axis_pos = axis;
if (axis_pos < 0) {
axis_pos += input_shape.size();
}
// axis should be -1 with ND
if (axis_pos != SizeToInt(input_shape.size() - 1)) {
MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but axis(" << axis << ") is invalid.";
}
// squeeze to 2d, then invoke cudnn
size_t n = 1;
for (size_t i = 0; i < input_shape.size() - 1; i++) {
n *= input_shape[i];
}
axis_ = 1;
batch_size_ = n;
channel_size_ = input_shape[axis_pos];
height_ = 1;
width_ = 1;
input_size_ = sizeof(T) * batch_size_ * channel_size_ * height_ * width_;
output_size_ = input_size_;
input_shape_.push_back(batch_size_);
input_shape_.push_back(channel_size_);
}
cudnnHandle_t cudnn_handle_;
cudnnTensorDescriptor_t input_descriptor_;
cudnnTensorDescriptor_t output_descriptor_;

@ -35,7 +35,7 @@ void HcclMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<K
std::vector<std::string> input_format, output_format;
std::vector<TypeId> input_type, output_type;
for (const auto &data_type : data_type_list) {
for (const auto &format : k4DSupportFormat) {
for (const auto &format : kOpFormatList) {
auto builder = std::make_shared<KernelBuildInfo::KernelBuildInfoBuilder>();
input_format.clear();
input_format.push_back(format);

@ -35,14 +35,18 @@ void FilterInvalidKernelInfo(const CNodePtr &kernel_node,
return AnfAlgo::GetOutputTensorNum(kernel_node) == kernel_build_info->GetOutputNum() &&
AnfAlgo::GetInputTensorNum(kernel_node) == kernel_build_info->GetInputNum();
});
kernel_info_list->clear();
if (!filtered_list.empty()) {
kernel_info_list->clear();
(void)std::copy(filtered_list.begin(), filtered_list.end(), std::back_inserter(*kernel_info_list));
} else {
MS_LOG(EXCEPTION) << "node" << kernel_node->DebugString() << "'s output size : ["
<< AnfAlgo::GetOutputTensorNum(kernel_node) << "]"
<< "input size : [" << AnfAlgo::GetInputTensorNum(kernel_node)
<< "] cannot match any kernelInfo !";
MS_LOG(WARNING) << "All kernel Info list does not match any kernel info ";
for (size_t index; index < kernel_info_list->size(); ++index) {
MS_EXCEPTION_IF_NULL(kernel_info_list->at(index));
MS_LOG(WARNING) << "kernel [ " << index << " ] :" << kernel_info_list->at(index)->ToString();
}
MS_LOG(WARNING) << "node" << kernel_node->DebugString() << "'s output size : ["
<< AnfAlgo::GetOutputTensorNum(kernel_node) << "]"
<< "input size : [" << AnfAlgo::GetInputTensorNum(kernel_node) << "] cannot match any kernelInfo !";
}
}
} // namespace
@ -50,7 +54,6 @@ void KernelQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel
MS_EXCEPTION_IF_NULL(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_info_list);
TbeMetadataInfo(kernel_node, kernel_info_list);
if (kernel_info_list->empty()) {
AicpuMetadataInfo(kernel_node, kernel_info_list);
}
@ -68,12 +71,41 @@ void KernelQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel
FilterInvalidKernelInfo(kernel_node, kernel_info_list);
}
void AicpuQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) {
void AICpuQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) {
MS_EXCEPTION_IF_NULL(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_info_list);
kernel_info_list->clear();
AicpuMetadataInfo(kernel_node, kernel_info_list);
FilterInvalidKernelInfo(kernel_node, kernel_info_list);
}
bool IsSupportedByAiCpu(const AnfNodePtr &kernel_node, const KernelBuildInfoPtr &select_kernel_build_info) {
MS_EXCEPTION_IF_NULL(kernel_node);
MS_EXCEPTION_IF_NULL(select_kernel_build_info);
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;
auto cnode = kernel_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
AicpuMetadataInfo(cnode, &kernel_info_list);
FilterInvalidKernelInfo(cnode, &kernel_info_list);
return std::any_of(kernel_info_list.begin(), kernel_info_list.end(),
[&select_kernel_build_info](const kernel::KernelBuildInfoPtr item) {
MS_EXCEPTION_IF_NULL(item);
return *item == *select_kernel_build_info;
});
}
bool IsSupportedByAiCore(const AnfNodePtr &kernel_node, const KernelBuildInfoPtr &select_kernel_build_info) {
MS_EXCEPTION_IF_NULL(kernel_node);
MS_EXCEPTION_IF_NULL(select_kernel_build_info);
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;
auto cnode = kernel_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
TbeMetadataInfo(cnode, &kernel_info_list);
FilterInvalidKernelInfo(cnode, &kernel_info_list);
return std::any_of(kernel_info_list.begin(), kernel_info_list.end(),
[&select_kernel_build_info](const kernel::KernelBuildInfoPtr item) {
MS_EXCEPTION_IF_NULL(item);
return *item == *select_kernel_build_info;
});
}
} // namespace kernel
} // namespace mindspore

@ -26,7 +26,9 @@
namespace mindspore {
namespace kernel {
void KernelQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list);
void AicpuQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list);
void AICpuQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list);
bool IsSupportedByAiCpu(const AnfNodePtr &kernel_node, const KernelBuildInfoPtr &select_kernel_build_info);
bool IsSupportedByAiCore(const AnfNodePtr &kernel_node, const KernelBuildInfoPtr &select_kernel_build_info);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_KERNEL_QUERY_H_

@ -551,11 +551,6 @@ bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr<const OpIn
}
bool IsShapeMatchFormat(const std::vector<size_t> &shape, const std::string &format) {
const std::set<std::string> kOpFormatList = {kOpFormat_DEFAULT, kOpFormat_NC1KHKWHWC0, kOpFormat_ND,
kOpFormat_NCHW, kOpFormat_NHWC, kOpFormat_HWCN,
kOpFormat_NC1HWC0, kOpFormat_FRAC_Z, kOpFormat_C1HWNCoC0,
kOpFormat_FRAC_NZ, kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04};
// if format is default, it remarkes support all format
if (kOpFormatList.find(format) == kOpFormatList.end()) {
MS_LOG(EXCEPTION) << "Got the unknown format " << format;

@ -170,6 +170,9 @@ std::string ErrnoToMessage(MSRStatus status) {
case IO_FAILED:
return "io operate failed";
break;
case MATCH_HEADER_FAILED:
return "match header failed";
break;
default:
return "invalid error no";
}

@ -84,7 +84,8 @@ void BindShardWriter(py::module *m) {
void BindShardReader(const py::module *m) {
(void)py::class_<ShardReader, std::shared_ptr<ShardReader>>(*m, "ShardReader", py::module_local())
.def(py::init<>())
.def("open", (MSRStatus(ShardReader::*)(const std::string &, const int &, const std::vector<std::string> &,
.def("open", (MSRStatus(ShardReader::*)(const std::vector<std::string> &, bool, const int &,
const std::vector<std::string> &,
const std::vector<std::shared_ptr<ShardOperator>> &)) &
ShardReader::OpenPy)
.def("launch", &ShardReader::Launch)
@ -106,7 +107,8 @@ void BindShardIndexGenerator(const py::module *m) {
void BindShardSegment(py::module *m) {
(void)py::class_<ShardSegment>(*m, "ShardSegment", py::module_local())
.def(py::init<>())
.def("open", (MSRStatus(ShardSegment::*)(const std::string &, const int &, const std::vector<std::string> &,
.def("open", (MSRStatus(ShardSegment::*)(const std::vector<std::string> &, bool, const int &,
const std::vector<std::string> &,
const std::vector<std::shared_ptr<ShardOperator>> &)) &
ShardSegment::OpenPy)
.def("get_category_fields",

@ -72,7 +72,8 @@ enum MSRStatus {
ILLEGAL_PARAMETERS,
GET_PAGE_BY_GROUP_ID_FAILED,
GET_SYSTEM_STATE_FAILED,
IO_FAILED
IO_FAILED,
MATCH_HEADER_FAILED
};
// convert error no to string message

@ -35,10 +35,11 @@ class ShardHeader {
public:
ShardHeader();
MSRStatus Build(const std::string &file_path);
~ShardHeader() = default;
MSRStatus BuildDataset(const std::vector<std::string> &file_paths, bool load_dataset = true);
static std::pair<MSRStatus, json> BuildSingleHeader(const std::string &file_path);
/// \brief add the schema and save it
/// \param[in] schema the schema needs to be added
/// \return the last schema's id
@ -126,7 +127,7 @@ class ShardHeader {
MSRStatus FileToPages(const std::string dump_file_name);
private:
MSRStatus InitializeHeader(const std::vector<json> &headers);
MSRStatus InitializeHeader(const std::vector<json> &headers, bool load_dataset);
/// \brief get the headers from all the shard data
/// \param[in] the shard data real path
@ -137,9 +138,9 @@ class ShardHeader {
MSRStatus ValidateField(const std::vector<std::string> &field_name, json schema, const uint64_t &schema_id);
/// \brief check the binary file status
MSRStatus CheckFileStatus(const std::string &path);
static MSRStatus CheckFileStatus(const std::string &path);
std::pair<MSRStatus, json> ValidateHeader(const std::string &path);
static std::pair<MSRStatus, json> ValidateHeader(const std::string &path);
void ParseHeader(const json &header);
@ -149,7 +150,7 @@ class ShardHeader {
MSRStatus CheckIndexField(const std::string &field, const json &schema);
void ParsePage(const json &page);
void ParsePage(const json &page, int shard_index, bool load_dataset);
MSRStatus ParseStatistics(const json &statistics);

@ -68,23 +68,25 @@ class ShardReader {
virtual ~ShardReader();
/// \brief open files and initialize reader, c++ API
/// \param[in] file_path the path of ONE file, any file in dataset is fine
/// \param[in] file_paths the path of ONE file, any file in dataset is fine or file list
/// \param[in] load_dataset load dataset from single file or not
/// \param[in] n_consumer number of threads when reading
/// \param[in] selected_columns column list to be populated
/// \param[in] operators operators applied to data, operator type is shuffle, sample or category
/// \param[in] block_reader block-reader mode if true, otherwise row-reader mode
/// \return MSRStatus the status of MSRStatus
MSRStatus Open(const std::string &file_path, int n_consumer = 4,
MSRStatus Open(const std::vector<std::string> &file_paths, bool load_dataset, int n_consumer = 4,
const std::vector<std::string> &selected_columns = {},
const std::vector<std::shared_ptr<ShardOperator>> &operators = {}, const bool &block_reader = false);
/// \brief open files and initialize reader, python API
/// \param[in] file_path the path of ONE file, any file in dataset is fine
/// \param[in] file_paths the path of ONE file, any file in dataset is fine or file list
/// \param[in] load_dataset load dataset from single file or not
/// \param[in] n_consumer number of threads when reading
/// \param[in] selected_columns column list to be populated
/// \param[in] operators operators applied to data, operator type is shuffle, sample or category
/// \return MSRStatus the status of MSRStatus
MSRStatus OpenPy(const std::string &file_path, const int &n_consumer = 4,
MSRStatus OpenPy(const std::vector<std::string> &file_paths, bool load_dataset, const int &n_consumer = 4,
const std::vector<std::string> &selected_columns = {},
const std::vector<std::shared_ptr<ShardOperator>> &operators = {});
@ -114,11 +116,13 @@ class ShardReader {
int GetShardCount() const;
/// \brief get the number of rows in database
/// \param[in] file_path the path of ONE file, any file in dataset is fine
/// \param[in] file_paths the path of ONE file, any file in dataset is fine or file list
/// \param[in] load_dataset load dataset from single file or not
/// \param[in] op smart pointer refer to ShardCategory or ShardSample object
/// \param[out] count # of rows
/// \return MSRStatus the status of MSRStatus
MSRStatus CountTotalRows(const std::string &file_path, const std::shared_ptr<ShardOperator> &op, int64_t *count);
MSRStatus CountTotalRows(const std::vector<std::string> &file_paths, bool load_dataset,
const std::shared_ptr<ShardOperator> &op, int64_t *count);
/// \brief shuffle task with incremental seed
/// \return void
@ -220,7 +224,7 @@ class ShardReader {
std::vector<std::vector<json>> &column_values);
/// \brief initialize reader
MSRStatus Init(const std::string &file_path);
MSRStatus Init(const std::vector<std::string> &file_paths, bool load_dataset);
/// \brief validate column list
MSRStatus CheckColumnList(const std::vector<std::string> &selected_columns);
@ -292,8 +296,9 @@ class ShardReader {
void GetClassesInShard(sqlite3 *db, int shard_id, const std::string sql, std::set<std::string> &categories);
/// \brief get number of classes
int64_t GetNumClasses(const std::string &file_path, const std::string &category_field);
int64_t GetNumClasses(const std::string &category_field);
std::pair<MSRStatus, std::vector<std::string>> GetMeta(const std::string &file_path, json &meta_data);
/// \brief get exactly blob fields data by indices
std::vector<uint8_t> ExtractBlobFieldBySelectColumns(std::vector<uint8_t> &blob_fields_bytes,
std::vector<uint32_t> &ordered_selected_columns_index);

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save