|
|
@ -18,13 +18,10 @@
|
|
|
|
#include <string>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <vector>
|
|
|
|
#include <algorithm>
|
|
|
|
#include <algorithm>
|
|
|
|
#include <unordered_set>
|
|
|
|
#include <utility>
|
|
|
|
|
|
|
|
|
|
|
|
namespace mindspore {
|
|
|
|
namespace mindspore {
|
|
|
|
namespace kernel {
|
|
|
|
namespace kernel {
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
const size_t kMaxDim = 10;
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
template <typename T>
|
|
|
|
void ReduceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|
|
|
void ReduceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|
|
|
MS_EXCEPTION_IF_NULL(kernel_node);
|
|
|
|
MS_EXCEPTION_IF_NULL(kernel_node);
|
|
|
@ -37,10 +34,14 @@ void ReduceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
MS_LOG(EXCEPTION) << "Attribute is invalid";
|
|
|
|
MS_LOG(EXCEPTION) << "Attribute is invalid";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int dimension = input_shape_.size();
|
|
|
|
int dimension = input_shape_.size();
|
|
|
|
std::transform(axis_.begin(), axis_.end(), axis_.begin(),
|
|
|
|
std::transform(axis_.begin(), axis_.end(), axis_.begin(),
|
|
|
|
[dimension](const auto &a) { return a < 0 ? dimension + a : a; });
|
|
|
|
[dimension](const auto &a) { return a < 0 ? dimension + a : a; });
|
|
|
|
sort(axis_.begin(), axis_.end());
|
|
|
|
sort(axis_.begin(), axis_.end());
|
|
|
|
|
|
|
|
// Delete the duplicate axis.
|
|
|
|
|
|
|
|
auto last = std::unique(axis_.begin(), axis_.end());
|
|
|
|
|
|
|
|
axis_.erase(last, axis_.end());
|
|
|
|
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
|
|
|
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
|
|
|
if (kernel_name == "ReduceMax") {
|
|
|
|
if (kernel_name == "ReduceMax") {
|
|
|
|
reduce_type_ = 1;
|
|
|
|
reduce_type_ = 1;
|
|
|
@ -55,10 +56,8 @@ void ReduceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|
|
|
reduce_type_ = 4;
|
|
|
|
reduce_type_ = 4;
|
|
|
|
reduce_func_ = [](const T *input, size_t pos, T *out) { *out += input[pos]; };
|
|
|
|
reduce_func_ = [](const T *input, size_t pos, T *out) { *out += input[pos]; };
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
MS_LOG(EXCEPTION) << "unsupported reduce type: " << reduce_type_;
|
|
|
|
MS_LOG(EXCEPTION) << "unsupported reduce type: " << reduce_type_;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
CheckParameter();
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
template <typename T>
|
|
|
@ -68,7 +67,7 @@ bool ReduceCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
|
|
|
size_t input_size = inputs[0]->size / sizeof(T);
|
|
|
|
size_t input_size = inputs[0]->size / sizeof(T);
|
|
|
|
auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
|
|
|
|
auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
|
|
|
|
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
|
|
|
|
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
|
|
|
|
if (axis_.empty()) {
|
|
|
|
if (axis_.empty() || input_shape_.empty() || input_shape_.size() == 1) {
|
|
|
|
// Get one ret
|
|
|
|
// Get one ret
|
|
|
|
*output_addr = input_addr[0];
|
|
|
|
*output_addr = input_addr[0];
|
|
|
|
for (size_t i = 1; i < input_size; ++i) {
|
|
|
|
for (size_t i = 1; i < input_size; ++i) {
|
|
|
@ -78,107 +77,50 @@ bool ReduceCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
|
|
|
*output_addr /= input_size;
|
|
|
|
*output_addr /= input_size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
// transpose->calculate strides->calculate ret
|
|
|
|
// Calculate transpose axes and stride
|
|
|
|
std::vector<size_t> out_shape;
|
|
|
|
|
|
|
|
std::vector<size_t> strides;
|
|
|
|
|
|
|
|
std::vector<size_t> back_strides;
|
|
|
|
|
|
|
|
size_t stride;
|
|
|
|
|
|
|
|
CalculateTransposeInfo(&out_shape, &strides, &back_strides, &stride);
|
|
|
|
|
|
|
|
int dimension = input_shape_.size();
|
|
|
|
int dimension = input_shape_.size();
|
|
|
|
std::vector<size_t> coordinates(dimension);
|
|
|
|
size_t stride = 1;
|
|
|
|
auto get_next_pos = [&coordinates, &out_shape, &strides, &back_strides, &dimension](size_t &curr_pos) {
|
|
|
|
std::vector<size_t> axes(input_shape_.size());
|
|
|
|
for (int i = dimension - 1; i >= 0; --i) {
|
|
|
|
size_t j = 0;
|
|
|
|
if (coordinates[i] + 1 == out_shape[i]) {
|
|
|
|
size_t k = 0;
|
|
|
|
coordinates[i] = 0;
|
|
|
|
for (int i = 0; i < dimension; ++i) {
|
|
|
|
curr_pos -= back_strides[i];
|
|
|
|
if (j == axis_.size() || i != axis_[j]) {
|
|
|
|
} else {
|
|
|
|
axes[k] = i;
|
|
|
|
coordinates[i]++;
|
|
|
|
++k;
|
|
|
|
curr_pos += strides[i];
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
stride *= input_shape_[i];
|
|
|
|
}
|
|
|
|
++j;
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
size_t output_size = outputs[0]->size / sizeof(T);
|
|
|
|
|
|
|
|
size_t pos = 0;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < output_size; ++i) {
|
|
|
|
|
|
|
|
if (i != 0) {
|
|
|
|
|
|
|
|
get_next_pos(pos);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
output_addr[i] = input_addr[pos];
|
|
|
|
|
|
|
|
for (size_t j = 1; j < stride; ++j) {
|
|
|
|
|
|
|
|
get_next_pos(pos);
|
|
|
|
|
|
|
|
reduce_func_(input_addr, pos, &output_addr[i]);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (reduce_type_ == 4) { // 4 is reduce mean
|
|
|
|
|
|
|
|
output_addr[i] /= stride;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (auto &it : axis_) {
|
|
|
|
return true;
|
|
|
|
axes[k] = it;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
|
|
|
void ReduceCPUKernel<T>::CalculateTransposeInfo(std::vector<size_t> *new_shape, std::vector<size_t> *strides,
|
|
|
|
|
|
|
|
std::vector<size_t> *back_strides, size_t *stride) const {
|
|
|
|
|
|
|
|
int dimension = input_shape_.size();
|
|
|
|
|
|
|
|
std::vector<size_t> input_strides(dimension);
|
|
|
|
|
|
|
|
input_strides[dimension - 1] = 1;
|
|
|
|
|
|
|
|
for (int i = dimension - 2; i >= 0; --i) {
|
|
|
|
|
|
|
|
input_strides[i] = input_shape_[i + 1] * input_strides[i + 1];
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Calculate transpose axes and stride
|
|
|
|
|
|
|
|
std::vector<size_t> axes(dimension);
|
|
|
|
|
|
|
|
int j = 0;
|
|
|
|
|
|
|
|
int k = 0;
|
|
|
|
|
|
|
|
*stride = 1;
|
|
|
|
|
|
|
|
for (int i = 0; i < dimension; ++i) {
|
|
|
|
|
|
|
|
if (i != axis_[j]) {
|
|
|
|
|
|
|
|
axes[k] = i;
|
|
|
|
|
|
|
|
++k;
|
|
|
|
++k;
|
|
|
|
} else {
|
|
|
|
|
|
|
|
*stride *= input_shape_[i];
|
|
|
|
|
|
|
|
++j;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Calculate transpose shape
|
|
|
|
for (auto &it : axis_) {
|
|
|
|
std::vector<size_t> transpose_shape(input_shape_.size());
|
|
|
|
axes[k] = it;
|
|
|
|
for (int i = 0; i < dimension; ++i) {
|
|
|
|
++k;
|
|
|
|
transpose_shape[i] = input_shape_[axes[i]];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Calculate strides, new_shape, back strides
|
|
|
|
|
|
|
|
strides->resize(dimension);
|
|
|
|
|
|
|
|
new_shape->resize(dimension);
|
|
|
|
|
|
|
|
back_strides->resize(dimension);
|
|
|
|
|
|
|
|
for (int i = dimension - 1; i >= 0; --i) {
|
|
|
|
|
|
|
|
(*strides)[i] = input_strides[axes[i]];
|
|
|
|
|
|
|
|
(*new_shape)[i] = input_shape_[axes[i]];
|
|
|
|
|
|
|
|
(*back_strides)[i] = ((*new_shape)[i] - 1) * (*strides)[i];
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
|
|
|
void ReduceCPUKernel<T>::CheckParameter() const {
|
|
|
|
|
|
|
|
if (input_shape_.empty() || input_shape_.size() > kMaxDim) {
|
|
|
|
|
|
|
|
MS_LOG(EXCEPTION) << "Invalid input tensor of dimension: " << input_shape_.size();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (axis_.empty()) {
|
|
|
|
|
|
|
|
MS_LOG(INFO) << "axis is empty";
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::unordered_set<int> checker(axis_.begin(), axis_.end());
|
|
|
|
|
|
|
|
if (checker.size() != axis_.size()) {
|
|
|
|
|
|
|
|
MS_LOG(EXCEPTION) << "Duplicate value in axis";
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int maxDimension = input_shape_.size();
|
|
|
|
|
|
|
|
for (auto &axis : axis_) {
|
|
|
|
|
|
|
|
if (axis >= maxDimension) {
|
|
|
|
|
|
|
|
MS_LOG(EXCEPTION) << "Invalid value in axis: " << axis;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t output_size = outputs[0]->size / sizeof(T);
|
|
|
|
|
|
|
|
TransposeIterator base_iter(std::move(transpose_shape), std::move(axes), input_shape_);
|
|
|
|
|
|
|
|
auto task = [this, &base_iter, input_addr, output_addr, stride](size_t start, size_t end) {
|
|
|
|
|
|
|
|
auto iter = base_iter;
|
|
|
|
|
|
|
|
iter.SetPos(start * stride);
|
|
|
|
|
|
|
|
for (size_t i = start; i < end; ++i) {
|
|
|
|
|
|
|
|
output_addr[i] = input_addr[iter.GetPos()];
|
|
|
|
|
|
|
|
iter.GenNextPos();
|
|
|
|
|
|
|
|
for (size_t j = 1; j < stride; ++j) {
|
|
|
|
|
|
|
|
reduce_func_(input_addr, iter.GetPos(), &output_addr[i]);
|
|
|
|
|
|
|
|
iter.GenNextPos();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (reduce_type_ == 4) { // 4 is reduce mean
|
|
|
|
|
|
|
|
output_addr[i] /= stride;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
CPUKernelUtils::ParallelFor(task, output_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} // namespace kernel
|
|
|
|
} // namespace kernel
|
|
|
|
} // namespace mindspore
|
|
|
|
} // namespace mindspore
|
|
|
|