first working implementation

add string implementation, can handle partial string slices

finish core implementation, added Index object

fix up the logic

add in new SliceOption object, cleaning up tests
pull/5300/head
nhussain 5 years ago
parent d0e49c5cf8
commit 77d507279c

@ -18,6 +18,7 @@
#include "pybind11/stl_bind.h"
#include "minddata/dataset/api/python/pybind_register.h"
#include "minddata/dataset/core/tensor_helpers.h"
#include "minddata/dataset/kernels/data/concatenate_op.h"
#include "minddata/dataset/kernels/data/duplicate_op.h"
#include "minddata/dataset/kernels/data/fill_op.h"
@ -61,39 +62,41 @@ PYBIND_REGISTER(PadEndOp, 1, ([](const py::module *m) {
.def(py::init<TensorShape, std::shared_ptr<Tensor>>());
}));
PYBIND_REGISTER(SliceOp, 1, ([](const py::module *m) {
(void)py::class_<SliceOp, TensorOp, std::shared_ptr<SliceOp>>(*m, "SliceOp")
.def(py::init<bool>())
.def(py::init([](const py::list &py_list) {
std::vector<dsize_t> c_list;
for (auto l : py_list) {
if (!l.is_none()) {
c_list.push_back(py::reinterpret_borrow<py::int_>(l));
}
}
return std::make_shared<SliceOp>(c_list);
}))
.def(py::init([](const py::tuple &py_slice) {
if (py_slice.size() != 3) {
THROW_IF_ERROR(Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Wrong slice object"));
}
PYBIND_REGISTER(SliceOption, 0, ([](const py::module *m) {
(void)py::class_<SliceOption>(*m, "SliceOption")
.def(py::init([](const py::slice &py_slice) {
Slice c_slice;
if (!py_slice[0].is_none() && !py_slice[1].is_none() && !py_slice[2].is_none()) {
c_slice = Slice(py::reinterpret_borrow<py::int_>(py_slice[0]),
py::reinterpret_borrow<py::int_>(py_slice[1]),
py::reinterpret_borrow<py::int_>(py_slice[2]));
} else if (py_slice[0].is_none() && py_slice[2].is_none()) {
c_slice = Slice(py::reinterpret_borrow<py::int_>(py_slice[1]));
} else if (!py_slice[0].is_none() && !py_slice[1].is_none()) {
c_slice = Slice(py::reinterpret_borrow<py::int_>(py_slice[0]),
py::reinterpret_borrow<py::int_>(py_slice[1]));
if (!py_slice.attr("start").is_none() && !py_slice.attr("stop").is_none() &&
!py_slice.attr("step").is_none()) {
c_slice = Slice(py::reinterpret_borrow<py::int_>(py_slice.attr("start")),
py::reinterpret_borrow<py::int_>(py_slice.attr("stop")),
py::reinterpret_borrow<py::int_>(py_slice.attr("step")));
} else if (py_slice.attr("start").is_none() && py_slice.attr("step").is_none()) {
c_slice = Slice(py::reinterpret_borrow<py::int_>(py_slice.attr("stop")));
} else if (!py_slice.attr("start").is_none() && !py_slice.attr("stop").is_none()) {
c_slice = Slice(py::reinterpret_borrow<py::int_>(py_slice.attr("start")),
py::reinterpret_borrow<py::int_>(py_slice.attr("stop")));
}
if (!c_slice.valid()) {
THROW_IF_ERROR(Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Wrong slice object"));
}
return std::make_shared<SliceOp>(c_slice);
return SliceOption(c_slice);
}))
.def(py::init([](const py::list &py_list) {
std::vector<dsize_t> indices;
for (auto l : py_list) {
indices.push_back(py::reinterpret_borrow<py::int_>(l));
}
return SliceOption(indices);
}))
.def(py::init<bool>())
.def(py::init<SliceOption>());
}));
PYBIND_REGISTER(SliceOp, 1, ([](const py::module *m) {
(void)py::class_<SliceOp, TensorOp, std::shared_ptr<SliceOp>>(*m, "SliceOp")
.def(py::init<std::vector<SliceOption>>());
}));
PYBIND_REGISTER(ToFloat16Op, 1, ([](const py::module *m) {

@ -7,6 +7,7 @@ set(DATASET_CORE_SRC_FILES
data_type.cc
global_context.cc
tensor.cc
tensor_helpers.cc
tensor_row.cc
tensor_shape.cc
)

@ -28,6 +28,7 @@
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/core/cv_tensor.h"
#include "minddata/dataset/core/global_context.h"
#ifdef ENABLE_PYTHON
#include "minddata/dataset/core/pybind_support.h"
namespace py = pybind11;
@ -92,11 +93,11 @@ Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, Tenso
CHECK_FAIL_RETURN_UNEXPECTED(type.IsNumeric(), "Number of elements is not 0. The type should be numeric.");
int64_t byte_size = (*out)->SizeInBytes();
// Don't allocate if we have a tensor with no elements.
if (byte_size != 0) {
RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
}
return Status::OK();
}
Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) {
@ -861,63 +862,164 @@ Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vect
CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(dst_addr, len, src_addr, len) == 0, "memcpy error");
return Status::OK();
}
Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices) {
CHECK_FAIL_RETURN_UNEXPECTED(shape_.Rank() == 1, "Currently Slice work with rank 1 tensors only.");
if (indices.empty()) {
return CreateEmpty(TensorShape({0}), type_, out);
Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption> slice_options_) {
std::vector<SliceOption> converted_slice_objects;
for (int i = 0; i < slice_options_.size(); i++) {
SliceOption slice_option = slice_options_[i];
if (slice_option.all_) {
mindspore::dataset::Slice slice = mindspore::dataset::Slice(shape_[i]);
converted_slice_objects.push_back(SliceOption(slice));
continue;
}
if (slice_option.indices_.empty() && !slice_option.slice_.valid()) {
RETURN_STATUS_UNEXPECTED("Both indices and slices can not be empty.");
}
if (!slice_option.indices_.empty() && slice_option.slice_.valid()) {
RETURN_STATUS_UNEXPECTED("Both indices and slices can not be given.");
}
// if slice object was provided, indices should be empty. Generate indices from the slice object.
if (slice_option.indices_.empty()) {
// check if slice is valid
mindspore::dataset::Slice slice_copy = slice_option.slice_;
slice_copy.start_ = HandleNeg(slice_option.slice_.start_, shape_[i]);
slice_copy.stop_ = HandleNeg(slice_option.slice_.stop_, shape_[i]);
slice_copy.start_ = slice_copy.start_ < 0 ? 0 : slice_copy.start_;
slice_copy.stop_ = slice_copy.stop_ < 0 ? 0 : slice_copy.stop_;
dsize_t max_idx = shape_[i];
slice_copy.start_ = slice_copy.start_ > max_idx ? max_idx : slice_copy.start_;
slice_copy.stop_ = slice_copy.stop_ > max_idx ? max_idx : slice_copy.stop_;
converted_slice_objects.emplace_back(SliceOption(slice_copy));
} else {
// indices validation
std::vector<dsize_t> indices_copy;
for (int j = 0; j < slice_option.indices_.size(); j++) {
dsize_t index = HandleNeg(slice_option.indices_[j], shape_[i]);
CHECK_FAIL_RETURN_UNEXPECTED(index < shape_[i] && index >= 0,
"Index " + std::to_string(index) + " is out of bounds.");
indices_copy.emplace_back(index);
}
converted_slice_objects.emplace_back(SliceOption(indices_copy));
}
}
// if a string with partial slices, pass in the rest
if (slice_options_.size() != Rank() && type() == DataType::DE_STRING) {
for (int i = slice_options_.size(); i < Rank(); i++) {
mindspore::dataset::Slice slice = mindspore::dataset::Slice(0, shape_[i]);
converted_slice_objects.emplace_back(SliceOption(slice));
}
}
// determine final shape:
TensorShape t = TensorShape({});
dsize_t slice_len = slice_options_.size();
dsize_t slice_len_ind;
for (int i = 0; i < shape_.Rank(); i++) {
if (i < slice_len) {
// if it's a slice
if (converted_slice_objects[i].indices_.size() == 0) {
slice_len_ind = (converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) /
converted_slice_objects[i].slice_.step_;
if ((converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) %
converted_slice_objects[i].slice_.step_ !=
0) {
slice_len_ind++;
}
// account for slices that would return no data
slice_len_ind = slice_len_ind < 0 ? 0 : slice_len_ind;
t = t.AppendDim(slice_len_ind);
} else {
// if its a vector of indices
// need to introduce a way of handling indices and slices
if (converted_slice_objects[i].indices_.size() >= 1) {
t = t.AppendDim(converted_slice_objects[i].indices_.size());
}
}
} else {
// add in the rest of the dimensions
slice_len_ind = shape_[i];
t = t.AppendDim(slice_len_ind);
}
}
std::vector<std::vector<dsize_t>> indices_vector = IndexGenerator(converted_slice_objects);
if (indices_vector.empty()) {
return CreateEmpty(t, type_, out);
}
if (type_.IsNumeric()) {
return SliceNumeric(out, indices);
return SliceNumeric(out, indices_vector, t);
} else {
return SliceString(out, indices);
return SliceString(out, indices_vector, t);
}
}
Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices) {
RETURN_IF_NOT_OK(CreateEmpty(TensorShape({static_cast<dsize_t>(indices.size())}), type_, out));
Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
const TensorShape &shape) {
RETURN_IF_NOT_OK(CreateEmpty(shape, type_, out));
(*out)->GetMutableBuffer();
dsize_t out_index = 0;
dsize_t dim_length = shape_[0];
std::vector<dsize_t> dim_length = shape_.AsVector();
dsize_t type_size = type_.SizeInBytes();
dsize_t src_start = HandleNeg(indices[0], dim_length);
std::vector<dsize_t> src_start = HandleNegIndices(indices[0], dim_length);
dsize_t src_start_index;
RETURN_IF_NOT_OK(shape_.ToFlatIndex(src_start, &src_start_index));
uchar *dst_addr = (*out)->data_;
dsize_t count = 1;
// to handle partial slices
dsize_t current_stride = shape_.Strides()[indices[0].size() - 1];
for (dsize_t i = 0; i < indices.size(); i++) {
dsize_t cur_index = HandleNeg(indices[i], dim_length);
CHECK_FAIL_RETURN_UNEXPECTED(
cur_index >= 0 && cur_index < dim_length,
"Index " + std::to_string(indices[i]) + " is out of bounds [0," + std::to_string(dim_length) + ")");
std::vector<dsize_t> cur_index = HandleNegIndices(indices[i], dim_length);
if (i < indices.size() - 1) {
dsize_t next_index = HandleNeg(indices[i + 1], dim_length);
if (next_index == cur_index + 1) {
std::vector<dsize_t> next_index = HandleNegIndices(indices[i + 1], dim_length);
dsize_t flat_idx_curr;
dsize_t flat_idx_next;
RETURN_IF_NOT_OK(shape_.ToFlatIndex(cur_index, &flat_idx_curr));
RETURN_IF_NOT_OK(shape_.ToFlatIndex(next_index, &flat_idx_next));
if (flat_idx_next == flat_idx_curr + current_stride) {
count++;
continue;
}
}
int return_code = memcpy_s(dst_addr + out_index * type_size, (*out)->SizeInBytes(), data_ + src_start * type_size,
count * type_size);
int return_code = memcpy_s(dst_addr + out_index * type_size, (*out)->SizeInBytes(),
data_ + src_start_index * type_size, count * type_size * current_stride);
CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed in SliceNumeric");
out_index += count;
out_index += count * current_stride;
if (i < indices.size() - 1) {
src_start = HandleNeg(indices[i + 1], dim_length); // next index
src_start = HandleNegIndices(indices[i + 1], dim_length); // next index
RETURN_IF_NOT_OK(shape_.ToFlatIndex(src_start, &src_start_index));
}
count = 1;
}
return Status::OK();
}
Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices) {
dsize_t dim_length = shape_[0];
Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
const TensorShape &shape) {
std::vector<dsize_t> dim_length = shape_.AsVector();
std::vector<std::string> strings;
for (dsize_t index : indices) {
dsize_t cur_index = HandleNeg(index, dim_length);
CHECK_FAIL_RETURN_UNEXPECTED(
cur_index >= 0 && cur_index < dim_length,
"Index " + std::to_string(index) + " is out of bounds [0," + std::to_string(dim_length) + ")");
for (std::vector<dsize_t> index : indices) {
std::vector<dsize_t> cur_index = HandleNegIndices(index, dim_length);
dsize_t cur_flat_index;
shape_.ToFlatIndex(cur_index, &cur_flat_index);
std::string_view sv;
GetItemAt(&sv, {cur_index});
RETURN_IF_NOT_OK(GetItemAt(&sv, {cur_index}));
strings.emplace_back(sv);
}
return CreateFromVector(strings, TensorShape({static_cast<dsize_t>(strings.size())}), out);
return CreateFromVector(strings, shape, out);
}
} // namespace dataset

@ -36,6 +36,7 @@
#include "utils/ms_utils.h"
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/core/data_type.h"
#include "minddata/dataset/core/tensor_helpers.h"
#include "minddata/dataset/core/tensor_shape.h"
#include "minddata/dataset/util/status.h"
#ifndef ENABLE_ANDROID
@ -369,20 +370,30 @@ class Tensor {
}
/// Handle negative indices.
/// \param[out] out modified index
/// \param[in] index
/// \param[in] length axis length used to modify index
/// \return dsize_t modified index
static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; }
/// Slice tensor bases on the given indicies. Copy the sliced data into out tensor. Only rank1 tensors are supported.
/// Handle negative indices for a vector of indices.
/// \param[out] out modified vector of indices
/// \param[in] index_vector vector of indices
/// \return std::vector<dsize_t> modified vector of indices
static inline std::vector<dsize_t> HandleNegIndices(std::vector<dsize_t> index_vector, std::vector<dsize_t> length) {
std::vector<dsize_t> indices(index_vector.size(), 0);
for (int i = 0; i < index_vector.size(); i++) {
indices[i] = HandleNeg(index_vector[i], length[i]);
}
return indices;
}
/// Slice tensor bases on the given indices. Copy the sliced data into out tensor.
/// Based on the type of tensor, SliceNumeric or SliceString will be called
/// \param[out] out Tensor
/// \param[in] indices vector of indices
/// \param[in] slice_options vector of SliceOption objects
/// \return Status error code
Status Slice(TensorPtr *out, const std::vector<dsize_t> &indices);
/// Slice numeric tensors.
Status SliceNumeric(TensorPtr *out, const std::vector<dsize_t> &indices);
/// Slice string tensors
Status SliceString(TensorPtr *out, const std::vector<dsize_t> &indices);
Status Slice(TensorPtr *out, const std::vector<mindspore::dataset::SliceOption> slice_options);
#ifdef ENABLE_PYTHON
/// Constructs numpy array from input tensor
@ -662,6 +673,13 @@ class Tensor {
#ifdef ENABLE_ANDROID
friend class tensor::DETensor;
#endif
/// Slice numeric tensors.
Status SliceNumeric(TensorPtr *out, const std::vector<std::vector<dsize_t>> &indices, const TensorShape &shape);
/// Slice string tensors
Status SliceString(TensorPtr *out, const std::vector<std::vector<dsize_t>> &indices, const TensorShape &shape);
/// Copy raw data of a array based on shape and strides to the destination pointer
/// \param dst [out] Pointer to the destination array where the content is to be copied
/// \param[in] src Pointer to the source of strided array to be copied

@ -0,0 +1,71 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <string>
#include <vector>
#include <iostream>
#include "minddata/dataset/core/tensor_helpers.h"
namespace mindspore {
namespace dataset {
void IndexGeneratorHelper(int8_t depth, std::vector<dsize_t> *numbers,
const std::vector<mindspore::dataset::SliceOption> &slice_list,
std::vector<std::vector<dsize_t>> *matrix) {
// for loop changes if its an index instead of a slice object
if (depth > 0) {
dsize_t new_depth = depth - 1;
dsize_t curr_ind = numbers->size() - depth;
if (slice_list[curr_ind].slice_.valid()) {
dsize_t increment = slice_list[curr_ind].slice_.step_;
if (increment > 0) {
for (int i = slice_list[curr_ind].slice_.start_; i < slice_list[curr_ind].slice_.stop_;
i = i + slice_list[curr_ind].slice_.step_) {
(*numbers)[curr_ind] = i;
IndexGeneratorHelper(new_depth, numbers, slice_list, matrix);
}
} else {
for (int i = slice_list[curr_ind].slice_.start_; i > slice_list[curr_ind].slice_.stop_;
i = i + slice_list[curr_ind].slice_.step_) {
(*numbers)[curr_ind] = i;
IndexGeneratorHelper(new_depth, numbers, slice_list, matrix);
}
}
} else {
for (int i = 0; i < slice_list[curr_ind].indices_.size(); i++) {
(*numbers)[curr_ind] = slice_list[curr_ind].indices_[i];
IndexGeneratorHelper(new_depth, numbers, slice_list, matrix);
}
}
} else {
(*matrix).emplace_back((*numbers));
}
}
// Used to generate slice indices
std::vector<std::vector<dsize_t>> IndexGenerator(const std::vector<mindspore::dataset::SliceOption> &slice_list) {
int8_t depth = slice_list.size();
std::vector<dsize_t> numbers(depth, 0);
std::vector<std::vector<dsize_t>> matrix(0, std::vector<dsize_t>(depth, 0));
IndexGeneratorHelper(depth, &numbers, slice_list, &matrix);
return matrix;
}
} // namespace dataset
} // namespace mindspore

@ -0,0 +1,81 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_
#include <memory>
#include <vector>
#include "minddata/dataset/core/constants.h"
namespace mindspore {
namespace dataset {
class Slice {
public:
Slice() : start_(0), stop_(0), step_(0) {}
Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {}
Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {}
explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {}
Slice(Slice const &slice) = default;
~Slice() = default;
bool valid() const { return !(start_ == 0 && stop_ == 0 && step_ == 0); }
dsize_t start_;
dsize_t stop_;
dsize_t step_;
};
class SliceOption {
public:
explicit SliceOption(bool all) : all_(all) {}
explicit SliceOption(std::vector<dsize_t> indices) : indices_(indices) {}
explicit SliceOption(Slice slice) : slice_(slice) {}
SliceOption(SliceOption const &slice) = default;
// only one of the following will be valid
// given indices to slice the Tensor.
std::vector<dsize_t> indices_ = {};
// Slice object. All start, stop and step are 0 if invalid.
Slice slice_;
bool all_ = false;
};
/// Recursive helper function to generate indices based on vector of SliceOptions. It recursively iterates through each
/// range represented by slice_options to generate a list of indices to be sliced.
/// \param[out] matrix Generated nested vector of indices
/// Example: For a 4 x 2 tensor, and with slice_list = {SliceOption({0})} (the first row), matrix will become
/// {{0}}. For slice_list = {SliceOption(all), SliceOption({0})} (the first column), matrix will become
/// {{0, 0}, {1, 0}, {2, 0}, {3, 0}}.
/// For slice_list = {SliceOption({0, 2})}, matrix will become {{0}, {2}}. The size of each nested array is always
/// equal to (slice_list).size().
/// \param[in] depth used to keep track of recursion level
/// \param[in] numbers vector used to represent current index
/// \param[in] matrix 2D vector to be populated with desired indices
/// \param[in] slice_options vector of SliceOption objects
void IndexGeneratorHelper(int8_t depth, std::vector<dsize_t> *numbers, const std::vector<SliceOption> &slice_list,
std::vector<std::vector<dsize_t>> *matrix);
/// Generate indices based on vector of SliceOptions
/// Calls the recursive helper function IndexGeneratorHelper
/// \param[in] slice_list vector of SliceOption objects. Note: If the user passes
/// {SliceOption(true), SliceOption(true)}, it will return a M x 2 vector, instead of reducing it to
/// {SliceOption(true)} first to only generate a M x 1 vector.
/// \return std::vector<std::vector<dsize_t>> 2D vector of generated indices, M x (slice_list).size()
std::vector<std::vector<dsize_t>> IndexGenerator(const std::vector<SliceOption> &slice_list);
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_

@ -13,35 +13,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/kernels/data/slice_op.h"
#include <functional>
#include <vector>
#include "minddata/dataset/kernels/data/slice_op.h"
#include "minddata/dataset/kernels/data/data_utils.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/kernels/tensor_op.h"
namespace mindspore {
namespace dataset {
Status SliceOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Rank() == 1, "SliceOp supports 1D Tensors only for now.");
// if `all` flag is true, output is just the input.
if (all_) {
*output = input;
return Status::OK();
return input->Slice(output, slice_options_);
}
// if slice object was provided, indices should be empty. Generate indices from the slice object.
if (slice_.valid() && indices_.empty()) {
dsize_t len = input->shape()[0];
std::vector<dsize_t> indices = slice_.Indices(len);
return input->Slice(output, indices);
}
// if indices are not empty, slices should be invalid, use indices_ to slice
if (!indices_.empty() && !slice_.valid()) {
return input->Slice(output, indices_);
}
RETURN_STATUS_UNEXPECTED("The indexing parameters are invalid");
}
} // namespace dataset
} // namespace mindspore

@ -23,47 +23,20 @@
#include <vector>
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/core/tensor_helpers.h"
#include "minddata/dataset/kernels/tensor_op.h"
namespace mindspore {
namespace dataset {
class Slice {
public:
Slice() : start_(0), stop_(0), step_(0) {}
Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {}
Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {}
explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {}
~Slice() = default;
std::vector<dsize_t> Indices(dsize_t length) {
std::vector<dsize_t> indices;
dsize_t index = std::min(Tensor::HandleNeg(start_, length), length);
dsize_t end_index = std::min(Tensor::HandleNeg(stop_, length), length);
if (step_ > 0) {
for (; index < end_index; index += step_) {
indices.push_back(index);
}
} else {
for (; index > end_index; index += step_) {
indices.push_back(index);
}
}
return indices;
}
bool valid() { return !(start_ == 0 && stop_ == 0 && step_ == 0); }
dsize_t start_;
dsize_t stop_;
dsize_t step_;
};
class SliceOp : public TensorOp {
public:
explicit SliceOp(std::vector<dsize_t> indices) : indices_(std::move(indices)) {}
explicit SliceOp(Slice slice) : slice_(slice) {}
explicit SliceOp(bool all) : all_(all) {}
explicit SliceOp(std::vector<SliceOption> slice_options) : slice_options_(slice_options) {}
explicit SliceOp(SliceOption slice_option) { slice_options_.push_back(slice_option); }
// short hand notation for slicing along fist dimension
explicit SliceOp(Slice slice) { slice_options_.push_back(SliceOption(slice)); }
explicit SliceOp(bool all) { slice_options_.push_back(SliceOption(all)); }
explicit SliceOp(std::vector<dsize_t> indices) { slice_options_.push_back(SliceOption(indices)); }
~SliceOp() override = default;
@ -72,13 +45,7 @@ class SliceOp : public TensorOp {
std::string Name() const override { return kSliceOp; }
private:
// only on of the following will be valid
// given indices to slice the Tensor. Empty vector if invalid.
std::vector<dsize_t> indices_;
// Slice object. All start, stop and step are 0 if invalid.
Slice slice_;
// Flag to read all indcies in the dim.
bool all_ = false;
std::vector<SliceOption> slice_options_ = {};
};
} // namespace dataset
} // namespace mindspore

@ -21,8 +21,8 @@ import numpy as np
import mindspore.common.dtype as mstype
import mindspore._c_dataengine as cde
from .validators import check_num_classes, check_de_type, check_fill_value, check_slice_op, check_mask_op, \
check_pad_end, check_concat_type, check_random_transform_ops
from .validators import check_num_classes, check_de_type, check_fill_value, check_slice_option, check_slice_op, \
check_mask_op, check_pad_end, check_concat_type, check_random_transform_ops
from ..core.datatypes import mstype_to_detype
@ -94,6 +94,32 @@ class TypeCast(cde.TypeCastOp):
super().__init__(data_type)
class _SliceOption(cde.SliceOption):
"""
Internal class SliceOption to be used with SliceOperation
Args:
_SliceOption(Union[int, list(int), slice, None, Ellipses, bool, _SliceOption]):
1. :py:obj:`int`: Slice this index only along the dimension. Negative index is supported.
2. :py:obj:`list(int)`: Slice these indices along the dimension. Negative indices are supported.
3. :py:obj:`slice`: Slice the generated indices from the slice object along the dimension.
4. :py:obj:`None`: Slice the whole dimension. Similar to `:` in Python indexing.
5. :py:obj:`Ellipses`: Slice the whole dimension. Similar to `:` in Python indexing.
6. :py:obj:`boolean`: Slice the whole dimension. Similar to `:` in Python indexing.
"""
@check_slice_option
def __init__(self, slice_option):
if isinstance(slice_option, int) and not isinstance(slice_option, bool):
slice_option = [slice_option]
elif slice_option is Ellipsis:
slice_option = True
elif slice_option is None:
slice_option = True
super().__init__(slice_option)
class Slice(cde.SliceOp):
"""
Slice operation to extract a tensor out using the given n slices.
@ -102,15 +128,16 @@ class Slice(cde.SliceOp):
(Currently only rank-1 tensors are supported).
Args:
slices(Union[int, list(int), slice, None, Ellipses]):
*slices(Union[int, list(int), slice, None, Ellipses]):
Maximum `n` number of arguments to slice a tensor of rank `n`.
One object in slices can be one of:
1. :py:obj:`int`: Slice this index only. Negative index is supported.
2. :py:obj:`list(int)`: Slice these indices ion the list only. Negative indices are supported.
3. :py:obj:`slice`: Slice the generated indices from the slice object. Similar to `start:stop:step`.
1. :py:obj:`int`: Slice this index only along the first dimension. Negative index is supported.
2. :py:obj:`list(int)`: Slice these indices along the first dimension. Negative indices are supported.
3. :py:obj:`slice`: Slice the generated indices from the slice object along the first dimension.
Similar to `start:stop:step`.
4. :py:obj:`None`: Slice the whole dimension. Similar to `:` in Python indexing.
5. :py:obj:`Ellipses`: Slice all dimensions between the two slices. Similar to `...` in Python indexing.
5. :py:obj:`Ellipses`: Slice the whole dimension. Similar to `:` in Python indexing.
Examples:
>>> import mindspore.dataset.transforms.c_transforms as c_transforms
@ -130,16 +157,9 @@ class Slice(cde.SliceOp):
@check_slice_op
def __init__(self, *slices):
dim0 = slices[0]
if isinstance(dim0, int):
dim0 = [dim0]
elif dim0 is None:
dim0 = True
elif isinstance(dim0, slice):
dim0 = (dim0.start, dim0.stop, dim0.step)
elif dim0 is Ellipsis:
dim0 = True
super().__init__(dim0)
slice_input_ = list(slices)
slice_input_ = [_SliceOption(slice_dim) for slice_dim in slice_input_]
super().__init__(slice_input_)
class Relational(IntEnum):

@ -19,8 +19,9 @@ import inspect
import numpy as np
from mindspore._c_expression import typing
from ..core.validator_helpers import parse_user_args, type_check, check_pos_int64, check_value, check_positive, \
check_tensor_op
check_tensor_op, type_check_list
# POS_INT_MIN is used to limit values from starting from 0
POS_INT_MIN = 1
@ -100,17 +101,40 @@ def check_de_type(method):
return new_method
def check_slice_option(method):
"""Wrapper method to check the parameters of SliceOption."""
@wraps(method)
def new_method(self, *args, **kwargs):
[slice_option], _ = parse_user_args(method, *args, **kwargs)
from .c_transforms import _SliceOption
if slice_option is not None:
type_check(slice_option, (int, list, slice, bool, type(Ellipsis), _SliceOption), "slice_option")
if isinstance(slice_option, list):
type_check_list(slice_option, (int,), "slice_option")
return method(self, *args, **kwargs)
return new_method
def check_slice_op(method):
"""Wrapper method to check the parameters of slice."""
@wraps(method)
def new_method(self, *args):
for _, arg in enumerate(args):
type_check(arg, (int, slice, list, type(None), type(Ellipsis)), "arg")
if isinstance(arg, list):
for a in arg:
type_check(a, (int,), "a")
return method(self, *args)
def new_method(self, *args, **kwargs):
[slice_op], _ = parse_user_args(method, *args, **kwargs)
for s in slice_op:
from .c_transforms import _SliceOption
if s is not None:
type_check(s, (int, list, slice, bool, type(Ellipsis), _SliceOption), "slice")
if isinstance(s, list) and s:
if isinstance(s[0], int):
type_check_list(s, (int,), "slice")
return method(self, *args, **kwargs)
return new_method

@ -124,6 +124,7 @@ SET(DE_UT_SRCS
distributed_sampler_test.cc
data_helper_test.cc
image_process_test.cc
slice_op_test.cc
)
if (ENABLE_PYTHON)

File diff suppressed because it is too large Load Diff

@ -424,12 +424,11 @@ TEST_F(MindDataTestTensorDE, TensorSlice) {
Tensor::CreateFromVector(std::vector<dsize_t>{0, 1, 2, 3, 4}, &t);
std::shared_ptr<Tensor> t2;
auto x = std::vector<dsize_t>{0, 3, 4};
std::vector<SliceOption> slice_options = {SliceOption(x)};
std::shared_ptr<Tensor> expected;
Tensor::CreateFromVector(x, &expected);
t->Slice(&t2, x);
t->Slice(&t2, slice_options);
ASSERT_EQ(*t2, *expected);
t->Slice(&t2, std::vector<dsize_t>{0, 1, 2, 3, 4});
ASSERT_EQ(*t2, *t);
}
TEST_F(MindDataTestTensorDE, TensorPartialInsert) {

@ -44,7 +44,7 @@ def test_random_choice():
res2 = test_config([[0, 1, 2]], [ops.Compose([ops.Duplicate(), ops.Concatenate()]),
ops.Compose([ops.Slice([0, 1]), ops.OneHot(2)])])
assert res2 in [[[[1, 0], [0, 1]]], [[0, 1, 2, 0, 1, 2]]]
# Test RandomChoice where there is only 1 operation
# Test RandomChoice when there is only 1 operation
assert test_config([[4, 3], [2, 1]], [ops.Slice([0])]) == [[4], [2]]

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save