From 77d507279cd365f05aff4b10be32e50d1e80a9dc Mon Sep 17 00:00:00 2001 From: nhussain Date: Fri, 21 Aug 2020 10:45:06 -0400 Subject: [PATCH] first working implementation add string implementation, can handle partial string slices finish core implementation, added Index object fix up the logic add in new SliceOption object, cleaning up tests --- .../bindings/dataset/kernels/data/bindings.cc | 57 +- .../minddata/dataset/core/CMakeLists.txt | 1 + .../ccsrc/minddata/dataset/core/tensor.cc | 162 ++++- .../ccsrc/minddata/dataset/core/tensor.h | 36 +- .../minddata/dataset/core/tensor_helpers.cc | 71 ++ .../minddata/dataset/core/tensor_helpers.h | 81 +++ .../minddata/dataset/kernels/data/slice_op.cc | 28 +- .../minddata/dataset/kernels/data/slice_op.h | 49 +- mindspore/dataset/transforms/c_transforms.py | 54 +- mindspore/dataset/transforms/validators.py | 40 +- tests/ut/cpp/dataset/CMakeLists.txt | 5 +- tests/ut/cpp/dataset/slice_op_test.cc | 664 ++++++++++++++++++ tests/ut/cpp/dataset/tensor_test.cc | 5 +- .../ut/python/dataset/test_c_random_choice.py | 2 +- tests/ut/python/dataset/test_slice_op.py | 354 ++++++---- 15 files changed, 1323 insertions(+), 286 deletions(-) create mode 100644 mindspore/ccsrc/minddata/dataset/core/tensor_helpers.cc create mode 100644 mindspore/ccsrc/minddata/dataset/core/tensor_helpers.h create mode 100644 tests/ut/cpp/dataset/slice_op_test.cc diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/data/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/data/bindings.cc index c6cec1eb55..084ceaefb9 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/data/bindings.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/data/bindings.cc @@ -18,6 +18,7 @@ #include "pybind11/stl_bind.h" #include "minddata/dataset/api/python/pybind_register.h" +#include "minddata/dataset/core/tensor_helpers.h" #include "minddata/dataset/kernels/data/concatenate_op.h" #include "minddata/dataset/kernels/data/duplicate_op.h" #include "minddata/dataset/kernels/data/fill_op.h" @@ -61,39 +62,41 @@ PYBIND_REGISTER(PadEndOp, 1, ([](const py::module *m) { .def(py::init>()); })); -PYBIND_REGISTER(SliceOp, 1, ([](const py::module *m) { - (void)py::class_>(*m, "SliceOp") - .def(py::init()) - .def(py::init([](const py::list &py_list) { - std::vector c_list; - for (auto l : py_list) { - if (!l.is_none()) { - c_list.push_back(py::reinterpret_borrow(l)); - } - } - return std::make_shared(c_list); - })) - .def(py::init([](const py::tuple &py_slice) { - if (py_slice.size() != 3) { - THROW_IF_ERROR(Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Wrong slice object")); - } +PYBIND_REGISTER(SliceOption, 0, ([](const py::module *m) { + (void)py::class_(*m, "SliceOption") + .def(py::init([](const py::slice &py_slice) { Slice c_slice; - if (!py_slice[0].is_none() && !py_slice[1].is_none() && !py_slice[2].is_none()) { - c_slice = Slice(py::reinterpret_borrow(py_slice[0]), - py::reinterpret_borrow(py_slice[1]), - py::reinterpret_borrow(py_slice[2])); - } else if (py_slice[0].is_none() && py_slice[2].is_none()) { - c_slice = Slice(py::reinterpret_borrow(py_slice[1])); - } else if (!py_slice[0].is_none() && !py_slice[1].is_none()) { - c_slice = Slice(py::reinterpret_borrow(py_slice[0]), - py::reinterpret_borrow(py_slice[1])); + if (!py_slice.attr("start").is_none() && !py_slice.attr("stop").is_none() && + !py_slice.attr("step").is_none()) { + c_slice = Slice(py::reinterpret_borrow(py_slice.attr("start")), + py::reinterpret_borrow(py_slice.attr("stop")), + py::reinterpret_borrow(py_slice.attr("step"))); + } else if (py_slice.attr("start").is_none() && py_slice.attr("step").is_none()) { + c_slice = Slice(py::reinterpret_borrow(py_slice.attr("stop"))); + } else if (!py_slice.attr("start").is_none() && !py_slice.attr("stop").is_none()) { + c_slice = Slice(py::reinterpret_borrow(py_slice.attr("start")), + py::reinterpret_borrow(py_slice.attr("stop"))); } if (!c_slice.valid()) { THROW_IF_ERROR(Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Wrong slice object")); } - return std::make_shared(c_slice); - })); + return SliceOption(c_slice); + })) + .def(py::init([](const py::list &py_list) { + std::vector indices; + for (auto l : py_list) { + indices.push_back(py::reinterpret_borrow(l)); + } + return SliceOption(indices); + })) + .def(py::init()) + .def(py::init()); + })); + +PYBIND_REGISTER(SliceOp, 1, ([](const py::module *m) { + (void)py::class_>(*m, "SliceOp") + .def(py::init>()); })); PYBIND_REGISTER(ToFloat16Op, 1, ([](const py::module *m) { diff --git a/mindspore/ccsrc/minddata/dataset/core/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/core/CMakeLists.txt index bfe6e67563..23e8ab7e64 100644 --- a/mindspore/ccsrc/minddata/dataset/core/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/core/CMakeLists.txt @@ -7,6 +7,7 @@ set(DATASET_CORE_SRC_FILES data_type.cc global_context.cc tensor.cc + tensor_helpers.cc tensor_row.cc tensor_shape.cc ) diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor.cc b/mindspore/ccsrc/minddata/dataset/core/tensor.cc index 2c7bbb5b51..95f4ada7d2 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor.cc +++ b/mindspore/ccsrc/minddata/dataset/core/tensor.cc @@ -28,6 +28,7 @@ #include "minddata/dataset/core/constants.h" #include "minddata/dataset/core/cv_tensor.h" #include "minddata/dataset/core/global_context.h" + #ifdef ENABLE_PYTHON #include "minddata/dataset/core/pybind_support.h" namespace py = pybind11; @@ -92,11 +93,11 @@ Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, Tenso CHECK_FAIL_RETURN_UNEXPECTED(type.IsNumeric(), "Number of elements is not 0. The type should be numeric."); int64_t byte_size = (*out)->SizeInBytes(); + // Don't allocate if we have a tensor with no elements. if (byte_size != 0) { RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size)); } - return Status::OK(); } Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) { @@ -861,63 +862,164 @@ Status Tensor::CopyLastDimAt(const std::shared_ptr &src, const std::vect CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(dst_addr, len, src_addr, len) == 0, "memcpy error"); return Status::OK(); } -Status Tensor::Slice(std::shared_ptr *out, const std::vector &indices) { - CHECK_FAIL_RETURN_UNEXPECTED(shape_.Rank() == 1, "Currently Slice work with rank 1 tensors only."); - if (indices.empty()) { - return CreateEmpty(TensorShape({0}), type_, out); + +Status Tensor::Slice(std::shared_ptr *out, const std::vector slice_options_) { + std::vector converted_slice_objects; + + for (int i = 0; i < slice_options_.size(); i++) { + SliceOption slice_option = slice_options_[i]; + + if (slice_option.all_) { + mindspore::dataset::Slice slice = mindspore::dataset::Slice(shape_[i]); + converted_slice_objects.push_back(SliceOption(slice)); + continue; + } + + if (slice_option.indices_.empty() && !slice_option.slice_.valid()) { + RETURN_STATUS_UNEXPECTED("Both indices and slices can not be empty."); + } + + if (!slice_option.indices_.empty() && slice_option.slice_.valid()) { + RETURN_STATUS_UNEXPECTED("Both indices and slices can not be given."); + } + + // if slice object was provided, indices should be empty. Generate indices from the slice object. + if (slice_option.indices_.empty()) { + // check if slice is valid + mindspore::dataset::Slice slice_copy = slice_option.slice_; + slice_copy.start_ = HandleNeg(slice_option.slice_.start_, shape_[i]); + slice_copy.stop_ = HandleNeg(slice_option.slice_.stop_, shape_[i]); + slice_copy.start_ = slice_copy.start_ < 0 ? 0 : slice_copy.start_; + slice_copy.stop_ = slice_copy.stop_ < 0 ? 0 : slice_copy.stop_; + dsize_t max_idx = shape_[i]; + slice_copy.start_ = slice_copy.start_ > max_idx ? max_idx : slice_copy.start_; + slice_copy.stop_ = slice_copy.stop_ > max_idx ? max_idx : slice_copy.stop_; + converted_slice_objects.emplace_back(SliceOption(slice_copy)); + } else { + // indices validation + std::vector indices_copy; + for (int j = 0; j < slice_option.indices_.size(); j++) { + dsize_t index = HandleNeg(slice_option.indices_[j], shape_[i]); + CHECK_FAIL_RETURN_UNEXPECTED(index < shape_[i] && index >= 0, + "Index " + std::to_string(index) + " is out of bounds."); + indices_copy.emplace_back(index); + } + converted_slice_objects.emplace_back(SliceOption(indices_copy)); + } + } + + // if a string with partial slices, pass in the rest + if (slice_options_.size() != Rank() && type() == DataType::DE_STRING) { + for (int i = slice_options_.size(); i < Rank(); i++) { + mindspore::dataset::Slice slice = mindspore::dataset::Slice(0, shape_[i]); + converted_slice_objects.emplace_back(SliceOption(slice)); + } + } + + // determine final shape: + TensorShape t = TensorShape({}); + dsize_t slice_len = slice_options_.size(); + dsize_t slice_len_ind; + for (int i = 0; i < shape_.Rank(); i++) { + if (i < slice_len) { + // if it's a slice + if (converted_slice_objects[i].indices_.size() == 0) { + slice_len_ind = (converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) / + converted_slice_objects[i].slice_.step_; + if ((converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) % + converted_slice_objects[i].slice_.step_ != + 0) { + slice_len_ind++; + } + // account for slices that would return no data + slice_len_ind = slice_len_ind < 0 ? 0 : slice_len_ind; + t = t.AppendDim(slice_len_ind); + } else { + // if its a vector of indices + // need to introduce a way of handling indices and slices + if (converted_slice_objects[i].indices_.size() >= 1) { + t = t.AppendDim(converted_slice_objects[i].indices_.size()); + } + } + } else { + // add in the rest of the dimensions + slice_len_ind = shape_[i]; + t = t.AppendDim(slice_len_ind); + } + } + + std::vector> indices_vector = IndexGenerator(converted_slice_objects); + + if (indices_vector.empty()) { + return CreateEmpty(t, type_, out); } if (type_.IsNumeric()) { - return SliceNumeric(out, indices); + return SliceNumeric(out, indices_vector, t); } else { - return SliceString(out, indices); + return SliceString(out, indices_vector, t); } } -Status Tensor::SliceNumeric(std::shared_ptr *out, const std::vector &indices) { - RETURN_IF_NOT_OK(CreateEmpty(TensorShape({static_cast(indices.size())}), type_, out)); + +Status Tensor::SliceNumeric(std::shared_ptr *out, const std::vector> &indices, + const TensorShape &shape) { + RETURN_IF_NOT_OK(CreateEmpty(shape, type_, out)); + (*out)->GetMutableBuffer(); dsize_t out_index = 0; - dsize_t dim_length = shape_[0]; + std::vector dim_length = shape_.AsVector(); dsize_t type_size = type_.SizeInBytes(); - dsize_t src_start = HandleNeg(indices[0], dim_length); + std::vector src_start = HandleNegIndices(indices[0], dim_length); + dsize_t src_start_index; + RETURN_IF_NOT_OK(shape_.ToFlatIndex(src_start, &src_start_index)); + uchar *dst_addr = (*out)->data_; dsize_t count = 1; + // to handle partial slices + dsize_t current_stride = shape_.Strides()[indices[0].size() - 1]; + for (dsize_t i = 0; i < indices.size(); i++) { - dsize_t cur_index = HandleNeg(indices[i], dim_length); - CHECK_FAIL_RETURN_UNEXPECTED( - cur_index >= 0 && cur_index < dim_length, - "Index " + std::to_string(indices[i]) + " is out of bounds [0," + std::to_string(dim_length) + ")"); + std::vector cur_index = HandleNegIndices(indices[i], dim_length); if (i < indices.size() - 1) { - dsize_t next_index = HandleNeg(indices[i + 1], dim_length); - if (next_index == cur_index + 1) { + std::vector next_index = HandleNegIndices(indices[i + 1], dim_length); + dsize_t flat_idx_curr; + dsize_t flat_idx_next; + + RETURN_IF_NOT_OK(shape_.ToFlatIndex(cur_index, &flat_idx_curr)); + RETURN_IF_NOT_OK(shape_.ToFlatIndex(next_index, &flat_idx_next)); + + if (flat_idx_next == flat_idx_curr + current_stride) { count++; continue; } } - int return_code = memcpy_s(dst_addr + out_index * type_size, (*out)->SizeInBytes(), data_ + src_start * type_size, - count * type_size); + + int return_code = memcpy_s(dst_addr + out_index * type_size, (*out)->SizeInBytes(), + data_ + src_start_index * type_size, count * type_size * current_stride); CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed in SliceNumeric"); - out_index += count; + out_index += count * current_stride; if (i < indices.size() - 1) { - src_start = HandleNeg(indices[i + 1], dim_length); // next index + src_start = HandleNegIndices(indices[i + 1], dim_length); // next index + RETURN_IF_NOT_OK(shape_.ToFlatIndex(src_start, &src_start_index)); } count = 1; } return Status::OK(); } -Status Tensor::SliceString(std::shared_ptr *out, const std::vector &indices) { - dsize_t dim_length = shape_[0]; +Status Tensor::SliceString(std::shared_ptr *out, const std::vector> &indices, + const TensorShape &shape) { + std::vector dim_length = shape_.AsVector(); std::vector strings; - for (dsize_t index : indices) { - dsize_t cur_index = HandleNeg(index, dim_length); - CHECK_FAIL_RETURN_UNEXPECTED( - cur_index >= 0 && cur_index < dim_length, - "Index " + std::to_string(index) + " is out of bounds [0," + std::to_string(dim_length) + ")"); + + for (std::vector index : indices) { + std::vector cur_index = HandleNegIndices(index, dim_length); + dsize_t cur_flat_index; + shape_.ToFlatIndex(cur_index, &cur_flat_index); std::string_view sv; - GetItemAt(&sv, {cur_index}); + RETURN_IF_NOT_OK(GetItemAt(&sv, {cur_index})); strings.emplace_back(sv); } - return CreateFromVector(strings, TensorShape({static_cast(strings.size())}), out); + return CreateFromVector(strings, shape, out); } } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor.h b/mindspore/ccsrc/minddata/dataset/core/tensor.h index b2fe352c1d..eaac044e21 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor.h +++ b/mindspore/ccsrc/minddata/dataset/core/tensor.h @@ -36,6 +36,7 @@ #include "utils/ms_utils.h" #include "minddata/dataset/core/constants.h" #include "minddata/dataset/core/data_type.h" +#include "minddata/dataset/core/tensor_helpers.h" #include "minddata/dataset/core/tensor_shape.h" #include "minddata/dataset/util/status.h" #ifndef ENABLE_ANDROID @@ -369,20 +370,30 @@ class Tensor { } /// Handle negative indices. + /// \param[out] out modified index + /// \param[in] index + /// \param[in] length axis length used to modify index + /// \return dsize_t modified index static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; } - /// Slice tensor bases on the given indicies. Copy the sliced data into out tensor. Only rank1 tensors are supported. + /// Handle negative indices for a vector of indices. + /// \param[out] out modified vector of indices + /// \param[in] index_vector vector of indices + /// \return std::vector modified vector of indices + static inline std::vector HandleNegIndices(std::vector index_vector, std::vector length) { + std::vector indices(index_vector.size(), 0); + for (int i = 0; i < index_vector.size(); i++) { + indices[i] = HandleNeg(index_vector[i], length[i]); + } + return indices; + } + + /// Slice tensor bases on the given indices. Copy the sliced data into out tensor. /// Based on the type of tensor, SliceNumeric or SliceString will be called /// \param[out] out Tensor - /// \param[in] indices vector of indices + /// \param[in] slice_options vector of SliceOption objects /// \return Status error code - Status Slice(TensorPtr *out, const std::vector &indices); - - /// Slice numeric tensors. - Status SliceNumeric(TensorPtr *out, const std::vector &indices); - - /// Slice string tensors - Status SliceString(TensorPtr *out, const std::vector &indices); + Status Slice(TensorPtr *out, const std::vector slice_options); #ifdef ENABLE_PYTHON /// Constructs numpy array from input tensor @@ -662,6 +673,13 @@ class Tensor { #ifdef ENABLE_ANDROID friend class tensor::DETensor; #endif + + /// Slice numeric tensors. + Status SliceNumeric(TensorPtr *out, const std::vector> &indices, const TensorShape &shape); + + /// Slice string tensors + Status SliceString(TensorPtr *out, const std::vector> &indices, const TensorShape &shape); + /// Copy raw data of a array based on shape and strides to the destination pointer /// \param dst [out] Pointer to the destination array where the content is to be copied /// \param[in] src Pointer to the source of strided array to be copied diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.cc b/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.cc new file mode 100644 index 0000000000..783b3a9382 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.cc @@ -0,0 +1,71 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include "minddata/dataset/core/tensor_helpers.h" + +namespace mindspore { +namespace dataset { + +void IndexGeneratorHelper(int8_t depth, std::vector *numbers, + const std::vector &slice_list, + std::vector> *matrix) { + // for loop changes if its an index instead of a slice object + if (depth > 0) { + dsize_t new_depth = depth - 1; + dsize_t curr_ind = numbers->size() - depth; + + if (slice_list[curr_ind].slice_.valid()) { + dsize_t increment = slice_list[curr_ind].slice_.step_; + + if (increment > 0) { + for (int i = slice_list[curr_ind].slice_.start_; i < slice_list[curr_ind].slice_.stop_; + i = i + slice_list[curr_ind].slice_.step_) { + (*numbers)[curr_ind] = i; + IndexGeneratorHelper(new_depth, numbers, slice_list, matrix); + } + } else { + for (int i = slice_list[curr_ind].slice_.start_; i > slice_list[curr_ind].slice_.stop_; + i = i + slice_list[curr_ind].slice_.step_) { + (*numbers)[curr_ind] = i; + IndexGeneratorHelper(new_depth, numbers, slice_list, matrix); + } + } + } else { + for (int i = 0; i < slice_list[curr_ind].indices_.size(); i++) { + (*numbers)[curr_ind] = slice_list[curr_ind].indices_[i]; + IndexGeneratorHelper(new_depth, numbers, slice_list, matrix); + } + } + + } else { + (*matrix).emplace_back((*numbers)); + } +} + +// Used to generate slice indices +std::vector> IndexGenerator(const std::vector &slice_list) { + int8_t depth = slice_list.size(); + std::vector numbers(depth, 0); + std::vector> matrix(0, std::vector(depth, 0)); + + IndexGeneratorHelper(depth, &numbers, slice_list, &matrix); + + return matrix; +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.h b/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.h new file mode 100644 index 0000000000..3e242df4d1 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.h @@ -0,0 +1,81 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_ + +#include +#include + +#include "minddata/dataset/core/constants.h" + +namespace mindspore { +namespace dataset { +class Slice { + public: + Slice() : start_(0), stop_(0), step_(0) {} + Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {} + Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {} + explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {} + Slice(Slice const &slice) = default; + + ~Slice() = default; + + bool valid() const { return !(start_ == 0 && stop_ == 0 && step_ == 0); } + dsize_t start_; + dsize_t stop_; + dsize_t step_; +}; + +class SliceOption { + public: + explicit SliceOption(bool all) : all_(all) {} + explicit SliceOption(std::vector indices) : indices_(indices) {} + explicit SliceOption(Slice slice) : slice_(slice) {} + SliceOption(SliceOption const &slice) = default; + + // only one of the following will be valid + // given indices to slice the Tensor. + std::vector indices_ = {}; + // Slice object. All start, stop and step are 0 if invalid. + Slice slice_; + bool all_ = false; +}; + +/// Recursive helper function to generate indices based on vector of SliceOptions. It recursively iterates through each +/// range represented by slice_options to generate a list of indices to be sliced. +/// \param[out] matrix Generated nested vector of indices +/// Example: For a 4 x 2 tensor, and with slice_list = {SliceOption({0})} (the first row), matrix will become +/// {{0}}. For slice_list = {SliceOption(all), SliceOption({0})} (the first column), matrix will become +/// {{0, 0}, {1, 0}, {2, 0}, {3, 0}}. +/// For slice_list = {SliceOption({0, 2})}, matrix will become {{0}, {2}}. The size of each nested array is always +/// equal to (slice_list).size(). +/// \param[in] depth used to keep track of recursion level +/// \param[in] numbers vector used to represent current index +/// \param[in] matrix 2D vector to be populated with desired indices +/// \param[in] slice_options vector of SliceOption objects +void IndexGeneratorHelper(int8_t depth, std::vector *numbers, const std::vector &slice_list, + std::vector> *matrix); + +/// Generate indices based on vector of SliceOptions +/// Calls the recursive helper function IndexGeneratorHelper +/// \param[in] slice_list vector of SliceOption objects. Note: If the user passes +/// {SliceOption(true), SliceOption(true)}, it will return a M x 2 vector, instead of reducing it to +/// {SliceOption(true)} first to only generate a M x 1 vector. +/// \return std::vector> 2D vector of generated indices, M x (slice_list).size() +std::vector> IndexGenerator(const std::vector &slice_list); +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_ diff --git a/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.cc index 66f48d5c2b..0126cc8156 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.cc @@ -13,35 +13,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "minddata/dataset/kernels/data/slice_op.h" +#include +#include + +#include "minddata/dataset/kernels/data/slice_op.h" +#include "minddata/dataset/kernels/data/data_utils.h" #include "minddata/dataset/core/tensor.h" #include "minddata/dataset/kernels/tensor_op.h" namespace mindspore { namespace dataset { + Status SliceOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); - CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Rank() == 1, "SliceOp supports 1D Tensors only for now."); - // if `all` flag is true, output is just the input. - if (all_) { - *output = input; - return Status::OK(); - } - - // if slice object was provided, indices should be empty. Generate indices from the slice object. - if (slice_.valid() && indices_.empty()) { - dsize_t len = input->shape()[0]; - std::vector indices = slice_.Indices(len); - return input->Slice(output, indices); - } - - // if indices are not empty, slices should be invalid, use indices_ to slice - if (!indices_.empty() && !slice_.valid()) { - return input->Slice(output, indices_); - } - RETURN_STATUS_UNEXPECTED("The indexing parameters are invalid"); + return input->Slice(output, slice_options_); } + } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.h index 39042cf6d4..37111f7a84 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.h @@ -23,47 +23,20 @@ #include #include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/core/tensor_helpers.h" #include "minddata/dataset/kernels/tensor_op.h" namespace mindspore { namespace dataset { -class Slice { - public: - Slice() : start_(0), stop_(0), step_(0) {} - Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {} - Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {} - explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {} - - ~Slice() = default; - - std::vector Indices(dsize_t length) { - std::vector indices; - dsize_t index = std::min(Tensor::HandleNeg(start_, length), length); - dsize_t end_index = std::min(Tensor::HandleNeg(stop_, length), length); - if (step_ > 0) { - for (; index < end_index; index += step_) { - indices.push_back(index); - } - } else { - for (; index > end_index; index += step_) { - indices.push_back(index); - } - } - return indices; - } - - bool valid() { return !(start_ == 0 && stop_ == 0 && step_ == 0); } - - dsize_t start_; - dsize_t stop_; - dsize_t step_; -}; class SliceOp : public TensorOp { public: - explicit SliceOp(std::vector indices) : indices_(std::move(indices)) {} - explicit SliceOp(Slice slice) : slice_(slice) {} - explicit SliceOp(bool all) : all_(all) {} + explicit SliceOp(std::vector slice_options) : slice_options_(slice_options) {} + explicit SliceOp(SliceOption slice_option) { slice_options_.push_back(slice_option); } + // short hand notation for slicing along fist dimension + explicit SliceOp(Slice slice) { slice_options_.push_back(SliceOption(slice)); } + explicit SliceOp(bool all) { slice_options_.push_back(SliceOption(all)); } + explicit SliceOp(std::vector indices) { slice_options_.push_back(SliceOption(indices)); } ~SliceOp() override = default; @@ -72,13 +45,7 @@ class SliceOp : public TensorOp { std::string Name() const override { return kSliceOp; } private: - // only on of the following will be valid - // given indices to slice the Tensor. Empty vector if invalid. - std::vector indices_; - // Slice object. All start, stop and step are 0 if invalid. - Slice slice_; - // Flag to read all indcies in the dim. - bool all_ = false; + std::vector slice_options_ = {}; }; } // namespace dataset } // namespace mindspore diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py index 73752ff004..72ba5c8913 100644 --- a/mindspore/dataset/transforms/c_transforms.py +++ b/mindspore/dataset/transforms/c_transforms.py @@ -21,8 +21,8 @@ import numpy as np import mindspore.common.dtype as mstype import mindspore._c_dataengine as cde -from .validators import check_num_classes, check_de_type, check_fill_value, check_slice_op, check_mask_op, \ - check_pad_end, check_concat_type, check_random_transform_ops +from .validators import check_num_classes, check_de_type, check_fill_value, check_slice_option, check_slice_op, \ + check_mask_op, check_pad_end, check_concat_type, check_random_transform_ops from ..core.datatypes import mstype_to_detype @@ -94,6 +94,32 @@ class TypeCast(cde.TypeCastOp): super().__init__(data_type) +class _SliceOption(cde.SliceOption): + """ + Internal class SliceOption to be used with SliceOperation + + Args: + _SliceOption(Union[int, list(int), slice, None, Ellipses, bool, _SliceOption]): + + 1. :py:obj:`int`: Slice this index only along the dimension. Negative index is supported. + 2. :py:obj:`list(int)`: Slice these indices along the dimension. Negative indices are supported. + 3. :py:obj:`slice`: Slice the generated indices from the slice object along the dimension. + 4. :py:obj:`None`: Slice the whole dimension. Similar to `:` in Python indexing. + 5. :py:obj:`Ellipses`: Slice the whole dimension. Similar to `:` in Python indexing. + 6. :py:obj:`boolean`: Slice the whole dimension. Similar to `:` in Python indexing. + """ + + @check_slice_option + def __init__(self, slice_option): + if isinstance(slice_option, int) and not isinstance(slice_option, bool): + slice_option = [slice_option] + elif slice_option is Ellipsis: + slice_option = True + elif slice_option is None: + slice_option = True + super().__init__(slice_option) + + class Slice(cde.SliceOp): """ Slice operation to extract a tensor out using the given n slices. @@ -102,15 +128,16 @@ class Slice(cde.SliceOp): (Currently only rank-1 tensors are supported). Args: - slices(Union[int, list(int), slice, None, Ellipses]): + *slices(Union[int, list(int), slice, None, Ellipses]): Maximum `n` number of arguments to slice a tensor of rank `n`. One object in slices can be one of: - 1. :py:obj:`int`: Slice this index only. Negative index is supported. - 2. :py:obj:`list(int)`: Slice these indices ion the list only. Negative indices are supported. - 3. :py:obj:`slice`: Slice the generated indices from the slice object. Similar to `start:stop:step`. + 1. :py:obj:`int`: Slice this index only along the first dimension. Negative index is supported. + 2. :py:obj:`list(int)`: Slice these indices along the first dimension. Negative indices are supported. + 3. :py:obj:`slice`: Slice the generated indices from the slice object along the first dimension. + Similar to `start:stop:step`. 4. :py:obj:`None`: Slice the whole dimension. Similar to `:` in Python indexing. - 5. :py:obj:`Ellipses`: Slice all dimensions between the two slices. Similar to `...` in Python indexing. + 5. :py:obj:`Ellipses`: Slice the whole dimension. Similar to `:` in Python indexing. Examples: >>> import mindspore.dataset.transforms.c_transforms as c_transforms @@ -130,16 +157,9 @@ class Slice(cde.SliceOp): @check_slice_op def __init__(self, *slices): - dim0 = slices[0] - if isinstance(dim0, int): - dim0 = [dim0] - elif dim0 is None: - dim0 = True - elif isinstance(dim0, slice): - dim0 = (dim0.start, dim0.stop, dim0.step) - elif dim0 is Ellipsis: - dim0 = True - super().__init__(dim0) + slice_input_ = list(slices) + slice_input_ = [_SliceOption(slice_dim) for slice_dim in slice_input_] + super().__init__(slice_input_) class Relational(IntEnum): diff --git a/mindspore/dataset/transforms/validators.py b/mindspore/dataset/transforms/validators.py index d317caf410..bf7aeae69f 100644 --- a/mindspore/dataset/transforms/validators.py +++ b/mindspore/dataset/transforms/validators.py @@ -19,8 +19,9 @@ import inspect import numpy as np from mindspore._c_expression import typing + from ..core.validator_helpers import parse_user_args, type_check, check_pos_int64, check_value, check_positive, \ - check_tensor_op + check_tensor_op, type_check_list # POS_INT_MIN is used to limit values from starting from 0 POS_INT_MIN = 1 @@ -100,17 +101,40 @@ def check_de_type(method): return new_method +def check_slice_option(method): + """Wrapper method to check the parameters of SliceOption.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + [slice_option], _ = parse_user_args(method, *args, **kwargs) + from .c_transforms import _SliceOption + if slice_option is not None: + type_check(slice_option, (int, list, slice, bool, type(Ellipsis), _SliceOption), "slice_option") + + if isinstance(slice_option, list): + type_check_list(slice_option, (int,), "slice_option") + + return method(self, *args, **kwargs) + + return new_method + + def check_slice_op(method): """Wrapper method to check the parameters of slice.""" @wraps(method) - def new_method(self, *args): - for _, arg in enumerate(args): - type_check(arg, (int, slice, list, type(None), type(Ellipsis)), "arg") - if isinstance(arg, list): - for a in arg: - type_check(a, (int,), "a") - return method(self, *args) + def new_method(self, *args, **kwargs): + [slice_op], _ = parse_user_args(method, *args, **kwargs) + + for s in slice_op: + from .c_transforms import _SliceOption + if s is not None: + type_check(s, (int, list, slice, bool, type(Ellipsis), _SliceOption), "slice") + if isinstance(s, list) and s: + if isinstance(s[0], int): + type_check_list(s, (int,), "slice") + + return method(self, *args, **kwargs) return new_method diff --git a/tests/ut/cpp/dataset/CMakeLists.txt b/tests/ut/cpp/dataset/CMakeLists.txt index 63d08e3163..f40dd53d81 100644 --- a/tests/ut/cpp/dataset/CMakeLists.txt +++ b/tests/ut/cpp/dataset/CMakeLists.txt @@ -122,8 +122,9 @@ SET(DE_UT_SRCS solarize_op_test.cc swap_red_blue_test.cc distributed_sampler_test.cc - data_helper_test.cc - image_process_test.cc + data_helper_test.cc + image_process_test.cc + slice_op_test.cc ) if (ENABLE_PYTHON) diff --git a/tests/ut/cpp/dataset/slice_op_test.cc b/tests/ut/cpp/dataset/slice_op_test.cc new file mode 100644 index 0000000000..a788e1a973 --- /dev/null +++ b/tests/ut/cpp/dataset/slice_op_test.cc @@ -0,0 +1,664 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "common/common.h" +#include "minddata/dataset/kernels/data/slice_op.h" +#include "utils/log_adapter.h" + +using namespace mindspore::dataset; +using mindspore::LogStream; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::MsLogLevel::INFO; + +class MindDataTestSliceOp : public UT::Common { + protected: + MindDataTestSliceOp() {} +}; + +TEST_F(MindDataTestSliceOp, TestOpBasic) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpBasic."; + std::vector labels = {1, 1, 3, 2}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, &input); + + std::shared_ptr output; + Slice slice = Slice(1, 3); + std::unique_ptr op(new SliceOp(SliceOption(slice))); + Status s = op->Compute(input, &output); + + std::vector out = {1, 3}; + std::shared_ptr expected; + Tensor::CreateFromVector(out, &expected); + + EXPECT_TRUE(s.IsOk()); + + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpNeg) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpNeg."; + std::vector labels = {1, 1, 3, 6, 4, 2}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, &input); + + std::shared_ptr output; + Slice slice = Slice(-1, -5, -1); + std::unique_ptr op(new SliceOp(slice)); + Status s = op->Compute(input, &output); + + std::vector out = {2, 4, 6, 3}; + std::shared_ptr expected; + Tensor::CreateFromVector(out, &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOp2D) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOp2D."; + std::vector labels = {1, 1, 3, 2, 3, 2}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 3}), &input); + + std::shared_ptr output; + Slice slice1_ = Slice(0, 2); + Slice slice2_ = Slice(0, 1); + + std::vector slices_ = {SliceOption(slice1_), SliceOption(slice2_)}; + std::unique_ptr op(new SliceOp(slices_)); + Status s = op->Compute(input, &output); + + std::vector out = {1, 2}; + std::shared_ptr expected; + Tensor::CreateFromVector(out, TensorShape({2, 1}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOp3D) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOp3D."; + std::vector labels = {1, 2, 3, 4, 5, 6, 7, 8}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &input); + + std::shared_ptr output; + Slice slice1_ = Slice(0, 1); + Slice slice2_ = Slice(0, 2); + Slice slice3_ = Slice(0, 2); + std::vector slices_ = {SliceOption(slice1_), SliceOption(slice2_), SliceOption(slice3_)}; + std::unique_ptr op(new SliceOp(slices_)); + Status s = op->Compute(input, &output); + + std::vector out = {1, 2, 3, 4}; + std::shared_ptr expected; + Tensor::CreateFromVector(out, TensorShape({1, 2, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpReturnNothing) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpReturnNothing."; + std::vector labels = {1, 2, 3, 4, 5, 6, 7, 8}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &input); + + std::shared_ptr output; + Slice slice1_ = Slice(0, 1); + Slice slice2_ = Slice(2, 1); + Slice slice3_ = Slice(0, 2); + std::vector slices_ = {SliceOption(slice1_), SliceOption(slice2_), SliceOption(slice3_)}; + std::unique_ptr op(new SliceOp(slices_)); + Status s = op->Compute(input, &output); + + std::vector out = {}; + std::shared_ptr expected; + Tensor::CreateFromVector(out, TensorShape({1, 0, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpPartialSlice) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpPartialSlice."; + std::vector labels = {1, 2, 3, 4, 5, 6, 7, 8}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({4, 2}), &input); + + std::shared_ptr output; + Slice slice1_ = Slice(0, 2); + std::unique_ptr op(new SliceOp(slice1_)); + Status s = op->Compute(input, &output); + + std::vector out = {1, 2, 3, 4}; + std::shared_ptr expected; + Tensor::CreateFromVector(out, TensorShape({2, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpBool1) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpBool1."; + std::vector labels = {1, 2, 3, 4, 5, 6, 7, 8}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &input); + + std::shared_ptr output; + std::unique_ptr op(new SliceOp(SliceOption(true))); + Status s = op->Compute(input, &output); + + std::shared_ptr expected; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpBool2) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpBool2."; + std::vector labels = {1, 2, 3, 4, 5, 6, 7, 8}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &input); + + std::shared_ptr output; + std::unique_ptr op(new SliceOp(true)); + Status s = op->Compute(input, &output); + + std::shared_ptr expected; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +// testing passing in just indices +TEST_F(MindDataTestSliceOp, TestOpIndices1) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpIndices1."; + std::vector labels = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({3, 3}), &input); + + std::shared_ptr output; + std::vector indices; + std::vector index1 = {1, 2}; + std::vector index2 = {0, 1}; + indices.emplace_back(SliceOption(index1)); + indices.emplace_back(SliceOption(index2)); + std::unique_ptr op(new SliceOp(indices)); + Status s = op->Compute(input, &output); + + std::vector out = {4, 5, 7, 8}; + std::shared_ptr expected; + Tensor::CreateFromVector(out, TensorShape({2, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +// testing passing in just indices +TEST_F(MindDataTestSliceOp, TestOpIndices2) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpIndices2."; + std::vector labels = {1, 2, 3, 4, 5, 6, 7, 8}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &input); + + std::shared_ptr output; + std::vector indices = {0}; + std::unique_ptr op(new SliceOp(indices)); + Status s = op->Compute(input, &output); + + std::vector out = {1, 2, 3, 4}; + + std::shared_ptr expected; + Tensor::CreateFromVector(out, TensorShape({1, 2, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +// Test Index Object +TEST_F(MindDataTestSliceOp, TestOpSliceAndIndex) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpSliceAndIndex."; + std::vector labels = {1, 2, 3, 4, 5, 6, 7, 8}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &input); + + std::shared_ptr output; + std::vector indices = {0}; + Slice slice = Slice(1); + std::vector slice_options = {SliceOption(indices), SliceOption(slice)}; + std::unique_ptr op(new SliceOp(slice_options)); + Status s = op->Compute(input, &output); + + std::vector out = {1, 2}; + std::shared_ptr expected; + Tensor::CreateFromVector(out, TensorShape({1, 1, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpLargerStep) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpLargerStep."; + std::vector labels = {1, 2, 3, 4, 5}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({1, 5}), &input); + + std::shared_ptr output; + Slice slice1_ = Slice(0, 1); + Slice slice2_ = Slice(0, 4, 2); + + std::vector slice_options = {SliceOption(slice1_), SliceOption(slice2_)}; + std::unique_ptr op(new SliceOp(slice_options)); + Status s = op->Compute(input, &output); + + std::vector out = {1, 3}; + std::shared_ptr expected; + + Tensor::CreateFromVector(out, TensorShape({1, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpIndicesError1) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpIndicesError1."; + std::vector labels = {1, 2, 3, 4, 5, 6, 7, 8}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &input); + + std::shared_ptr output; + std::unique_ptr op(new SliceOp(Slice())); + Status s = op->Compute(input, &output); + + EXPECT_FALSE(s.IsOk()); + EXPECT_NE(s.ToString().find("Both indices and slices can not be empty."), std::string::npos); + + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpIndicesError2) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpIndicesError2."; + std::vector labels = {1, 2, 3, 4, 5, 6, 7, 8}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &input); + + std::shared_ptr output; + SliceOption slice_option = SliceOption(Slice(2)); + std::vector indices = {0}; + slice_option.indices_ = indices; + std::unique_ptr op(new SliceOp(slice_option)); + Status s = op->Compute(input, &output); + + EXPECT_FALSE(s.IsOk()); + EXPECT_NE(s.ToString().find("Both indices and slices can not be given."), std::string::npos); + + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpIndicesError3) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpIndicesError3."; + std::vector labels = {1, 2, 3, 4, 5, 6, 7, 8}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({8}), &input); + + std::shared_ptr output; + std::vector indices = {8}; + + std::unique_ptr op(new SliceOp(SliceOption(indices))); + Status s = op->Compute(input, &output); + + EXPECT_FALSE(s.IsOk()); + EXPECT_NE(s.ToString().find("Index 8 is out of bounds."), std::string::npos); + + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpBasicString) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpBasicString."; + std::vector labels = {"1", "1", "3", "2d"}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, &input); + + std::shared_ptr output; + Slice slice = Slice(1, 3); + std::unique_ptr op(new SliceOp(slice)); + Status s = op->Compute(input, &output); + + std::vector out = {"1", "3"}; + std::shared_ptr expected; + Tensor::CreateFromVector(out, &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOp2DString) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOp2DString."; + std::vector labels = {"1a", "1b", "3", "2", "3", "2"}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 3}), &input); + + std::shared_ptr output; + Slice slice1_ = Slice(0, 2); + Slice slice2_ = Slice(0, 2); + + std::vector slice_option = {SliceOption(slice1_), SliceOption(slice2_)}; + std::unique_ptr op(new SliceOp(slice_option)); + Status s = op->Compute(input, &output); + + std::vector out = {"1a", "1b", "2", "3"}; + std::shared_ptr expected; + Tensor::CreateFromVector(out, TensorShape({2, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpPartialSliceString) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpPartialSliceString."; + std::vector labels = {"1a", "1b", "3", "2", "3", "2", "4", "66"}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &input); + + std::shared_ptr output; + Slice slice1 = Slice(0, 2); + Slice slice2 = Slice(0, 1); + + std::vector slice_options = {SliceOption(slice1), SliceOption(slice2)}; + std::unique_ptr op(new SliceOp(slice_options)); + Status s = op->Compute(input, &output); + + std::vector out = {"1a", "1b", "3", "2"}; + std::shared_ptr expected; + Tensor::CreateFromVector(out, TensorShape({2, 1, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpIndicesString) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpIndicesString."; + std::vector labels = {"1", "2", "3", "4", "5", "6", "7", "8", "9"}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({3, 3}), &input); + + std::shared_ptr output; + std::vector index1 = {1, 2}; + std::vector index2 = {0, 1}; + std::vector slice_options = {SliceOption(index1), SliceOption(index2)}; + + std::unique_ptr op(new SliceOp(slice_options)); + Status s = op->Compute(input, &output); + + std::vector out = {"4", "5", "7", "8"}; + std::shared_ptr expected; + Tensor::CreateFromVector(out, TensorShape({2, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpIndicesString2) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpIndicesString2."; + std::vector labels = {"1", "2", "3", "4", "5", "6", "7", "8"}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &input); + + std::shared_ptr output; + std::vector indices = {0}; + std::unique_ptr op(new SliceOp(indices)); + Status s = op->Compute(input, &output); + + std::vector out = {"1", "2", "3", "4"}; + + std::shared_ptr expected; + Tensor::CreateFromVector(out, TensorShape({1, 2, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpSliceAndIndexString) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpSliceAndIndexString."; + std::vector labels = {"1", "2", "3", "4", "5", "6", "7", "8"}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &input); + + std::shared_ptr output; + std::vector indices = {0}; + Slice slice = Slice(1); + std::vector slice_options = {SliceOption(indices), SliceOption(slice)}; + std::unique_ptr op(new SliceOp(slice_options)); + Status s = op->Compute(input, &output); + + std::vector out = {"1", "2"}; + std::shared_ptr expected; + Tensor::CreateFromVector(out, TensorShape({1, 1, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpLargerStepString) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpLargerStepString."; + std::vector labels = {"1", "2", "3", "4", "5"}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({1, 5}), &input); + + std::shared_ptr output; + Slice slice1_ = Slice(0, 1); + Slice slice2_ = Slice(0, 4, 2); + + std::vector slice_options = {SliceOption(slice1_), SliceOption(slice2_)}; + std::unique_ptr op(new SliceOp(slice_options)); + Status s = op->Compute(input, &output); + + std::vector out = {"1", "3"}; + std::shared_ptr expected; + + Tensor::CreateFromVector(out, TensorShape({1, 2}), &expected); + + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + + ASSERT_TRUE(*output == *expected); + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpIndicesErrorString1) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpIndicesErrorString1."; + std::vector labels = {"1", "2", "3", "4", "5", "6", "7", "8"}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &input); + + std::shared_ptr output; + std::unique_ptr op(new SliceOp(Slice())); + Status s = op->Compute(input, &output); + + EXPECT_FALSE(s.IsOk()); + EXPECT_NE(s.ToString().find("Both indices and slices can not be empty."), std::string::npos); + + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpIndicesErrorString2) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpIndicesErrorString2."; + std::vector labels = {"1", "2", "3", "4", "5", "6", "7", "8"}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 2, 2}), &input); + + std::shared_ptr output; + SliceOption slice_option = SliceOption(Slice(2)); + std::vector indices = {0}; + slice_option.indices_ = indices; + std::unique_ptr op(new SliceOp(slice_option)); + Status s = op->Compute(input, &output); + + EXPECT_FALSE(s.IsOk()); + EXPECT_NE(s.ToString().find("Both indices and slices can not be given."), std::string::npos); + + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} + +TEST_F(MindDataTestSliceOp, TestOpIndicesErrorString3) { + MS_LOG(INFO) << "Doing MindDataTestSliceOp-TestOpIndicesErrorString3."; + std::vector labels = {1, 2, 3, 4, 5, 6, 7, 8}; + std::shared_ptr input; + Tensor::CreateFromVector(labels, TensorShape({2, 4}), &input); + + std::shared_ptr output; + std::vector indices = {2}; + + std::unique_ptr op(new SliceOp(SliceOption(indices))); + Status s = op->Compute(input, &output); + + EXPECT_FALSE(s.IsOk()); + EXPECT_NE(s.ToString().find("Index 2 is out of bounds."), std::string::npos); + + MS_LOG(INFO) << "MindDataTestSliceOp-TestOp end."; +} diff --git a/tests/ut/cpp/dataset/tensor_test.cc b/tests/ut/cpp/dataset/tensor_test.cc index 758b194835..f789533b80 100644 --- a/tests/ut/cpp/dataset/tensor_test.cc +++ b/tests/ut/cpp/dataset/tensor_test.cc @@ -424,12 +424,11 @@ TEST_F(MindDataTestTensorDE, TensorSlice) { Tensor::CreateFromVector(std::vector{0, 1, 2, 3, 4}, &t); std::shared_ptr t2; auto x = std::vector{0, 3, 4}; + std::vector slice_options = {SliceOption(x)}; std::shared_ptr expected; Tensor::CreateFromVector(x, &expected); - t->Slice(&t2, x); + t->Slice(&t2, slice_options); ASSERT_EQ(*t2, *expected); - t->Slice(&t2, std::vector{0, 1, 2, 3, 4}); - ASSERT_EQ(*t2, *t); } TEST_F(MindDataTestTensorDE, TensorPartialInsert) { diff --git a/tests/ut/python/dataset/test_c_random_choice.py b/tests/ut/python/dataset/test_c_random_choice.py index 335274a1f6..ebbfa8e9d4 100644 --- a/tests/ut/python/dataset/test_c_random_choice.py +++ b/tests/ut/python/dataset/test_c_random_choice.py @@ -44,7 +44,7 @@ def test_random_choice(): res2 = test_config([[0, 1, 2]], [ops.Compose([ops.Duplicate(), ops.Concatenate()]), ops.Compose([ops.Slice([0, 1]), ops.OneHot(2)])]) assert res2 in [[[[1, 0], [0, 1]]], [[0, 1, 2, 0, 1, 2]]] - # Test RandomChoice where there is only 1 operation + # Test RandomChoice when there is only 1 operation assert test_config([[4, 3], [2, 1]], [ops.Slice([0])]) == [[4], [2]] diff --git a/tests/ut/python/dataset/test_slice_op.py b/tests/ut/python/dataset/test_slice_op.py index 443e850c64..1fbe52da33 100644 --- a/tests/ut/python/dataset/test_slice_op.py +++ b/tests/ut/python/dataset/test_slice_op.py @@ -22,193 +22,291 @@ import mindspore.dataset as ds import mindspore.dataset.transforms.c_transforms as ops -def slice_compare(array, indexing): +def slice_compare(array, indexing, expected_array): data = ds.NumpySlicesDataset([array]) - array = np.array(array) - data = data.map(operations=ops.Slice(indexing)) - for d in data.create_tuple_iterator(output_numpy=True): - if indexing is None: - array = array[:] - else: - array = array[indexing] - np.testing.assert_array_equal(array, d[0]) + if isinstance(indexing, list) and indexing and not isinstance(indexing[0], int): + data = data.map(operations=ops.Slice(*indexing)) + else: + data = data.map(operations=ops.Slice(indexing)) + for d in data.create_dict_iterator(output_numpy=True): + np.testing.assert_array_equal(expected_array, d['column_0']) def test_slice_all(): - slice_compare([1, 2, 3, 4, 5], None) - slice_compare([1, 2, 3, 4, 5], ...) + slice_compare([1, 2, 3, 4, 5], None, [1, 2, 3, 4, 5]) + slice_compare([1, 2, 3, 4, 5], ..., [1, 2, 3, 4, 5]) + slice_compare([1, 2, 3, 4, 5], True, [1, 2, 3, 4, 5]) def test_slice_single_index(): - slice_compare([1, 2, 3, 4, 5], 0) - slice_compare([1, 2, 3, 4, 5], 4) - slice_compare([1, 2, 3, 4, 5], 2) - slice_compare([1, 2, 3, 4, 5], -1) - slice_compare([1, 2, 3, 4, 5], -5) - slice_compare([1, 2, 3, 4, 5], -3) + slice_compare([1, 2, 3, 4, 5], 0, [1]) + slice_compare([1, 2, 3, 4, 5], -3, [3]) + slice_compare([1, 2, 3, 4, 5], [0], [1]) + + +def test_slice_indices_multidim(): + slice_compare([[1, 2, 3, 4, 5]], [[0], [0]], 1) + slice_compare([[1, 2, 3, 4, 5]], [[0], [0, 3]], [[1, 4]]) + slice_compare([[1, 2, 3, 4, 5]], [0], [[1, 2, 3, 4, 5]]) + slice_compare([[1, 2, 3, 4, 5]], [[0], [0, -4]], [[1, 2]]) def test_slice_list_index(): - slice_compare([1, 2, 3, 4, 5], [0, 1, 4]) - slice_compare([1, 2, 3, 4, 5], [4, 1, 0]) - slice_compare([1, 2, 3, 4, 5], [-1, 1, 0]) - slice_compare([1, 2, 3, 4, 5], [-1, -4, -2]) - slice_compare([1, 2, 3, 4, 5], [3, 3, 3]) - slice_compare([1, 2, 3, 4, 5], [1, 1, 1, 1, 1]) + slice_compare([1, 2, 3, 4, 5], [0, 1, 4], [1, 2, 5]) + slice_compare([1, 2, 3, 4, 5], [4, 1, 0], [5, 2, 1]) + slice_compare([1, 2, 3, 4, 5], [-1, 1, 0], [5, 2, 1]) + slice_compare([1, 2, 3, 4, 5], [-1, -4, -2], [5, 2, 4]) + slice_compare([1, 2, 3, 4, 5], [3, 3, 3], [4, 4, 4]) -def test_slice_slice_obj_2s(): - slice_compare([1, 2, 3, 4, 5], slice(0, 2)) - slice_compare([1, 2, 3, 4, 5], slice(2, 4)) - slice_compare([1, 2, 3, 4, 5], slice(4, 10)) +def test_slice_index_and_slice(): + slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1), [4]], [[5]]) + slice_compare([[1, 2, 3, 4, 5]], [[0], slice(0, 2)], [[1, 2]]) + slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [[1], slice(2, 4, 1)], [[7, 8]]) def test_slice_slice_obj_1s(): - slice_compare([1, 2, 3, 4, 5], slice(1)) - slice_compare([1, 2, 3, 4, 5], slice(4)) - slice_compare([1, 2, 3, 4, 5], slice(10)) + slice_compare([1, 2, 3, 4, 5], slice(1), [1]) + slice_compare([1, 2, 3, 4, 5], slice(4), [1, 2, 3, 4]) + slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(2), slice(2)], [[1, 2], [5, 6]]) + slice_compare([1, 2, 3, 4, 5], slice(10), [1, 2, 3, 4, 5]) + + +def test_slice_slice_obj_2s(): + slice_compare([1, 2, 3, 4, 5], slice(0, 2), [1, 2]) + slice_compare([1, 2, 3, 4, 5], slice(2, 4), [3, 4]) + slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(0, 2), slice(1, 2)], [[2], [6]]) + slice_compare([1, 2, 3, 4, 5], slice(4, 10), [5]) + + +def test_slice_slice_obj_2s_multidim(): + slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1)], [[1, 2, 3, 4, 5]]) + slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1), slice(4)], [[1, 2, 3, 4]]) + slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1), slice(0, 3)], [[1, 2, 3]]) + slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(0, 2, 2), slice(2, 4, 1)], [[3, 4]]) + slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(1, 0, -1), slice(1)], [[5]]) def test_slice_slice_obj_3s(): - slice_compare([1, 2, 3, 4, 5], slice(0, 2, 1)) - slice_compare([1, 2, 3, 4, 5], slice(0, 4, 1)) - slice_compare([1, 2, 3, 4, 5], slice(0, 10, 1)) - slice_compare([1, 2, 3, 4, 5], slice(0, 5, 2)) - slice_compare([1, 2, 3, 4, 5], slice(0, 2, 2)) - slice_compare([1, 2, 3, 4, 5], slice(0, 1, 2)) - slice_compare([1, 2, 3, 4, 5], slice(4, 5, 1)) - slice_compare([1, 2, 3, 4, 5], slice(2, 5, 3)) + """ + Test passing in all parameters to the slice objects + """ + slice_compare([1, 2, 3, 4, 5], slice(0, 2, 1), [1, 2]) + slice_compare([1, 2, 3, 4, 5], slice(0, 4, 1), [1, 2, 3, 4]) + slice_compare([1, 2, 3, 4, 5], slice(0, 10, 1), [1, 2, 3, 4, 5]) + slice_compare([1, 2, 3, 4, 5], slice(0, 5, 2), [1, 3, 5]) + slice_compare([1, 2, 3, 4, 5], slice(0, 2, 2), [1]) + slice_compare([1, 2, 3, 4, 5], slice(0, 1, 2), [1]) + slice_compare([1, 2, 3, 4, 5], slice(4, 5, 1), [5]) + slice_compare([1, 2, 3, 4, 5], slice(2, 5, 3), [3]) + slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(0, 2, 1)], [[1, 2, 3, 4], [5, 6, 7, 8]]) + slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(0, 2, 3)], [[1, 2, 3, 4]]) + slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(0, 2, 2), slice(0, 1, 2)], [[1]]) + slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(0, 2, 1), slice(0, 1, 2)], [[1], [5]]) + slice_compare([[[1, 2, 3, 4], [5, 6, 7, 8]], [[1, 2, 3, 4], [5, 6, 7, 8]]], + [slice(0, 2, 1), slice(0, 1, 1), slice(0, 4, 2)], + [[[1, 3]], [[1, 3]]]) + + +def test_slice_obj_3s_double(): + slice_compare([1., 2., 3., 4., 5.], slice(0, 2, 1), [1., 2.]) + slice_compare([1., 2., 3., 4., 5.], slice(0, 4, 1), [1., 2., 3., 4.]) + slice_compare([1., 2., 3., 4., 5.], slice(0, 5, 2), [1., 3., 5.]) + slice_compare([1., 2., 3., 4., 5.], slice(0, 2, 2), [1.]) + slice_compare([1., 2., 3., 4., 5.], slice(0, 1, 2), [1.]) + slice_compare([1., 2., 3., 4., 5.], slice(4, 5, 1), [5.]) + slice_compare([1., 2., 3., 4., 5.], slice(2, 5, 3), [3.]) + + +def test_out_of_bounds_slicing(): + """ + Test passing indices outside of the input to the slice objects + """ + slice_compare([1, 2, 3, 4, 5], slice(-15, -1), [1, 2, 3, 4]) + slice_compare([1, 2, 3, 4, 5], slice(-15, 15), [1, 2, 3, 4, 5]) + slice_compare([1, 2, 3, 4], slice(-15, -7), []) def test_slice_multiple_rows(): - dataset = [[1, 2], [3, 4, 5], [1], [1, 2, 3, 4, 5, 6, 7]] + """ + Test passing in multiple rows + """ + dataset = [[1], [3, 4, 5], [1, 2], [1, 2, 3, 4, 5, 6, 7]] + exp_dataset = [[], [4, 5], [2], [2, 3, 4]] def gen(): for row in dataset: yield (np.array(row),) data = ds.GeneratorDataset(gen, column_names=["col"]) - indexing = slice(0, 4) + indexing = slice(1, 4) data = data.map(operations=ops.Slice(indexing)) - for i, d in enumerate(data): - array = np.array(dataset[i]) - array = array[indexing] - np.testing.assert_array_equal(array, d[0].asnumpy()) + for (d, exp_d) in zip(data.create_dict_iterator(output_numpy=True), exp_dataset): + np.testing.assert_array_equal(exp_d, d['col']) + +def test_slice_obj_neg(): + slice_compare([1, 2, 3, 4, 5], slice(-1, -5, -1), [5, 4, 3, 2]) + slice_compare([1, 2, 3, 4, 5], slice(-1), [1, 2, 3, 4]) + slice_compare([1, 2, 3, 4, 5], slice(-2), [1, 2, 3]) + slice_compare([1, 2, 3, 4, 5], slice(-1, -5, -2), [5, 3]) + slice_compare([1, 2, 3, 4, 5], slice(-5, -1, 2), [1, 3]) + slice_compare([1, 2, 3, 4, 5], slice(-5, -1), [1, 2, 3, 4]) -def test_slice_slice_obj_3s_double(): - slice_compare([1., 2., 3., 4., 5.], slice(0, 2, 1)) - slice_compare([1., 2., 3., 4., 5.], slice(0, 4, 1)) - slice_compare([1., 2., 3., 4., 5.], slice(0, 10, 1)) - slice_compare([1., 2., 3., 4., 5.], slice(0, 5, 2)) - slice_compare([1., 2., 3., 4., 5.], slice(0, 2, 2)) - slice_compare([1., 2., 3., 4., 5.], slice(0, 1, 2)) - slice_compare([1., 2., 3., 4., 5.], slice(4, 5, 1)) - slice_compare([1., 2., 3., 4., 5.], slice(2, 5, 3)) +def test_slice_all_str(): + slice_compare([b"1", b"2", b"3", b"4", b"5"], None, [b"1", b"2", b"3", b"4", b"5"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], ..., [b"1", b"2", b"3", b"4", b"5"]) -def test_slice_slice_obj_neg(): - slice_compare([1, 2, 3, 4, 5], slice(-1, -5, -1)) - slice_compare([1, 2, 3, 4, 5], slice(-1)) - slice_compare([1, 2, 3, 4, 5], slice(-2)) - slice_compare([1, 2, 3, 4, 5], slice(-1, -5, -2)) - slice_compare([1, 2, 3, 4, 5], slice(-5, -1, 2)) - slice_compare([1, 2, 3, 4, 5], slice(-5, -1)) +def test_slice_single_index_str(): + slice_compare([b"1", b"2", b"3", b"4", b"5"], [0, 1], [b"1", b"2"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], [0, 1], [b"1", b"2"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], [4], [b"5"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], [-1], [b"5"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], [-5], [b"1"]) -def test_slice_exceptions(): - with pytest.raises(RuntimeError) as info: - slice_compare([1, 2, 3, 4, 5], 5) - assert "Index 5 is out of bounds [0,5)" in str(info.value) - slice_compare([1, 2, 3, 4, 5], slice(0)) - slice_compare([1, 2, 3, 4, 5], slice(3, 1, 1)) - slice_compare([1, 2, 3, 4, 5], slice(5, 10, 1)) - slice_compare([1, 2, 3, 4, 5], slice(-1, -5, 1)) +def test_slice_indexes_multidim_str(): + slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [[0], 0], [[b"1"]]) + slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [[0], [0, 1]], [[b"1", b"2"]]) -def test_slice_all_str(): - slice_compare([b"1", b"2", b"3", b"4", b"5"], None) - slice_compare([b"1", b"2", b"3", b"4", b"5"], ...) +def test_slice_list_index_str(): + slice_compare([b"1", b"2", b"3", b"4", b"5"], [0, 1, 4], [b"1", b"2", b"5"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], [4, 1, 0], [b"5", b"2", b"1"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], [3, 3, 3], [b"4", b"4", b"4"]) -def test_slice_single_index_str(): - slice_compare([b"1", b"2", b"3", b"4", b"5"], 0) - slice_compare([b"1", b"2", b"3", b"4", b"5"], 4) - slice_compare([b"1", b"2", b"3", b"4", b"5"], 2) - slice_compare([b"1", b"2", b"3", b"4", b"5"], -1) - slice_compare([b"1", b"2", b"3", b"4", b"5"], -5) - slice_compare([b"1", b"2", b"3", b"4", b"5"], -3) +# test str index object here +def test_slice_index_and_slice_str(): + slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [slice(0, 1), 4], [[b"5"]]) + slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [[0], slice(0, 2)], [[b"1", b"2"]]) + slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], [[1], slice(2, 4, 1)], + [[b"7", b"8"]]) -def test_slice_list_index_str(): - slice_compare([b"1", b"2", b"3", b"4", b"5"], [0, 1, 4]) - slice_compare([b"1", b"2", b"3", b"4", b"5"], [4, 1, 0]) - slice_compare([b"1", b"2", b"3", b"4", b"5"], [-1, 1, 0]) - slice_compare([b"1", b"2", b"3", b"4", b"5"], [-1, -4, -2]) - slice_compare([b"1", b"2", b"3", b"4", b"5"], [3, 3, 3]) - slice_compare([b"1", b"2", b"3", b"4", b"5"], [1, 1, 1, 1, 1]) +def test_slice_slice_obj_1s_str(): + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(1), [b"1"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(4), [b"1", b"2", b"3", b"4"]) + slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], + [slice(2), slice(2)], + [[b"1", b"2"], [b"5", b"6"]]) def test_slice_slice_obj_2s_str(): - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 2)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(2, 4)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(4, 10)) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 2), [b"1", b"2"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(2, 4), [b"3", b"4"]) + slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], + [slice(0, 2), slice(1, 2)], [[b"2"], [b"6"]]) -def test_slice_slice_obj_1s_str(): - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(1)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(4)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(10)) +def test_slice_slice_obj_2s_multidim_str(): + slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [slice(0, 1)], [[b"1", b"2", b"3", b"4", b"5"]]) + slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [slice(0, 1), slice(4)], + [[b"1", b"2", b"3", b"4"]]) + slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [slice(0, 1), slice(0, 3)], + [[b"1", b"2", b"3"]]) + slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], + [slice(0, 2, 2), slice(2, 4, 1)], + [[b"3", b"4"]]) def test_slice_slice_obj_3s_str(): - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 2, 1)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 4, 1)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 10, 1)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 5, 2)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 2, 2)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 1, 2)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(4, 5, 1)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(2, 5, 3)) - - -def test_slice_slice_obj_neg_str(): - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1, -5, -1)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-2)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1, -5, -2)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-5, -1, 2)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-5, -1)) - - -def test_slice_exceptions_str(): + """ + Test passing in all parameters to the slice objects + """ + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 2, 1), [b"1", b"2"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 4, 1), [b"1", b"2", b"3", b"4"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 5, 2), [b"1", b"3", b"5"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 2, 2), [b"1"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 1, 2), [b"1"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(4, 5, 1), [b"5"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(2, 5, 3), [b"3"]) + slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], [slice(0, 2, 1)], + [[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]]) + slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], slice(0, 2, 3), [[b"1", b"2", b"3", b"4"]]) + slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], + [slice(0, 2, 2), slice(0, 1, 2)], [[b"1"]]) + slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], + [slice(0, 2, 1), slice(0, 1, 2)], + [[b"1"], [b"5"]]) + slice_compare([[[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], + [[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]]], + [slice(0, 2, 1), slice(0, 1, 1), slice(0, 4, 2)], + [[[b"1", b"3"]], [[b"1", b"3"]]]) + + +def test_slice_obj_neg_str(): + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1, -5, -1), [b"5", b"4", b"3", b"2"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1), [b"1", b"2", b"3", b"4"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-2), [b"1", b"2", b"3"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1, -5, -2), [b"5", b"3"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-5, -1, 2), [b"1", b"3"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-5, -1), [b"1", b"2", b"3", b"4"]) + + +def test_out_of_bounds_slicing_str(): + """ + Test passing indices outside of the input to the slice objects + """ + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-15, -1), [b"1", b"2", b"3", b"4"]) + slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-15, 15), [b"1", b"2", b"3", b"4", b"5"]) + + indexing = slice(-15, -7) + expected_array = np.array([], dtype="S") + data = [b"1", b"2", b"3", b"4", b"5"] + data = ds.NumpySlicesDataset([data]) + data = data.map(operations=ops.Slice(indexing)) + for d in data.create_dict_iterator(output_numpy=True): + np.testing.assert_array_equal(expected_array, d['column_0']) + + +def test_slice_exceptions(): + """ + Test passing in invalid parameters + """ + with pytest.raises(RuntimeError) as info: + slice_compare([b"1", b"2", b"3", b"4", b"5"], [5], [b"1", b"2", b"3", b"4", b"5"]) + assert "Index 5 is out of bounds." in str(info.value) + with pytest.raises(RuntimeError) as info: - slice_compare([b"1", b"2", b"3", b"4", b"5"], 5) - assert "Index 5 is out of bounds [0,5)" in str(info.value) + slice_compare([b"1", b"2", b"3", b"4", b"5"], [], [b"1", b"2", b"3", b"4", b"5"]) + assert "Both indices and slices can not be empty." in str(info.value) + + with pytest.raises(TypeError) as info: + slice_compare([b"1", b"2", b"3", b"4", b"5"], [[[0, 1]]], [b"1", b"2", b"3", b"4", b"5"]) + assert "Argument slice_option[0] with value [0, 1] is not of type " \ + "(,)." in str(info.value) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(3, 1, 1)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(5, 10, 1)) - slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1, -5, 1)) + with pytest.raises(TypeError) as info: + slice_compare([b"1", b"2", b"3", b"4", b"5"], [[slice(3)]], [b"1", b"2", b"3", b"4", b"5"]) + assert "Argument slice_option[0] with value slice(None, 3, None) is not of type " \ + "(,)." in str(info.value) if __name__ == "__main__": test_slice_all() test_slice_single_index() + test_slice_indices_multidim() test_slice_list_index() - test_slice_slice_obj_3s() - test_slice_slice_obj_2s() + test_slice_index_and_slice() test_slice_slice_obj_1s() - test_slice_slice_obj_neg() - test_slice_exceptions() - test_slice_slice_obj_3s_double() + test_slice_slice_obj_2s() + test_slice_slice_obj_2s_multidim() + test_slice_slice_obj_3s() + test_slice_obj_3s_double() + test_slice_multiple_rows() + test_slice_obj_neg() test_slice_all_str() test_slice_single_index_str() + test_slice_indexes_multidim_str() test_slice_list_index_str() - test_slice_slice_obj_3s_str() - test_slice_slice_obj_2s_str() + test_slice_index_and_slice_str() test_slice_slice_obj_1s_str() - test_slice_slice_obj_neg_str() - test_slice_exceptions_str() - test_slice_multiple_rows() + test_slice_slice_obj_2s_str() + test_slice_slice_obj_2s_multidim_str() + test_slice_slice_obj_3s_str() + test_slice_obj_neg_str() + test_out_of_bounds_slicing_str() + test_slice_exceptions()