parent
d89cedb980
commit
4136892a3e
@ -0,0 +1,66 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "minddata/dataset/text/kernels/data_utils.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "minddata/dataset/core/pybind_support.h"
|
||||
#include "minddata/dataset/kernels/data/type_cast_op.h"
|
||||
#include "minddata/dataset/kernels/data/slice_op.h"
|
||||
#include "minddata/dataset/kernels/data/concatenate_op.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
Status SlidingWindowHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, TensorShape out_shape,
|
||||
uint32_t width, int32_t axis) {
|
||||
// if the data row has fewer items than width, the corresponding result row will be empty
|
||||
if (out_shape.Size() == 0) {
|
||||
MS_LOG(WARNING) << "The data row has fewer items than width, the result will be empty.";
|
||||
if (input->type().value() == DataType::DE_STRING) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, std::vector<std::string>{}, TensorShape({0})));
|
||||
} else {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, TensorShape({0}), input->type()));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
axis = Tensor::HandleNeg(axis, input->shape().Size());
|
||||
int32_t axis_end = input->shape()[axis];
|
||||
std::shared_ptr<Tensor> tmp;
|
||||
auto concatenate_op = std::make_unique<ConcatenateOp>(axis, nullptr, nullptr);
|
||||
|
||||
// Slice on specified axis and concatenate on new axis
|
||||
for (int32_t i = 0; i + width <= axis_end; i++) {
|
||||
auto slice_op = std::make_unique<SliceOp>(Slice(i, i + width, 1));
|
||||
slice_op->Compute(input, &tmp);
|
||||
if (i == 0) {
|
||||
*output = tmp;
|
||||
} else {
|
||||
TensorRow in({*output, tmp});
|
||||
TensorRow out_row;
|
||||
concatenate_op->Compute(in, &out_row);
|
||||
*output = out_row[0];
|
||||
}
|
||||
}
|
||||
(*output)->Reshape(out_shape);
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
@ -0,0 +1,43 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef DATASET_KERNELS_TEXT_DATA_UTILS_H_
|
||||
#define DATASET_KERNELS_TEXT_DATA_UTILS_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "minddata/dataset/util/status.h"
|
||||
#include "minddata/dataset/core/constants.h"
|
||||
#include "minddata/dataset/core/data_type.h"
|
||||
#include "minddata/dataset/core/tensor.h"
|
||||
#include "minddata/dataset/core/cv_tensor.h"
|
||||
#include "minddata/dataset/core/tensor_shape.h"
|
||||
#include "minddata/dataset/core/tensor_row.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
/// \brief Helper method that perform sliding window on input tensor.
|
||||
/// \param[in] input - Input tensor.
|
||||
/// \param[in] out_shape - Output shape of output tensor.
|
||||
/// \param[in] width - The axis along which sliding window is computed.
|
||||
/// \param[in] axis - The width of the window.
|
||||
/// \param[out] output - Output tensor
|
||||
/// \return Status return code
|
||||
Status SlidingWindowHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, TensorShape out_shape,
|
||||
uint32_t width, int32_t axis);
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // DATASET_KERNELS_TEXT_DATA_UTILS_H_
|
@ -0,0 +1,57 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "minddata/dataset/text/kernels/sliding_window_op.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
Status SlidingWindowOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
|
||||
IO_CHECK(input, output);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Rank() == 1, "SlidingWindosOp supports 1D Tensors only for now.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(axis_ == 0 || axis_ == -1, "axis supports 0 or -1 only for now.");
|
||||
|
||||
std::vector<TensorShape> input_shape = {input->shape()};
|
||||
std::vector<TensorShape> output_shape = {TensorShape({})};
|
||||
RETURN_IF_NOT_OK(OutputShape(input_shape, output_shape));
|
||||
|
||||
RETURN_IF_NOT_OK(SlidingWindowHelper(input, output, output_shape[0], width_, axis_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SlidingWindowOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() == NumInput(), "incorrect num of inputs\n");
|
||||
int32_t axis = Tensor::HandleNeg(axis_, inputs[0].Size());
|
||||
TensorShape input_shape = inputs[0];
|
||||
std::vector<dsize_t> output_shape_initializer;
|
||||
|
||||
// if a data row has fewer items than width, the corresponding result row will be empty.
|
||||
if (input_shape[axis] >= width_) {
|
||||
for (int32_t idx = 0; idx < input_shape.Size(); ++idx) {
|
||||
if (idx != axis) {
|
||||
output_shape_initializer.push_back(input_shape[idx]);
|
||||
} else {
|
||||
output_shape_initializer.push_back(input_shape[idx] - (width_ - 1));
|
||||
output_shape_initializer.push_back(width_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
outputs.pop_back();
|
||||
outputs.emplace_back(TensorShape(output_shape_initializer));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(outputs.size() == NumOutput(), "incorrect num of outputs\n");
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
@ -0,0 +1,68 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef DATASET_KERNELS_TEXT_SLIDING_WINDOW_OP_H_
|
||||
#define DATASET_KERNELS_TEXT_SLIDING_WINDOW_OP_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "minddata/dataset/core/tensor.h"
|
||||
#include "minddata/dataset/kernels/tensor_op.h"
|
||||
#include "minddata/dataset/text/kernels/data_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
class SlidingWindowOp : public TensorOp {
|
||||
public:
|
||||
/// \brief Constructor of SlidingWindowOp.
|
||||
/// \param[in] width - The axis along which sliding window is computed.
|
||||
/// \param[in] axis - The width of the window.
|
||||
/// \return Status return code
|
||||
explicit SlidingWindowOp(uint32_t width, int32_t axis = 0) : width_(width), axis_(axis) {}
|
||||
|
||||
/// \brief Destructor of SlidingWindowOp.
|
||||
~SlidingWindowOp() override = default;
|
||||
|
||||
/// \brief Perform sliding window to tensor.
|
||||
/// \param[in] input - Input tensor of Op.
|
||||
/// \param[out] output - output tensor of Op.
|
||||
/// \return Status return code
|
||||
Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
|
||||
|
||||
/// \brief Calculate tensor shape for output tensor.
|
||||
/// \param[in] inputs - Input tensor shapes.
|
||||
/// \param[out] outputs - Output tensor shapes.
|
||||
/// \return Status return code
|
||||
Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
|
||||
|
||||
/// \brief Print args for debugging.
|
||||
/// \param[in] out - std::ostream &out.
|
||||
void Print(std::ostream &out) const override { out << "SliceWindowOp"; }
|
||||
|
||||
/// \brief Print name of op.
|
||||
std::string Name() const override { return kSlidingWindowOp; }
|
||||
|
||||
private:
|
||||
uint32_t width_; // The width of the window. Must be an integer and greater than zero.
|
||||
int32_t axis_; // The axis along which sliding window is computed, only support 0/-1 for now.
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // DATASET_KERNELS_TEXT_SLIDING_WINDOW_OP_H_
|
@ -0,0 +1,69 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "common/common.h"
|
||||
#include "minddata/dataset/text/kernels/sliding_window_op.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
using namespace mindspore::dataset;
|
||||
using mindspore::MsLogLevel::INFO;
|
||||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::LogStream;
|
||||
|
||||
class MindDataTestSlidingWindowOp : public UT::Common {
|
||||
protected:
|
||||
MindDataTestSlidingWindowOp() {}
|
||||
};
|
||||
|
||||
TEST_F(MindDataTestSlidingWindowOp, Compute) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestSlidingWindowOp->Compute.";
|
||||
std::vector<std::string> strings = {"one", "two", "three", "four", "five", "six", "seven", "eight"};
|
||||
TensorShape shape({static_cast<dsize_t>(strings.size())});
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(strings, shape);
|
||||
std::shared_ptr<Tensor> output;
|
||||
|
||||
std::unique_ptr<SlidingWindowOp> op(new SlidingWindowOp(3, 0));
|
||||
Status s = op->Compute(input, &output);
|
||||
|
||||
std::vector<std::string> out = {"one", "two", "three", "two", "three", "four", "three", "four", "five",
|
||||
"four", "five", "six", "five", "six", "seven", "six", "seven", "eight"};
|
||||
std::shared_ptr<Tensor> expected = std::make_shared<Tensor>(out, TensorShape({6, 3}));
|
||||
|
||||
ASSERT_TRUE(output->shape() == expected->shape());
|
||||
ASSERT_TRUE(output->type() == expected->type());
|
||||
MS_LOG(DEBUG) << *output << std::endl;
|
||||
MS_LOG(DEBUG) << *expected << std::endl;
|
||||
ASSERT_TRUE(*output == *expected);
|
||||
|
||||
MS_LOG(INFO) << "MindDataTestSlidingWindowOp end.";
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestSlidingWindowOp, OutputShape) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestSlidingWindowOp->OutputShape.";
|
||||
std::vector<std::string> strings = {"one", "two", "three", "four", "five", "six", "seven", "eight"};
|
||||
TensorShape shape({static_cast<dsize_t>(strings.size())});
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(strings, shape);
|
||||
std::vector<TensorShape> input_shape = {input->shape()};
|
||||
std::vector<TensorShape> output_shape = {TensorShape({})};
|
||||
|
||||
std::unique_ptr<SlidingWindowOp> op(new SlidingWindowOp(3, 0));
|
||||
Status s = op->OutputShape(input_shape, output_shape);
|
||||
|
||||
MS_LOG(DEBUG) << "input_shape" << input_shape[0];
|
||||
MS_LOG(DEBUG) << "output_shape" << output_shape[0];
|
||||
ASSERT_TRUE(output_shape[0] == TensorShape({6, 3}));
|
||||
|
||||
MS_LOG(INFO) << "MindDataTestSlidingWindowOp end.";
|
||||
}
|
@ -0,0 +1,105 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Testing SlidingWindow in mindspore.dataset
|
||||
"""
|
||||
import numpy as np
|
||||
import mindspore.dataset as ds
|
||||
import mindspore.dataset.text as text
|
||||
|
||||
def test_sliding_window_string():
|
||||
""" test sliding_window with string type"""
|
||||
inputs = [["大", "家", "早", "上", "好"]]
|
||||
expect = np.array([['大', '家'], ['家', '早'], ['早', '上'], ['上', '好']])
|
||||
|
||||
dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False)
|
||||
dataset = dataset.map(input_columns=["text"], operations=text.SlidingWindow(2, 0))
|
||||
|
||||
result = []
|
||||
for data in dataset.create_dict_iterator():
|
||||
for i in range(data['text'].shape[0]):
|
||||
result.append([])
|
||||
for j in range(data['text'].shape[1]):
|
||||
result[i].append(data['text'][i][j].decode('utf8'))
|
||||
result = np.array(result)
|
||||
np.testing.assert_array_equal(result, expect)
|
||||
|
||||
def test_sliding_window_number():
|
||||
inputs = [1]
|
||||
expect = np.array([[1]])
|
||||
|
||||
def gen(nums):
|
||||
yield (np.array(nums),)
|
||||
|
||||
dataset = ds.GeneratorDataset(gen(inputs), column_names=["number"])
|
||||
dataset = dataset.map(input_columns=["number"], operations=text.SlidingWindow(1, -1))
|
||||
|
||||
for data in dataset.create_dict_iterator():
|
||||
np.testing.assert_array_equal(data['number'], expect)
|
||||
|
||||
def test_sliding_window_big_width():
|
||||
inputs = [[1, 2, 3, 4, 5]]
|
||||
expect = np.array([])
|
||||
|
||||
dataset = ds.NumpySlicesDataset(inputs, column_names=["number"], shuffle=False)
|
||||
dataset = dataset.map(input_columns=["number"], operations=text.SlidingWindow(30, 0))
|
||||
|
||||
for data in dataset.create_dict_iterator():
|
||||
np.testing.assert_array_equal(data['number'], expect)
|
||||
|
||||
def test_sliding_window_exception():
|
||||
try:
|
||||
_ = text.SlidingWindow(0, 0)
|
||||
assert False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
_ = text.SlidingWindow("1", 0)
|
||||
assert False
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
_ = text.SlidingWindow(1, "0")
|
||||
assert False
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
inputs = [[1, 2, 3, 4, 5]]
|
||||
dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False)
|
||||
dataset = dataset.map(input_columns=["text"], operations=text.SlidingWindow(3, -100))
|
||||
for _ in dataset.create_dict_iterator():
|
||||
pass
|
||||
assert False
|
||||
except RuntimeError as e:
|
||||
assert "axis supports 0 or -1 only for now." in str(e)
|
||||
|
||||
try:
|
||||
inputs = ["aa", "bb", "cc"]
|
||||
dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False)
|
||||
dataset = dataset.map(input_columns=["text"], operations=text.SlidingWindow(2, 0))
|
||||
for _ in dataset.create_dict_iterator():
|
||||
pass
|
||||
assert False
|
||||
except RuntimeError as e:
|
||||
assert "SlidingWindosOp supports 1D Tensors only for now." in str(e)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_sliding_window_string()
|
||||
test_sliding_window_number()
|
||||
test_sliding_window_big_width()
|
||||
test_sliding_window_exception()
|
Loading…
Reference in new issue