parent
d89cedb980
commit
4136892a3e
@ -0,0 +1,66 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "minddata/dataset/text/kernels/data_utils.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <limits>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "minddata/dataset/core/pybind_support.h"
|
||||||
|
#include "minddata/dataset/kernels/data/type_cast_op.h"
|
||||||
|
#include "minddata/dataset/kernels/data/slice_op.h"
|
||||||
|
#include "minddata/dataset/kernels/data/concatenate_op.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
Status SlidingWindowHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, TensorShape out_shape,
|
||||||
|
uint32_t width, int32_t axis) {
|
||||||
|
// if the data row has fewer items than width, the corresponding result row will be empty
|
||||||
|
if (out_shape.Size() == 0) {
|
||||||
|
MS_LOG(WARNING) << "The data row has fewer items than width, the result will be empty.";
|
||||||
|
if (input->type().value() == DataType::DE_STRING) {
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, std::vector<std::string>{}, TensorShape({0})));
|
||||||
|
} else {
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, TensorShape({0}), input->type()));
|
||||||
|
}
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
axis = Tensor::HandleNeg(axis, input->shape().Size());
|
||||||
|
int32_t axis_end = input->shape()[axis];
|
||||||
|
std::shared_ptr<Tensor> tmp;
|
||||||
|
auto concatenate_op = std::make_unique<ConcatenateOp>(axis, nullptr, nullptr);
|
||||||
|
|
||||||
|
// Slice on specified axis and concatenate on new axis
|
||||||
|
for (int32_t i = 0; i + width <= axis_end; i++) {
|
||||||
|
auto slice_op = std::make_unique<SliceOp>(Slice(i, i + width, 1));
|
||||||
|
slice_op->Compute(input, &tmp);
|
||||||
|
if (i == 0) {
|
||||||
|
*output = tmp;
|
||||||
|
} else {
|
||||||
|
TensorRow in({*output, tmp});
|
||||||
|
TensorRow out_row;
|
||||||
|
concatenate_op->Compute(in, &out_row);
|
||||||
|
*output = out_row[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(*output)->Reshape(out_shape);
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
@ -0,0 +1,43 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef DATASET_KERNELS_TEXT_DATA_UTILS_H_
|
||||||
|
#define DATASET_KERNELS_TEXT_DATA_UTILS_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include "minddata/dataset/util/status.h"
|
||||||
|
#include "minddata/dataset/core/constants.h"
|
||||||
|
#include "minddata/dataset/core/data_type.h"
|
||||||
|
#include "minddata/dataset/core/tensor.h"
|
||||||
|
#include "minddata/dataset/core/cv_tensor.h"
|
||||||
|
#include "minddata/dataset/core/tensor_shape.h"
|
||||||
|
#include "minddata/dataset/core/tensor_row.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
/// \brief Helper method that perform sliding window on input tensor.
|
||||||
|
/// \param[in] input - Input tensor.
|
||||||
|
/// \param[in] out_shape - Output shape of output tensor.
|
||||||
|
/// \param[in] width - The axis along which sliding window is computed.
|
||||||
|
/// \param[in] axis - The width of the window.
|
||||||
|
/// \param[out] output - Output tensor
|
||||||
|
/// \return Status return code
|
||||||
|
Status SlidingWindowHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, TensorShape out_shape,
|
||||||
|
uint32_t width, int32_t axis);
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // DATASET_KERNELS_TEXT_DATA_UTILS_H_
|
@ -0,0 +1,57 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "minddata/dataset/text/kernels/sliding_window_op.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
Status SlidingWindowOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
|
||||||
|
IO_CHECK(input, output);
|
||||||
|
CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Rank() == 1, "SlidingWindosOp supports 1D Tensors only for now.");
|
||||||
|
CHECK_FAIL_RETURN_UNEXPECTED(axis_ == 0 || axis_ == -1, "axis supports 0 or -1 only for now.");
|
||||||
|
|
||||||
|
std::vector<TensorShape> input_shape = {input->shape()};
|
||||||
|
std::vector<TensorShape> output_shape = {TensorShape({})};
|
||||||
|
RETURN_IF_NOT_OK(OutputShape(input_shape, output_shape));
|
||||||
|
|
||||||
|
RETURN_IF_NOT_OK(SlidingWindowHelper(input, output, output_shape[0], width_, axis_));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
Status SlidingWindowOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) {
|
||||||
|
CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() == NumInput(), "incorrect num of inputs\n");
|
||||||
|
int32_t axis = Tensor::HandleNeg(axis_, inputs[0].Size());
|
||||||
|
TensorShape input_shape = inputs[0];
|
||||||
|
std::vector<dsize_t> output_shape_initializer;
|
||||||
|
|
||||||
|
// if a data row has fewer items than width, the corresponding result row will be empty.
|
||||||
|
if (input_shape[axis] >= width_) {
|
||||||
|
for (int32_t idx = 0; idx < input_shape.Size(); ++idx) {
|
||||||
|
if (idx != axis) {
|
||||||
|
output_shape_initializer.push_back(input_shape[idx]);
|
||||||
|
} else {
|
||||||
|
output_shape_initializer.push_back(input_shape[idx] - (width_ - 1));
|
||||||
|
output_shape_initializer.push_back(width_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
outputs.pop_back();
|
||||||
|
outputs.emplace_back(TensorShape(output_shape_initializer));
|
||||||
|
CHECK_FAIL_RETURN_UNEXPECTED(outputs.size() == NumOutput(), "incorrect num of outputs\n");
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
@ -0,0 +1,68 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef DATASET_KERNELS_TEXT_SLIDING_WINDOW_OP_H_
|
||||||
|
#define DATASET_KERNELS_TEXT_SLIDING_WINDOW_OP_H_
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "minddata/dataset/core/tensor.h"
|
||||||
|
#include "minddata/dataset/kernels/tensor_op.h"
|
||||||
|
#include "minddata/dataset/text/kernels/data_utils.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
|
||||||
|
class SlidingWindowOp : public TensorOp {
|
||||||
|
public:
|
||||||
|
/// \brief Constructor of SlidingWindowOp.
|
||||||
|
/// \param[in] width - The axis along which sliding window is computed.
|
||||||
|
/// \param[in] axis - The width of the window.
|
||||||
|
/// \return Status return code
|
||||||
|
explicit SlidingWindowOp(uint32_t width, int32_t axis = 0) : width_(width), axis_(axis) {}
|
||||||
|
|
||||||
|
/// \brief Destructor of SlidingWindowOp.
|
||||||
|
~SlidingWindowOp() override = default;
|
||||||
|
|
||||||
|
/// \brief Perform sliding window to tensor.
|
||||||
|
/// \param[in] input - Input tensor of Op.
|
||||||
|
/// \param[out] output - output tensor of Op.
|
||||||
|
/// \return Status return code
|
||||||
|
Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
|
||||||
|
|
||||||
|
/// \brief Calculate tensor shape for output tensor.
|
||||||
|
/// \param[in] inputs - Input tensor shapes.
|
||||||
|
/// \param[out] outputs - Output tensor shapes.
|
||||||
|
/// \return Status return code
|
||||||
|
Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
|
||||||
|
|
||||||
|
/// \brief Print args for debugging.
|
||||||
|
/// \param[in] out - std::ostream &out.
|
||||||
|
void Print(std::ostream &out) const override { out << "SliceWindowOp"; }
|
||||||
|
|
||||||
|
/// \brief Print name of op.
|
||||||
|
std::string Name() const override { return kSlidingWindowOp; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
uint32_t width_; // The width of the window. Must be an integer and greater than zero.
|
||||||
|
int32_t axis_; // The axis along which sliding window is computed, only support 0/-1 for now.
|
||||||
|
};
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // DATASET_KERNELS_TEXT_SLIDING_WINDOW_OP_H_
|
@ -0,0 +1,69 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "common/common.h"
|
||||||
|
#include "minddata/dataset/text/kernels/sliding_window_op.h"
|
||||||
|
#include "utils/log_adapter.h"
|
||||||
|
|
||||||
|
using namespace mindspore::dataset;
|
||||||
|
using mindspore::MsLogLevel::INFO;
|
||||||
|
using mindspore::ExceptionType::NoExceptionType;
|
||||||
|
using mindspore::LogStream;
|
||||||
|
|
||||||
|
class MindDataTestSlidingWindowOp : public UT::Common {
|
||||||
|
protected:
|
||||||
|
MindDataTestSlidingWindowOp() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(MindDataTestSlidingWindowOp, Compute) {
|
||||||
|
MS_LOG(INFO) << "Doing MindDataTestSlidingWindowOp->Compute.";
|
||||||
|
std::vector<std::string> strings = {"one", "two", "three", "four", "five", "six", "seven", "eight"};
|
||||||
|
TensorShape shape({static_cast<dsize_t>(strings.size())});
|
||||||
|
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(strings, shape);
|
||||||
|
std::shared_ptr<Tensor> output;
|
||||||
|
|
||||||
|
std::unique_ptr<SlidingWindowOp> op(new SlidingWindowOp(3, 0));
|
||||||
|
Status s = op->Compute(input, &output);
|
||||||
|
|
||||||
|
std::vector<std::string> out = {"one", "two", "three", "two", "three", "four", "three", "four", "five",
|
||||||
|
"four", "five", "six", "five", "six", "seven", "six", "seven", "eight"};
|
||||||
|
std::shared_ptr<Tensor> expected = std::make_shared<Tensor>(out, TensorShape({6, 3}));
|
||||||
|
|
||||||
|
ASSERT_TRUE(output->shape() == expected->shape());
|
||||||
|
ASSERT_TRUE(output->type() == expected->type());
|
||||||
|
MS_LOG(DEBUG) << *output << std::endl;
|
||||||
|
MS_LOG(DEBUG) << *expected << std::endl;
|
||||||
|
ASSERT_TRUE(*output == *expected);
|
||||||
|
|
||||||
|
MS_LOG(INFO) << "MindDataTestSlidingWindowOp end.";
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(MindDataTestSlidingWindowOp, OutputShape) {
|
||||||
|
MS_LOG(INFO) << "Doing MindDataTestSlidingWindowOp->OutputShape.";
|
||||||
|
std::vector<std::string> strings = {"one", "two", "three", "four", "five", "six", "seven", "eight"};
|
||||||
|
TensorShape shape({static_cast<dsize_t>(strings.size())});
|
||||||
|
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(strings, shape);
|
||||||
|
std::vector<TensorShape> input_shape = {input->shape()};
|
||||||
|
std::vector<TensorShape> output_shape = {TensorShape({})};
|
||||||
|
|
||||||
|
std::unique_ptr<SlidingWindowOp> op(new SlidingWindowOp(3, 0));
|
||||||
|
Status s = op->OutputShape(input_shape, output_shape);
|
||||||
|
|
||||||
|
MS_LOG(DEBUG) << "input_shape" << input_shape[0];
|
||||||
|
MS_LOG(DEBUG) << "output_shape" << output_shape[0];
|
||||||
|
ASSERT_TRUE(output_shape[0] == TensorShape({6, 3}));
|
||||||
|
|
||||||
|
MS_LOG(INFO) << "MindDataTestSlidingWindowOp end.";
|
||||||
|
}
|
@ -0,0 +1,105 @@
|
|||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
"""
|
||||||
|
Testing SlidingWindow in mindspore.dataset
|
||||||
|
"""
|
||||||
|
import numpy as np
|
||||||
|
import mindspore.dataset as ds
|
||||||
|
import mindspore.dataset.text as text
|
||||||
|
|
||||||
|
def test_sliding_window_string():
|
||||||
|
""" test sliding_window with string type"""
|
||||||
|
inputs = [["大", "家", "早", "上", "好"]]
|
||||||
|
expect = np.array([['大', '家'], ['家', '早'], ['早', '上'], ['上', '好']])
|
||||||
|
|
||||||
|
dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False)
|
||||||
|
dataset = dataset.map(input_columns=["text"], operations=text.SlidingWindow(2, 0))
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for data in dataset.create_dict_iterator():
|
||||||
|
for i in range(data['text'].shape[0]):
|
||||||
|
result.append([])
|
||||||
|
for j in range(data['text'].shape[1]):
|
||||||
|
result[i].append(data['text'][i][j].decode('utf8'))
|
||||||
|
result = np.array(result)
|
||||||
|
np.testing.assert_array_equal(result, expect)
|
||||||
|
|
||||||
|
def test_sliding_window_number():
|
||||||
|
inputs = [1]
|
||||||
|
expect = np.array([[1]])
|
||||||
|
|
||||||
|
def gen(nums):
|
||||||
|
yield (np.array(nums),)
|
||||||
|
|
||||||
|
dataset = ds.GeneratorDataset(gen(inputs), column_names=["number"])
|
||||||
|
dataset = dataset.map(input_columns=["number"], operations=text.SlidingWindow(1, -1))
|
||||||
|
|
||||||
|
for data in dataset.create_dict_iterator():
|
||||||
|
np.testing.assert_array_equal(data['number'], expect)
|
||||||
|
|
||||||
|
def test_sliding_window_big_width():
|
||||||
|
inputs = [[1, 2, 3, 4, 5]]
|
||||||
|
expect = np.array([])
|
||||||
|
|
||||||
|
dataset = ds.NumpySlicesDataset(inputs, column_names=["number"], shuffle=False)
|
||||||
|
dataset = dataset.map(input_columns=["number"], operations=text.SlidingWindow(30, 0))
|
||||||
|
|
||||||
|
for data in dataset.create_dict_iterator():
|
||||||
|
np.testing.assert_array_equal(data['number'], expect)
|
||||||
|
|
||||||
|
def test_sliding_window_exception():
|
||||||
|
try:
|
||||||
|
_ = text.SlidingWindow(0, 0)
|
||||||
|
assert False
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
_ = text.SlidingWindow("1", 0)
|
||||||
|
assert False
|
||||||
|
except TypeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
_ = text.SlidingWindow(1, "0")
|
||||||
|
assert False
|
||||||
|
except TypeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
inputs = [[1, 2, 3, 4, 5]]
|
||||||
|
dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False)
|
||||||
|
dataset = dataset.map(input_columns=["text"], operations=text.SlidingWindow(3, -100))
|
||||||
|
for _ in dataset.create_dict_iterator():
|
||||||
|
pass
|
||||||
|
assert False
|
||||||
|
except RuntimeError as e:
|
||||||
|
assert "axis supports 0 or -1 only for now." in str(e)
|
||||||
|
|
||||||
|
try:
|
||||||
|
inputs = ["aa", "bb", "cc"]
|
||||||
|
dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False)
|
||||||
|
dataset = dataset.map(input_columns=["text"], operations=text.SlidingWindow(2, 0))
|
||||||
|
for _ in dataset.create_dict_iterator():
|
||||||
|
pass
|
||||||
|
assert False
|
||||||
|
except RuntimeError as e:
|
||||||
|
assert "SlidingWindosOp supports 1D Tensors only for now." in str(e)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
test_sliding_window_string()
|
||||||
|
test_sliding_window_number()
|
||||||
|
test_sliding_window_big_width()
|
||||||
|
test_sliding_window_exception()
|
Loading…
Reference in new issue