parent
ea829e89c3
commit
b9495a9ccc
@ -0,0 +1,66 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dataset/text/kernels/truncate_sequence_pair_op.h"
|
||||
|
||||
#include "dataset/core/tensor.h"
|
||||
#include "dataset/kernels/tensor_op.h"
|
||||
#include "dataset/kernels/data/slice_op.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
Status TruncateSequencePairOp::Compute(const TensorRow &input, TensorRow *output) {
|
||||
IO_CHECK_VECTOR(input, output);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 2, "Number of inputs should be two.");
|
||||
std::shared_ptr<Tensor> seq1 = input[0];
|
||||
std::shared_ptr<Tensor> seq2 = input[1];
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(seq1->shape().Rank() == 1 && seq2->shape().Rank() == 1,
|
||||
"Both sequences should be of rank 1");
|
||||
dsize_t length1 = seq1->shape()[0];
|
||||
dsize_t length2 = seq2->shape()[0];
|
||||
dsize_t outLength1 = length1;
|
||||
dsize_t outLength2 = length2;
|
||||
|
||||
dsize_t total = length1 + length2;
|
||||
while (total > max_length_) {
|
||||
if (outLength1 > outLength2)
|
||||
outLength1--;
|
||||
else
|
||||
outLength2--;
|
||||
total--;
|
||||
}
|
||||
std::shared_ptr<Tensor> outSeq1;
|
||||
if (length1 != outLength1) {
|
||||
std::unique_ptr<SliceOp> slice1(new SliceOp(Slice(outLength1 - length1)));
|
||||
RETURN_IF_NOT_OK(slice1->Compute(seq1, &outSeq1));
|
||||
} else {
|
||||
outSeq1 = std::move(seq1);
|
||||
}
|
||||
|
||||
std::shared_ptr<Tensor> outSeq2;
|
||||
if (length2 != outLength2) {
|
||||
std::unique_ptr<SliceOp> slice2(new SliceOp(Slice(outLength2 - length2)));
|
||||
RETURN_IF_NOT_OK(slice2->Compute(seq2, &outSeq2));
|
||||
} else {
|
||||
outSeq2 = std::move(seq2);
|
||||
}
|
||||
output->push_back(outSeq1);
|
||||
output->push_back(outSeq2);
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
@ -0,0 +1,48 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef DATASET_KERNELS_DATA_TRUNCATE_SEQUENCE_PAIR_OP_H_
|
||||
#define DATASET_KERNELS_DATA_TRUNCATE_SEQUENCE_PAIR_OP_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "dataset/core/tensor.h"
|
||||
#include "dataset/kernels/tensor_op.h"
|
||||
#include "dataset/kernels/data/type_cast_op.h"
|
||||
#include "dataset/kernels/data/data_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
class TruncateSequencePairOp : public TensorOp {
|
||||
public:
|
||||
explicit TruncateSequencePairOp(dsize_t length) : max_length_(length) {}
|
||||
|
||||
~TruncateSequencePairOp() override = default;
|
||||
|
||||
void Print(std::ostream &out) const override { out << "TruncateSequencePairOp"; }
|
||||
|
||||
Status Compute(const TensorRow &input, TensorRow *output) override;
|
||||
|
||||
private:
|
||||
dsize_t max_length_;
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // DATASET_KERNELS_DATA_TRUNCATE_SEQUENCE_PAIR_OP_H_
|
@ -0,0 +1,51 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "dataset/core/client.h"
|
||||
#include "common/common.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "securec.h"
|
||||
#include "dataset/core/tensor.h"
|
||||
#include "mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h"
|
||||
|
||||
using namespace mindspore::dataset;
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
class MindDataTestTruncatePairOp : public UT::Common {
|
||||
public:
|
||||
MindDataTestTruncatePairOp() {}
|
||||
|
||||
void SetUp() { GlobalInit(); }
|
||||
};
|
||||
|
||||
TEST_F(MindDataTestTruncatePairOp, Basics) {
|
||||
std::shared_ptr<Tensor> t1;
|
||||
Tensor::CreateTensor(&t1, std::vector<uint32_t>({1, 2, 3}));
|
||||
std::shared_ptr<Tensor> t2;
|
||||
Tensor::CreateTensor(&t2, std::vector<uint32_t>({4, 5}));
|
||||
TensorRow in({t1, t2});
|
||||
std::shared_ptr<TruncateSequencePairOp> op = std::make_shared<TruncateSequencePairOp>(4);
|
||||
TensorRow out;
|
||||
ASSERT_TRUE(op->Compute(in, &out).IsOk());
|
||||
std::shared_ptr<Tensor> out1;
|
||||
Tensor::CreateTensor(&out1, std::vector<uint32_t>({1, 2}));
|
||||
std::shared_ptr<Tensor> out2;
|
||||
Tensor::CreateTensor(&out2, std::vector<uint32_t>({4, 5}));
|
||||
ASSERT_EQ(*out1, *out[0]);
|
||||
ASSERT_EQ(*out2, *out[1]);
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Testing Mask op in DE
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.dataset as ds
|
||||
import mindspore.dataset.text as text
|
||||
|
||||
|
||||
def compare(in1, in2, length, out1, out2):
|
||||
data = ds.NumpySlicesDataset({"s1": [in1], "s2": [in2]})
|
||||
data = data.map(input_columns=["s1", "s2"], operations=text.TruncateSequencePair(length))
|
||||
for d in data.create_dict_iterator():
|
||||
np.testing.assert_array_equal(out1, d["s1"])
|
||||
np.testing.assert_array_equal(out2, d["s2"])
|
||||
|
||||
|
||||
def test_basics():
|
||||
compare(in1=[1, 2, 3], in2=[4, 5], length=4, out1=[1, 2], out2=[4, 5])
|
||||
compare(in1=[1, 2], in2=[4, 5], length=4, out1=[1, 2], out2=[4, 5])
|
||||
compare(in1=[1], in2=[4], length=4, out1=[1], out2=[4])
|
||||
compare(in1=[1, 2, 3, 4], in2=[5], length=4, out1=[1, 2, 3], out2=[5])
|
||||
compare(in1=[1, 2, 3, 4], in2=[5, 6, 7, 8], length=4, out1=[1, 2], out2=[5, 6])
|
||||
|
||||
|
||||
def test_basics_odd():
|
||||
compare(in1=[1, 2, 3], in2=[4, 5], length=3, out1=[1, 2], out2=[4])
|
||||
compare(in1=[1, 2], in2=[4, 5], length=3, out1=[1, 2], out2=[4])
|
||||
compare(in1=[1], in2=[4], length=5, out1=[1], out2=[4])
|
||||
compare(in1=[1, 2, 3, 4], in2=[5], length=3, out1=[1, 2], out2=[5])
|
||||
compare(in1=[1, 2, 3, 4], in2=[5, 6, 7, 8], length=3, out1=[1, 2], out2=[5])
|
||||
|
||||
|
||||
def test_basics_str():
|
||||
compare(in1=[b"1", b"2", b"3"], in2=[4, 5], length=4, out1=[b"1", b"2"], out2=[4, 5])
|
||||
compare(in1=[b"1", b"2"], in2=[b"4", b"5"], length=4, out1=[b"1", b"2"], out2=[b"4", b"5"])
|
||||
compare(in1=[b"1"], in2=[4], length=4, out1=[b"1"], out2=[4])
|
||||
compare(in1=[b"1", b"2", b"3", b"4"], in2=[b"5"], length=4, out1=[b"1", b"2", b"3"], out2=[b"5"])
|
||||
compare(in1=[b"1", b"2", b"3", b"4"], in2=[5, 6, 7, 8], length=4, out1=[b"1", b"2"], out2=[5, 6])
|
||||
|
||||
|
||||
def test_exceptions():
|
||||
with pytest.raises(RuntimeError) as info:
|
||||
compare(in1=[1, 2, 3, 4], in2=[5, 6, 7, 8], length=1, out1=[1, 2], out2=[5])
|
||||
assert "Indices are empty, generated tensor would be empty" in str(info.value)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_basics()
|
||||
test_basics_odd()
|
||||
test_basics_str()
|
||||
test_exceptions()
|
Loading…
Reference in new issue