Modify Pybind LoDTensor API according to length-based LoD (#11106)

* add lod_tensor util and modify pybind

* refind pybind LoDTensor API and modify LoDTensor and DataFeeder test

* fix test error

* fix detection map op test

* fix reorder_lod_tensor test

* fix seq_concat_op

* fix chunk evel op test

* fix target assign op

* fix warp ctc op

* address comments step 1: reverse reset_lod op

* step 2: modify op test

* add warning message

* remove has_valid_lod

* add back has_valid_lod

* address comments

* add exception catching trial
wangkuiyi-patch-1
Kexin Zhao 7 years ago committed by GitHub
parent 53d1d0f0f2
commit 417fcf4f43
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -173,21 +173,6 @@ def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim,
return avg_cost, feeding_list return avg_cost, feeding_list
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
lod_t = core.LoDTensor()
lod_t.set(flattened_data, place)
lod_t.set_lod([lod])
return lod_t, lod[-1]
def lodtensor_to_ndarray(lod_tensor): def lodtensor_to_ndarray(lod_tensor):
dims = lod_tensor.get_dims() dims = lod_tensor.get_dims()
ndarray = np.zeros(shape=dims).astype('float32') ndarray = np.zeros(shape=dims).astype('float32')

@ -125,18 +125,3 @@ def get_model(args):
batch_size=args.batch_size) batch_size=args.batch_size)
return loss, inference_program, adam, train_reader, test_reader, batch_acc return loss, inference_program, adam, train_reader, test_reader, batch_acc
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = numpy.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res

@ -410,5 +410,38 @@ void LoDTensor::MergeLoDTensor(
} }
} }
LoD ConvertToLengthBasedLoD(const LoD &offset_lod) {
LoD length_lod;
length_lod.reserve(offset_lod.size());
for (size_t lvl = 0; lvl < offset_lod.size(); ++lvl) {
std::vector<size_t> level;
if (offset_lod[lvl].size() > 0) {
level.reserve(offset_lod[lvl].size() - 1);
}
for (size_t idx = 0; idx < offset_lod[lvl].size() - 1; ++idx) {
level.push_back(offset_lod[lvl][idx + 1] - offset_lod[lvl][idx]);
}
length_lod.push_back(level);
}
return length_lod;
}
LoD ConvertToOffsetBasedLoD(const LoD &length_lod) {
LoD offset_lod;
offset_lod.reserve(length_lod.size());
for (size_t lvl = 0; lvl < length_lod.size(); ++lvl) {
std::vector<size_t> level;
level.reserve(length_lod[lvl].size() + 1);
size_t tmp = 0;
level.push_back(tmp);
for (size_t idx = 0; idx < length_lod[lvl].size(); ++idx) {
tmp += length_lod[lvl][idx];
level.push_back(tmp);
}
offset_lod.push_back(level);
}
return offset_lod;
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

@ -226,5 +226,19 @@ extern void WriteToRecordIO(recordio::Writer* writer,
extern std::vector<LoDTensor> ReadFromRecordIO( extern std::vector<LoDTensor> ReadFromRecordIO(
recordio::Scanner* scanner, const platform::DeviceContext& dev_ctx); recordio::Scanner* scanner, const platform::DeviceContext& dev_ctx);
/*
* Convert between length-based LoD and offset-based LoD.
* The implementation of LoDTensor class use offset-based LoD.
* However, we want to expose the more user-friendly length-based
* LoD to the Python side instead.
*
* Example:
* If offset_lod = [[0, 2, 3],[0, 3, 5, 9]]
* then length_lod = [[2, 1], [3, 2, 4]]
*/
LoD ConvertToLengthBasedLoD(const LoD& offset_lod);
LoD ConvertToOffsetBasedLoD(const LoD& length_lod);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

@ -228,6 +228,38 @@ TEST(LoD, CheckAbsLoD) {
ASSERT_FALSE(CheckAbsLoD(abs_lod0)); ASSERT_FALSE(CheckAbsLoD(abs_lod0));
} }
TEST(LoD, ConvertToLengthBasedLoD) {
LoD offset_lod;
offset_lod.push_back(std::vector<size_t>({0, 2}));
offset_lod.push_back(std::vector<size_t>({0, 1, 3}));
offset_lod.push_back(std::vector<size_t>({0, 2, 4, 5}));
LoD length_lod = ConvertToLengthBasedLoD(offset_lod);
LoD expected;
expected.push_back(std::vector<size_t>({2}));
expected.push_back(std::vector<size_t>({1, 2}));
expected.push_back(std::vector<size_t>({2, 2, 1}));
EXPECT_EQ(length_lod, expected);
}
TEST(LoD, ConvertToOffsetBasedLoD) {
LoD length_lod;
length_lod.push_back(std::vector<size_t>({2}));
length_lod.push_back(std::vector<size_t>({1, 2}));
length_lod.push_back(std::vector<size_t>({2, 2, 1}));
LoD offset_lod = ConvertToOffsetBasedLoD(length_lod);
LoD expected;
expected.push_back(std::vector<size_t>({0, 2}));
expected.push_back(std::vector<size_t>({0, 1, 3}));
expected.push_back(std::vector<size_t>({0, 2, 4, 5}));
EXPECT_EQ(offset_lod, expected);
}
template <typename T> template <typename T>
static void TestRecordIO() { static void TestRecordIO() {
LoDTensor tensor; LoDTensor tensor;

@ -144,28 +144,74 @@ PYBIND11_PLUGIN(core) {
py::class_<LoDTensor, Tensor>(m, "LoDTensor") py::class_<LoDTensor, Tensor>(m, "LoDTensor")
.def_buffer( .def_buffer(
[](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); }) [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); })
.def( .def("__init__",
"__init__", [](LoDTensor &instance, const std::vector<std::vector<size_t>>
[](LoDTensor &instance, const std::vector<std::vector<size_t>> &lod) { &recursive_sequence_lengths) {
LoD new_lod; LoD new_lod;
new_lod.reserve(lod.size()); new_lod.reserve(recursive_sequence_lengths.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); std::copy(recursive_sequence_lengths.begin(),
new (&instance) LoDTensor(new_lod); recursive_sequence_lengths.end(),
std::back_inserter(new_lod));
LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod);
PADDLE_ENFORCE(
CheckLoD(new_offset_lod, -1),
"the provided recursive_sequence_lengths info is invalid");
new (&instance) LoDTensor(new_offset_lod);
}) })
.def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); }) .def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); })
.def("set_lod", .def("set_lod",
[](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) { [](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) {
// the input lod is offset-based level-of-detail info
LOG(WARNING)
<< "set_lod is deprecated and will be removed by 9.2018, "
"please switch to set_recursive_sequence_lengths.";
LoD new_lod; LoD new_lod;
new_lod.reserve(lod.size()); new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
PADDLE_ENFORCE(CheckLoD(new_lod, vectorize(self.dims()).front()),
"the provided lod info is invalid");
self.set_lod(new_lod); self.set_lod(new_lod);
}) })
.def("lod", [](LoDTensor &self) -> std::vector<std::vector<size_t>> { .def("set_recursive_sequence_lengths",
auto lod = self.lod(); [](LoDTensor &self, const std::vector<std::vector<size_t>>
&recursive_sequence_lengths) {
// the input recursive_sequence_lengths is length-based
// level-of-detail info
LoD new_lod;
new_lod.reserve(recursive_sequence_lengths.size());
std::copy(recursive_sequence_lengths.begin(),
recursive_sequence_lengths.end(),
std::back_inserter(new_lod));
LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod);
PADDLE_ENFORCE(
CheckLoD(new_offset_lod, vectorize(self.dims()).front()),
"the provided recursive_sequence_lengths info is invalid");
self.set_lod(new_offset_lod);
})
.def("lod",
[](LoDTensor &self) -> std::vector<std::vector<size_t>> {
// output the offset-based lod info
LOG(WARNING) << "lod is deprecated and will be removed by 9.2018, "
"please switch to recursive_sequence_lengths.";
LoD lod = self.lod();
std::vector<std::vector<size_t>> new_lod; std::vector<std::vector<size_t>> new_lod;
new_lod.reserve(lod.size()); new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
return new_lod; return new_lod;
})
.def("recursive_sequence_lengths",
[](LoDTensor &self) -> std::vector<std::vector<size_t>> {
// output the length-based lod info
LoD lod = ConvertToLengthBasedLoD(self.lod());
std::vector<std::vector<size_t>> new_lod;
new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
return new_lod;
})
.def("has_valid_recursive_sequence_lengths", [](LoDTensor &self) -> bool {
// Check that the lod info is valid and match the outermost
// dimension of the LoDTensor data
return CheckLoD(self.lod(), vectorize(self.dims()).front());
}); });
py::class_<SelectedRows>(m, "SelectedRows") py::class_<SelectedRows>(m, "SelectedRows")

@ -47,7 +47,7 @@ class DataToLoDTensorConverter(object):
self.lod = [] self.lod = []
for i in six.range(lod_level): for i in six.range(lod_level):
self.lod.append([0]) self.lod.append([])
def feed(self, data): def feed(self, data):
self._feed_impl_(data, self.lod, self.lod_level) self._feed_impl_(data, self.lod, self.lod_level)
@ -56,8 +56,7 @@ class DataToLoDTensorConverter(object):
if lod_level == 0: if lod_level == 0:
self.data.append(data) self.data.append(data)
else: else:
cur_lod_len = len(data) lod[0].append(len(data))
lod[0].append(lod[0][-1] + cur_lod_len)
for each_data in data: for each_data in data:
self._feed_impl_(each_data, lod[1:], lod_level - 1) self._feed_impl_(each_data, lod[1:], lod_level - 1)
@ -66,7 +65,7 @@ class DataToLoDTensorConverter(object):
t = core.LoDTensor() t = core.LoDTensor()
t.set(arr, self.place) t.set(arr, self.place)
if self.lod_level > 0: if self.lod_level > 0:
t.set_lod(self.lod) t.set_recursive_sequence_lengths(self.lod)
return t return t

@ -18,80 +18,6 @@ import numpy as np
__all__ = ['create_lod_tensor', 'create_random_int_lodtensor'] __all__ = ['create_lod_tensor', 'create_random_int_lodtensor']
def _validate_lod(lod, tensor_height=-1):
"""Check whether the input length-based lod info is valid.
There are several things to check:
1. lod should be a list of lists. Empty list is fine.
2. The length of each sublist (a lod level) should be at least one.
3. Each element in each lod level should be an integer greater than 0.
4. The sum of one lod level should be equal to the length of the next lod level.
5. The sum of the last lod level should be equal to the tensor height.
Bypass this check if user does not provide tensor_height as input.
Args:
lod: the length-based lod info, e.g., [[2, 3], [2, 1, 2, 3, 4]].
tensor_height: the outermost dimension of the tensor with which the input
lod is associated with.
Returns:
A boolean indicating whether the input lod is valid or not.
"""
assert isinstance(lod, list), "lod should be a list"
# Empty lod is fine
if len(lod) == 0:
return True
lod_sum = []
for level in lod:
assert isinstance(level, list), "each item in lod should be a list"
# Each level of lod should have at least one length info
if len(level) < 1:
return False
level_sum = 0
for lod_len in level:
# Each length in a level should be > 0
if lod_len <= 0:
return False
level_sum += lod_len
lod_sum.append(level_sum)
for idx, val in enumerate(lod_sum[:-1]):
# Each level's sum should be equal to
# the number of items in the next level
if val != len(lod[idx + 1]):
return False
if tensor_height == -1:
return True
else:
# Last level's sum should be equal to the tensor height
return lod_sum[-1] == tensor_height
def _convert_lod(lod):
"""Convert a length-based lod to a offset-based lod.
If the length-based lod is [[2, 3], [2, 1, 2, 3, 4]],
then the offset-based lod is [[0, 2, 5], [0, 2, 3, 5, 8, 12]].
Args:
lod: a length-based lod info.
Returns:
A list of lists as the offset-based lod converted to from the input lod.
"""
new_lod = []
for level in lod:
cur_len = 0
new_level = [cur_len]
for lod_len in level:
cur_len += lod_len
new_level.append(cur_len)
new_lod.append(new_level)
return new_lod
def create_lod_tensor(data, lod, place): def create_lod_tensor(data, lod, place):
"""Create a lod tensor from a numpy array, a list, or an existing lod tensor. """Create a lod tensor from a numpy array, a list, or an existing lod tensor.
@ -139,11 +65,11 @@ def create_lod_tensor(data, lod, place):
flattened_data = flattened_data.reshape([len(flattened_data), 1]) flattened_data = flattened_data.reshape([len(flattened_data), 1])
return create_lod_tensor(flattened_data, lod, place) return create_lod_tensor(flattened_data, lod, place)
elif isinstance(data, np.ndarray): elif isinstance(data, np.ndarray):
assert _validate_lod(lod,
data.shape[0]), "the provided lod info is invalid"
tensor = core.LoDTensor() tensor = core.LoDTensor()
tensor.set(data, place) tensor.set(data, place)
tensor.set_lod(_convert_lod(lod)) tensor.set_recursive_sequence_lengths(lod)
assert tensor.has_valid_recursive_sequence_lengths(
), "the provided lod info is invalid"
return tensor return tensor
else: else:
raise TypeError( raise TypeError(
@ -181,9 +107,8 @@ def create_random_int_lodtensor(lod, base_shape, place, low, high):
A fluid LoDTensor object with tensor data and lod info. A fluid LoDTensor object with tensor data and lod info.
""" """
assert isinstance(base_shape, list), "base_shape should be a list" assert isinstance(base_shape, list), "base_shape should be a list"
converted_lod = _convert_lod(lod)
# append the total number of basic elements to the front of its shape # append the total number of basic elements to the front of its shape
overall_shape = [converted_lod[-1][-1]] + base_shape overall_shape = [sum(lod[-1])] + base_shape
# the range of integer data elements is [low, high] # the range of integer data elements is [low, high]
data = np.random.random_integers(low, high, overall_shape).astype("int64") data = np.random.random_integers(low, high, overall_shape).astype("int64")
return create_lod_tensor(data, lod, place) return create_lod_tensor(data, lod, place)

@ -22,12 +22,11 @@ class TestDataFeeder(unittest.TestCase):
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder([img, label], fluid.CPUPlace()) feeder = fluid.DataFeeder([img, label], fluid.CPUPlace())
result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])]) result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])])
print(result)
self.assertEqual(result['image'].shape(), [2, 1, 28, 28]) self.assertEqual(result['image'].shape(), [2, 1, 28, 28])
self.assertEqual(result['label'].shape(), [2, 1]) self.assertEqual(result['label'].shape(), [2, 1])
self.assertEqual(result['image'].lod(), []) self.assertEqual(result['image'].recursive_sequence_lengths(), [])
self.assertEqual(result['label'].lod(), []) self.assertEqual(result['label'].recursive_sequence_lengths(), [])
def test_lod_level_1_converter(self): def test_lod_level_1_converter(self):
# lod_level = 1 # lod_level = 1
@ -42,12 +41,12 @@ class TestDataFeeder(unittest.TestCase):
# label = [1] * len(data) # label = [1] * len(data)
result = feeder.feed( result = feeder.feed(
[([1, 2, 3], [1]), ([4, 5], [1]), ([6, 7, 8, 9], [1])]) [([1, 2, 3], [1]), ([4, 5], [1]), ([6, 7, 8, 9], [1])])
print(result)
self.assertEqual(result['sentences'].shape(), [9, 1]) self.assertEqual(result['sentences'].shape(), [9, 1])
self.assertEqual(result['label'].shape(), [3, 1]) self.assertEqual(result['label'].shape(), [3, 1])
self.assertEqual(result['sentences'].lod(), [[0, 3, 5, 9]]) self.assertEqual(result['sentences'].recursive_sequence_lengths(),
self.assertEqual(result['label'].lod(), []) [[3, 2, 4]])
self.assertEqual(result['label'].recursive_sequence_lengths(), [])
def test_lod_level_2_converter(self): def test_lod_level_2_converter(self):
# lod_level = 2 # lod_level = 2
@ -62,12 +61,12 @@ class TestDataFeeder(unittest.TestCase):
# label = [1] * len(data) # label = [1] * len(data)
result = feeder.feed( result = feeder.feed(
[([[1, 2, 3], [4, 5]], [1]), ([[6, 7, 8, 9]], [1])]) [([[1, 2, 3], [4, 5]], [1]), ([[6, 7, 8, 9]], [1])])
print(result)
self.assertEqual(result['paragraphs'].shape(), [9, 1]) self.assertEqual(result['paragraphs'].shape(), [9, 1])
self.assertEqual(result['label'].shape(), [2, 1]) self.assertEqual(result['label'].shape(), [2, 1])
self.assertEqual(result['paragraphs'].lod(), [[0, 2, 3], [0, 3, 5, 9]]) self.assertEqual(result['paragraphs'].recursive_sequence_lengths(),
self.assertEqual(result['label'].lod(), []) [[2, 1], [3, 2, 4]])
self.assertEqual(result['label'].recursive_sequence_lengths(), [])
if __name__ == '__main__': if __name__ == '__main__':

@ -13,44 +13,41 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.lod_tensor import create_lod_tensor, create_random_int_lodtensor, _validate_lod, _convert_lod from paddle.fluid.lod_tensor import create_lod_tensor, create_random_int_lodtensor
import numpy import numpy as np
import unittest import unittest
class TestLoDTensor(unittest.TestCase): class TestLoDTensor(unittest.TestCase):
def test_validate_lod(self): def test_pybind_lod(self):
lod = (1, 2, 1) tensor = fluid.LoDTensor()
self.assertRaises(AssertionError, _validate_lod, lod, -1)
lod = [[1, 2], (2, 3)]
self.assertRaises(AssertionError, _validate_lod, lod, -1)
lod = [1, 2, 3]
self.assertRaises(AssertionError, _validate_lod, lod, -1)
lod = [] lod = []
self.assertTrue(_validate_lod(lod, -1)) tensor.set_recursive_sequence_lengths(lod)
lod = [[], [1], [3]] lod = [[], [1], [3]]
self.assertFalse(_validate_lod(lod, -1)) self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, lod)
lod = [[0], [-1], [3]] lod = [[0], [2], [3]]
self.assertFalse(_validate_lod(lod, -1)) self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, lod)
# Each level's sum should be equal to the number of items in the next level
# Moreover, last level's sum should be equal to the tensor height
lod = [[2, 3], [1, 3, 1, 2, 1]]
self.assertTrue(_validate_lod(lod, tensor_height=8))
lod = [[1, 3], [2, 1, 3]]
self.assertFalse(_validate_lod(lod, tensor_height=6))
lod = [[1, 3], [2, 1, 3, 4]]
self.assertFalse(_validate_lod(lod, tensor_height=5))
def test_convert_lod(self):
lod = [[1, 2, 3]] lod = [[1, 2, 3]]
converted_lod = [[0, 1, 3, 6]] tensor.set_recursive_sequence_lengths(lod)
self.assertEqual(_convert_lod(lod), converted_lod) self.assertEqual(tensor.recursive_sequence_lengths(), lod)
tensor.set(np.random.random([6, 1]), fluid.CPUPlace())
self.assertTrue(tensor.has_valid_recursive_sequence_lengths())
tensor.set(np.random.random([9, 1]), fluid.CPUPlace())
self.assertFalse(tensor.has_valid_recursive_sequence_lengths())
# Each level's sum should be equal to the number of items in the next level
# Moreover, last level's sum should be equal to the tensor height
lod = [[2, 3], [1, 3, 1, 2, 2]]
tensor.set_recursive_sequence_lengths(lod)
self.assertEqual(tensor.recursive_sequence_lengths(), lod)
tensor.set(np.random.random([8, 1]), fluid.CPUPlace())
self.assertFalse(tensor.has_valid_recursive_sequence_lengths())
lod = [[2, 3], [1, 3, 1, 2, 1]] lod = [[2, 3], [1, 3, 1, 2, 1]]
converted_lod = [[0, 2, 5], [0, 1, 4, 5, 7, 8]] tensor.set_recursive_sequence_lengths(lod)
self.assertEqual(_convert_lod(lod), converted_lod) self.assertTrue(tensor.has_valid_recursive_sequence_lengths())
tensor.set(np.random.random([9, 1]), fluid.CPUPlace())
self.assertFalse(tensor.has_valid_recursive_sequence_lengths())
def test_create_lod_tensor(self): def test_create_lod_tensor(self):
# Create LoDTensor from a list # Create LoDTensor from a list
@ -60,19 +57,19 @@ class TestLoDTensor(unittest.TestCase):
self.assertRaises(AssertionError, create_lod_tensor, data, wrong_lod, self.assertRaises(AssertionError, create_lod_tensor, data, wrong_lod,
fluid.CPUPlace()) fluid.CPUPlace())
tensor = create_lod_tensor(data, correct_lod, fluid.CPUPlace()) tensor = create_lod_tensor(data, correct_lod, fluid.CPUPlace())
self.assertEqual(tensor.lod(), [[0, 3, 5]]) self.assertEqual(tensor.recursive_sequence_lengths(), correct_lod)
# Create LoDTensor from numpy array # Create LoDTensor from numpy array
data = numpy.random.random([10, 1]) data = np.random.random([10, 1])
lod = [[2, 1], [3, 3, 4]] lod = [[2, 1], [3, 3, 4]]
tensor = create_lod_tensor(data, lod, fluid.CPUPlace()) tensor = create_lod_tensor(data, lod, fluid.CPUPlace())
self.assertEqual(tensor.lod(), [[0, 2, 3], [0, 3, 6, 10]]) self.assertEqual(tensor.recursive_sequence_lengths(), lod)
# Create LoDTensor from another LoDTensor, they are differnt instances # Create LoDTensor from another LoDTensor, they are differnt instances
new_lod = [[2, 2, 1], [1, 2, 2, 3, 2]] new_lod = [[2, 2, 1], [1, 2, 2, 3, 2]]
new_tensor = create_lod_tensor(tensor, new_lod, fluid.CPUPlace()) new_tensor = create_lod_tensor(tensor, new_lod, fluid.CPUPlace())
self.assertEqual(tensor.lod(), [[0, 2, 3], [0, 3, 6, 10]]) self.assertEqual(tensor.recursive_sequence_lengths(), lod)
self.assertEqual(new_tensor.lod(), [[0, 2, 4, 5], [0, 1, 3, 5, 8, 10]]) self.assertEqual(new_tensor.recursive_sequence_lengths(), new_lod)
def test_create_random_int_lodtensor(self): def test_create_random_int_lodtensor(self):
# The shape of a word, commonly used in speech and NLP problem, is [1] # The shape of a word, commonly used in speech and NLP problem, is [1]
@ -83,7 +80,7 @@ class TestLoDTensor(unittest.TestCase):
high = dict_size - 1 high = dict_size - 1
tensor = create_random_int_lodtensor(lod, shape, tensor = create_random_int_lodtensor(lod, shape,
fluid.CPUPlace(), low, high) fluid.CPUPlace(), low, high)
self.assertEqual(tensor.lod(), [[0, 2, 5, 10]]) self.assertEqual(tensor.recursive_sequence_lengths(), lod)
self.assertEqual(tensor.shape(), [10, 1]) self.assertEqual(tensor.shape(), [10, 1])

@ -162,7 +162,7 @@ class OpTest(unittest.TestCase):
tensor = core.LoDTensor() tensor = core.LoDTensor()
if isinstance(np_value, tuple): if isinstance(np_value, tuple):
tensor.set(np_value[0], place) tensor.set(np_value[0], place)
tensor.set_lod(np_value[1]) tensor.set_recursive_sequence_lengths(np_value[1])
else: else:
tensor.set(np_value, place) tensor.set(np_value, place)
feed_map[name] = tensor feed_map[name] = tensor
@ -170,7 +170,8 @@ class OpTest(unittest.TestCase):
tensor = core.LoDTensor() tensor = core.LoDTensor()
if isinstance(self.inputs[var_name], tuple): if isinstance(self.inputs[var_name], tuple):
tensor.set(self.inputs[var_name][0], place) tensor.set(self.inputs[var_name][0], place)
tensor.set_lod(self.inputs[var_name][1]) tensor.set_recursive_sequence_lengths(self.inputs[var_name][
1])
else: else:
tensor.set(self.inputs[var_name], place) tensor.set(self.inputs[var_name], place)
feed_map[var_name] = tensor feed_map[var_name] = tensor
@ -293,7 +294,8 @@ class OpTest(unittest.TestCase):
str(place)) str(place))
if isinstance(expect, tuple): if isinstance(expect, tuple):
self.assertListEqual( self.assertListEqual(
actual.lod(), expect[1], "Output (" + sub_out_name + actual.recursive_sequence_lengths(), expect[1],
"Output (" + sub_out_name +
") has different lod at " + str(place)) ") has different lod at " + str(place))
else: else:
idx = find_actual(out_name, fetch_list) idx = find_actual(out_name, fetch_list)
@ -307,8 +309,8 @@ class OpTest(unittest.TestCase):
"Output (" + out_name + ") has diff at " + str(place) + "Output (" + out_name + ") has diff at " + str(place) +
str(actual_t) + "\n" + str(expect_t)) str(actual_t) + "\n" + str(expect_t))
if isinstance(expect, tuple): if isinstance(expect, tuple):
self.assertListEqual(actual.lod(), expect[1], self.assertListEqual(actual.recursive_sequence_lengths(),
"Output (" + out_name + expect[1], "Output (" + out_name +
") has different lod at " + str(place)) ") has different lod at " + str(place))
def _get_places(self): def _get_places(self):
@ -408,7 +410,7 @@ class OpTest(unittest.TestCase):
tensor = core.LoDTensor() tensor = core.LoDTensor()
tensor.set(np_value, place) tensor.set(np_value, place)
if lod is not None: if lod is not None:
tensor.set_lod(lod) tensor.set_recursive_sequence_lengths(lod)
return tensor return tensor
@staticmethod @staticmethod

@ -128,7 +128,7 @@ def create_or_get_tensor(scope, var_name, var, place):
tensor = scope.var(var_name).get_tensor() tensor = scope.var(var_name).get_tensor()
if var is not None: if var is not None:
assert isinstance(var, np.ndarray) assert isinstance(var, np.ndarray)
tensor.set_lod([[]]) tensor.set_recursive_sequence_lengths([])
tensor.set_dims(var.shape) tensor.set_dims(var.shape)
tensor.set(var, place) tensor.set(var, place)
return tensor return tensor

@ -26,36 +26,36 @@ class TestBeamSearchDecodeOp(unittest.TestCase):
def append_lod_tensor(self, tensor_array, lod, data): def append_lod_tensor(self, tensor_array, lod, data):
lod_tensor = core.LoDTensor() lod_tensor = core.LoDTensor()
lod_tensor.set_lod(lod) lod_tensor.set_recursive_sequence_lengths(lod)
lod_tensor.set(data, self.place) lod_tensor.set(data, self.place)
tensor_array.append(lod_tensor) tensor_array.append(lod_tensor)
def test_get_set(self): def test_get_set(self):
ids = self.scope.var("ids").get_lod_tensor_array() ids = self.scope.var("ids").get_lod_tensor_array()
self.append_lod_tensor( self.append_lod_tensor(
ids, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]], ids, [[3, 3], [1, 1, 1, 1, 1, 1]],
np.array( np.array(
[1, 2, 3, 4, 5, 6], dtype="int64")) [1, 2, 3, 4, 5, 6], dtype="int64"))
self.append_lod_tensor( self.append_lod_tensor(
ids, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]], ids, [[3, 3], [1, 0, 2, 2, 0, 1]],
np.array( np.array(
[0, 1, 2, 3, 4, 5], dtype="int64")) [0, 1, 2, 3, 4, 5], dtype="int64"))
self.append_lod_tensor( self.append_lod_tensor(
ids, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]], ids, [[3, 3], [0, 1, 1, 1, 1, 1]],
np.array( np.array(
[0, 1, 2, 3, 4], dtype="int64")) [0, 1, 2, 3, 4], dtype="int64"))
scores = self.scope.var("scores").get_lod_tensor_array() scores = self.scope.var("scores").get_lod_tensor_array()
self.append_lod_tensor( self.append_lod_tensor(
scores, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]], scores, [[3, 3], [1, 1, 1, 1, 1, 1]],
np.array( np.array(
[1, 2, 3, 4, 5, 6], dtype="float64")) [1, 2, 3, 4, 5, 6], dtype="float64"))
self.append_lod_tensor( self.append_lod_tensor(
scores, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]], scores, [[3, 3], [1, 0, 2, 2, 0, 1]],
np.array( np.array(
[0, 1, 2, 3, 4, 5], dtype="float64")) [0, 1, 2, 3, 4, 5], dtype="float64"))
self.append_lod_tensor( self.append_lod_tensor(
scores, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]], scores, [[3, 3], [0, 1, 1, 1, 1, 1]],
np.array( np.array(
[0, 1, 2, 3, 4], dtype="float64")) [0, 1, 2, 3, 4], dtype="float64"))
@ -73,9 +73,11 @@ class TestBeamSearchDecodeOp(unittest.TestCase):
beam_search_decode_op.run(self.scope, self.place) beam_search_decode_op.run(self.scope, self.place)
expected_lod = [[0, 4, 8], [0, 1, 3, 6, 9, 10, 13, 16, 19]] expected_lod = [[4, 4], [1, 2, 3, 3, 1, 3, 3, 3]]
self.assertEqual(sentence_ids.lod(), expected_lod) self.assertEqual(sentence_ids.recursive_sequence_lengths(),
self.assertEqual(sentence_scores.lod(), expected_lod) expected_lod)
self.assertEqual(sentence_scores.recursive_sequence_lengths(),
expected_lod)
expected_data = np.array( expected_data = np.array(
[2, 1, 0, 3, 1, 0, 3, 2, 1, 5, 4, 3, 2, 4, 4, 3, 6, 5, 4], "int64") [2, 1, 0, 3, 1, 0, 3, 2, 1, 5, 4, 3, 2, 4, 4, 3, 6, 5, 4], "int64")

@ -48,18 +48,18 @@ class BeamSearchOpTester(unittest.TestCase):
op.run(self.scope, core.CPUPlace()) op.run(self.scope, core.CPUPlace())
selected_ids = self.scope.find_var("selected_ids").get_tensor() selected_ids = self.scope.find_var("selected_ids").get_tensor()
print 'selected_ids', np.array(selected_ids) print 'selected_ids', np.array(selected_ids)
print 'lod', selected_ids.lod() print 'lod', selected_ids.recursive_sequence_lengths()
def _create_pre_ids(self): def _create_pre_ids(self):
np_data = np.array([[1, 2, 3, 4]], dtype='int64') np_data = np.array([[1, 2, 3, 4]], dtype='int64')
tensor = create_tensor(self.scope, "pre_ids", np_data) tensor = create_tensor(self.scope, "pre_ids", np_data)
def _create_ids(self): def _create_ids(self):
self.lod = [[0, 1, 4], [0, 1, 2, 3, 4]] self.lod = [[1, 3], [1, 1, 1, 1]]
np_data = np.array( np_data = np.array(
[[4, 2, 5], [2, 1, 3], [3, 5, 2], [8, 2, 1]], dtype='int64') [[4, 2, 5], [2, 1, 3], [3, 5, 2], [8, 2, 1]], dtype='int64')
tensor = create_tensor(self.scope, "ids", np_data) tensor = create_tensor(self.scope, "ids", np_data)
tensor.set_lod(self.lod) tensor.set_recursive_sequence_lengths(self.lod)
def _create_scores(self): def _create_scores(self):
np_data = np.array( np_data = np.array(
@ -71,7 +71,7 @@ class BeamSearchOpTester(unittest.TestCase):
], ],
dtype='float32') dtype='float32')
tensor = create_tensor(self.scope, "scores", np_data) tensor = create_tensor(self.scope, "scores", np_data)
tensor.set_lod(self.lod) tensor.set_recursive_sequence_lengths(self.lod)
if __name__ == '__main__': if __name__ == '__main__':

@ -65,23 +65,25 @@ def batch_bipartite_match(distance, lod, match_type=None, dist_threshold=None):
distance (numpy.array) : The distance of two entries with shape [M, N]. distance (numpy.array) : The distance of two entries with shape [M, N].
lod (list of int): The offsets of each input in this batch. lod (list of int): The offsets of each input in this batch.
""" """
n = len(lod) - 1 n = len(lod)
m = distance.shape[1] m = distance.shape[1]
match_indices = -1 * np.ones((n, m), dtype=np.int) match_indices = -1 * np.ones((n, m), dtype=np.int)
match_dist = np.zeros((n, m), dtype=np.float32) match_dist = np.zeros((n, m), dtype=np.float32)
for i in range(len(lod) - 1): cur_offset = 0
bipartite_match(distance[lod[i]:lod[i + 1], :], match_indices[i, :], for i in range(n):
match_dist[i, :]) bipartite_match(distance[cur_offset:(cur_offset + lod[i]), :],
match_indices[i, :], match_dist[i, :])
if match_type == 'per_prediction': if match_type == 'per_prediction':
argmax_match(distance[lod[i]:lod[i + 1], :], match_indices[i, :], argmax_match(distance[cur_offset:(cur_offset + lod[i]), :],
match_dist[i, :], dist_threshold) match_indices[i, :], match_dist[i, :], dist_threshold)
cur_offset += lod[i]
return match_indices, match_dist return match_indices, match_dist
class TestBipartiteMatchOpWithLoD(OpTest): class TestBipartiteMatchOpWithLoD(OpTest):
def setUp(self): def setUp(self):
self.op_type = 'bipartite_match' self.op_type = 'bipartite_match'
lod = [[0, 5, 11, 23]] lod = [[5, 6, 12]]
dist = np.random.random((23, 217)).astype('float32') dist = np.random.random((23, 217)).astype('float32')
match_indices, match_dist = batch_bipartite_match(dist, lod[0]) match_indices, match_dist = batch_bipartite_match(dist, lod[0])
@ -98,7 +100,7 @@ class TestBipartiteMatchOpWithLoD(OpTest):
class TestBipartiteMatchOpWithoutLoD(OpTest): class TestBipartiteMatchOpWithoutLoD(OpTest):
def setUp(self): def setUp(self):
self.op_type = 'bipartite_match' self.op_type = 'bipartite_match'
lod = [[0, 8]] lod = [[8]]
dist = np.random.random((8, 17)).astype('float32') dist = np.random.random((8, 17)).astype('float32')
match_indices, match_dist = batch_bipartite_match(dist, lod[0]) match_indices, match_dist = batch_bipartite_match(dist, lod[0])
@ -115,7 +117,7 @@ class TestBipartiteMatchOpWithoutLoD(OpTest):
class TestBipartiteMatchOpWithPerPredictionType(OpTest): class TestBipartiteMatchOpWithPerPredictionType(OpTest):
def setUp(self): def setUp(self):
self.op_type = 'bipartite_match' self.op_type = 'bipartite_match'
lod = [[0, 5, 11, 23]] lod = [[5, 6, 12]]
dist = np.random.random((23, 237)).astype('float32') dist = np.random.random((23, 237)).astype('float32')
match_indices, match_dist = batch_bipartite_match(dist, lod[0], match_indices, match_dist = batch_bipartite_match(dist, lod[0],
'per_prediction', 0.5) 'per_prediction', 0.5)

@ -81,15 +81,19 @@ def batch_box_coder(prior_box, prior_box_var, target_box, lod, code_type,
n = target_box.shape[0] n = target_box.shape[0]
m = prior_box.shape[0] m = prior_box.shape[0]
output_box = np.zeros((n, m, 4), dtype=np.float32) output_box = np.zeros((n, m, 4), dtype=np.float32)
for i in range(len(lod) - 1): cur_offset = 0
for i in range(len(lod)):
if (code_type == "EncodeCenterSize"): if (code_type == "EncodeCenterSize"):
box_coder(target_box[lod[i]:lod[i + 1], :], prior_box, box_coder(target_box[cur_offset:(cur_offset + lod[i]), :],
prior_box_var, output_box[lod[i]:lod[i + 1], :, :], prior_box, prior_box_var,
output_box[cur_offset:(cur_offset + lod[i]), :, :],
code_type, box_normalized) code_type, box_normalized)
elif (code_type == "DecodeCenterSize"): elif (code_type == "DecodeCenterSize"):
box_coder(target_box[lod[i]:lod[i + 1], :, :], prior_box, box_coder(target_box[cur_offset:(cur_offset + lod[i]), :, :],
prior_box_var, output_box[lod[i]:lod[i + 1], :, :], prior_box, prior_box_var,
output_box[cur_offset:(cur_offset + lod[i]), :, :],
code_type, box_normalized) code_type, box_normalized)
cur_offset += lod[i]
return output_box return output_box
@ -99,7 +103,7 @@ class TestBoxCoderOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "box_coder" self.op_type = "box_coder"
lod = [[0, 1, 2, 3, 4, 5]] lod = [[1, 1, 1, 1, 1]]
prior_box = np.random.random((10, 4)).astype('float32') prior_box = np.random.random((10, 4)).astype('float32')
prior_box_var = np.random.random((10, 4)).astype('float32') prior_box_var = np.random.random((10, 4)).astype('float32')
target_box = np.random.random((5, 10, 4)).astype('float32') target_box = np.random.random((5, 10, 4)).astype('float32')
@ -152,7 +156,7 @@ class TestBoxCoderOpWithLoD(OpTest):
def setUp(self): def setUp(self):
self.op_type = "box_coder" self.op_type = "box_coder"
lod = [[0, 4, 12, 20]] lod = [[4, 8, 8]]
prior_box = np.random.random((10, 4)).astype('float32') prior_box = np.random.random((10, 4)).astype('float32')
prior_box_var = np.random.random((10, 4)).astype('float32') prior_box_var = np.random.random((10, 4)).astype('float32')
target_box = np.random.random((20, 4)).astype('float32') target_box = np.random.random((20, 4)).astype('float32')

@ -144,10 +144,10 @@ class TestChunkEvalOp(OpTest):
starts = sorted(starts) starts = sorted(starts)
self.num_correct_chunks, self.num_infer_chunks, self.num_label_chunks = self.gen_chunks( self.num_correct_chunks, self.num_infer_chunks, self.num_label_chunks = self.gen_chunks(
infer, label, starts) infer, label, starts)
self.inputs = { lod = []
'Inference': (infer, [starts]), for i in range(len(starts) - 1):
'Label': (label, [starts]) lod.append(starts[i + 1] - starts[i])
} self.inputs = {'Inference': (infer, [lod]), 'Label': (label, [lod])}
precision = float( precision = float(
self.num_correct_chunks self.num_correct_chunks
) / self.num_infer_chunks if self.num_infer_chunks else 0 ) / self.num_infer_chunks if self.num_infer_chunks else 0

@ -22,9 +22,9 @@ from op_test import OpTest
class CRFDecoding(object): class CRFDecoding(object):
def __init__(self, emission_weights, transition_weights, def __init__(self, emission_weights, transition_weights,
seq_start_positions): seq_start_positions):
assert (emission_weights.shape[0] == seq_start_positions[-1]) assert (emission_weights.shape[0] == sum(seq_start_positions))
self.tag_num = emission_weights.shape[1] self.tag_num = emission_weights.shape[1]
self.seq_num = len(seq_start_positions) - 1 self.seq_num = len(seq_start_positions)
self.seq_start_positions = seq_start_positions self.seq_start_positions = seq_start_positions
self.x = emission_weights self.x = emission_weights
@ -34,9 +34,9 @@ class CRFDecoding(object):
self.w = transition_weights[2:, :] self.w = transition_weights[2:, :]
self.track = np.zeros( self.track = np.zeros(
(seq_start_positions[-1], self.tag_num), dtype="int64") (sum(seq_start_positions), self.tag_num), dtype="int64")
self.decoded_path = np.zeros( self.decoded_path = np.zeros(
(seq_start_positions[-1], 1), dtype="int64") (sum(seq_start_positions), 1), dtype="int64")
def _decode_one_sequence(self, decoded_path, x): def _decode_one_sequence(self, decoded_path, x):
seq_len, tag_num = x.shape seq_len, tag_num = x.shape
@ -71,9 +71,11 @@ class CRFDecoding(object):
decoded_path[i - 1] = max_idx = track[i, max_idx] decoded_path[i - 1] = max_idx = track[i, max_idx]
def decode(self): def decode(self):
cur_pos = 0
for i in range(self.seq_num): for i in range(self.seq_num):
start = self.seq_start_positions[i] start = cur_pos
end = self.seq_start_positions[i + 1] cur_pos += self.seq_start_positions[i]
end = cur_pos
self._decode_one_sequence(self.decoded_path[start:end, :], self._decode_one_sequence(self.decoded_path[start:end, :],
self.x[start:end, :]) self.x[start:end, :])
return self.decoded_path return self.decoded_path
@ -90,11 +92,13 @@ class TestCRFDecodingOp1(OpTest):
TAG_NUM = 17 TAG_NUM = 17
MAX_SEQ_LEN = 10 MAX_SEQ_LEN = 10
lod = [[0]] lod = [[]]
total_len = 0
for i in range(SEQ_NUM): for i in range(SEQ_NUM):
lod[-1].append(lod[-1][-1] + random.randint(1, MAX_SEQ_LEN)) lod[-1].append(random.randint(1, MAX_SEQ_LEN))
total_len += lod[-1][-1]
emission = np.random.uniform(-1, 1, emission = np.random.uniform(-1, 1,
[lod[-1][-1], TAG_NUM]).astype("float64") [total_len, TAG_NUM]).astype("float64")
transition = np.random.uniform(-0.5, 0.5, transition = np.random.uniform(-0.5, 0.5,
[TAG_NUM + 2, TAG_NUM]).astype("float64") [TAG_NUM + 2, TAG_NUM]).astype("float64")
@ -126,7 +130,8 @@ class TestCRFDecodingOp2(OpTest):
self.op_type = "crf_decoding" self.op_type = "crf_decoding"
TAG_NUM = 5 TAG_NUM = 5
lod = [[0, 1, 3, 6, 10]] lod = [[1, 2, 3, 4]]
total_len = sum(lod[-1])
transition = np.repeat( transition = np.repeat(
np.arange( np.arange(
TAG_NUM, dtype="float64").reshape(1, TAG_NUM), TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
@ -135,13 +140,13 @@ class TestCRFDecodingOp2(OpTest):
emission = np.repeat( emission = np.repeat(
np.arange( np.arange(
TAG_NUM, dtype="float64").reshape(1, TAG_NUM), TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
lod[-1][-1], total_len,
axis=0) axis=0)
labels = np.random.randint( labels = np.random.randint(
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64") low=0, high=TAG_NUM, size=(total_len, 1), dtype="int64")
predicted_labels = np.ones( predicted_labels = np.ones(
(lod[-1][-1], 1), dtype="int64") * (TAG_NUM - 1) (total_len, 1), dtype="int64") * (TAG_NUM - 1)
expected_output = (labels == predicted_labels).astype("int64") expected_output = (labels == predicted_labels).astype("int64")
self.inputs = { self.inputs = {

@ -22,14 +22,16 @@ from test_softmax_op import stable_softmax
def CTCAlign(input, lod, blank, merge_repeated): def CTCAlign(input, lod, blank, merge_repeated):
lod0 = lod[0] lod0 = lod[0]
result = [] result = []
for i in range(len(lod0) - 1): cur_offset = 0
for i in range(len(lod0)):
prev_token = -1 prev_token = -1
for j in range(lod0[i], lod0[i + 1]): for j in range(cur_offset, cur_offset + lod0[i]):
token = input[j][0] token = input[j][0]
if (token != blank) and not (merge_repeated and if (token != blank) and not (merge_repeated and
token == prev_token): token == prev_token):
result.append(token) result.append(token)
prev_token = token prev_token = token
cur_offset += lod0[i]
result = np.array(result).reshape([len(result), 1]).astype("int32") result = np.array(result).reshape([len(result), 1]).astype("int32")
if len(result) == 0: if len(result) == 0:
result = np.array([-1]) result = np.array([-1])
@ -39,7 +41,7 @@ def CTCAlign(input, lod, blank, merge_repeated):
class TestCTCAlignOp(OpTest): class TestCTCAlignOp(OpTest):
def config(self): def config(self):
self.op_type = "ctc_align" self.op_type = "ctc_align"
self.input_lod = [[0, 11, 18]] self.input_lod = [[11, 7]]
self.blank = 0 self.blank = 0
self.merge_repeated = False self.merge_repeated = False
self.input = np.array( self.input = np.array(
@ -66,7 +68,7 @@ class TestCTCAlignOp(OpTest):
class TestCTCAlignOpCase1(TestCTCAlignOp): class TestCTCAlignOpCase1(TestCTCAlignOp):
def config(self): def config(self):
self.op_type = "ctc_align" self.op_type = "ctc_align"
self.input_lod = [[0, 11, 19]] self.input_lod = [[11, 8]]
self.blank = 0 self.blank = 0
self.merge_repeated = True self.merge_repeated = True
self.input = np.array( self.input = np.array(
@ -77,7 +79,7 @@ class TestCTCAlignOpCase1(TestCTCAlignOp):
class TestCTCAlignOpCase2(TestCTCAlignOp): class TestCTCAlignOpCase2(TestCTCAlignOp):
def config(self): def config(self):
self.op_type = "ctc_align" self.op_type = "ctc_align"
self.input_lod = [[0, 4]] self.input_lod = [[4]]
self.blank = 0 self.blank = 0
self.merge_repeated = True self.merge_repeated = True
self.input = np.array([0, 0, 0, 0]).reshape([4, 1]).astype("int32") self.input = np.array([0, 0, 0, 0]).reshape([4, 1]).astype("int32")

@ -74,13 +74,13 @@ class TestDetectionMAPOp(OpTest):
self.evaluate_difficult = True self.evaluate_difficult = True
self.ap_type = "integral" self.ap_type = "integral"
self.label_lod = [[0, 2, 4]] self.label_lod = [[2, 2]]
# label difficult xmin ymin xmax ymax # label difficult xmin ymin xmax ymax
self.label = [[1, 0, 0.1, 0.1, 0.3, 0.3], [1, 1, 0.6, 0.6, 0.8, 0.8], self.label = [[1, 0, 0.1, 0.1, 0.3, 0.3], [1, 1, 0.6, 0.6, 0.8, 0.8],
[2, 0, 0.3, 0.3, 0.6, 0.5], [1, 0, 0.7, 0.1, 0.9, 0.3]] [2, 0, 0.3, 0.3, 0.6, 0.5], [1, 0, 0.7, 0.1, 0.9, 0.3]]
# label score xmin ymin xmax ymax difficult # label score xmin ymin xmax ymax difficult
self.detect_lod = [[0, 3, 7]] self.detect_lod = [[3, 4]]
self.detect = [ self.detect = [
[1, 0.3, 0.1, 0.0, 0.4, 0.3], [1, 0.7, 0.0, 0.1, 0.2, 0.3], [1, 0.3, 0.1, 0.0, 0.4, 0.3], [1, 0.7, 0.0, 0.1, 0.2, 0.3],
[1, 0.9, 0.7, 0.6, 0.8, 0.8], [2, 0.8, 0.2, 0.1, 0.4, 0.4], [1, 0.9, 0.7, 0.6, 0.8, 0.8], [2, 0.8, 0.2, 0.1, 0.4, 0.4],
@ -89,7 +89,7 @@ class TestDetectionMAPOp(OpTest):
] ]
# label score true_pos false_pos # label score true_pos false_pos
self.tf_pos_lod = [[0, 3, 7]] self.tf_pos_lod = [[3, 4]]
self.tf_pos = [[1, 0.9, 1, 0], [1, 0.7, 1, 0], [1, 0.3, 0, 1], self.tf_pos = [[1, 0.9, 1, 0], [1, 0.7, 1, 0], [1, 0.3, 0, 1],
[1, 0.2, 1, 0], [2, 0.8, 0, 1], [2, 0.1, 1, 0], [1, 0.2, 1, 0], [2, 0.8, 0, 1], [2, 0.1, 1, 0],
[3, 0.2, 0, 1]] [3, 0.2, 0, 1]]
@ -112,15 +112,19 @@ class TestDetectionMAPOp(OpTest):
for i, count in enumerate(class_pos_count): for i, count in enumerate(class_pos_count):
class_pos_count_dict[i] = count class_pos_count_dict[i] = count
for i in range(len(true_pos_lod[0]) - 1): cur_pos = 0
start = true_pos_lod[0][i] for i in range(len(true_pos_lod[0])):
end = true_pos_lod[0][i + 1] start = cur_pos
cur_pos += true_pos_lod[0][i]
end = cur_pos
for j in range(start, end): for j in range(start, end):
true_pos_dict[i].append(true_pos[j]) true_pos_dict[i].append(true_pos[j])
for i in range(len(false_pos_lod[0]) - 1): cur_pos = 0
start = false_pos_lod[0][i] for i in range(len(false_pos_lod[0])):
end = false_pos_lod[0][i + 1] start = cur_pos
cur_pos += false_pos_lod[0][i]
end = cur_pos
for j in range(start, end): for j in range(start, end):
false_pos_dict[i].append(false_pos[j]) false_pos_dict[i].append(false_pos[j])
@ -130,19 +134,19 @@ class TestDetectionMAPOp(OpTest):
label_number = self.class_num label_number = self.class_num
out_class_pos_count = [] out_class_pos_count = []
out_true_pos_lod = [0] out_true_pos_lod = []
out_true_pos = [] out_true_pos = []
out_false_pos_lod = [0] out_false_pos_lod = []
out_false_pos = [] out_false_pos = []
for i in range(label_number): for i in range(label_number):
out_class_pos_count.append([label_count[i]]) out_class_pos_count.append([label_count[i]])
true_pos_list = true_pos[i] true_pos_list = true_pos[i]
out_true_pos += true_pos_list out_true_pos += true_pos_list
out_true_pos_lod.append(len(out_true_pos)) out_true_pos_lod.append(len(true_pos_list))
false_pos_list = false_pos[i] false_pos_list = false_pos[i]
out_false_pos += false_pos_list out_false_pos += false_pos_list
out_false_pos_lod.append(len(out_false_pos)) out_false_pos_lod.append(len(false_pos_list))
return out_class_pos_count, out_true_pos, [ return out_class_pos_count, out_true_pos, [
out_true_pos_lod out_true_pos_lod
@ -241,7 +245,7 @@ class TestDetectionMAPOpSkipDiff(TestDetectionMAPOp):
self.evaluate_difficult = False self.evaluate_difficult = False
self.tf_pos_lod = [[0, 2, 6]] self.tf_pos_lod = [[2, 4]]
# label score true_pos false_pos # label score true_pos false_pos
self.tf_pos = [[1, 0.7, 1, 0], [1, 0.3, 0, 1], [1, 0.2, 1, 0], self.tf_pos = [[1, 0.7, 1, 0], [1, 0.3, 0, 1], [1, 0.2, 1, 0],
[2, 0.8, 0, 1], [2, 0.1, 1, 0], [3, 0.2, 0, 1]] [2, 0.8, 0, 1], [2, 0.1, 1, 0], [3, 0.2, 0, 1]]
@ -267,9 +271,9 @@ class TestDetectionMAPOpMultiBatch(TestDetectionMAPOp):
def init_test_case(self): def init_test_case(self):
super(TestDetectionMAPOpMultiBatch, self).init_test_case() super(TestDetectionMAPOpMultiBatch, self).init_test_case()
self.class_pos_count = [0, 2, 1] self.class_pos_count = [0, 2, 1]
self.true_pos_lod = [[0, 0, 3, 5]] self.true_pos_lod = [[0, 3, 2]]
self.true_pos = [[0.7, 1.], [0.3, 0.], [0.2, 1.], [0.8, 0.], [0.1, 1.]] self.true_pos = [[0.7, 1.], [0.3, 0.], [0.2, 1.], [0.8, 0.], [0.1, 1.]]
self.false_pos_lod = [[0, 0, 3, 5]] self.false_pos_lod = [[0, 3, 2]]
self.false_pos = [[0.7, 0.], [0.3, 1.], [0.2, 0.], [0.8, 1.], [0.1, 0.]] self.false_pos = [[0.7, 0.], [0.3, 1.], [0.2, 0.], [0.8, 1.], [0.1, 0.]]

@ -136,16 +136,16 @@ class BaseRNN(object):
feed_dict = dict() feed_dict = dict()
for iname in self.inputs: for iname in self.inputs:
lod = [0] lod = []
np_flatten = [] np_flatten = []
for seq_id in xrange(len(self.inputs[iname])): for seq_id in xrange(len(self.inputs[iname])):
seq_len = len(self.inputs[iname][seq_id]) seq_len = len(self.inputs[iname][seq_id])
lod.append(lod[-1] + seq_len) lod.append(seq_len)
np_flatten.extend(self.inputs[iname][seq_id]) np_flatten.extend(self.inputs[iname][seq_id])
t = fluid.Tensor() t = fluid.Tensor()
t.set(numpy.array(np_flatten), place) t.set(numpy.array(np_flatten), place)
t.set_lod([lod]) t.set_recursive_sequence_lengths([lod])
feed_dict[iname] = t feed_dict[iname] = t
for pname in self.params: for pname in self.params:

@ -39,20 +39,20 @@ class TestDyRnnStaticInput(unittest.TestCase):
def prepare_x_tensor(self): def prepare_x_tensor(self):
self.x_tensor_dim = 10 self.x_tensor_dim = 10
lod = [[0, 2, 3, 6]] lod = [[2, 1, 3]]
shape = [lod[0][-1], self.x_tensor_dim] shape = [sum(lod[0]), self.x_tensor_dim]
self.x_tensor_data = np.random.random(shape).astype('float32') self.x_tensor_data = np.random.random(shape).astype('float32')
self.x_tensor = core.LoDTensor() self.x_tensor = core.LoDTensor()
self.x_tensor.set_lod(lod) self.x_tensor.set_recursive_sequence_lengths(lod)
self.x_tensor.set(self.x_tensor_data, self.place) self.x_tensor.set(self.x_tensor_data, self.place)
def prepare_static_input_tensor(self): def prepare_static_input_tensor(self):
self.static_input_tensor_dim = 4 self.static_input_tensor_dim = 4
lod = [[0, 1, 3, 6]] lod = [[1, 2, 3]]
shape = [lod[0][-1], self.static_input_tensor_dim] shape = [sum(lod[0]), self.static_input_tensor_dim]
self.static_input_data = np.random.random(shape).astype('float32') self.static_input_data = np.random.random(shape).astype('float32')
self.static_input_tensor = core.LoDTensor() self.static_input_tensor = core.LoDTensor()
self.static_input_tensor.set_lod(lod) self.static_input_tensor.set_recursive_sequence_lengths(lod)
self.static_input_tensor.set(self.static_input_data, self.place) self.static_input_tensor.set(self.static_input_data, self.place)
def fetch_value(self, var): def fetch_value(self, var):
@ -69,7 +69,7 @@ class TestDyRnnStaticInput(unittest.TestCase):
ndarray = np.zeros(shape=dims).astype('float32') ndarray = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)): for i in xrange(np.product(dims)):
ndarray.ravel()[i] = lod_tensor.get_float_element(i) ndarray.ravel()[i] = lod_tensor.get_float_element(i)
return ndarray, lod_tensor.lod() return ndarray, lod_tensor.recursive_sequence_lengths()
def build_graph(self, only_forward=False): def build_graph(self, only_forward=False):
x_tensor = fluid.layers.data( x_tensor = fluid.layers.data(
@ -131,21 +131,20 @@ class TestDyRnnStaticInput(unittest.TestCase):
framework.grad_var_name('static_input_tensor')) framework.grad_var_name('static_input_tensor'))
return static_input_grad, loss return static_input_grad, loss
def get_seq_len_from_lod(self, lod):
return [lod[0][i + 1] - lod[0][i] for i in xrange(len(lod[0]) - 1)]
def get_expected_static_step_outs(self): def get_expected_static_step_outs(self):
x_lod = self.x_tensor.lod() x_lod = self.x_tensor.recursive_sequence_lengths()
x_seq_len = self.get_seq_len_from_lod(x_lod) x_seq_len = x_lod[0]
x_seq_len_sorted = sorted(x_seq_len) x_seq_len_sorted = sorted(x_seq_len)
x_sorted_indices = np.argsort(x_seq_len)[::-1] x_sorted_indices = np.argsort(x_seq_len)[::-1]
static_lod = self.static_input_tensor.lod() static_lod = self.static_input_tensor.recursive_sequence_lengths()
static_sliced = [ static_sliced = []
self.static_input_data[static_lod[0][i]:static_lod[0][i + 1]] cur_offset = 0
for i in xrange(len(static_lod[0]) - 1) for i in xrange(len(static_lod[0])):
] static_sliced.append(self.static_input_data[cur_offset:(
static_seq_len = self.get_seq_len_from_lod(static_lod) cur_offset + static_lod[0][i])])
cur_offset += static_lod[0][i]
static_seq_len = static_lod[0]
static_reordered = [] static_reordered = []
for i in xrange(len(x_sorted_indices)): for i in xrange(len(x_sorted_indices)):
static_reordered.extend(static_sliced[x_sorted_indices[i]].tolist()) static_reordered.extend(static_sliced[x_sorted_indices[i]].tolist())
@ -159,11 +158,13 @@ class TestDyRnnStaticInput(unittest.TestCase):
for i in xrange(self._max_sequence_len): for i in xrange(self._max_sequence_len):
end = len(x_seq_len) - bisect.bisect_left(x_seq_len_sorted, i + 1) end = len(x_seq_len) - bisect.bisect_left(x_seq_len_sorted, i + 1)
lod = [0] lod = []
total_len = 0
for i in xrange(end): for i in xrange(end):
lod.append(static_seq_len_reordered[i] + lod[-1]) lod.append(static_seq_len_reordered[i])
total_len += lod[-1]
static_step_lods.append([lod]) static_step_lods.append([lod])
end = lod[-1] end = total_len
static_step_outs.append( static_step_outs.append(
np.array(static_reordered[:end]).astype('float32')) np.array(static_reordered[:end]).astype('float32'))
@ -199,7 +200,9 @@ class TestDyRnnStaticInput(unittest.TestCase):
self.static_input_tensor.set_float_element(i, origin) self.static_input_tensor.set_float_element(i, origin)
numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2 numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2
self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001)) self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001))
self.assertTrue(np.allclose(actual_lod, self.static_input_tensor.lod())) self.assertTrue(
np.allclose(actual_lod,
self.static_input_tensor.recursive_sequence_lengths()))
if __name__ == '__main__': if __name__ == '__main__':

@ -52,23 +52,29 @@ class TestEditDistanceOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "edit_distance" self.op_type = "edit_distance"
normalized = False normalized = False
x1 = np.array([[0, 12, 3, 5, 8, 2]]).astype("int64") x1 = np.array([[12, 3, 5, 8, 2]]).astype("int64")
x2 = np.array([[0, 12, 4, 7, 8]]).astype("int64") x2 = np.array([[12, 4, 7, 8]]).astype("int64")
x1 = np.transpose(x1) x1 = np.transpose(x1)
x2 = np.transpose(x2) x2 = np.transpose(x2)
x1_lod = [0, 1, 5] x1_lod = [1, 4]
x2_lod = [0, 3, 4] x2_lod = [3, 1]
num_strs = len(x1_lod) - 1 num_strs = len(x1_lod)
distance = np.zeros((num_strs, 1)).astype("float32") distance = np.zeros((num_strs, 1)).astype("float32")
sequence_num = np.array(2).astype("int64") sequence_num = np.array(2).astype("int64")
x1_offset = 0
x2_offset = 0
for i in range(0, num_strs): for i in range(0, num_strs):
distance[i] = Levenshtein( distance[i] = Levenshtein(
hyp=x1[x1_lod[i]:x1_lod[i + 1]], hyp=x1[x1_offset:(x1_offset + x1_lod[i])],
ref=x2[x2_lod[i]:x2_lod[i + 1]]) ref=x2[x2_offset:(x2_offset + x2_lod[i])])
x1_offset += x1_lod[i]
x2_offset += x2_lod[i]
if normalized is True: if normalized is True:
len_ref = x2_lod[i + 1] - x2_lod[i] len_ref = x2_lod[i]
distance[i] = distance[i] / len_ref distance[i] = distance[i] / len_ref
self.attrs = {'normalized': normalized} self.attrs = {'normalized': normalized}
self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])} self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
self.outputs = {'Out': distance, 'SequenceNum': sequence_num} self.outputs = {'Out': distance, 'SequenceNum': sequence_num}
@ -81,23 +87,29 @@ class TestEditDistanceOpNormalized(OpTest):
def setUp(self): def setUp(self):
self.op_type = "edit_distance" self.op_type = "edit_distance"
normalized = True normalized = True
x1 = np.array([[0, 10, 3, 6, 5, 8, 2]]).astype("int64") x1 = np.array([[10, 3, 6, 5, 8, 2]]).astype("int64")
x2 = np.array([[0, 10, 4, 6, 7, 8]]).astype("int64") x2 = np.array([[10, 4, 6, 7, 8]]).astype("int64")
x1 = np.transpose(x1) x1 = np.transpose(x1)
x2 = np.transpose(x2) x2 = np.transpose(x2)
x1_lod = [0, 1, 3, 6] x1_lod = [1, 2, 3]
x2_lod = [0, 2, 3, 5] x2_lod = [2, 1, 2]
num_strs = len(x1_lod) - 1 num_strs = len(x1_lod)
distance = np.zeros((num_strs, 1)).astype("float32") distance = np.zeros((num_strs, 1)).astype("float32")
sequence_num = np.array(3).astype("int64") sequence_num = np.array(3).astype("int64")
x1_offset = 0
x2_offset = 0
for i in range(0, num_strs): for i in range(0, num_strs):
distance[i] = Levenshtein( distance[i] = Levenshtein(
hyp=x1[x1_lod[i]:x1_lod[i + 1]], hyp=x1[x1_offset:(x1_offset + x1_lod[i])],
ref=x2[x2_lod[i]:x2_lod[i + 1]]) ref=x2[x2_offset:(x2_offset + x2_lod[i])])
x1_offset += x1_lod[i]
x2_offset += x2_lod[i]
if normalized is True: if normalized is True:
len_ref = x2_lod[i + 1] - x2_lod[i] len_ref = x2_lod[i]
distance[i] = distance[i] / len_ref distance[i] = distance[i] / len_ref
self.attrs = {'normalized': normalized} self.attrs = {'normalized': normalized}
self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])} self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
self.outputs = {'Out': distance, 'SequenceNum': sequence_num} self.outputs = {'Out': distance, 'SequenceNum': sequence_num}

@ -24,17 +24,16 @@ class TestFeedFetch(unittest.TestCase):
input_array = np.ones((4, 4, 6)).astype("float32") input_array = np.ones((4, 4, 6)).astype("float32")
input_array[0, 0, 0] = 3 input_array[0, 0, 0] = 3
input_array[3, 3, 5] = 10 input_array[3, 3, 5] = 10
input_tensor = core.LoDTensor([[0, 2, 4]]) input_tensor = core.LoDTensor([[2, 2]])
input_tensor.set(input_array, place) input_tensor.set(input_array, place)
core.set_feed_variable(scope, input_tensor, "feed", 0) core.set_feed_variable(scope, input_tensor, "feed", 0)
output_tensor = core.get_fetch_variable(scope, "feed", 0) output_tensor = core.get_fetch_variable(scope, "feed", 0)
output_lod = output_tensor.lod() output_lod = output_tensor.recursive_sequence_lengths()
self.assertEqual(0, output_lod[0][0]) self.assertEqual(2, output_lod[0][0])
self.assertEqual(2, output_lod[0][1]) self.assertEqual(2, output_lod[0][1])
self.assertEqual(4, output_lod[0][2])
output_array = np.array(output_tensor) output_array = np.array(output_tensor)
self.assertEqual(3, output_array[0, 0, 0]) self.assertEqual(3, output_array[0, 0, 0])

@ -55,7 +55,7 @@ class TestFillConstantBatchSizeLikeWithLoDTensor(OpTest):
self.op_type = "fill_constant_batch_size_like" self.op_type = "fill_constant_batch_size_like"
self.inputs = { self.inputs = {
'Input': (np.random.random((31, 28)).astype("float32"), 'Input': (np.random.random((31, 28)).astype("float32"),
[[0, 9, 23, 31]]) [[9, 14, 8]])
} }
self.attrs = { self.attrs = {
'value': 3.5, 'value': 3.5,

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save