You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/paddle/gserver/tests/test_PyDataProvider2.py

112 lines
3.3 KiB

# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
from paddle.trainer.PyDataProvider2 import *
@provider(input_types=[dense_vector(200, seq_type=SequenceType.NO_SEQUENCE)])
def test_dense_no_seq(setting, filename):
for i in xrange(200):
yield [(float(j - 100) * float(i + 1)) / 200.0 for j in xrange(200)]
@provider(input_types=[integer_value(200, seq_type=SequenceType.NO_SEQUENCE)])
def test_index_no_seq(setting, filename):
for i in xrange(200):
yield i
def test_init_hooker(setting, value, **kwargs):
setting.value = value
@provider(input_types=[dense_vector(20, seq_type=SequenceType.NO_SEQUENCE)],
init_hook=test_init_hooker)
def test_init_hook(setting, filename):
for i in xrange(200):
yield setting.value
@provider(
input_types=[
sparse_binary_vector(30000, seq_type=SequenceType.NO_SEQUENCE)])
def test_sparse_non_value_no_seq(setting, filename):
for i in xrange(200):
yield [(i + 1) * (j + 1) for j in xrange(10)]
@provider(input_types=[sparse_vector(30000, seq_type=SequenceType.NO_SEQUENCE)])
def test_sparse_value_no_seq(setting, filename):
for i in xrange(200):
yield [((i + 1) * (j + 1), float(j) / float(i + 1)) for j in xrange(10)]
@provider(input_types=[integer_value(200, seq_type=SequenceType.SEQUENCE)])
def test_index_seq(setting, filename):
for i in xrange(200):
yield range(i + 1)
@provider(input_types=[index_slot(200, seq_type=SequenceType.SUB_SEQUENCE)])
def test_index_sub_seq(setting, filename):
def gen_sub_seq(l):
l += 1
for j in xrange(l):
yield range(j + 1)
for i in xrange(200):
yield list(gen_sub_seq(i))
@provider(input_types=[index_slot(100)], min_pool_size=1000)
def test_min_pool_size(setting, filename):
for _ in xrange(1 << 14):
yield random.randint(0, 100 - 1)
@provider(input_types=[index_slot(100, seq_type=SequenceType.SEQUENCE)],
can_over_batch_size=False,
calc_batch_size=lambda x: len(x[0]))
def test_can_over_batch_size(setting, filename):
for _ in xrange(1 << 10):
seq_len = random.randint(0, 99)
yield [random.randint(0, 100 - 1) for _ in xrange(seq_len)]
@provider(input_types={'input1':index_slot(10), 'input2': index_slot(10)})
def test_input_order(setting, filename):
for _ in xrange(1000):
yield {
'input1': 0,
'input2': 1
}
@provider(input_types=[index_slot(10)],
check=True,
check_fail_continue=True,
should_shuffle="123") # also test should shuffle
def test_check(settings, filename):
yield_good_value = False
while not yield_good_value:
for _ in xrange(10000):
i = random.randint(0, 100)
if i < 10:
yield_good_value = True
yield i