You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
89 lines
2.4 KiB
89 lines
2.4 KiB
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
Creator package contains some simple reader creator, which could
|
|
be used in user program.
|
|
"""
|
|
|
|
__all__ = ['np_array', 'text_file', 'recordio']
|
|
|
|
|
|
def np_array(x):
|
|
"""
|
|
Creates a reader that yields elements of x, if it is a
|
|
numpy vector. Or rows of x, if it is a numpy matrix.
|
|
Or any sub-hyperplane indexed by the highest dimension.
|
|
|
|
:param x: the numpy array to create reader from.
|
|
:returns: data reader created from x.
|
|
"""
|
|
|
|
def reader():
|
|
if x.ndim < 1:
|
|
yield x
|
|
|
|
for e in x:
|
|
yield e
|
|
|
|
return reader
|
|
|
|
|
|
def text_file(path):
|
|
"""
|
|
Creates a data reader that outputs text line by line from given text file.
|
|
Trailing new line ('\\\\n') of each line will be removed.
|
|
|
|
:path: path of the text file.
|
|
:returns: data reader of text file
|
|
"""
|
|
|
|
def reader():
|
|
f = open(path, "r")
|
|
for l in f:
|
|
yield l.rstrip('\n')
|
|
f.close()
|
|
|
|
return reader
|
|
|
|
|
|
def recordio(paths, buf_size=100):
|
|
"""
|
|
Creates a data reader from given RecordIO file paths separated by ",",
|
|
glob pattern is supported.
|
|
:path: path of recordio files, can be a string or a string list.
|
|
:returns: data reader of recordio files.
|
|
"""
|
|
|
|
import recordio as rec
|
|
import paddle.reader.decorator as dec
|
|
import six
|
|
import six.moves.cPickle as pickle
|
|
|
|
def reader():
|
|
if isinstance(paths, six.string_types):
|
|
path = paths
|
|
elif isinstance(paths, six.binary_type):
|
|
path = paths.decode()
|
|
else:
|
|
path = ",".join(paths)
|
|
f = rec.reader(path)
|
|
while True:
|
|
r = f.read()
|
|
if r is None:
|
|
break
|
|
yield pickle.loads(r)
|
|
f.close()
|
|
|
|
return dec.buffered(reader, buf_size)
|