remove some reader apis, test=develop (#20266)

revert-20712-fix_depthwise_conv
Zeng Jinle 6 years ago committed by GitHub
parent df102f6428
commit e3e22146de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -101,12 +101,7 @@ paddle.fluid.io.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaul
paddle.fluid.io.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', 'e42ea6fee23ce26b23cb142cd1d6522d')) paddle.fluid.io.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', 'e42ea6fee23ce26b23cb142cd1d6522d'))
paddle.fluid.io.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'c5bb8f7dd4f917f1569a368aab5b8aad')) paddle.fluid.io.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'c5bb8f7dd4f917f1569a368aab5b8aad'))
paddle.fluid.io.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '9c804a42f8a4dbaa76b3c98e0ab7f796')) paddle.fluid.io.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '9c804a42f8a4dbaa76b3c98e0ab7f796'))
paddle.fluid.io.PipeReader ('paddle.reader.decorator.PipeReader', ('document', 'd3c250618f98c1a5fb646f869016a98e'))
paddle.fluid.io.PipeReader.__init__ (ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.io.PipeReader.get_line (ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')), ('document', '9621ae612e595b6c34eb3bb5f3eb1a45'))
paddle.fluid.io.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0')) paddle.fluid.io.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0'))
paddle.fluid.io.Fake ('paddle.reader.decorator.Fake', ('document', '0d8f4847b99bed6d456ade0d903202e1'))
paddle.fluid.io.Fake.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.ConstantInitializer ('paddle.fluid.initializer.ConstantInitializer', ('document', '911263fc30c516c55e89cd72086a23f8')) paddle.fluid.initializer.ConstantInitializer ('paddle.fluid.initializer.ConstantInitializer', ('document', '911263fc30c516c55e89cd72086a23f8'))
paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.UniformInitializer ('paddle.fluid.initializer.UniformInitializer', ('document', '587b7035cd1d56f76f2ded617b92521d')) paddle.fluid.initializer.UniformInitializer ('paddle.fluid.initializer.UniformInitializer', ('document', '587b7035cd1d56f76f2ded617b92521d'))

@ -14,8 +14,7 @@
__all__ = [ __all__ = [
'cache', 'map_readers', 'buffered', 'compose', 'chain', 'shuffle', 'cache', 'map_readers', 'buffered', 'compose', 'chain', 'shuffle',
'ComposeNotAligned', 'firstn', 'xmap_readers', 'PipeReader', 'ComposeNotAligned', 'firstn', 'xmap_readers', 'multiprocess_reader'
'multiprocess_reader', 'Fake'
] ]
from threading import Thread from threading import Thread
@ -517,116 +516,3 @@ def multiprocess_reader(readers, use_pipe=True, queue_size=1000):
return pipe_reader return pipe_reader
else: else:
return queue_reader return queue_reader
def _buf2lines(buf, line_break="\n"):
# FIXME: line_break should be automatically configured.
lines = buf.split(line_break)
return lines[:-1], lines[-1]
class PipeReader:
"""
PipeReader read data by stream from a command, take it's
stdout into a pipe buffer and redirect it to the parser to
parse, then yield data as your desired format.
You can using standard linux command or call another program
to read data, from HDFS, Ceph, URL, AWS S3 etc:
.. code-block:: python
cmd = "hadoop fs -cat /path/to/some/file"
cmd = "cat sample_file.tar.gz"
cmd = "curl http://someurl"
cmd = "python print_s3_bucket.py"
An example:
.. code-block:: python
def example_reader():
for f in myfiles:
pr = PipeReader("cat %s"%f)
for l in pr.get_line():
sample = l.split(" ")
yield sample
"""
def __init__(self, command, bufsize=8192, file_type="plain"):
if not isinstance(command, str):
raise TypeError("left_cmd must be a string")
if file_type == "gzip":
self.dec = zlib.decompressobj(
32 + zlib.MAX_WBITS) # offset 32 to skip the header
self.file_type = file_type
self.bufsize = bufsize
self.process = subprocess.Popen(
command.split(" "), bufsize=bufsize, stdout=subprocess.PIPE)
def get_line(self, cut_lines=True, line_break="\n"):
"""
:param cut_lines: cut buffer to lines
:type cut_lines: bool
:param line_break: line break of the file, like '\\\\n' or '\\\\r'
:type line_break: string
:return: one line or a buffer of bytes
:rtype: string
"""
remained = ""
while True:
buff = self.process.stdout.read(self.bufsize)
if buff:
if self.file_type == "gzip":
decomp_buff = cpt.to_text(self.dec.decompress(buff))
elif self.file_type == "plain":
decomp_buff = cpt.to_text(buff)
else:
raise TypeError("file_type %s is not allowed" %
self.file_type)
if cut_lines:
lines, remained = _buf2lines(''.join(
[remained, decomp_buff]), line_break)
for line in lines:
yield line
else:
yield decomp_buff
else:
break
class Fake(object):
"""
fake reader will cache the first data it read and yield it out for data_num times.
It is used to cache a data from real reader and use it for speed testing.
:param reader: the origin reader
:param data_num: times that this reader will yield data.
:return: a fake reader.
Examples:
.. code-block:: python
def reader():
for i in range(10):
yield i
fake_reader = Fake()(reader, 100)
"""
def __init__(self):
self.data = None
self.yield_num = 0
def __call__(self, reader, data_num):
def fake_reader():
if self.data is None:
self.data = next(reader())
while self.yield_num < data_num:
yield self.data
self.yield_num += 1
self.yield_num = 0
return fake_reader

@ -147,34 +147,6 @@ class TestXmap(unittest.TestCase):
self.assertEqual(e, mapper(idx)) self.assertEqual(e, mapper(idx))
class TestPipeReader(unittest.TestCase):
def test_pipe_reader(self):
def example_reader(myfiles):
for f in myfiles:
pr = paddle.reader.PipeReader("cat %s" % f, bufsize=128)
for l in pr.get_line():
yield l
import tempfile
records = [str(i) for i in range(5)]
temp = tempfile.NamedTemporaryFile()
try:
with open(temp.name, 'w') as f:
for r in records:
f.write('%s\n' % r)
result = []
for r in example_reader([temp.name]):
result.append(r)
for idx, e in enumerate(records):
self.assertEqual(e, result[idx])
finally:
# delete the temporary file
temp.close()
class TestMultiProcessReader(unittest.TestCase): class TestMultiProcessReader(unittest.TestCase):
def setup(self): def setup(self):
self.samples = [] self.samples = []
@ -203,21 +175,5 @@ class TestMultiProcessReader(unittest.TestCase):
self.reader_test(use_pipe=True) self.reader_test(use_pipe=True)
class TestFakeReader(unittest.TestCase):
def test_fake_reader(self):
def reader():
for i in range(10):
yield i
data_num = 100
fake_reader = paddle.reader.Fake()(reader, data_num)
for _ in range(10):
i = 0
for data in fake_reader():
self.assertEqual(data, 0)
i += 1
self.assertEqual(i, data_num)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

Loading…
Cancel
Save