remove reset recordio usage (#19519)

6 years ago · 5dce1da680
parent 85914f7a88
commit 5dce1da680
12 changed files with 0 additions and 370 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,3 @@
 python/paddle/fluid/tests/unittests/reader_reset_test.recordio
 paddle/operators/check_t.save
 paddle/operators/check_tensor.ls
 paddle/operators/tensor.save
--- a/paddle/fluid/operators/reader/reader_op_registry.cc
+++ b/paddle/fluid/operators/reader/reader_op_registry.cc
@ -38,21 +38,6 @@ std::unordered_map<std::string, FileReaderCreator>& FileReaderRegistry() {
  return regs;
 }
 std::unique_ptr<framework::ReaderBase> CreateReaderByFileName(
    const std::string& file_name) {
  size_t separator_pos = file_name.find_last_of(kFileFormatSeparator);
  PADDLE_ENFORCE_NE(separator_pos, std::string::npos,
                    "File name illegal! A legal file name should be like: "
                    "[file_name].[file_format] (e.g., 'data_file.recordio').");
  std::string filetype = file_name.substr(separator_pos + 1);
  auto itor = FileReaderRegistry().find(filetype);
  PADDLE_ENFORCE(itor != FileReaderRegistry().end(),
                 "No file reader registered for '%s' format.", filetype);
  framework::ReaderBase* reader = (itor->second)(file_name);
  return std::unique_ptr<framework::ReaderBase>(reader);
 }
 void FileReaderMakerBase::Make() {
  AddOutput("Out", "(ReaderHolder): The created random reader.").AsDuplicable();
  AddAttr<std::vector<int>>("shape_concat", "The concat of all data's shapes.");
--- a/paddle/fluid/operators/reader/reader_op_registry.h
+++ b/paddle/fluid/operators/reader/reader_op_registry.h
@ -40,9 +40,6 @@ int RegisterFileReader(const std::string& filetype) {
  return 0;
 }
 std::unique_ptr<framework::ReaderBase> CreateReaderByFileName(
    const std::string& file_name);
 extern std::vector<framework::DDim> RestoreShapes(
    const std::vector<int>& shape_concat, const std::vector<int>& ranks);
--- a/python/paddle/dataset/tests/common_test.py
+++ b/python/paddle/dataset/tests/common_test.py
@ -1,97 +0,0 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import print_function
 import paddle.dataset.common
 import unittest
 import tempfile
 import glob
 from six.moves import range
 class TestCommon(unittest.TestCase):
    def test_md5file(self):
        _, temp_path = tempfile.mkstemp()
        with open(temp_path, 'w') as f:
            f.write("Hello\n")
        self.assertEqual('09f7e02f1290be211da707a266f153b3',
                         paddle.dataset.common.md5file(temp_path))
    def test_download(self):
        yi_avatar = 'https://avatars0.githubusercontent.com/u/1548775?v=3&s=460'
        self.assertEqual(
            paddle.dataset.common.DATA_HOME + '/test/1548775?v=3&s=460',
            paddle.dataset.common.download(yi_avatar, 'test',
                                           'f75287202d6622414c706c36c16f8e0d'))
    def test_split(self):
        def test_reader():
            def reader():
                for x in range(10):
                    yield x
            return reader
        _, temp_path = tempfile.mkstemp()
        paddle.dataset.common.split(
            test_reader(), 4, suffix=temp_path + '/test-%05d.pickle')
        files = glob.glob(temp_path + '/test-%05d.pickle')
        self.assertEqual(len(files), 3)
    def test_cluster_file_reader(self):
        _, temp_path = tempfile.mkstemp()
        for x in range(5):
            with open(temp_path + '/%05d.test' % x) as f:
                f.write('%d\n' % x)
        reader = paddle.dataset.common.cluster_files_reader(
            temp_path + '/*.test', 5, 0)
        for idx, e in enumerate(reader()):
            self.assertEqual(e, str("0"))
    def test_convert(self):
        record_num = 10
        num_shards = 4
        def test_reader():
            def reader():
                for x in range(record_num):
                    yield x
            return reader
        path = tempfile.mkdtemp()
        paddle.dataset.common.convert(path,
                                      test_reader(), num_shards,
                                      'random_images')
        files = glob.glob(path + '/random_images-*')
        self.assertEqual(len(files), num_shards)
        recs = []
        for i in range(0, num_shards):
            n = "%s/random_images-%05d-of-%05d" % (path, i, num_shards - 1)
            r = recordio.reader(n)
            while True:
                d = r.read()
                if d is None:
                    break
                recs.append(d)
        recs.sort()
        self.assertEqual(total, record_num)
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
+++ b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
@ -130,13 +130,8 @@ unsupported_fp16_list = {
    'send_barrier',
    'recv',
    'fetch_barrier',
    'create_recordio_file_reader',
    'create_random_data_generator',
    'create_py_reader',
    'create_shuffle_reader',
    'create_batch_reader',
    'create_double_buffer_reader',
    'create_multi_pass_reader',
    'read',
    'load',
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@ -754,98 +754,6 @@ def create_py_reader_by_data(capacity,
        feed_list=feed_list)
 def open_files(filenames,
               shapes,
               lod_levels,
               dtypes,
               thread_num=None,
               buffer_size=None,
               pass_num=1,
               is_test=None):
    """
    Open files
    This layer takes a list of files to read from and returns a Reader Variable.
    Via the Reader Variable, we can get data from given files. All files must
    have name suffixs to indicate their formats, e.g., '*.recordio'.
    Args:
       filenames(list): The list of file names.
       shapes(list): List of tuples which declaring data shapes.
       lod_levels(list): List of ints which declaring data lod_level.
       dtypes(list): List of strs which declaring data type.
       thread_num(None): The number of thread to read files.
            Default: min(len(filenames), cpu_number).
       buffer_size(None): The buffer size of reader. Default: 3 * thread_num
       pass_num(int): Number of passes to run.
       is_test(bool|None): Whether `open_files` used for testing or not. If it
            is used for testing, the order of data generated is same as the file
            order. Otherwise, it is not guaranteed the order of data is same
            between every epoch. [Default: False].
    Returns:
       Variable: A Reader Variable via which we can get file data.
    Examples:
       .. code-block:: python
         import paddle.fluid as fluid
         reader = fluid.layers.io.open_files(filenames=['./data1.recordio',
                                                     './data2.recordio'],
                                             shapes=[(3,224,224), (1,)],
                                             lod_levels=[0, 0],
                                             dtypes=['float32', 'int64'])
         # Via the reader, we can use 'read_file' layer to get data:
         image, label = fluid.layers.io.read_file(reader)
    """
    if thread_num is None:
        thread_num = min(len(filenames), multiprocessing.cpu_count())
    else:
        thread_num = int(thread_num)
    if buffer_size is None:
        buffer_size = 3 * thread_num
    else:
        buffer_size = int(buffer_size)
    if isinstance(filenames, six.string_types):
        filenames = [filenames]
    dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
    shape_concat = []
    ranks = []
    for shape in shapes:
        shape_concat.extend(shape)
        ranks.append(len(shape))
    multi_file_reader_name = unique_name('multi_file_reader')
    startup_blk = default_startup_program().current_block()
    startup_reader = startup_blk.create_var(name=multi_file_reader_name)
    attrs = {
        'shape_concat': shape_concat,
        'lod_levels': lod_levels,
        'ranks': ranks,
        'file_names': filenames,
        'thread_num': thread_num,
        'buffer_size': buffer_size
    }
    if is_test is not None:
        attrs['is_test'] = is_test
    startup_blk.append_op(
        type='open_files', outputs={'Out': [startup_reader]}, attrs=attrs)
    startup_reader.desc.set_dtypes(dtypes)
    startup_reader.persistable = True
    main_prog_reader = _copy_reader_var_(default_main_program().current_block(),
                                         startup_reader)
    if pass_num > 1:
        main_prog_reader = multi_pass(
            reader=main_prog_reader, pass_num=pass_num)
    return monkey_patch_reader_methods(main_prog_reader)
 def __create_shared_decorated_reader__(op_type, reader, attrs):
    var_name = unique_name(op_type)
    startup_blk = default_startup_program().current_block()
--- a/python/paddle/fluid/tests/demo/file_reader/.gitignore
+++ b/python/paddle/fluid/tests/demo/file_reader/.gitignore
@ -1 +0,0 @@
 *.recordio
--- a/python/paddle/fluid/tests/demo/file_reader/train.py
+++ b/python/paddle/fluid/tests/demo/file_reader/train.py
@ -1,140 +0,0 @@
 #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import print_function
 import paddle.fluid as fluid
 import numpy
 import sys
 TRAIN_FILES = ['train.recordio']
 TEST_FILES = ['test.recordio']
 DICT_DIM = 5147
 # embedding dim
 emb_dim = 128
 # hidden dim
 hid_dim = 128
 # class num
 class_dim = 2
 # epoch num
 epoch_num = 10
 def build_program(is_train):
    file_obj_handle = fluid.layers.io.open_files(
        filenames=TRAIN_FILES if is_train else TEST_FILES,
        shapes=[[-1, 1], [-1, 1]],
        lod_levels=[1, 0],
        dtypes=['int64', 'int64'])
    file_obj = fluid.layers.io.double_buffer(file_obj_handle)
    with fluid.unique_name.guard():
        data, label = fluid.layers.read_file(file_obj)
        emb = fluid.layers.embedding(input=data, size=[DICT_DIM, emb_dim])
        conv_3 = fluid.nets.sequence_conv_pool(
            input=emb,
            num_filters=hid_dim,
            filter_size=3,
            act="tanh",
            pool_type="sqrt")
        conv_4 = fluid.nets.sequence_conv_pool(
            input=emb,
            num_filters=hid_dim,
            filter_size=4,
            act="tanh",
            pool_type="sqrt")
        prediction = fluid.layers.fc(input=[conv_3, conv_4],
                                     size=class_dim,
                                     act="softmax")
        # cross entropy loss
        cost = fluid.layers.cross_entropy(input=prediction, label=label)
        # mean loss
        avg_cost = fluid.layers.mean(x=cost)
        acc = fluid.layers.accuracy(input=prediction, label=label)
        if is_train:
            # SGD optimizer
            sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=0.001)
            sgd_optimizer.minimize(avg_cost)
    return {'loss': avg_cost, 'log': [avg_cost, acc], 'file': file_obj_handle}
 def main():
    train = fluid.Program()
    startup = fluid.Program()
    test = fluid.Program()
    with fluid.program_guard(train, startup):
        train_args = build_program(is_train=True)
    with fluid.program_guard(test, startup):
        test_args = build_program(is_train=False)
    use_cuda = fluid.core.is_compiled_with_cuda()
    # startup
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place=place)
    exe.run(startup)
    train_exe = fluid.ParallelExecutor(
        use_cuda=use_cuda,
        loss_name=train_args['loss'].name,
        main_program=train)
    test_exe = fluid.ParallelExecutor(
        use_cuda=use_cuda, main_program=test, share_vars_from=train_exe)
    fetch_var_list = [var.name for var in train_args['log']]
    for epoch_id in range(epoch_num):
        # train
        try:
            batch_id = 0
            while True:
                loss, acc = map(numpy.array,
                                train_exe.run(fetch_list=fetch_var_list))
                print 'Train epoch', epoch_id, 'batch', batch_id, 'loss:', loss, 'acc:', acc
                batch_id += 1
        except fluid.core.EOFException:
            print 'End of epoch', epoch_id
            train_args['file'].reset()
        # test
        loss = []
        acc = []
        try:
            while True:
                loss_np, acc_np = map(numpy.array,
                                      test_exe.run(fetch_list=fetch_var_list))
                loss.append(loss_np[0])
                acc.append(acc_np[0])
        except:
            test_args['file'].reset()
            print 'Test loss:', numpy.mean(loss), 'acc:', numpy.mean(acc)
 if __name__ == '__main__':
    main()
--- a/python/paddle/fluid/tests/unittests/.gitignore
+++ b/python/paddle/fluid/tests/unittests/.gitignore
@ -1,8 +0,0 @@
 mnist.recordio
 mnist_0.recordio
 mnist_1.recordio
 mnist_2.recordio
 flowers.recordio
 wmt16.recordio
 data_balance_test.recordio
 data_balance_with_lod_test.recordio
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py
@ -18,8 +18,6 @@ import paddle.fluid as fluid
 fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
 os.environ['RECORDIO_FILENAME'] = './eager_deletion_transformer.wmt16.recordio'
 from test_parallel_executor_transformer import TestTransformer
 if __name__ == '__main__':
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer_auto_growth.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer_auto_growth.py
@ -12,9 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 os.environ['RECORDIO_FILENAME'] = './auto_growth_pe_transformer.wmt16.recordio'
 import unittest
 from test_parallel_executor_transformer import *
--- a/python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py
+++ b/python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py
@ -12,12 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import unittest
 import paddle.fluid as fluid
 os.environ['RECORDIO_FILENAME'] = './p_gc_transformer.wmt16.recordio'
 fluid.core._set_eager_deletion_mode(0.0, 0.55, True)
 from test_parallel_executor_transformer import TestTransformer