From 1b59220d50576661d6afb4bd7f6ea799a460bd6f Mon Sep 17 00:00:00 2001 From: sneaxiy Date: Wed, 27 Jun 2018 05:42:19 +0000 Subject: [PATCH 01/15] complete python reader op python side --- paddle/fluid/pybind/pybind.cc | 2 + python/paddle/fluid/__init__.py | 3 +- python/paddle/fluid/layers/io.py | 89 ++++++- .../unittests/test_py_reader_push_pop.py | 99 ++++++++ .../test_py_reader_using_executor.py | 224 ++++++++++++++++++ 5 files changed, 413 insertions(+), 4 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py create mode 100644 python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index c88fbef63c..069a59c7b4 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -449,6 +449,8 @@ All parameter, weight, gradient are variables in Paddle. }); py::class_(m, "LoDTensorArray") + .def("__init__", + [](LoDTensorArray &instance) { new (&instance) LoDTensorArray(); }) .def("__getitem__", [](LoDTensorArray &self, size_t i) { return &self.at(i); }, py::return_value_policy::reference) diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index bd985ad733..e1944ebc7b 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -44,7 +44,7 @@ import metrics import transpiler from param_attr import ParamAttr, WeightNormParamAttr from data_feeder import DataFeeder -from core import LoDTensor, CPUPlace, CUDAPlace, CUDAPinnedPlace +from core import LoDTensor, LoDTensorArray, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope from transpiler import DistributeTranspiler, InferenceTranspiler, \ memory_optimize, release_memory from concurrency import (Go, make_channel, channel_send, channel_recv, @@ -72,6 +72,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + \ 'backward', 'regularizer', 'LoDTensor', + 'LoDTensorArray', 'CPUPlace', 'CUDAPlace', 'CUDAPinnedPlace', diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index 9de88e2c32..d883c9e4ad 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -22,9 +22,10 @@ from ..executor import global_scope from layer_function_generator import generate_layer_fn, templatedoc __all__ = [ - 'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file', - 'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer', - 'random_data_generator', 'Preprocessor', 'load' + 'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'Recv', + 'open_recordio_file', 'open_files', 'read_file', 'shuffle', 'batch', + 'double_buffer', 'random_data_generator', 'py_reader', 'Preprocessor', + 'load' ] @@ -431,6 +432,88 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True): return monkey_patch_reader_methods(main_prog_var) +def py_reader(capacity, shapes, lod_levels, dtypes, for_parallel=True): + """ + Create a reader and blocking queue for data feeding in Python + + This layer returns a Reader Variable and a BlockingQueue. + The BlockingQueue provides `push()` method to push a + `LoDTensorArray` object into the queue in Python side. In C++ + side, the Reader Variable would invoke `pop()` method of the + queue to retrieve the feeding data. The process of feeding data + in Python side and fetching data in C++ side can run in parallel. + The BlockingQueue should be closed using `close()` method when + unused. + + Args: + capacity(int): The maximum capacity of the BlockingQueue. + shapes(list): List of tuples which declaring data shapes. + lod_levels(list): List of ints which declaring data lod_level. + dtypes(list): List of strs which declaring data type. + for_parallel(Bool): Set it as True if you are going to run + subsequent operators in parallel. + + Returns: + Variable: A Reader Variable from which we can get feeding data. + BlockingQueue: A blocking queue for data feeding. + + Examples: + + .. code-block:: python + + reader, queue = fluid.layers.py_reader( + capacity=10, + shapes=[[-1,3,224,224], [-1,1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64']) + # Via the reader, we can use 'read_file' layer to get data: + image, label = fluid.layers.read_file(reader) + # Via the blocking queue, we can feed data using threads + def feed_data(queue, feed_images, feed_labels): + for feed_image, feed_label in zip(feed_images, feed_labels): + data = core.LoDTensorArray() + data.append(feed_image) + data.append(feed_label) + queue.push(data) + + thread = threading.Thread(target=feed_data, args=(queue, feed_images, feed_labels)) + """ + dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes] + shape_concat = [] + ranks = [] + + for shape in shapes: + shape_concat.extend(shape) + ranks.append(len(shape)) + + queue_name = unique_name('lod_tensor_blocking_queue') + var = global_scope().var(queue_name) + feed_queue = core.init_lod_tensor_blocking_queue(var, capacity, shapes) + + startup_blk = default_startup_program().current_block() + startup_var = startup_blk.create_var(name=unique_name('create_py_reader')) + startup_blk.append_op( + type='create_py_reader', + inputs={'blocking_queue': queue_name}, + outputs={'Out': [startup_var]}, + attrs={ + 'shape_concat': shape_concat, + 'lod_levels': lod_levels, + 'ranks': ranks + }) + + startup_var.desc.set_dtypes(dtypes) + startup_var.persistable = True + + main_prog_var = _copy_reader_var_(default_main_program().current_block(), + startup_var) + + if for_parallel: + main_prog_var = parallel(reader=main_prog_var) + + return monkey_patch_reader_methods(main_prog_var), feed_queue + + def open_files(filenames, shapes, lod_levels, diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py b/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py new file mode 100644 index 0000000000..0571546484 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py @@ -0,0 +1,99 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle.fluid as fluid +import numpy as np +from threading import Thread + + +def feed_data(feed_queue, inputs): + for in_data in inputs: + feed_queue.push(in_data) + + +class TestPyReader(unittest.TestCase): + def setUp(self): + self.capacity = 10 + self.batch_size_min = 10 + self.batch_size_max = 20 + self.shapes = [(-1, 3, 2, 1), (-1, 1)] + self.lod_levels = [0, 0] + self.dtypes = ['float32', 'int64'] + self.iterations = 20 + + def test_single_thread_main(self): + self.main(use_thread=False) + + def test_multiple_thread_main(self): + self.main(use_thread=True) + + def main(self, use_thread=False): + with fluid.program_guard(fluid.Program(), fluid.Program()): + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + executor = fluid.Executor(place) + + data_file, feed_queue = fluid.layers.py_reader( + capacity=self.capacity, + dtypes=self.dtypes, + lod_levels=self.lod_levels, + shapes=self.shapes) + + read_out_data = fluid.layers.read_file(data_file) + self.inputs = [] + + for i in range(self.iterations): + in_data = fluid.LoDTensorArray() + batch_size = np.random.random_integers(self.batch_size_min, + self.batch_size_max) + for shape, dtype in zip(self.shapes, self.dtypes): + next_data = np.random.uniform( + low=0, high=1000, + size=(batch_size, ) + shape[1:]).astype(dtype) + in_data.append(executor.as_lodtensor(next_data)) + + self.inputs.append(in_data) + + executor.run(fluid.default_startup_program()) + self.outputs = [] + if use_thread: + thread = Thread( + target=feed_data, args=(feed_queue, self.inputs)) + thread.start() + for in_data in self.inputs: + self.outputs.append( + executor.run(fetch_list=list(read_out_data))) + else: + for in_data in self.inputs: + feed_queue.push(in_data) + self.outputs.append( + executor.run(fetch_list=list(read_out_data))) + + feed_queue.close() + self.validate() + + def validate(self): + self.assertEqual(len(self.inputs), len(self.outputs)) + for in_data_list, out_data_list in zip(self.inputs, self.outputs): + self.assertEqual(len(in_data_list), len(out_data_list)) + in_data_list_np = [ + np.array(in_lod_tensor) for in_lod_tensor in in_data_list + ] + for in_data, out_data in zip(in_data_list_np, out_data_list): + self.assertTrue((in_data == out_data).all()) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py new file mode 100644 index 0000000000..98d48097ee --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py @@ -0,0 +1,224 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle.fluid as fluid +import paddle.fluid.core as core +import numpy as np +import threading +import multiprocessing +import os + + +def as_tensor(np_array_or_tensor, place=None): + if isinstance(np_array_or_tensor, fluid.LoDTensor): + return np_array_or_tensor + + if place is None: + place = fluid.CPUPlace() + + tensor = fluid.LoDTensor() + tensor.set(np_array_or_tensor, place) + return tensor + + +def as_numpy(tensor_or_numpy): + return tensor_or_numpy if isinstance( + tensor_or_numpy, np.ndarray) else np.array(tensor_or_numpy) + + +def feed_data(feed_queue, reader): + data_generator = reader() + while True: + data = next(data_generator, None) + if data is None or not feed_queue.push(data): + break + + +def simple_fc_net(in_size, + class_num, + hidden_sizes, + batch_size, + queue_capacity, + use_double_buffer=False): + reader, feed_queue = fluid.layers.py_reader( + capacity=queue_capacity, + shapes=[[-1, in_size], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64']) + reader = fluid.layers.batch(reader, batch_size=batch_size) + if use_double_buffer: + reader = fluid.layers.double_buffer(reader) + + in_data, label = fluid.layers.read_file(reader) + + hidden = in_data + for hidden_size in hidden_sizes: + hidden = fluid.layers.fc( + hidden, + size=hidden_size, + act='tanh', + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=1.0))) + + predict_label = fluid.layers.fc(hidden, size=class_num, act='softmax') + loss = fluid.layers.mean( + fluid.layers.cross_entropy( + input=predict_label, label=label)) + + optimizer = fluid.optimizer.Adam() + optimizer.minimize(loss) + return in_data, label, loss, optimizer, feed_queue + + +class TestPyReaderUsingExecutor(unittest.TestCase): + def setUp(self): + self.in_size = 1000 + self.hidden_sizes = [50, 30, 20] + self.class_num = 10 + self.batch_size = 32 + self.iterations = 10 + self.queue_capacity = 50 + + def test(self): + for use_cuda in [False, True]: + for use_parallel_executor in [False, True]: + for use_double_buffer in [False, True]: + print('Test Parameters:'), + print({ + 'use_cuda': use_cuda, + 'use_parallel_executor': use_parallel_executor, + 'use_double_buffer': use_double_buffer + }) + self.main(use_cuda, use_parallel_executor, + use_double_buffer) + + def random_reader(self): + def reader(): + self.inputs = [] + cnt = 0 + while True: + tensors = fluid.LoDTensorArray() + in_data = np.random.uniform( + low=0, high=1, size=(1, self.in_size)).astype('float32') + tensors.append(as_tensor(in_data)) + label = np.random.random_integers( + low=0, high=self.class_num - 1, size=(1, 1)).astype('int64') + tensors.append(as_tensor(label)) + + if cnt < self.iterations * self.batch_size * self.batch_size_times: + if cnt % (self.batch_size * self.batch_size_times) == 0: + self.inputs.append([in_data, label]) + else: + self.inputs[-1][0] = np.concatenate( + (self.inputs[-1][0], in_data), axis=0) + self.inputs[-1][1] = np.concatenate( + (self.inputs[-1][1], label), axis=0) + elif not self.use_double_buffer: + break + + yield tensors + cnt += 1 + + yield None + + return reader + + def main(self, + use_cuda=True, + use_parallel_executor=False, + use_double_buffer=False): + assert not use_cuda or use_cuda and core.is_compiled_with_cuda() + + self.use_cuda = use_cuda + self.use_parallel_executor = use_parallel_executor + self.use_double_buffer = use_double_buffer + + startup_program = fluid.Program() + main_program = fluid.Program() + + with fluid.program_guard(main_program, startup_program): + in_data, label, loss, optimizer, feed_queue = simple_fc_net( + in_size=self.in_size, + class_num=self.class_num, + hidden_sizes=self.hidden_sizes, + batch_size=self.batch_size, + queue_capacity=self.queue_capacity, + use_double_buffer=self.use_double_buffer) + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + + startup_exe = fluid.Executor(place) + startup_exe.run(startup_program) + + if use_parallel_executor: + main_exe = fluid.ParallelExecutor(use_cuda, loss_name=loss.name) + if use_cuda: + self.batch_size_times = core.get_cuda_device_count() + else: + self.batch_size_times = int( + os.environ.get('CPU_NUM', multiprocessing.cpu_count())) + else: + main_exe = startup_exe + self.batch_size_times = 1 + + reader = self.random_reader() + thread = threading.Thread( + target=feed_data, args=(feed_queue, reader)) + thread.start() + + self.outputs = [] + for _ in range(self.iterations): + fetches = main_exe.run(fetch_list=[in_data.name, label.name]) + fetches = [as_numpy(fetch) for fetch in fetches] + self.outputs.append(fetches) + + self.validate() + feed_queue.close() + + def validate(self): + self.assertEqual(len(self.inputs), len(self.outputs)) + for batch_in, batch_out in zip(self.inputs, self.outputs): + self.assertEqual(len(batch_in), len(batch_out)) + if self.use_parallel_executor and not self.use_double_buffer: + self.validate_unordered_batch(batch_in, batch_out) + else: + for in_data, out_data in zip(batch_in, batch_out): + self.assertEqual(in_data.shape, out_data.shape) + if not self.use_parallel_executor: + self.assertTrue((in_data == out_data).all()) + + def validate_unordered_batch(self, batch_in, batch_out): + out_index_left_set = set(range(self.batch_size * self.batch_size_times)) + mapping_num = 0 + for i in range(self.batch_size * self.batch_size_times): + for j in out_index_left_set: + flag = True + for k in range(len(batch_in)): + in_data = batch_in[k][i] + out_data = batch_out[k][j] + if (in_data != out_data).any(): + flag = False + break + + if flag: + out_index_left_set.remove(j) + mapping_num += 1 + break + + self.assertEqual(mapping_num, self.batch_size * self.batch_size_times) + + +if __name__ == '__main__': + unittest.main() From 5832e817d541ad46fead0aa420668851ee44183c Mon Sep 17 00:00:00 2001 From: yuyang18 Date: Wed, 11 Jul 2018 16:11:28 +0800 Subject: [PATCH 02/15] Hide append_regularizer --- python/paddle/fluid/regularizer.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index dac474d5ee..53f35f5cc0 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -15,10 +15,7 @@ import framework from . import core -__all__ = [ - 'append_regularization_ops', 'L1Decay', 'L2Decay', 'L1DecayRegularizer', - 'L2DecayRegularizer' -] +__all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer'] def append_regularization_ops(parameters_and_grads, regularization=None): From b83ffda4aa8aaa085e7447b21d09528480ae5b21 Mon Sep 17 00:00:00 2001 From: yuyang18 Date: Wed, 11 Jul 2018 17:34:02 +0800 Subject: [PATCH 03/15] Try hide APIs --- paddle/fluid/pybind/pybind.cc | 28 +++++++++---------- .../paddle/fluid/tests/unittests/op_test.py | 16 +++++------ .../fluid/tests/unittests/test_tensor.py | 17 ++++++----- .../paddle/fluid/tests/unittests/testsuite.py | 2 +- 4 files changed, 31 insertions(+), 32 deletions(-) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 0c523b6f17..2921054a42 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -78,37 +78,37 @@ PYBIND11_PLUGIN(core) { py::class_(m, "Tensor", py::buffer_protocol()) .def_buffer( [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); }) - .def("get_dims", + .def("_get_dims", [](const Tensor &self) { return vectorize(self.dims()); }) - .def("set_dims", + .def("_set_dims", [](Tensor &self, const std::vector &dim) { self.Resize(make_ddim(dim)); }) - .def("set_layout", + .def("_set_layout", [](Tensor &self, const std::string &layout) { self.set_layout(StringToDataLayout(layout)); }) - .def("alloc_float", + .def("_alloc_float", [](Tensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); }) - .def("alloc_float", + .def("_alloc_float", [](Tensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) - .def("alloc_int", + .def("_alloc_int", [](Tensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) - .def("alloc_int", + .def("_alloc_int", [](Tensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); }) - .def("alloc_int", + .def("_alloc_int", [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { self.mutable_data(place); }) - .def("alloc_float", + .def("_alloc_float", [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { self.mutable_data(place); }) @@ -136,11 +136,11 @@ PYBIND11_PLUGIN(core) { .def("set", PyCUDAPinnedTensorSetFromArray) #endif .def("shape", [](Tensor &self) { return vectorize(self.dims()); }) - .def("set_float_element", TensorSetElement) - .def("get_float_element", TensorGetElement) - .def("set_double_element", TensorSetElement) - .def("get_double_element", TensorGetElement) - .def("dtype", [](Tensor &self) { return ToDataType(self.type()); }); + .def("_set_float_element", TensorSetElement) + .def("_get_float_element", TensorGetElement) + .def("_set_double_element", TensorSetElement) + .def("_get_double_element", TensorGetElement) + .def("_dtype", [](Tensor &self) { return ToDataType(self.type()); }); py::class_(m, "LoDTensor") .def_buffer( diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index e056ef9952..6824ede82b 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -60,8 +60,8 @@ def get_numeric_gradient(place, return np.array(sum).mean() tensor_to_check = scope.find_var(input_to_check).get_tensor() - tensor_size = product(tensor_to_check.get_dims()) - tensor_to_check_dtype = tensor_to_check.dtype() + tensor_size = product(tensor_to_check.shape()) + tensor_to_check_dtype = tensor_to_check._dtype() if tensor_to_check_dtype == core.VarDesc.VarType.FP32: tensor_to_check_dtype = np.float32 elif tensor_to_check_dtype == core.VarDesc.VarType.FP64: @@ -74,15 +74,15 @@ def get_numeric_gradient(place, def __get_elem__(tensor, i): if tensor_to_check_dtype == np.float32: - return tensor.get_float_element(i) + return tensor._get_float_element(i) else: - return tensor.get_double_element(i) + return tensor._get_double_element(i) def __set_elem__(tensor, i, e): if tensor_to_check_dtype == np.float32: - tensor.set_float_element(i, e) + tensor._set_float_element(i, e) else: - tensor.set_double_element(i, e) + tensor._set_double_element(i, e) # we only compute gradient of one element each time. # we use a for loop to compute the gradient of every element. @@ -107,7 +107,7 @@ def get_numeric_gradient(place, __set_elem__(tensor_to_check, i, origin) gradient_flat[i] = (y_pos - y_neg) / delta / 2 - return gradient_flat.reshape(tensor_to_check.get_dims()) + return gradient_flat.reshape(tensor_to_check.shape()) class OpTest(unittest.TestCase): @@ -125,7 +125,7 @@ class OpTest(unittest.TestCase): @classmethod def tearDownClass(cls): - '''Restore random seeds''' + """Restore random seeds""" np.random.set_state(cls._np_rand_state) random.setstate(cls._py_rand_state) diff --git a/python/paddle/fluid/tests/unittests/test_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor.py index f17edd3025..02bced5c12 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_tensor.py @@ -26,7 +26,7 @@ class TestTensor(unittest.TestCase): tensor = var.get_tensor() tensor.set_dims([1000, 784]) - tensor.alloc_int(place) + tensor._alloc_int(place) tensor_array = numpy.array(tensor) self.assertEqual((1000, 784), tensor_array.shape) tensor_array[3, 9] = 1 @@ -45,7 +45,7 @@ class TestTensor(unittest.TestCase): tensor = var.get_tensor() tensor.set_dims([1000, 784]) - tensor.alloc_float(place) + tensor._alloc_float(place) tensor_array = numpy.array(tensor) self.assertEqual((1000, 784), tensor_array.shape) @@ -64,7 +64,7 @@ class TestTensor(unittest.TestCase): lod_tensor = var_lod.get_tensor() lod_tensor.set_dims([4, 4, 6]) - lod_tensor.alloc_int(place) + lod_tensor._alloc_int(place) array = numpy.array(lod_tensor) array[0, 0, 0] = 3 array[3, 3, 5] = 10 @@ -85,7 +85,7 @@ class TestTensor(unittest.TestCase): lod_tensor = var_lod.get_tensor() lod_tensor.set_dims([5, 2, 3, 4]) - lod_tensor.alloc_float(place) + lod_tensor._alloc_float(place) tensor_array = numpy.array(lod_tensor) self.assertEqual((5, 2, 3, 4), tensor_array.shape) @@ -104,14 +104,13 @@ class TestTensor(unittest.TestCase): self.assertListEqual(lod_py, lod) def test_lod_tensor_init(self): - scope = core.Scope() place = core.CPUPlace() lod_py = [[2, 1], [1, 2, 2]] lod_tensor = core.LoDTensor() lod_tensor.set_dims([5, 2, 3, 4]) lod_tensor.set_recursive_sequence_lengths(lod_py) - lod_tensor.alloc_float(place) + lod_tensor._alloc_float(place) tensor_array = numpy.array(lod_tensor) tensor_array[0, 0, 0, 0] = 1.0 tensor_array[0, 0, 0, 1] = 2.0 @@ -131,7 +130,7 @@ class TestTensor(unittest.TestCase): lod_tensor.set_dims([5, 2, 3, 4]) lod_tensor.set_recursive_sequence_lengths(lod_py) - lod_tensor.alloc_float(place) + lod_tensor._alloc_float(place) tensor_array = numpy.array(lod_tensor) tensor_array[0, 0, 0, 0] = 1.0 tensor_array[0, 0, 0, 1] = 2.0 @@ -150,14 +149,14 @@ class TestTensor(unittest.TestCase): tensor = var.get_tensor() tensor.set_dims([0, 1]) - tensor.alloc_float(place) + tensor._alloc_float(place) tensor_array = numpy.array(tensor) self.assertEqual((0, 1), tensor_array.shape) if core.is_compiled_with_cuda(): gpu_place = core.CUDAPlace(0) - tensor.alloc_float(gpu_place) + tensor._alloc_float(gpu_place) tensor_array = numpy.array(tensor) self.assertEqual((0, 1), tensor_array.shape) diff --git a/python/paddle/fluid/tests/unittests/testsuite.py b/python/paddle/fluid/tests/unittests/testsuite.py index a995ee10f2..55c6e54906 100644 --- a/python/paddle/fluid/tests/unittests/testsuite.py +++ b/python/paddle/fluid/tests/unittests/testsuite.py @@ -75,7 +75,7 @@ def set_input(scope, op, inputs, place): if isinstance(var, tuple): tensor.set_recursive_sequence_lengths(var[1]) var = var[0] - tensor.set_dims(var.shape) + tensor._set_dims(var.shape) tensor.set(var, place) elif isinstance(var, float): scope.find_var(var_name).set_float(var) From 7ce0d45efa37e51c0c0372ed1936c19dd70ddc3f Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 11 Jul 2018 17:34:47 +0800 Subject: [PATCH 04/15] fix adam and adamax optimizer --- python/paddle/fluid/optimizer.py | 122 +++++++++++++++---------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 75ee40fa9c..8b6a9b00ee 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -123,7 +123,7 @@ class Optimizer(object): """ pass - def _finish_update(self, block): + def _finish_update(self, block, parameters): """Finish any custom updates needed before completing an optimization step @@ -132,7 +132,7 @@ class Optimizer(object): parameters: list of parameter variables for the optimizer Returns: - list of finish ops or None + None """ pass @@ -236,7 +236,8 @@ class Optimizer(object): # Get custom finish ops for subclasses # FIXME: Need to fix this once we figure out how to handle dependencies - self._finish_update(loss.block) + self._finish_update(loss.block, + [p[0] for p in parameters_and_grads]) end = len(global_block.ops) return global_block.slice_ops(start, end) @@ -486,6 +487,8 @@ class AdamOptimizer(Optimizer): """ _moment1_acc_str = "moment1" _moment2_acc_str = "moment2" + _beta1_pow_acc_str = "beta1_pow_acc" + _beta2_pow_acc_str = "beta2_pow_acc" def __init__(self, learning_rate=0.001, @@ -507,32 +510,22 @@ class AdamOptimizer(Optimizer): def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) - main_block = block.program.global_block() - # Create beta1 and beta2 power tensors - beta_shape = [1] - self._beta1_pow_acc = self.helper.create_global_variable( - name=unique_name.generate('beta1_pow_acc'), - dtype='float32' if self._dtype == None else self._dtype, - shape=beta_shape, - lod_level=0, - persistable=True) - self.helper.set_variable_initializer( - self._beta1_pow_acc, initializer=Constant(self._beta1)) - - self._beta2_pow_acc = self.helper.create_global_variable( - name=unique_name.generate('beta2_pow_acc'), - dtype='float32' if self._dtype == None else self._dtype, - shape=beta_shape, - lod_level=0, - persistable=True) - - self.helper.set_variable_initializer( - self._beta2_pow_acc, initializer=Constant(self._beta2)) - # Create accumulator tensors for first and second moments for p in parameters: self._add_accumulator(self._moment1_acc_str, p) self._add_accumulator(self._moment2_acc_str, p) + self._add_accumulator( + name=self._beta1_pow_acc_str, + param=p, + dtype='float32', + fill_value=self._beta1, + shape=[1]) + self._add_accumulator( + name=self._beta2_pow_acc_str, + param=p, + dtype='float32', + fill_value=self._beta2, + shape=[1]) def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) @@ -541,6 +534,11 @@ class AdamOptimizer(Optimizer): param_and_grad[0]) moment2 = self._get_accumulator(self._moment2_acc_str, param_and_grad[0]) + beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, + param_and_grad[0]) + beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str, + param_and_grad[0]) + # create the adam optimize op adam_op = block.append_op( type=self.type, @@ -550,8 +548,8 @@ class AdamOptimizer(Optimizer): "LearningRate": self._create_param_lr(param_and_grad), "Moment1": moment1, "Moment2": moment2, - "Beta1Pow": self._beta1_pow_acc, - "Beta2Pow": self._beta2_pow_acc + "Beta1Pow": beta1_pow_acc, + "Beta2Pow": beta2_pow_acc }, outputs={ "ParamOut": param_and_grad[0], @@ -566,24 +564,27 @@ class AdamOptimizer(Optimizer): return adam_op - def _finish_update(self, block): + def _finish_update(self, block, parameters): """Update Beta1 and Beta2 Power accumulators """ assert isinstance(block, framework.Block) main_block = block.program.global_block() - scale_beta1 = main_block.append_op( - type="scale", - inputs={"X": self._beta1_pow_acc}, - outputs={"Out": self._beta1_pow_acc}, - attrs={"scale": self._beta1}) - - scale_beta2 = main_block.append_op( - type="scale", - inputs={"X": self._beta2_pow_acc}, - outputs={"Out": self._beta2_pow_acc}, - attrs={"scale": self._beta2}) - - return [scale_beta1, scale_beta2] + for param in parameters: + beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, + param) + beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str, + param) + main_block.append_op( + type="scale", + inputs={"X": beta1_pow_acc}, + outputs={"Out": beta1_pow_acc}, + attrs={"scale": self._beta1}) + + main_block.append_op( + type="scale", + inputs={"X": beta2_pow_acc}, + outputs={"Out": beta2_pow_acc}, + attrs={"scale": self._beta2}) class AdamaxOptimizer(Optimizer): @@ -626,6 +627,7 @@ class AdamaxOptimizer(Optimizer): """ _moment_acc_str = "moment" _inf_norm_acc_str = "inf_norm" + _beta1_pow_acc_str = "beta1_pow_acc" def __init__(self, learning_rate=0.001, @@ -645,21 +647,16 @@ class AdamaxOptimizer(Optimizer): self._epsilon = epsilon def _create_accumulators(self, block, parameters): - # Create beta1 power accumulator tensor - beta_shape = [1] - self._beta1_pow_acc = self.helper.create_global_variable( - name=unique_name.generate('beta1_pow_acc'), - dtype='float32' if self._dtype == None else self._dtype, - shape=beta_shape, - lod_level=0, - persistable=True) - self.helper.set_variable_initializer( - self._beta1_pow_acc, initializer=Constant(self._beta1)) - # Create accumulator tensors for first moment and infinity norm for p in parameters: self._add_accumulator(self._moment_acc_str, p) self._add_accumulator(self._inf_norm_acc_str, p) + self._add_accumulator( + name=self._beta1_pow_acc_str, + param=p, + dtype='float32', + fill_value=self._beta1, + shape=[1]) def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) @@ -667,6 +664,8 @@ class AdamaxOptimizer(Optimizer): moment = self._get_accumulator(self._moment_acc_str, param_and_grad[0]) inf_norm = self._get_accumulator(self._inf_norm_acc_str, param_and_grad[0]) + beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, + param_and_grad[0]) # create the adamax optimize op adamax_op = block.append_op( type=self.type, @@ -676,7 +675,7 @@ class AdamaxOptimizer(Optimizer): "LearningRate": self._create_param_lr(param_and_grad), "Moment": moment, "InfNorm": inf_norm, - "Beta1Pow": self._beta1_pow_acc + "Beta1Pow": beta1_pow_acc }, outputs={ "ParamOut": param_and_grad[0], @@ -691,18 +690,19 @@ class AdamaxOptimizer(Optimizer): return adamax_op - def _finish_update(self, block): + def _finish_update(self, block, parameters): """Update Beta1 Power accumulator """ assert isinstance(block, framework.Block) main_block = block.program.global_block() - scale_beta1 = main_block.append_op( - type="scale", - inputs={"X": self._beta1_pow_acc}, - outputs={"Out": self._beta1_pow_acc}, - attrs={"scale": self._beta1}) - - return [scale_beta1] + for param in parameters: + beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, + param) + main_block.append_op( + type="scale", + inputs={"X": beta1_pow_acc}, + outputs={"Out": beta1_pow_acc}, + attrs={"scale": self._beta1}) class DecayedAdagradOptimizer(Optimizer): From a685566cd545285b5984512f7858d80e2e8229a1 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Wed, 11 Jul 2018 21:51:05 +0800 Subject: [PATCH 05/15] Throw all warnings from git describe in setup.py.in Convert all values to string --- python/setup.py.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/setup.py.in b/python/setup.py.in index 52138b414e..ee4bbf6feb 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -42,12 +42,12 @@ def get_patch(): def is_taged(): try: - cmd = ['git', 'describe', '--exact-match', '--tags'] + cmd = ['git', 'describe', '--exact-match', '--tags', 'HEAD', '2>/dev/null'] git_tag = subprocess.Popen(cmd, stdout = subprocess.PIPE).communicate()[0].strip() except: return False - if git_tag.replace('v', '') == '@PADDLE_VERSION@': + if str(git_tag).replace('v', '') == '@PADDLE_VERSION@': return True else: return False From 08fa3983b0832d08dfa05df55f0a9aa00f94ee5e Mon Sep 17 00:00:00 2001 From: yuyang18 Date: Thu, 12 Jul 2018 12:14:55 +0800 Subject: [PATCH 06/15] Get BeamSearch Prob on C-API --- paddle/legacy/capi/Arguments.cpp | 11 +++++++++++ paddle/legacy/capi/arguments.h | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/paddle/legacy/capi/Arguments.cpp b/paddle/legacy/capi/Arguments.cpp index 87fac3d6c6..0ce1770c76 100644 --- a/paddle/legacy/capi/Arguments.cpp +++ b/paddle/legacy/capi/Arguments.cpp @@ -66,6 +66,17 @@ paddle_error paddle_arguments_get_value(paddle_arguments args, return kPD_NO_ERROR; } +PD_API paddle_error paddle_arguments_get_prob(paddle_arguments args, + uint64_t ID, + paddle_matrix mat) { + if (args == nullptr || mat == nullptr) return kPD_NULLPTR; + auto m = paddle::capi::cast(mat); + auto a = castArg(args); + if (ID >= a->args.size()) return kPD_OUT_OF_RANGE; + m->mat = a->args[ID].in; + return kPD_NO_ERROR; +} + paddle_error paddle_arguments_get_ids(paddle_arguments args, uint64_t ID, paddle_ivector ids) { diff --git a/paddle/legacy/capi/arguments.h b/paddle/legacy/capi/arguments.h index 69a66bb012..ceb64ee6aa 100644 --- a/paddle/legacy/capi/arguments.h +++ b/paddle/legacy/capi/arguments.h @@ -87,6 +87,18 @@ PD_API paddle_error paddle_arguments_get_value(paddle_arguments args, uint64_t ID, paddle_matrix mat); +/** + * @brief paddle_arguments_get_prob Get the prob matrix of beam search, which + * slot ID is `ID` + * @param [in] args arguments array + * @param [in] ID array index + * @param [out] mat matrix pointer + * @return paddle_error + */ +PD_API paddle_error paddle_arguments_get_prob(paddle_arguments args, + uint64_t ID, + paddle_matrix mat); + /** * @brief PDArgsGetIds Get the integer vector of one argument in array, which * index is `ID`. From 6eba4b32ce1d07aa4fa641d4837076ca48beeff0 Mon Sep 17 00:00:00 2001 From: yuyang18 Date: Thu, 12 Jul 2018 12:32:47 +0800 Subject: [PATCH 07/15] Fix unittests after hide APIs --- .../fluid/tests/unittests/test_batch_norm_op.py | 1 - .../tests/unittests/test_dynrnn_static_input.py | 14 +++++++------- .../fluid/tests/unittests/test_selected_rows.py | 6 +++--- .../tests/unittests/test_shrink_rnn_memory.py | 4 ++-- python/paddle/fluid/tests/unittests/test_tensor.py | 14 +++++++------- 5 files changed, 19 insertions(+), 20 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py index a62ee9596d..fcb2612326 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py @@ -129,7 +129,6 @@ def create_or_get_tensor(scope, var_name, var, place): if var is not None: assert isinstance(var, np.ndarray) tensor.set_recursive_sequence_lengths([]) - tensor.set_dims(var.shape) tensor.set(var, place) return tensor diff --git a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py index 92e718662d..31af124572 100644 --- a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py +++ b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py @@ -65,10 +65,10 @@ class TestDyRnnStaticInput(unittest.TestCase): return self._lodtensor_to_ndarray(fetch_outs[0]) def _lodtensor_to_ndarray(self, lod_tensor): - dims = lod_tensor.get_dims() + dims = lod_tensor.shape() ndarray = np.zeros(shape=dims).astype('float32') for i in xrange(np.product(dims)): - ndarray.ravel()[i] = lod_tensor.get_float_element(i) + ndarray.ravel()[i] = lod_tensor._get_float_element(i) return ndarray, lod_tensor.recursive_sequence_lengths() def build_graph(self, only_forward=False): @@ -185,19 +185,19 @@ class TestDyRnnStaticInput(unittest.TestCase): actual_gradients, actual_lod = self.fetch_value(static_input_grad) - static_input_shape = self.static_input_tensor.get_dims() + static_input_shape = self.static_input_tensor.shape() numeric_gradients = np.zeros(shape=static_input_shape).astype('float32') # calculate numeric gradients tensor_size = np.product(static_input_shape) for i in xrange(tensor_size): - origin = self.static_input_tensor.get_float_element(i) + origin = self.static_input_tensor._get_float_element(i) x_pos = origin + self._delta - self.static_input_tensor.set_float_element(i, x_pos) + self.static_input_tensor._set_float_element(i, x_pos) y_pos = self.fetch_value(loss)[0][0] x_neg = origin - self._delta - self.static_input_tensor.set_float_element(i, x_neg) + self.static_input_tensor._set_float_element(i, x_neg) y_neg = self.fetch_value(loss)[0][0] - self.static_input_tensor.set_float_element(i, origin) + self.static_input_tensor._set_float_element(i, origin) numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2 self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001)) self.assertTrue( diff --git a/python/paddle/fluid/tests/unittests/test_selected_rows.py b/python/paddle/fluid/tests/unittests/test_selected_rows.py index 3d7b86787f..f504a06fff 100644 --- a/python/paddle/fluid/tests/unittests/test_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_selected_rows.py @@ -40,12 +40,12 @@ class TestSelectedRows(unittest.TestCase): # compare tensor self.assertAlmostEqual(2.0, - selected_rows.get_tensor().get_float_element(0)) + selected_rows.get_tensor()._get_float_element(0)) self.assertAlmostEqual(1.0, - selected_rows.get_tensor().get_float_element(1)) + selected_rows.get_tensor()._get_float_element(1)) self.assertAlmostEqual( 4.0, - selected_rows.get_tensor().get_float_element(2 * row_numel + 8)) + selected_rows.get_tensor()._get_float_element(2 * row_numel + 8)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py b/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py index b779f0fb01..24bc2cbaf8 100644 --- a/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py +++ b/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py @@ -45,8 +45,8 @@ class TestShrinkRNNMemoryBase(unittest.TestCase): def sum_lodtensor(self, tensor): sum_res = 0.0 - for i in xrange(np.product(tensor.get_dims())): - sum_res += tensor.get_float_element(i) + for i in xrange(np.product(tensor.shape())): + sum_res += tensor._get_float_element(i) return sum_res diff --git a/python/paddle/fluid/tests/unittests/test_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor.py index 02bced5c12..5ccc876ae8 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_tensor.py @@ -25,7 +25,7 @@ class TestTensor(unittest.TestCase): tensor = var.get_tensor() - tensor.set_dims([1000, 784]) + tensor._set_dims([1000, 784]) tensor._alloc_int(place) tensor_array = numpy.array(tensor) self.assertEqual((1000, 784), tensor_array.shape) @@ -44,7 +44,7 @@ class TestTensor(unittest.TestCase): tensor = var.get_tensor() - tensor.set_dims([1000, 784]) + tensor._set_dims([1000, 784]) tensor._alloc_float(place) tensor_array = numpy.array(tensor) @@ -63,7 +63,7 @@ class TestTensor(unittest.TestCase): var_lod = scope.var("test_lod_tensor") lod_tensor = var_lod.get_tensor() - lod_tensor.set_dims([4, 4, 6]) + lod_tensor._set_dims([4, 4, 6]) lod_tensor._alloc_int(place) array = numpy.array(lod_tensor) array[0, 0, 0] = 3 @@ -84,7 +84,7 @@ class TestTensor(unittest.TestCase): var_lod = scope.var("test_lod_tensor") lod_tensor = var_lod.get_tensor() - lod_tensor.set_dims([5, 2, 3, 4]) + lod_tensor._set_dims([5, 2, 3, 4]) lod_tensor._alloc_float(place) tensor_array = numpy.array(lod_tensor) @@ -108,7 +108,7 @@ class TestTensor(unittest.TestCase): lod_py = [[2, 1], [1, 2, 2]] lod_tensor = core.LoDTensor() - lod_tensor.set_dims([5, 2, 3, 4]) + lod_tensor._set_dims([5, 2, 3, 4]) lod_tensor.set_recursive_sequence_lengths(lod_py) lod_tensor._alloc_float(place) tensor_array = numpy.array(lod_tensor) @@ -128,7 +128,7 @@ class TestTensor(unittest.TestCase): lod_py = [[2, 1], [1, 2, 2]] lod_tensor = core.LoDTensor() - lod_tensor.set_dims([5, 2, 3, 4]) + lod_tensor._set_dims([5, 2, 3, 4]) lod_tensor.set_recursive_sequence_lengths(lod_py) lod_tensor._alloc_float(place) tensor_array = numpy.array(lod_tensor) @@ -148,7 +148,7 @@ class TestTensor(unittest.TestCase): tensor = var.get_tensor() - tensor.set_dims([0, 1]) + tensor._set_dims([0, 1]) tensor._alloc_float(place) tensor_array = numpy.array(tensor) From 0c8f69c370aea45b126fab4d7ae9f7064ed37d05 Mon Sep 17 00:00:00 2001 From: yuyang18 Date: Thu, 12 Jul 2018 12:42:18 +0800 Subject: [PATCH 08/15] Hide more APIs --- python/paddle/fluid/layer_helper.py | 22 +++++++++++----------- python/paddle/fluid/param_attr.py | 18 +++++++++--------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index 86efd1ff51..de752d1dae 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -68,11 +68,11 @@ class LayerHelper(object): @property def param_attr(self): - return ParamAttr.to_attr(self.kwargs.get('param_attr', None)) + return ParamAttr._to_attr(self.kwargs.get('param_attr', None)) @property def bias_attr(self): - return ParamAttr.to_attr(self.kwargs.get('bias_attr', None)) + return ParamAttr._to_attr(self.kwargs.get('bias_attr', None)) def multiple_param_attr(self, length): param_attr = self.param_attr @@ -262,11 +262,11 @@ class LayerHelper(object): g_param = self.startup_program.global_block().create_parameter( dtype=dtype, shape=g_param_shape, - **g_param_attr.to_kwargs(with_initializer=False)) + **g_param_attr._to_kwargs(with_initializer=False)) v_param = self.startup_program.global_block().create_parameter( dtype=dtype, shape=v_param_shape, - **v_param_attr.to_kwargs(with_initializer=True)) + **v_param_attr._to_kwargs(with_initializer=True)) __norm_except_dim( x=v_param, out=g_param, @@ -275,9 +275,9 @@ class LayerHelper(object): # Add weight normalization to main_program g_param = self.main_program.global_block().create_parameter( - dtype=dtype, shape=g_param_shape, **g_param_attr.to_kwargs()) + dtype=dtype, shape=g_param_shape, **g_param_attr._to_kwargs()) v_param = self.main_program.global_block().create_parameter( - dtype=dtype, shape=v_param_shape, **v_param_attr.to_kwargs()) + dtype=dtype, shape=v_param_shape, **v_param_attr._to_kwargs()) w_param = __weight_normalize(g_param, v_param, dim=attr.dim) return w_param @@ -296,11 +296,11 @@ class LayerHelper(object): if default_initializer is None and attr.initializer is None: if is_bias: - attr.set_default_bias_initializer() + attr._set_default_bias_initializer() else: - attr.set_default_param_initializer() + attr._set_default_param_initializer() else: - attr.set_default_initializer(default_initializer) + attr._set_default_initializer(default_initializer) # If weight normalization is set, insert extra parameters and ops. # Refer to https://arxiv.org/pdf/1602.07868.pdf @@ -310,9 +310,9 @@ class LayerHelper(object): return param self.startup_program.global_block().create_parameter( - dtype=dtype, shape=shape, **attr.to_kwargs(with_initializer=True)) + dtype=dtype, shape=shape, **attr._to_kwargs(with_initializer=True)) return self.main_program.global_block().create_parameter( - dtype=dtype, shape=shape, **attr.to_kwargs()) + dtype=dtype, shape=shape, **attr._to_kwargs()) def get_parameter(self, name): param = self.main_program.global_block().var(name) diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index 0a42b9fca8..4a61f85ec4 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -67,7 +67,7 @@ class ParamAttr(object): self.gradient_clip = gradient_clip self.model_average = do_model_average - def set_default_initializer(self, initializer): + def _set_default_initializer(self, initializer): """ Set the default initializer, the initializer should be Constant, Uniform, Normal, Xavier, MSRA. @@ -88,7 +88,7 @@ class ParamAttr(object): self.initializer = initializer - def set_default_param_initializer(self): + def _set_default_param_initializer(self): """ Set the default initializer for the parameter with Xavier. @@ -98,9 +98,9 @@ class ParamAttr(object): Returns: None. """ - self.set_default_initializer(Xavier()) + self._set_default_initializer(Xavier()) - def set_default_bias_initializer(self): + def _set_default_bias_initializer(self): """ Set the default initializer for the bias with Constant(0.0). @@ -110,10 +110,10 @@ class ParamAttr(object): Returns: None. """ - self.set_default_initializer(Constant(0.0)) + self._set_default_initializer(Constant(0.0)) @staticmethod - def to_attr(arg): + def _to_attr(arg): """ Create ParamAttr[s]. @@ -131,7 +131,7 @@ class ParamAttr(object): if arg is None: return ParamAttr() elif isinstance(arg, list) or isinstance(arg, tuple): - return [ParamAttr.to_attr(a) for a in arg] + return [ParamAttr._to_attr(a) for a in arg] elif isinstance(arg, ParamAttr): return arg elif isinstance(arg, str) or isinstance(arg, unicode): @@ -141,11 +141,11 @@ class ParamAttr(object): elif isinstance(arg, WeightDecayRegularizer): return ParamAttr(regularizer=arg) elif isinstance(arg, bool): - return ParamAttr.to_attr(None) if arg else False + return ParamAttr._to_attr(None) if arg else False else: raise TypeError("{0} cast to ParamAttr".format(type(arg))) - def to_kwargs(self, with_initializer=False): + def _to_kwargs(self, with_initializer=False): """ Returns the attributes of this parameter. From 4dccb58483f4de69f3223b305d5befba96310659 Mon Sep 17 00:00:00 2001 From: yuyang18 Date: Thu, 12 Jul 2018 12:54:52 +0800 Subject: [PATCH 09/15] Hide clip APIs --- python/paddle/fluid/clip.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 18e2f3045e..2a8e3d410a 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -31,7 +31,7 @@ class BaseErrorClipAttr(object): def __str__(self): raise NotImplementedError() - def append_clip_op(self, block, grad_name): + def _append_clip_op(self, block, grad_name): raise NotImplementedError() @@ -67,7 +67,7 @@ class ErrorClipByValue(BaseErrorClipAttr): def __str__(self): return "ByValue, min=%f, max=%f" % (self.min, self.max) - def append_clip_op(self, block, grad_name): + def _append_clip_op(self, block, grad_name): clip_op_desc = block.desc.append_op() clip_op_desc.set_type("clip") clip_op_desc.set_input("X", [grad_name]) @@ -90,17 +90,17 @@ def error_clip_callback(block, context): "Variable's error_clip should be an instance of BaseErrorClipAttr or None." ) if error_clip is not None: - error_clip.append_clip_op(block, grad_n) + error_clip._append_clip_op(block, grad_n) class BaseGradientClipAttr(object): def __str__(self): raise NotImplementedError() - def process_context(self, context, param, grad): + def _process_context(self, context, param, grad): raise NotImplementedError() - def create_operators(self, param, grad): + def _create_operators(self, param, grad): raise NotImplementedError() @@ -108,10 +108,10 @@ class NullGradientClipAttr(BaseGradientClipAttr): def __str__(self): return "Null" - def process_context(self, context, param, grad): + def _process_context(self, context, param, grad): pass - def create_operators(self, param, grad): + def _create_operators(self, param, grad): return param, grad @@ -153,10 +153,10 @@ class GradientClipByValue(BaseGradientClipAttr): def __str__(self): return "ByValue, min=%f, max=%f" % (self.min, self.max) - def process_context(self, context, param, grad): + def _process_context(self, context, param, grad): pass - def create_operators(self, param, grad): + def _create_operators(self, param, grad): new_grad = layers.clip(x=grad, min=self.min, max=self.max) return param, new_grad @@ -199,10 +199,10 @@ class GradientClipByNorm(BaseGradientClipAttr): def __str__(self): return "ByNorm, clip_norm=%f" % self.clip_norm - def process_context(self, context, param, grad): + def _process_context(self, context, param, grad): pass - def create_operators(self, param, grad): + def _create_operators(self, param, grad): new_grad = layers.clip_by_norm(x=grad, max_norm=self.clip_norm) return param, new_grad @@ -257,7 +257,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): return "ByGlobalNorm, group_name=%s, clip_norm=%f" % (self.group_name, self.clip_norm) - def process_context(self, context, param, grad): + def _process_context(self, context, param, grad): if self.group_name not in context: context[self.group_name] = [] context[self.group_name + "_clip_value"] = self.clip_norm @@ -274,7 +274,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): self.context = context - def create_operators(self, param, grad): + def _create_operators(self, param, grad): group_scale_name = self.group_name + "_scale" if group_scale_name not in self.context: group_norm_var = layers.sums(input=self.context[self.group_name]) @@ -336,12 +336,12 @@ def append_gradient_clip_ops(param_grad): "clip attribute should be an instance of BaseGradientClipAttr" ) - clip_attr.process_context(context=context, param=p, grad=g) + clip_attr._process_context(context=context, param=p, grad=g) res = [] for p, g in param_grad: with p.block.program.optimized_guard(p): - res.append(clip_attr.create_operators(param=p, grad=g)) + res.append(clip_attr._create_operators(param=p, grad=g)) return res From d8220ccb91fae54e968740c03840ed3a1503ba85 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 12 Jul 2018 13:29:42 +0800 Subject: [PATCH 10/15] add optimized_guard for optimizer finish_update --- python/paddle/fluid/optimizer.py | 46 +++++++++++++++++--------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 8b6a9b00ee..d458e7bd07 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -570,21 +570,22 @@ class AdamOptimizer(Optimizer): assert isinstance(block, framework.Block) main_block = block.program.global_block() for param in parameters: - beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, - param) - beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str, - param) - main_block.append_op( - type="scale", - inputs={"X": beta1_pow_acc}, - outputs={"Out": beta1_pow_acc}, - attrs={"scale": self._beta1}) - - main_block.append_op( - type="scale", - inputs={"X": beta2_pow_acc}, - outputs={"Out": beta2_pow_acc}, - attrs={"scale": self._beta2}) + with param.block.program.optimized_guard(param): + beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, + param) + beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str, + param) + main_block.append_op( + type="scale", + inputs={"X": beta1_pow_acc}, + outputs={"Out": beta1_pow_acc}, + attrs={"scale": self._beta1}) + + main_block.append_op( + type="scale", + inputs={"X": beta2_pow_acc}, + outputs={"Out": beta2_pow_acc}, + attrs={"scale": self._beta2}) class AdamaxOptimizer(Optimizer): @@ -696,13 +697,14 @@ class AdamaxOptimizer(Optimizer): assert isinstance(block, framework.Block) main_block = block.program.global_block() for param in parameters: - beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, - param) - main_block.append_op( - type="scale", - inputs={"X": beta1_pow_acc}, - outputs={"Out": beta1_pow_acc}, - attrs={"scale": self._beta1}) + with param.block.program.optimized_guard(param): + beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, + param) + main_block.append_op( + type="scale", + inputs={"X": beta1_pow_acc}, + outputs={"Out": beta1_pow_acc}, + attrs={"scale": self._beta1}) class DecayedAdagradOptimizer(Optimizer): From 2ce1ed3dbdd54d72be288b64a2eccec4f6dd1e4d Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 12 Jul 2018 13:38:11 +0800 Subject: [PATCH 11/15] add optimized_guard for ModelAverage --- python/paddle/fluid/optimizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 65fef50ead..214f47afa1 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -1158,7 +1158,8 @@ class ModelAverage(Optimizer): self.params_grads.append((param, grad)) for param, grad in self.params_grads: - self._append_average_accumulate_op(param) + with param.block.program.optimized_guard(param): + self._append_average_accumulate_op(param) self.apply_program = Program() block = self.apply_program.global_block() From 55aea982096b5f031f66db131853323dd5eaef14 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 12 Jul 2018 15:02:52 +0800 Subject: [PATCH 12/15] update test_optimizer --- python/paddle/fluid/tests/unittests/test_optimizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index 43385691bb..18921d727f 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -287,7 +287,7 @@ class TestAdamOptimizer(unittest.TestCase): # Check accumulators accumulators = adam_optimizer.get_accumulators() - self.assertEqual(len(accumulators), 2) + self.assertEqual(len(accumulators), 4) self.assertTrue(adam_optimizer.get_moment1_str() in accumulators) self.assertTrue(adam_optimizer.get_moment2_str() in accumulators) moment1_acc = accumulators[adam_optimizer.get_moment1_str()] @@ -354,7 +354,7 @@ class TestAdamaxOptimizer(unittest.TestCase): # Check accumulators accumulators = adamax_optimizer.get_accumulators() - self.assertEqual(len(accumulators), 2) + self.assertEqual(len(accumulators), 3) self.assertTrue(adamax_optimizer.get_moment_str() in accumulators) self.assertTrue(adamax_optimizer.get_inf_norm_str() in accumulators) moment_acc = accumulators[adamax_optimizer.get_moment_str()] From 1effba33123cc64d296c6642c468d5707c6a97bd Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Thu, 12 Jul 2018 17:00:06 +0800 Subject: [PATCH 13/15] fix pe with cpu place --- paddle/fluid/framework/parallel_executor.cc | 10 +++++++--- paddle/fluid/framework/parallel_executor.h | 2 +- paddle/fluid/pybind/pybind.cc | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index b53a6f43fb..3a9027713a 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -95,7 +95,7 @@ ParallelExecutor::ParallelExecutor( } if (member_->local_scopes_.size() != 1 && local_scopes.empty()) { - BCastParamsToGPUs(bcast_vars); + BCastParamsToDevs(bcast_vars); } // Startup Program has been run. All local scopes has correct parameters. @@ -131,7 +131,7 @@ ParallelExecutor::ParallelExecutor( member_->places_, std::move(member_->executor_))); } -void ParallelExecutor::BCastParamsToGPUs( +void ParallelExecutor::BCastParamsToDevs( const std::unordered_set &vars) const { // the the initializing bcast, all vars would be bcast from device(0), // otherwise @@ -202,7 +202,11 @@ void ParallelExecutor::BCastParamsToGPUs( #endif } else { platform::CPUPlace cpu; - for (size_t i = 1; i < member_->places_.size(); ++i) { + for (size_t i = 0; i < member_->places_.size(); ++i) { + if ((initializing && i == 0) || + (!initializing && static_cast(i) == var_dev_id)) + continue; + auto local_scope = member_->local_scopes_[i]; auto *t = local_scope->Var(var)->GetMutable(); t->Resize(dims); diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 058f83f07c..6985b65406 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -66,7 +66,7 @@ class ParallelExecutor { void Run(const std::vector &fetch_tensors, const std::string &fetched_var_name); - void BCastParamsToGPUs(const std::unordered_set &vars) const; + void BCastParamsToDevs(const std::unordered_set &vars) const; private: ParallelExecutorPrivate *member_; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 0c523b6f17..be9d375c69 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -656,7 +656,7 @@ All parameter, weight, gradient are variables in Paddle. const std::string &, Scope *, std::vector &, const ExecutionStrategy &, const BuildStrategy &, size_t, size_t>()) - .def("bcast_params", &ParallelExecutor::BCastParamsToGPUs) + .def("bcast_params", &ParallelExecutor::BCastParamsToDevs) // NOTE: even we return a vec* to Python use reference policy. // We still cannot get local_scope from this vector, since the element // of vec will be freed by Python GC. We can only return Scope* From 5c12c5eb421996ab25553a2cef488ed95f993aff Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 12 Jul 2018 17:32:45 +0800 Subject: [PATCH 14/15] update distribute transformer for adam and adamax optimizer --- .../fluid/transpiler/distribute_transpiler.py | 35 +++---------------- 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 53d6ca86a0..92cdff04a0 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -377,11 +377,6 @@ class DistributeTranspiler(object): # append it into the sub program. global_ops = [] - # HACK: optimization global ops only used to scale beta1 and beta2 - # replace it with dependency engine. - for op in self.optimize_ops: - if self._is_adam_connected_op(op): - global_ops.append(op) def __append_optimize_op__(op, block, grad_to_block_id, merged_var, lr_ops): @@ -1289,22 +1284,16 @@ class DistributeTranspiler(object): # If one op's input is another op's output or # one op's output is another op's input, we say # the two operator is connected. - def _append_inname_remove_beta(varname_list): + def _append_inname(varname_list): op_input_names = [] for in_name in varname_list: - # HACK: remove beta1 and beta2 to avoid let all - # ops connected. - if in_name.startswith("beta2_pow_acc") or \ - in_name.startswith("beta1_pow_acc"): - continue - else: - op_input_names.append(in_name) + op_input_names.append(in_name) return op_input_names - op1_input_names = _append_inname_remove_beta(op1.desc.input_arg_names()) + op1_input_names = _append_inname(op1.desc.input_arg_names()) op1_output_names = op1.desc.output_arg_names() - op2_input_names = _append_inname_remove_beta(op2.desc.input_arg_names()) + op2_input_names = _append_inname(op2.desc.input_arg_names()) op2_output_names = op2.desc.output_arg_names() if set(op1_output_names) & set(op2_input_names) or \ @@ -1413,7 +1402,7 @@ class DistributeTranspiler(object): def _get_optimize_pass(self): """ - Get optimizer operators, paramters and gradients from origin_program + Get optimizer operators, parameters and gradients from origin_program Returns: opt_ops (list): optimize operators. params_grads (dict): paramter->gradient. @@ -1436,20 +1425,6 @@ class DistributeTranspiler(object): origin_var_dict[param_name], origin_var_dict[input_name] ]) - elif self._is_adam_connected_op(op): - opt_ops.append(op) else: pass return opt_ops, params_grads - - def _is_adam_connected_op(self, op): - """ - A hack function to determinate whether the input operator - is connected to optimize operator. - """ - if op.type == "scale": - for in_name in op.input_arg_names: - if in_name.startswith("beta1_pow_acc") or \ - in_name.startswith("beta2_pow_acc"): - return True - return False From 600f5f0b49264f44deba3fb07f63e0c3acfb12d6 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 13 Jul 2018 10:52:31 +0800 Subject: [PATCH 15/15] simpilify distribute transpiler --- .../fluid/transpiler/distribute_transpiler.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 92cdff04a0..121c36e477 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -1284,20 +1284,8 @@ class DistributeTranspiler(object): # If one op's input is another op's output or # one op's output is another op's input, we say # the two operator is connected. - def _append_inname(varname_list): - op_input_names = [] - for in_name in varname_list: - op_input_names.append(in_name) - return op_input_names - - op1_input_names = _append_inname(op1.desc.input_arg_names()) - op1_output_names = op1.desc.output_arg_names() - - op2_input_names = _append_inname(op2.desc.input_arg_names()) - op2_output_names = op2.desc.output_arg_names() - - if set(op1_output_names) & set(op2_input_names) or \ - set(op1_input_names) & set(op2_output_names): + if set(op1.desc.output_arg_names()) & set(op2.desc.input_arg_names()) or \ + set(op1.desc.input_arg_names()) & set(op2.desc.output_arg_names()): return True return False