test=develop

add linspace, modify interface comments in tensor.py, merge with develop branch
revert-16650-refine_parallel_executor_3
zhoukunsheng 6 years ago
commit 43060084a4

@ -255,6 +255,7 @@ paddle.fluid.layers.reverse (ArgSpec(args=['x', 'axis'], varargs=None, keywords=
paddle.fluid.layers.has_inf (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '8f8c0306117ea441f20dcbbdba1f0ecc'))
paddle.fluid.layers.has_nan (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '2e53e83127dbfd86e7098bdfe9a549e8'))
paddle.fluid.layers.isfinite (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '0a437011c3906079fd8947ed3e52d292'))
paddle.fluid.layers.range (ArgSpec(args=['start', 'end', 'step', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '2ec937ede953ded2fdff2675883900bb'))
paddle.fluid.layers.linspace (ArgSpec(args=['start', 'stop', 'num', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', 'd40afc565db001d41659ab7bac32d7c4'))
paddle.fluid.layers.While.__init__ (ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.While.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
@ -377,23 +378,9 @@ paddle.fluid.contrib.Calibrator.__init__ (ArgSpec(args=['self'], varargs='args',
paddle.fluid.contrib.Calibrator.sample_data (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3b8c85ca1e2cf753cc8c90a6c6992958'))
paddle.fluid.contrib.Calibrator.save_int8_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.reader.ctr_reader.ctr_reader (ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b2ebf3de2a6ef1af2c3b88d2db7591ab'))
paddle.fluid.contrib.build_compressor (ArgSpec(args=['place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'config'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.CompressPass.__init__ (ArgSpec(args=['self', 'place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'program_exe'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.CompressPass.add_strategy (ArgSpec(args=['self', 'strategy'], varargs=None, keywords=None, defaults=None), ('document', '3bf6010b6f47d3c86df0ec8957be95e0'))
paddle.fluid.contrib.CompressPass.apply (ArgSpec(args=['self', 'graph'], varargs=None, keywords=None, defaults=None), ('document', 'a92bf85d4b59bd4f2ac1706d7c4899a6'))
paddle.fluid.contrib.ImitationGraph.__init__ (ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.ImitationGraph.all_parameters (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.__init__ (ArgSpec(args=['self', 'pruner', 'start_epoch', 'end_epoch', 'delta_rate', 'acc_loss_threshold', 'sensitivities'], varargs=None, keywords=None, defaults=(None, 0, 10, 0.2, 0.2, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_batch_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_batch_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_compress_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_compress_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.MagnitudePruner.__init__ (ArgSpec(args=['self', 'threshold'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.MagnitudePruner.prune (ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.RatioPruner.__init__ (ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e7a81a325b296a9ca502ee5adb4fc85d'))
paddle.fluid.contrib.RatioPruner.prune (ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,)), ('document', '358cbf2978c91028fb96a195a9884645'))
paddle.fluid.contrib.Compressor.__init__ (ArgSpec(args=['self', 'place', 'scope', 'train_program', 'train_reader', 'train_feed_list', 'train_fetch_list', 'eval_program', 'eval_reader', 'eval_feed_list', 'eval_fetch_list', 'teacher_programs', 'checkpoint_path', 'train_optimizer', 'distiller_optimizer'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, [], './checkpoints', None, None)), ('document', '31ae143830c9bf6b43547dd546c5ba80'))
paddle.fluid.contrib.Compressor.config (ArgSpec(args=['self', 'config_file'], varargs=None, keywords=None, defaults=None), ('document', '780d9c007276ccbb95b292400d7807b0'))
paddle.fluid.contrib.Compressor.run (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'c6e43d6a078d307672283c1f36e04fe9'))
paddle.fluid.contrib.load_persistables_for_increment (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None), ('document', '2ab36d4f7a564f5f65e455807ad06c67'))
paddle.fluid.contrib.load_persistables_for_inference (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None), ('document', '59066bac9db0ac6ce414d05780b7333f'))
paddle.fluid.contrib.convert_dist_to_sparse_program (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '74c39c595dc70d6be2f16d8e462d282b'))
@ -433,48 +420,59 @@ paddle.fluid.nets.img_conv_group (ArgSpec(args=['input', 'conv_num_filter', 'poo
paddle.fluid.optimizer.SGDOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.SGDOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.SGDOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.SGDOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.SGDOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.MomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.MomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.MomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.MomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.MomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdagradOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdamOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdamOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdamOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdamOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdamOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdamaxOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdamaxOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdamaxOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdamaxOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.FtrlOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.FtrlOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.FtrlOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.FtrlOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.FtrlOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.RMSPropOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.RMSPropOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdadeltaOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdadeltaOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdadeltaOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.ModelAverage.__init__ (ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.ModelAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '46234a5470590feb336346f70a3db715'))
paddle.fluid.optimizer.ModelAverage.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.ModelAverage.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.ModelAverage.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.ModelAverage.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.ModelAverage.restore (ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None), ('document', '18db9c70be9c4dd466f9844457b21bfe'))
paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.LarsMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.LarsMomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.LarsMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.backward.append_backward (ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '1a79bd7d10ae54ca763ec81bca36ba24'))
paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))

@ -61,4 +61,6 @@ nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocat
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator)
cc_test(allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade)
cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade)
cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade)

@ -0,0 +1,100 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#ifdef PADDLE_WITH_CUDA
DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
DECLARE_uint64(initial_gpu_memory_in_mb);
DECLARE_uint64(reallocate_gpu_memory_in_mb);
DECLARE_int64(gpu_allocator_retry_time);
#endif
namespace paddle {
namespace memory {
namespace allocation {
//! Run allocate test cases for different places
void AllocateTestCases() {
auto &instance = AllocatorFacade::Instance();
platform::Place place;
size_t size = 1024;
{
place = platform::CPUPlace();
size = 1024;
auto cpu_allocation = instance.Alloc(place, size);
ASSERT_NE(cpu_allocation, nullptr);
ASSERT_NE(cpu_allocation->ptr(), nullptr);
ASSERT_EQ(cpu_allocation->place(), place);
ASSERT_EQ(cpu_allocation->size(), size);
}
#ifdef PADDLE_WITH_CUDA
{
place = platform::CUDAPlace(0);
size = 1024;
auto gpu_allocation = instance.Alloc(place, size);
ASSERT_NE(gpu_allocation, nullptr);
ASSERT_NE(gpu_allocation->ptr(), nullptr);
ASSERT_EQ(gpu_allocation->place(), place);
ASSERT_GE(gpu_allocation->size(), size);
}
{
// Allocate 2GB gpu memory
place = platform::CUDAPlace(0);
size = 2 * static_cast<size_t>(1 << 30);
auto gpu_allocation = instance.Alloc(place, size);
ASSERT_NE(gpu_allocation, nullptr);
ASSERT_NE(gpu_allocation->ptr(), nullptr);
ASSERT_EQ(gpu_allocation->place(), place);
ASSERT_GE(gpu_allocation->size(), size);
}
{
place = platform::CUDAPinnedPlace();
size = (1 << 20);
auto cuda_pinned_allocation =
instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20);
ASSERT_NE(cuda_pinned_allocation, nullptr);
ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr);
ASSERT_EQ(cuda_pinned_allocation->place(), place);
ASSERT_GE(cuda_pinned_allocation->size(), size);
}
#endif
}
TEST(Allocator, SpecifyGpuMemory) {
#ifdef PADDLE_WITH_CUDA
// Set to 0.0 to test FLAGS_initial_gpu_memory_in_mb and
// FLAGS_reallocate_gpu_memory_in_mb
FLAGS_fraction_of_gpu_memory_to_use = 0.0;
// 512 MB
FLAGS_initial_gpu_memory_in_mb = 512;
// 4 MB
FLAGS_reallocate_gpu_memory_in_mb = 4;
FLAGS_gpu_allocator_retry_time = 500;
FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
#endif
AllocateTestCases();
}
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -19,6 +19,8 @@
#ifdef PADDLE_WITH_CUDA
DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
DECLARE_uint64(initial_gpu_memory_in_mb);
DECLARE_uint64(reallocate_gpu_memory_in_mb);
DECLARE_int64(gpu_allocator_retry_time);
#endif
@ -26,13 +28,8 @@ namespace paddle {
namespace memory {
namespace allocation {
TEST(allocator, allocator) {
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
FLAGS_gpu_allocator_retry_time = 500;
FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
#endif
//! Run allocate test cases for different places
void AllocateTestCases() {
auto &instance = AllocatorFacade::Instance();
platform::Place place;
size_t size = 1024;
@ -82,6 +79,16 @@ TEST(allocator, allocator) {
#endif
}
TEST(Allocator, Allocator) {
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
FLAGS_gpu_allocator_retry_time = 500;
FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
#endif
AllocateTestCases();
}
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -37,6 +37,8 @@ DEFINE_bool(init_allocated_mem, false,
"that initializing the allocated memory with a small value "
"during unit testing.");
DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_uint64(initial_gpu_memory_in_mb);
DECLARE_uint64(reallocate_gpu_memory_in_mb);
DECLARE_bool(benchmark);
namespace paddle {
@ -153,12 +155,18 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
platform::GpuMinChunkSize(),
platform::GpuMaxChunkSize());
VLOG(10) << "\n\nNOTE: each GPU device use "
<< FLAGS_fraction_of_gpu_memory_to_use * 100
<< "% of GPU memory.\n"
<< "You can set GFlags environment variable '"
<< "FLAGS_fraction_of_gpu_memory_to_use"
<< "' to change the fraction of GPU usage.\n\n";
VLOG(10) << "\n\nNOTE:\n"
<< "You can set GFlags environment variable "
<< "'FLAGS_fraction_of_gpu_memory_to_use' "
<< "or 'FLAGS_initial_gpu_memory_in_mb' "
<< "or 'FLAGS_reallocate_gpu_memory_in_mb' "
<< "to change the memory size for GPU usage.\n"
<< "Current 'FLAGS_fraction_of_gpu_memory_to_use' value is "
<< FLAGS_fraction_of_gpu_memory_to_use
<< ". Current 'FLAGS_initial_gpu_memory_in_mb' value is "
<< FLAGS_initial_gpu_memory_in_mb
<< ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
<< FLAGS_reallocate_gpu_memory_in_mb << "\n\n";
}
});

@ -9,3 +9,5 @@ endif(${WITH_GPU})
cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator)
cc_library(buddy_allocator SRCS buddy_allocator.cc DEPS memory_block system_allocator glog)
cc_test(buddy_allocator_test SRCS buddy_allocator_test.cc DEPS buddy_allocator)

@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include <algorithm>
#include <utility>
#include "glog/logging.h"
DEFINE_bool(free_idle_memory, false,
@ -36,9 +40,10 @@ BuddyAllocator::~BuddyAllocator() {
"have actually been freed";
while (!pool_.empty()) {
auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin()));
VLOG(10) << "Free from block (" << block << ", " << max_chunk_size_ << ")";
VLOG(10) << "Free from block (" << block << ", " << block->size(cache_)
<< ")";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
system_allocator_->Free(block, block->size(cache_), block->index(cache_));
cache_.invalidate(block);
pool_.erase(pool_.begin());
}
@ -71,7 +76,7 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
// refill the pool if failure
if (it == pool_.end()) {
it = RefillPool();
it = RefillPool(size);
// if still failure, fail fatally
if (it == pool_.end()) {
return nullptr;
@ -184,19 +189,28 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
return static_cast<MemoryBlock*>(p)->data();
}
BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool(
size_t request_bytes) {
size_t allocate_bytes = max_chunk_size_;
size_t index = 0;
#ifdef PADDLE_WITH_CUDA
if (system_allocator_->UseGpu()) {
if ((total_used_ + total_free_) == 0) {
// Compute the maximum allocation size for the first allocation.
max_chunk_size_ = platform::GpuMaxChunkSize();
// Compute the allocation size for gpu for the first allocation.
allocate_bytes = std::max(platform::GpuInitAllocSize(), request_bytes);
} else {
// Reallocation size
if (realloc_size_ == 0) {
realloc_size_ = platform::GpuReallocSize();
}
allocate_bytes = std::max(realloc_size_, request_bytes);
}
}
#endif
// Allocate a new maximum sized block
size_t index = 0;
void* p = system_allocator_->Alloc(&index, max_chunk_size_);
// Allocate a new block
void* p = system_allocator_->Alloc(&index, allocate_bytes);
if (p == nullptr) return pool_.end();
@ -204,7 +218,7 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
<< " from system allocator";
static_cast<MemoryBlock*>(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index,
max_chunk_size_, nullptr, nullptr);
allocate_bytes, nullptr, nullptr);
// gpu fallback allocation
if (system_allocator_->UseGpu() &&
@ -212,10 +226,10 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
fallback_alloc_count_++;
}
total_free_ += max_chunk_size_;
total_free_ += allocate_bytes;
// dump the block into pool
return pool_.insert(IndexSizeAddress(index, max_chunk_size_, p)).first;
return pool_.insert(IndexSizeAddress(index, allocate_bytes, p)).first;
}
BuddyAllocator::PoolSet::iterator BuddyAllocator::FindExistChunk(size_t size) {
@ -286,12 +300,12 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
VLOG(10) << "Return block " << block << " to fallback allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
system_allocator_->Free(block, block->size(cache_), block->index(cache_));
cache_.invalidate(block);
pool = PoolSet::reverse_iterator(pool_.erase(std::next(pool).base()));
total_free_ -= max_chunk_size_;
total_free_ -= block->size(cache_);
fallback_alloc_count_--;
// If no fall allocation exists, return directly
@ -322,12 +336,12 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
VLOG(10) << "Return block " << block << " to base allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
system_allocator_->Free(block, block->size(cache_), block->index(cache_));
cache_.invalidate(block);
pool = PoolSet::reverse_iterator(pool_.erase(std::next(pool).base()));
total_free_ -= max_chunk_size_;
total_free_ -= block->size(cache_);
if (!shall_free_alloc()) return;
}

@ -60,7 +60,7 @@ class BuddyAllocator {
void* SystemAlloc(size_t size);
/*! \brief If existing chunks are not suitable, refill pool */
PoolSet::iterator RefillPool();
PoolSet::iterator RefillPool(size_t request_bytes);
/**
* \brief Find the suitable chunk from existing pool and split
@ -89,6 +89,8 @@ class BuddyAllocator {
size_t min_chunk_size_; // the minimum size of each chunk
size_t max_chunk_size_; // the maximum size of each chunk
size_t realloc_size_ = 0; // the size of re-allocated chunk
private:
/**
* \brief A list of free allocation

@ -0,0 +1,133 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include <memory>
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/gpu_info.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_uint64(initial_gpu_memory_in_mb);
DECLARE_uint64(reallocate_gpu_memory_in_mb);
#endif
namespace paddle {
namespace memory {
namespace detail {
constexpr static int test_gpu_id = 0;
void TestBuddyAllocator(BuddyAllocator* allocator, size_t size_bytes) {
bool freed = false;
size_t used_bytes = allocator->Used();
if (size_bytes > 0) {
void* p = allocator->Alloc(size_bytes);
EXPECT_NE(p, nullptr);
#ifdef PADDLE_WITH_CUDA
if (size_bytes < platform::GpuMaxChunkSize()) {
#else
if (size_bytes < platform::CpuMaxChunkSize()) {
#endif
// Not allocate from SystemAllocator
EXPECT_GE(allocator->Used(), used_bytes + size_bytes);
} else {
// Allocate from SystemAllocator doesn't count in Used()
EXPECT_EQ(allocator->Used(), used_bytes);
}
int* intp = static_cast<int*>(p);
std::shared_ptr<int> ptr(intp, [&](void* p) {
allocator->Free(intp);
freed = true;
});
} else {
freed = true;
}
EXPECT_EQ(used_bytes, allocator->Used());
EXPECT_TRUE(freed);
}
#ifdef PADDLE_WITH_CUDA
TEST(BuddyAllocator, GpuFraction) {
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
BuddyAllocator buddy_allocator(
std::unique_ptr<SystemAllocator>(new GPUAllocator(test_gpu_id)),
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
TestBuddyAllocator(&buddy_allocator, 10);
TestBuddyAllocator(&buddy_allocator, 10 << 10);
TestBuddyAllocator(&buddy_allocator, 10 << 20);
TestBuddyAllocator(&buddy_allocator, 2 * static_cast<size_t>(1 << 30));
}
TEST(BuddyAllocator, InitRealloc) {
FLAGS_initial_gpu_memory_in_mb = 100;
FLAGS_reallocate_gpu_memory_in_mb = 50;
EXPECT_EQ(platform::GpuMaxChunkSize(), static_cast<size_t>(100 << 20));
BuddyAllocator buddy_allocator(
std::unique_ptr<SystemAllocator>(new GPUAllocator(test_gpu_id)),
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
// Less then initial size and reallocate size
TestBuddyAllocator(&buddy_allocator, 10 << 20);
// Between initial size and reallocate size and not exceed pool
TestBuddyAllocator(&buddy_allocator, 80 << 20);
// Less then reallocate size and exceed pool
TestBuddyAllocator(&buddy_allocator, 40 << 20);
// Greater then reallocate size and exceed pool
TestBuddyAllocator(&buddy_allocator, 80 << 20);
// Greater then initial size and reallocate size
TestBuddyAllocator(&buddy_allocator, 2 * static_cast<size_t>(1 << 30));
}
TEST(BuddyAllocator, ReallocSizeGreaterThanInit) {
FLAGS_initial_gpu_memory_in_mb = 5;
FLAGS_reallocate_gpu_memory_in_mb = 10;
EXPECT_EQ(platform::GpuMaxChunkSize(), static_cast<size_t>(10 << 20));
BuddyAllocator buddy_allocator(
std::unique_ptr<SystemAllocator>(new GPUAllocator(test_gpu_id)),
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
// Less then initial size and reallocate size
TestBuddyAllocator(&buddy_allocator, 1 << 20);
// Between initial size and reallocate size and not exceed pool
TestBuddyAllocator(&buddy_allocator, 3 << 20);
// Less then initial size and exceed pool
TestBuddyAllocator(&buddy_allocator, 3 << 20);
// Less then reallocate size and not exceed pool (now pool is 15 MB, used 7
// MB)
TestBuddyAllocator(&buddy_allocator, 7 << 20);
// Less then reallocate size and exceed pool
TestBuddyAllocator(&buddy_allocator, 8 << 20);
// Greater then initial size and reallocate size
TestBuddyAllocator(&buddy_allocator, 2 * static_cast<size_t>(1 << 30));
}
#endif
} // namespace detail
} // namespace memory
} // namespace paddle

@ -32,6 +32,9 @@ limitations under the License. */
DECLARE_bool(use_pinned_memory);
DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_uint64(initial_gpu_memory_in_mb);
DECLARE_uint64(reallocate_gpu_memory_in_mb);
namespace paddle {
namespace memory {
namespace detail {
@ -119,11 +122,18 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
gpu_alloc_size_ += size;
return p;
} else {
LOG(WARNING)
<< "Cannot malloc " << size / 1024.0 / 1024.0
<< " MB GPU memory. Please shrink FLAGS_fraction_of_gpu_memory_to_use "
"environment variable to a lower value. Current value is "
<< FLAGS_fraction_of_gpu_memory_to_use;
LOG(WARNING) << "Cannot malloc " << size / 1024.0 / 1024.0
<< " MB GPU memory. Please shrink "
"FLAGS_fraction_of_gpu_memory_to_use or "
"FLAGS_initial_gpu_memory_in_mb or "
"FLAGS_reallocate_gpu_memory_in_mb"
"environment variable to a lower value. "
<< "Current FLAGS_fraction_of_gpu_memory_to_use value is "
<< FLAGS_fraction_of_gpu_memory_to_use
<< ". Current FLAGS_initial_gpu_memory_in_mb value is "
<< FLAGS_initial_gpu_memory_in_mb
<< ". Current FLAGS_reallocate_gpu_memory_in_mb value is "
<< FLAGS_reallocate_gpu_memory_in_mb;
return nullptr;
}
}

@ -0,0 +1,69 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/range_op.h"
namespace paddle {
namespace operators {
class RangeOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
if (ctx->HasInput("Start")) {
auto s_dims = ctx->GetInputDim("Start");
PADDLE_ENFORCE((s_dims.size() == 1) && (s_dims[0] == 1),
"The shape of Input(Start) should be [1].");
}
if (ctx->HasInput("End")) {
auto e_dims = ctx->GetInputDim("End");
PADDLE_ENFORCE((e_dims.size() == 1) && (e_dims[0] == 1),
"The shape of Input(End) should be [1].");
}
if (ctx->HasInput("Step")) {
auto step_dims = ctx->GetInputDim("Step");
PADDLE_ENFORCE((step_dims.size() == 1) && (step_dims[0] == 1),
"The shape of Input(Step) should be [1].");
}
ctx->SetOutputDim("Out", {-1});
}
};
class RangeOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Start",
"Start of interval. The interval includes this value. It is a "
"tensor with shape=[1].");
AddInput("End",
"End of interval. The interval does not include this value, "
"except in some cases where step is not an integer and floating "
"point round-off affects the length of out. It is a tensor with "
"shape=[1].");
AddInput("Step", "Spacing between values. It is a tensor with shape=[1].");
AddOutput("Out", "A sequence of numbers.");
AddComment(R"DOC(
Return evenly spaced values within a given interval. Values are generated within the half-open interval [start, stop) (in other words, the interval including start but excluding stop). Like arange function of numpy.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(range, ops::RangeOp, ops::RangeOpMaker);
REGISTER_OP_CPU_KERNEL(range, ops::CPURangeKernel<int>,
ops::CPURangeKernel<float>, ops::CPURangeKernel<double>,
ops::CPURangeKernel<int64_t>);

@ -0,0 +1,67 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/range_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
namespace paddle {
namespace operators {
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
i += blockDim.x * gridDim.x)
template <typename T>
__global__ void RangeKernel(T start, T step, int64_t size, T* out) {
CUDA_1D_KERNEL_LOOP(index, size) { out[index] = start + step * index; }
}
template <typename T>
class CUDARangeKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* start_t = context.Input<framework::Tensor>("Start");
auto* end_t = context.Input<framework::Tensor>("End");
auto* step_t = context.Input<framework::Tensor>("Step");
auto* out = context.Output<framework::Tensor>("Out");
framework::Tensor n;
framework::TensorCopy(*start_t, platform::CPUPlace(), &n);
T start = n.data<T>()[0];
framework::TensorCopy(*end_t, platform::CPUPlace(), &n);
T end = n.data<T>()[0];
framework::TensorCopy(*step_t, platform::CPUPlace(), &n);
T step = n.data<T>()[0];
int64_t size = 0;
GetSize(start, end, step, &size);
out->Resize(framework::make_ddim({size}));
T* out_data = out->mutable_data<T>(context.GetPlace());
auto stream = context.cuda_device_context().stream();
int block = 512;
int grid = (size + block - 1) / block;
RangeKernel<T><<<grid, block, 0, stream>>>(start, step, size, out_data);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(range, ops::CUDARangeKernel<int>,
ops::CUDARangeKernel<int64_t>,
ops::CUDARangeKernel<float>,
ops::CUDARangeKernel<double>);

@ -0,0 +1,56 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <functional>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle {
namespace operators {
template <typename T>
void GetSize(T start, T end, T step, int64_t* size) {
PADDLE_ENFORCE(!std::equal_to<T>()(step, 0),
"The step of range op should not be 0.");
PADDLE_ENFORCE(((start < end) && (step > 0)) || ((start > end) && (step < 0)),
"The step should be greater than 0 while start < end. And the "
"step should be less than 0 while start > end.");
*size = std::is_integral<T>::value
? ((std::abs(end - start) + std::abs(step) - 1) / std::abs(step))
: std::ceil(std::abs((end - start) / step));
}
template <typename T>
class CPURangeKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
T start = context.Input<framework::Tensor>("Start")->data<T>()[0];
T end = context.Input<framework::Tensor>("End")->data<T>()[0];
T step = context.Input<framework::Tensor>("Step")->data<T>()[0];
auto* out = context.Output<framework::Tensor>("Out");
int64_t size = 0;
GetSize(start, end, step, &size);
out->Resize(framework::make_ddim({size}));
T* out_data = out->mutable_data<T>(context.GetPlace());
T value = start;
for (int64_t i = 0; i < size; ++i) {
out_data[i] = value;
value += step;
}
}
};
} // namespace operators
} // namespace paddle

@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/gpu_info.h"
#include <algorithm>
#include <cstdlib>
#include <string>
@ -31,6 +30,8 @@ constexpr static float fraction_of_gpu_memory_to_use = 0.92f;
constexpr static float fraction_of_gpu_memory_to_use = 0.5f;
#endif
constexpr static float fraction_reserve_gpu_memory = 0.05f;
DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use,
"Allocate a trunk of gpu memory that is this fraction of the "
"total gpu memory size. Future memory usage will be allocated "
@ -38,6 +39,24 @@ DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use,
"additional trunks of the same size will be requested from gpu "
"until the gpu has no memory left for another trunk.");
DEFINE_uint64(
initial_gpu_memory_in_mb, 0ul,
"Allocate a trunk of gpu memory whose byte size is specified by "
"the flag. Future memory usage will be allocated from the "
"truck. If the trunk doesn't have enough gpu memory, additional "
"trunks of the gpu memory will be requested from gpu with size "
"specified by FLAGS_reallocate_gpu_memory_in_mb until the gpu has "
"no memory left for the additional trunk. Note: if you set this "
"flag, the memory size set by "
"FLAGS_fraction_of_gpu_memory_to_use will be overrided by this "
"flag. If you don't set this flag, PaddlePaddle will use "
"FLAGS_fraction_of_gpu_memory_to_use to allocate gpu memory");
DEFINE_uint64(reallocate_gpu_memory_in_mb, 0ul,
"If this flag is set, Paddle will reallocate the gpu memory with "
"size specified by this flag. Else Paddle will reallocate by "
"FLAGS_fraction_of_gpu_memory_to_use");
DEFINE_bool(
enable_cublas_tensor_op_math, false,
"The enable_cublas_tensor_op_math indicate whether to use Tensor Core, "
@ -180,13 +199,43 @@ void GpuMemoryUsage(size_t *available, size_t *total) {
}
size_t GpuMaxAllocSize() {
return std::max(GpuInitAllocSize(), GpuReallocSize());
}
size_t GpuInitAllocSize() {
if (FLAGS_initial_gpu_memory_in_mb > 0ul) {
// Initial memory will be allocated by FLAGS_initial_gpu_memory_in_mb
return static_cast<size_t>(FLAGS_initial_gpu_memory_in_mb << 20);
}
// FLAGS_initial_gpu_memory_in_mb is 0, initial memory will be allocated by
// fraction
size_t total = 0;
size_t available = 0;
GpuMemoryUsage(&available, &total);
size_t reserving = static_cast<size_t>(fraction_reserve_gpu_memory * total);
// Reserve the rest for page tables, etc.
return static_cast<size_t>(total * FLAGS_fraction_of_gpu_memory_to_use);
return static_cast<size_t>((total - reserving) *
FLAGS_fraction_of_gpu_memory_to_use);
}
size_t GpuReallocSize() {
if (FLAGS_reallocate_gpu_memory_in_mb > 0ul) {
// Additional memory will be allocated by FLAGS_reallocate_gpu_memory_in_mb
return static_cast<size_t>(FLAGS_reallocate_gpu_memory_in_mb << 20);
}
// FLAGS_reallocate_gpu_memory_in_mb is 0, additional memory will be allocated
// by fraction
size_t total = 0;
size_t available = 0;
GpuMemoryUsage(&available, &total);
size_t reserving = static_cast<size_t>(fraction_reserve_gpu_memory * total);
return static_cast<size_t>((total - reserving) *
FLAGS_fraction_of_gpu_memory_to_use);
}
size_t GpuMinChunkSize() {
@ -201,16 +250,13 @@ size_t GpuMaxChunkSize() {
GpuMemoryUsage(&available, &total);
VLOG(10) << "GPU Usage " << available / 1024 / 1024 << "M/"
<< total / 1024 / 1024 << "M";
size_t reserving = static_cast<size_t>(0.05 * total);
size_t reserving = static_cast<size_t>(fraction_reserve_gpu_memory * total);
// If available less than minimum chunk size, no usable memory exists.
available =
std::min(std::max(available, GpuMinChunkSize()) - GpuMinChunkSize(),
total - reserving);
// Reserving the rest memory for page tables, etc.
size_t allocating = static_cast<size_t>(FLAGS_fraction_of_gpu_memory_to_use *
(total - reserving));
size_t allocating = GpuMaxAllocSize();
PADDLE_ENFORCE_LE(allocating, available,
"Insufficient GPU memory to allocation.");

@ -60,6 +60,12 @@ void GpuMemoryUsage(size_t *available, size_t *total);
//! Get the maximum allocation size of current GPU device.
size_t GpuMaxAllocSize();
//! Get the initial allocation size of current GPU device.
size_t GpuInitAllocSize();
//! Get the re-allocation size of current GPU device.
size_t GpuReallocSize();
//! Get the minimum chunk size for GPU buddy allocator.
size_t GpuMinChunkSize();

@ -41,6 +41,8 @@ int main(int argc, char** argv) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
envs.push_back("fraction_of_gpu_memory_to_use");
envs.push_back("initial_gpu_memory_in_mb");
envs.push_back("reallocate_gpu_memory_in_mb");
envs.push_back("allocator_strategy");
#elif __clang__
envs.push_back("use_mkldnn");

@ -163,7 +163,8 @@ def __bootstrap__():
if core.is_compiled_with_cuda():
read_env_flags += [
'fraction_of_gpu_memory_to_use', 'cudnn_deterministic',
'fraction_of_gpu_memory_to_use', 'initial_gpu_memory_in_mb',
'reallocate_gpu_memory_in_mb', 'cudnn_deterministic',
'enable_cublas_tensor_op_math', 'conv_workspace_size_limit',
'cudnn_exhaustive_search', 'memory_optimize_debug', 'selected_gpus',
'sync_nccl_allreduce', 'limit_of_tmp_allocation',

@ -13,13 +13,4 @@
# limitations under the License.
from .core import *
from .graph import *
from .prune import *
__all__ = [
'build_compressor',
'CompressPass',
'ImitationGraph',
'SensitivePruneStrategy',
'MagnitudePruner',
'RatioPruner',
]
__all__ = ['Compressor', ]

@ -14,11 +14,9 @@
from . import config
from .config import *
from . import compress_pass
from .compress_pass import *
from . import compressor
from .compressor import *
from . import strategy
from .strategy import *
from . import pass_builder
from .pass_builder import *
__all__ = config.__all__ + compress_pass.__all__ + strategy.__all__ + pass_builder.__all__
__all__ = config.__all__ + compressor.__all__ + strategy.__all__

@ -1,129 +0,0 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ....core import CPUPlace
from ..graph import get_executor
__all__ = ['Context', 'CompressPass']
class Context(object):
"""
The context in the process of compression.
Args:
exe: The executor used to execute graph.
graph: The graph to be compressed.
scope: The scope used to execute graph.
program_exe: The program_exe is used to execute the program
created for modifying the variables in scope.
"""
def __init__(self, exe, graph, scope, program_exe=None):
# The total number of epoches to be trained.
self.epoch = 0
# Current epoch
self.epoch_id = 0
# Current batch
self.batch_id = 0
self.exe = exe
self.graph = graph
self.scope = scope
self.program_exe = program_exe
class CompressPass(object):
"""
The pass used to compress model.
Args:
place: The device used in compression.
data_reader: The data_reader used to run graph.
data_feeder: The data_feeder used to run graph.
scope: The scope used to run graph.
metrics: The metrics for evaluating model.
epoch: The total epoches of trainning in compression.
program_exe: The program_exe is used to execute the program
created for modifying the variables in scope.
"""
def __init__(self,
place=None,
data_reader=None,
data_feeder=None,
scope=None,
metrics=None,
epoch=None,
program_exe=None):
self.strategies = []
self.place = CPUPlace() if place is None else place
self.data_reader = data_reader
self.data_feeder = data_feeder
self.scope = scope
self.metrics = metrics
self.epoch = epoch
self.program_exe = program_exe
def add_strategy(self, strategy):
"""
Add a strategy to current compress pass.
Args:
strategy: The strategy to be added into current compress pass.
"""
self.strategies.append(strategy)
self.epoch = max(strategy.end_epoch, self.epoch)
def apply(self, graph):
"""
Compress a model.
Args:
graph: The target graph to be compressed.
"""
self.executor = get_executor(graph, self.place)
context = Context(
self.executor, graph, self.scope, program_exe=self.program_exe)
for strategy in self.strategies:
strategy.on_compress_begin(context)
for epoch in range(self.epoch):
for strategy in self.strategies:
strategy.on_epoch_begin(context)
for data in self.data_reader():
for strategy in self.strategies:
strategy.on_batch_begin(context)
fetches = None
if self.metrics:
fetches = self.metrics.values()
feed = None
if self.data_feeder:
feed = self.data_feeder.feed(data)
results = self.executor.run(graph,
fetches=fetches,
scope=self.scope,
feed=feed)
if results:
print("results: {}".format(
zip(self.metrics.keys(), results)))
for strategy in self.strategies:
strategy.on_batch_end(context)
context.batch_id += 1
for strategy in self.strategies:
strategy.on_epoch_end(context)
context.epoch_id += 1
for strategy in self.strategies:
strategy.on_compress_end(context)

File diff suppressed because it is too large Load Diff

@ -17,7 +17,7 @@ import funcsigs
import yaml
from collections import OrderedDict
from ..prune import *
from .compress_pass import *
from ..quantization import *
from .strategy import *
__all__ = ['ConfigFactory']
@ -29,15 +29,10 @@ class ConfigFactory(object):
def __init__(self, config):
"""Init a factory from configure file."""
self.instances = {}
self.compressor = {}
self.version = None
self._parse_config(config)
def get_compress_pass(self):
"""
Get compress pass from factory.
"""
return self.instance('compress_pass')
def instance(self, name):
"""
Get instance from factory.
@ -59,8 +54,16 @@ class ConfigFactory(object):
args = {}
for key in keys:
value = attrs[key]
if isinstance(value, str) and value.lower() == 'none':
value = None
if isinstance(value, str) and value in self.instances:
value = self.instances[value]
if isinstance(value, list):
for i in range(len(value)):
if isinstance(value[i],
str) and value[i] in self.instances:
value[i] = self.instances[value[i]]
args[key] = value
self.instances[name] = class_(**args)
return self.instances.get(name)
@ -76,16 +79,23 @@ class ConfigFactory(object):
assert self.version == int(key_values['version'])
# parse pruners
if key == 'pruners' or key == 'strategies':
if key == 'distillers' or key == 'pruners' or key == 'quantizers' or key == 'strategies':
instances = key_values[key]
for name in instances:
self._new_instance(name, instances[name])
if key == 'compress_pass':
compress_pass = self._new_instance(key, key_values[key])
for name in key_values[key]['strategies']:
strategy = self.instance(name)
compress_pass.add_strategy(strategy)
if key == 'compressor':
self.compressor['strategies'] = []
self.compressor['epoch'] = key_values[key]['epoch']
if 'init_model' in key_values[key]:
self.compressor['init_model'] = key_values[key][
'init_model']
self.compressor['checkpoint_path'] = key_values[key][
'checkpoint_path']
if 'strategies' in key_values[key]:
for name in key_values[key]['strategies']:
strategy = self.instance(name)
self.compressor['strategies'].append(strategy)
if key == 'include':
for config_file in key_values[key]:

@ -1,39 +0,0 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .compress_pass import CompressPass
from .config import ConfigFactory
__all__ = ['build_compressor']
def build_compressor(place=None,
data_reader=None,
data_feeder=None,
scope=None,
metrics=None,
epoch=None,
config=None):
if config is not None:
factory = ConfigFactory(config)
comp_pass = factory.get_compress_pass()
else:
comp_pass = CompressPass()
comp_pass.place = place
comp_pass.data_reader = data_reader
comp_pass.data_feeder = data_feeder
comp_pass.scope = scope
comp_pass.metrics = metrics
comp_pass.epoch = epoch
return comp_pass

@ -20,7 +20,7 @@ class Strategy(object):
Base class for all strategies.
"""
def __init__(self, start_epoch=0, end_epoch=10):
def __init__(self, start_epoch=0, end_epoch=0):
"""
Args:
start_epoch: The first epoch to apply the strategy.
@ -29,7 +29,7 @@ class Strategy(object):
self.start_epoch = start_epoch
self.end_epoch = end_epoch
def on_compress_begin(self, context):
def on_compression_begin(self, context):
pass
def on_epoch_begin(self, context):
@ -44,5 +44,5 @@ class Strategy(object):
def on_batch_end(self, context):
pass
def on_compress_end(self, context):
def on_compression_end(self, context):
pass

@ -1,28 +0,0 @@
version: 1.0
pruners:
pruner_1:
class: 'RatioPruner'
ratios:
'conv1_1.w': 0.3
'conv1_2.w': 0.4
'*': 0.9
group_dims:
'*': [1, 2, 3]
criterions:
'*': 'l1-norm'
strategies:
strategy_1:
class: 'SensitivePruneStrategy'
pruner: 'pruner_1'
start_epoch: 0
end_epoch: 10
delta_rate: 0.20
acc_loss_threshold: 0.2
sensitivities:
'conv1_1.w': 0.4
compress_pass:
class: 'CompressPass'
epoch: 100
strategies:
- strategy_1

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save