From 3959023099f25c590ec72f701976c7b4e1233174 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 24 Feb 2018 16:28:13 +0800 Subject: [PATCH 01/11] Enhance layer_function_generator * Generated functions can take `*args` as inputs. --- .../v2/fluid/layers/layer_function_generator.py | 5 ++++- python/paddle/v2/fluid/layers/tensor.py | 4 ++-- .../v2/fluid/tests/book/notest_rnn_encoder_decoer.py | 2 +- python/paddle/v2/fluid/tests/book/test_fit_a_line.py | 2 +- .../v2/fluid/tests/book/test_image_classification.py | 2 +- .../v2/fluid/tests/book/test_label_semantic_roles.py | 2 +- .../v2/fluid/tests/book/test_machine_translation.py | 2 +- .../v2/fluid/tests/book/test_recognize_digits.py | 6 +++--- .../v2/fluid/tests/book/test_recommender_system.py | 2 +- .../v2/fluid/tests/book/test_understand_sentiment.py | 10 +++++----- python/paddle/v2/fluid/tests/book/test_word2vec.py | 4 ++-- .../tests/book_distribute/notest_dist_fit_a_line.py | 2 +- .../notest_dist_image_classification.py | 2 +- .../notest_dist_label_semantic_roles.py | 2 +- .../tests/book_distribute/notest_dist_word2vec.py | 2 +- .../book_distribute/notest_machine_translation.py | 2 +- .../notest_recognize_digits_conv_dist.py | 2 +- .../notest_recognize_digits_mlp_dist.py | 2 +- .../notest_recommender_system_dist.py | 2 +- .../notest_understand_sentiment_conv_dist.py | 2 +- .../notest_understand_sentiment_dynamic_lstm.py | 2 +- .../test_memopt_fit_a_line.py | 2 +- .../test_memopt_image_classification_train.py | 2 +- .../test_memopt_machine_translation.py | 2 +- python/paddle/v2/fluid/tests/demo/fc_gan.py | 4 ++-- python/paddle/v2/fluid/tests/test_error_clip.py | 2 +- python/paddle/v2/fluid/tests/test_gradient_clip.py | 2 +- .../paddle/v2/fluid/tests/test_mnist_if_else_op.py | 4 ++-- .../tests/unittests/test_array_read_write_op.py | 12 ++++++------ .../v2/fluid/tests/unittests/test_calc_gradient.py | 2 +- .../fluid/tests/unittests/test_conditional_block.py | 2 +- .../paddle/v2/fluid/tests/unittests/test_dyn_rnn.py | 4 ++-- .../tests/unittests/test_dynrnn_gradient_check.py | 4 ++-- .../tests/unittests/test_dynrnn_static_input.py | 2 +- .../fluid/tests/unittests/test_inference_model_io.py | 2 +- .../paddle/v2/fluid/tests/unittests/test_layers.py | 10 +++++----- .../tests/unittests/test_lod_tensor_array_ops.py | 2 +- .../unittests/test_memory_optimization_transpiler.py | 2 +- .../v2/fluid/tests/unittests/test_parallel_op.py | 6 +++--- .../paddle/v2/fluid/tests/unittests/test_print_op.py | 2 +- .../paddle/v2/fluid/tests/unittests/test_profiler.py | 2 +- .../v2/fluid/tests/unittests/test_recurrent_op.py | 8 ++++---- .../paddle/v2/fluid/tests/unittests/test_registry.py | 2 +- .../fluid/tests/unittests/test_shrink_rnn_memory.py | 2 +- .../unittests/test_split_and_merge_lod_tensor_op.py | 2 +- .../paddle/v2/fluid/tests/unittests/test_while_op.py | 2 +- 46 files changed, 75 insertions(+), 72 deletions(-) diff --git a/python/paddle/v2/fluid/layers/layer_function_generator.py b/python/paddle/v2/fluid/layers/layer_function_generator.py index 88c9ae31b7..16a401dc7b 100644 --- a/python/paddle/v2/fluid/layers/layer_function_generator.py +++ b/python/paddle/v2/fluid/layers/layer_function_generator.py @@ -155,7 +155,7 @@ def generate_layer_fn(op_type): return dtype - def func(**kwargs): + def func(*args, **kwargs): helper = LayerHelper(op_type, **kwargs) dtype = infer_and_check_dtype(op_proto, **kwargs) @@ -166,6 +166,9 @@ def generate_layer_fn(op_type): val = kwargs.pop(name, []) if not isinstance(val, list) and not isinstance(val, tuple): val = [val] + if len(val) == 0 and len(args) != 0: + val = args[0] + args = args[1:] inputs[ipt.name] = val outputs = dict() diff --git a/python/paddle/v2/fluid/layers/tensor.py b/python/paddle/v2/fluid/layers/tensor.py index 97e8f082cf..8100e8f034 100644 --- a/python/paddle/v2/fluid/layers/tensor.py +++ b/python/paddle/v2/fluid/layers/tensor.py @@ -160,8 +160,8 @@ def sums(input, out=None): a0 = layers.array_read(array=tmp, i=i) i = layers.increment(x=i) a1 = layers.array_read(array=tmp, i=i) - mean_a0 = layers.mean(x=a0) - mean_a1 = layers.mean(x=a1) + mean_a0 = layers.mean(a0) + mean_a1 = layers.mean(a1) a_sum = layers.sums(input=[mean_a0, mean_a1]) """ helper = LayerHelper('sum', **locals()) diff --git a/python/paddle/v2/fluid/tests/book/notest_rnn_encoder_decoer.py b/python/paddle/v2/fluid/tests/book/notest_rnn_encoder_decoer.py index c7db70f1b1..0054bb6bec 100644 --- a/python/paddle/v2/fluid/tests/book/notest_rnn_encoder_decoer.py +++ b/python/paddle/v2/fluid/tests/book/notest_rnn_encoder_decoer.py @@ -147,7 +147,7 @@ def seq_to_seq_net(): label = fluid.layers.data( name='label_sequence', shape=[1], dtype='int64', lod_level=1) cost = fluid.layers.cross_entropy(input=prediction, label=label) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) return avg_cost, prediction diff --git a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py index a66c2c3c2f..77cffd4de9 100644 --- a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py @@ -29,7 +29,7 @@ def train(use_cuda, save_dirname): y = fluid.layers.data(name='y', shape=[1], dtype='float32') cost = fluid.layers.square_error_cost(input=y_predict, label=y) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification.py b/python/paddle/v2/fluid/tests/book/test_image_classification.py index 734ab3e4fb..b99a4285aa 100644 --- a/python/paddle/v2/fluid/tests/book/test_image_classification.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification.py @@ -110,7 +110,7 @@ def train(net_type, use_cuda, save_dirname): predict = fluid.layers.fc(input=net, size=classdim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) acc = fluid.layers.accuracy(input=predict, label=label) # Test program diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py index b790246ec1..e513a658ff 100644 --- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -164,7 +164,7 @@ def train(use_cuda, save_dirname=None): label=target, param_attr=fluid.ParamAttr( name='crfw', learning_rate=mix_hidden_lr)) - avg_cost = fluid.layers.mean(x=crf_cost) + avg_cost = fluid.layers.mean(crf_cost) # TODO(qiao) # check other optimizers and check why out will be NAN diff --git a/python/paddle/v2/fluid/tests/book/test_machine_translation.py b/python/paddle/v2/fluid/tests/book/test_machine_translation.py index d3405a9601..ee6a5d61df 100644 --- a/python/paddle/v2/fluid/tests/book/test_machine_translation.py +++ b/python/paddle/v2/fluid/tests/book/test_machine_translation.py @@ -178,7 +178,7 @@ def train_main(use_cuda, is_sparse): label = pd.data( name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) cost = pd.cross_entropy(input=rnn_out, label=label) - avg_cost = pd.mean(x=cost) + avg_cost = pd.mean(cost) optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer.minimize(avg_cost) diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits.py index 2462d425e1..aeeea1177e 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits.py @@ -48,7 +48,7 @@ BATCH_SIZE = 64 def loss_net(hidden, label): prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) - avg_loss = fluid.layers.mean(x=loss) + avg_loss = fluid.layers.mean(loss) acc = fluid.layers.accuracy(input=prediction, label=label) return prediction, avg_loss, acc @@ -101,8 +101,8 @@ def train(nn_type, use_cuda, parallel, save_dirname, save_param_filename): avg_loss, acc = pd() # get mean loss and acc through every devices. - avg_loss = fluid.layers.mean(x=avg_loss) - acc = fluid.layers.mean(x=acc) + avg_loss = fluid.layers.mean(avg_loss) + acc = fluid.layers.mean(acc) else: prediction, avg_loss, acc = net_conf(img, label) diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py index 1a7d8d57ff..a5adc3507b 100644 --- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py @@ -147,7 +147,7 @@ def model(): label = layers.data(name='score', shape=[1], dtype='float32') square_cost = layers.square_error_cost(input=scale_infer, label=label) - avg_cost = layers.mean(x=square_cost) + avg_cost = layers.mean(square_cost) return scale_infer, avg_cost diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment.py index 61f46b51c4..cdd233a5b6 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment.py @@ -42,7 +42,7 @@ def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32, size=class_dim, act="softmax") cost = fluid.layers.cross_entropy(input=prediction, label=label) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) accuracy = fluid.layers.accuracy(input=prediction, label=label) return avg_cost, accuracy, prediction @@ -82,7 +82,7 @@ def dyn_rnn_lstm(data, label, input_dim, class_dim=2, emb_dim=32, last = fluid.layers.sequence_last_step(rnn()) prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax") cost = fluid.layers.cross_entropy(input=prediction, label=label) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) accuracy = fluid.layers.accuracy(input=prediction, label=label) return avg_cost, accuracy, prediction @@ -119,7 +119,7 @@ def stacked_lstm_net(data, size=class_dim, act='softmax') cost = fluid.layers.cross_entropy(input=prediction, label=label) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) accuracy = fluid.layers.accuracy(input=prediction, label=label) return avg_cost, accuracy, prediction @@ -158,8 +158,8 @@ def train(word_dict, net_method, use_cuda, parallel=False, save_dirname=None): pd.write_output(acc) cost, acc = pd() - cost = fluid.layers.mean(x=cost) - acc_out = fluid.layers.mean(x=acc) + cost = fluid.layers.mean(cost) + acc_out = fluid.layers.mean(acc) prediction = None assert save_dirname is None diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py index 9bd8f90c5e..ac23bd7284 100644 --- a/python/paddle/v2/fluid/tests/book/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -118,7 +118,7 @@ def train(use_cuda, is_sparse, parallel, save_dirname): size=dict_size, act='softmax') cost = fluid.layers.cross_entropy(input=predict_word, label=words[4]) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) return avg_cost, predict_word word_dict = paddle.dataset.imikolov.build_dict() @@ -143,7 +143,7 @@ def train(use_cuda, is_sparse, parallel, save_dirname): ])) pd.write_output(avg_cost) - avg_cost = fluid.layers.mean(x=pd()) + avg_cost = fluid.layers.mean(pd()) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py index c443c4e0b7..164327d8f0 100644 --- a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py @@ -24,7 +24,7 @@ y_predict = fluid.layers.fc(input=x, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') cost = fluid.layers.square_error_cost(input=y_predict, label=y) -avg_cost = fluid.layers.mean(x=cost) +avg_cost = fluid.layers.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_image_classification.py b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_image_classification.py index 298ecfc386..6ba06a6038 100644 --- a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_image_classification.py +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_image_classification.py @@ -114,7 +114,7 @@ else: predict = fluid.layers.fc(input=net, size=classdim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) -avg_cost = fluid.layers.mean(x=cost) +avg_cost = fluid.layers.mean(cost) optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimize_ops, params_grads = optimizer.minimize(avg_cost) diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_label_semantic_roles.py index 1210bf1d84..fa4bf33cea 100644 --- a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_label_semantic_roles.py +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_label_semantic_roles.py @@ -154,7 +154,7 @@ def main(): label=target, param_attr=fluid.ParamAttr( name='crfw', learning_rate=mix_hidden_lr)) - avg_cost = fluid.layers.mean(x=crf_cost) + avg_cost = fluid.layers.mean(crf_cost) # TODO(qiao) # check other optimizers and check why out will be NAN diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py index 0d5ad98850..aff4c53ebc 100644 --- a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py @@ -65,7 +65,7 @@ concat_embed = fluid.layers.concat( hidden1 = fluid.layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid') predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax') cost = fluid.layers.cross_entropy(input=predict_word, label=next_word) -avg_cost = fluid.layers.mean(x=cost) +avg_cost = fluid.layers.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) train_reader = paddle.batch( diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_machine_translation.py b/python/paddle/v2/fluid/tests/book_distribute/notest_machine_translation.py index 15d2d40979..5406bd9113 100644 --- a/python/paddle/v2/fluid/tests/book_distribute/notest_machine_translation.py +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_machine_translation.py @@ -94,7 +94,7 @@ def main(): label = layers.data( name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) cost = layers.cross_entropy(input=rnn_out, label=label) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimize_ops, params_grads = optimizer.minimize(avg_cost) diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py b/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py index 1c1fffc589..f6623099cb 100644 --- a/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py @@ -37,7 +37,7 @@ conv_pool_2 = fluid.nets.simple_img_conv_pool( predict = fluid.layers.fc(input=conv_pool_2, size=10, act="softmax") cost = fluid.layers.cross_entropy(input=predict, label=label) -avg_cost = fluid.layers.mean(x=cost) +avg_cost = fluid.layers.mean(cost) optimizer = fluid.optimizer.Adam(learning_rate=0.01) optimize_ops, params_grads = optimizer.minimize(avg_cost) diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_mlp_dist.py b/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_mlp_dist.py index c442ada6e3..f2d32cb99d 100644 --- a/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_mlp_dist.py +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_mlp_dist.py @@ -32,7 +32,7 @@ predict = fluid.layers.fc(input=hidden2, size=10, act='softmax') label = fluid.layers.data(name='y', shape=[1], dtype='int64') cost = fluid.layers.cross_entropy(input=predict, label=label) -avg_cost = fluid.layers.mean(x=cost) +avg_cost = fluid.layers.mean(cost) optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9) optimize_ops, params_grads = optimizer.minimize(avg_cost) diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_recommender_system_dist.py b/python/paddle/v2/fluid/tests/book_distribute/notest_recommender_system_dist.py index 363c7102c7..907b09a38b 100644 --- a/python/paddle/v2/fluid/tests/book_distribute/notest_recommender_system_dist.py +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_recommender_system_dist.py @@ -117,7 +117,7 @@ def model(): label = layers.data(name='score', shape=[1], dtype='float32') square_cost = layers.square_error_cost(input=scale_infer, label=label) - avg_cost = layers.mean(x=square_cost) + avg_cost = layers.mean(square_cost) return avg_cost diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_conv_dist.py b/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_conv_dist.py index c5c0856c31..f95b4a9a02 100644 --- a/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_conv_dist.py +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_conv_dist.py @@ -38,7 +38,7 @@ def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32, size=class_dim, act="softmax") cost = fluid.layers.cross_entropy(input=prediction, label=label) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) optimize_ops, params_grads = adam_optimizer.minimize(avg_cost) accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_dynamic_lstm.py index 99e2c2bbac..5212319435 100644 --- a/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_dynamic_lstm.py +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_dynamic_lstm.py @@ -49,7 +49,7 @@ def stacked_lstm_net(data, size=class_dim, act='softmax') cost = fluid.layers.cross_entropy(input=prediction, label=label) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) optimize_ops, params_grads = adam_optimizer.minimize(avg_cost) accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) diff --git a/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py index 944f8af086..04ab2d1d07 100644 --- a/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py +++ b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py @@ -30,7 +30,7 @@ y_predict = fluid.layers.fc(input=x, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') cost = fluid.layers.square_error_cost(input=y_predict, label=y) -avg_cost = fluid.layers.mean(x=cost) +avg_cost = fluid.layers.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1) sgd_optimizer.minimize(avg_cost) diff --git a/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py index a556904107..307e6035f4 100644 --- a/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py +++ b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py @@ -117,7 +117,7 @@ else: predict = fluid.layers.fc(input=net, size=classdim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) -avg_cost = fluid.layers.mean(x=cost) +avg_cost = fluid.layers.mean(cost) optimizer = fluid.optimizer.Adam(learning_rate=0.001) opts = optimizer.minimize(avg_cost) diff --git a/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py index 4c1eae861b..3de46e8c27 100644 --- a/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py +++ b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py @@ -100,7 +100,7 @@ def main(): label = layers.data( name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) cost = layers.cross_entropy(input=rnn_out, label=label) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer.minimize(avg_cost) diff --git a/python/paddle/v2/fluid/tests/demo/fc_gan.py b/python/paddle/v2/fluid/tests/demo/fc_gan.py index 67921db04a..a0d3721ea4 100644 --- a/python/paddle/v2/fluid/tests/demo/fc_gan.py +++ b/python/paddle/v2/fluid/tests/demo/fc_gan.py @@ -96,7 +96,7 @@ def main(): x=D(img), label=fluid.layers.data( name='label', shape=[1], dtype='float32')) - d_loss = fluid.layers.mean(x=d_loss) + d_loss = fluid.layers.mean(d_loss) with fluid.program_guard(dg_program, startup_program): noise = fluid.layers.data( @@ -107,7 +107,7 @@ def main(): x=D(g_img), label=fluid.layers.fill_constant_batch_size_like( input=noise, dtype='float32', shape=[-1, 1], value=1.0)) - dg_loss = fluid.layers.mean(x=dg_loss) + dg_loss = fluid.layers.mean(dg_loss) opt = fluid.optimizer.Adam(learning_rate=LEARNING_RATE) diff --git a/python/paddle/v2/fluid/tests/test_error_clip.py b/python/paddle/v2/fluid/tests/test_error_clip.py index d577d0014d..99b69c1625 100644 --- a/python/paddle/v2/fluid/tests/test_error_clip.py +++ b/python/paddle/v2/fluid/tests/test_error_clip.py @@ -33,7 +33,7 @@ with fluid.program_guard(main_program=prog): label = fluid.layers.data(name='y', shape=[1], dtype='int64') cost = fluid.layers.cross_entropy(input=predict, label=label) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) prog_clip = prog.clone() prog_clip.block(0).var(hidden1.name).set_error_clip( diff --git a/python/paddle/v2/fluid/tests/test_gradient_clip.py b/python/paddle/v2/fluid/tests/test_gradient_clip.py index 792262df84..c20863ddb2 100644 --- a/python/paddle/v2/fluid/tests/test_gradient_clip.py +++ b/python/paddle/v2/fluid/tests/test_gradient_clip.py @@ -30,7 +30,7 @@ with fluid.program_guard(main_program=prog): label = fluid.layers.data(name='y', shape=[1], dtype='int64') cost = fluid.layers.cross_entropy(input=predict, label=label) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) prog_clip = prog.clone() diff --git a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py index 75a651cf27..e5a1406b93 100644 --- a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py +++ b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py @@ -56,7 +56,7 @@ class TestMNISTIfElseOp(unittest.TestCase): prob = layers.merge_lod_tensor( in_true=true_out, in_false=false_out, mask=cond, x=image) loss = layers.cross_entropy(input=prob, label=label) - avg_loss = layers.mean(x=loss) + avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) @@ -113,7 +113,7 @@ class TestMNISTIfElseOp(unittest.TestCase): prob = ie() loss = layers.cross_entropy(input=prob[0], label=label) - avg_loss = layers.mean(x=loss) + avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) diff --git a/python/paddle/v2/fluid/tests/unittests/test_array_read_write_op.py b/python/paddle/v2/fluid/tests/unittests/test_array_read_write_op.py index 8917b9b906..e04f682ece 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_array_read_write_op.py +++ b/python/paddle/v2/fluid/tests/unittests/test_array_read_write_op.py @@ -49,15 +49,15 @@ class TestArrayReadWrite(unittest.TestCase): i = layers.increment(x=i) a2 = layers.array_read(array=arr, i=i) - mean_a0 = layers.mean(x=a0) - mean_a1 = layers.mean(x=a1) - mean_a2 = layers.mean(x=a2) + mean_a0 = layers.mean(a0) + mean_a1 = layers.mean(a1) + mean_a2 = layers.mean(a2) a_sum = layers.sums(input=[mean_a0, mean_a1, mean_a2]) - mean_x0 = layers.mean(x=x[0]) - mean_x1 = layers.mean(x=x[1]) - mean_x2 = layers.mean(x=x[2]) + mean_x0 = layers.mean(x[0]) + mean_x1 = layers.mean(x[1]) + mean_x2 = layers.mean(x[2]) x_sum = layers.sums(input=[mean_x0, mean_x1, mean_x2]) diff --git a/python/paddle/v2/fluid/tests/unittests/test_calc_gradient.py b/python/paddle/v2/fluid/tests/unittests/test_calc_gradient.py index 1b38dcf343..1b0de31ae0 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_calc_gradient.py +++ b/python/paddle/v2/fluid/tests/unittests/test_calc_gradient.py @@ -26,7 +26,7 @@ class TestCalcGradient(unittest.TestCase): x = layers.create_parameter(dtype="float32", shape=[5, 10]) y = layers.create_parameter(dtype="float32", shape=[10, 8]) mul_out = layers.mul(x=x, y=y) - mean_out = layers.mean(x=mul_out) + mean_out = layers.mean(mul_out) a = calc_gradient(mean_out, mul_out) b = calc_gradient(mean_out, x) place = fluid.CPUPlace() diff --git a/python/paddle/v2/fluid/tests/unittests/test_conditional_block.py b/python/paddle/v2/fluid/tests/unittests/test_conditional_block.py index 58ac267203..f605e13d21 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_conditional_block.py +++ b/python/paddle/v2/fluid/tests/unittests/test_conditional_block.py @@ -39,7 +39,7 @@ class ConditionalBlock(unittest.TestCase): outs = exe.run(feed={'X': x}, fetch_list=[out])[0] print outs - loss = layers.mean(x=out) + loss = layers.mean(out) append_backward(loss=loss) outs = exe.run( feed={'X': x}, diff --git a/python/paddle/v2/fluid/tests/unittests/test_dyn_rnn.py b/python/paddle/v2/fluid/tests/unittests/test_dyn_rnn.py index 1571572fc6..23a1555208 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_dyn_rnn.py +++ b/python/paddle/v2/fluid/tests/unittests/test_dyn_rnn.py @@ -81,7 +81,7 @@ class TestDynRNN(unittest.TestCase): logits = fluid.layers.fc(input=last, size=1, act=None) loss = fluid.layers.sigmoid_cross_entropy_with_logits( x=logits, label=label) - loss = fluid.layers.mean(x=loss) + loss = fluid.layers.mean(loss) sgd = fluid.optimizer.SGD(1e-4) sgd.minimize(loss=loss) cpu = fluid.CPUPlace() @@ -119,7 +119,7 @@ class TestDynRNN(unittest.TestCase): label = fluid.layers.data(name='label', shape=[1], dtype='float32') loss = fluid.layers.sigmoid_cross_entropy_with_logits( x=logits, label=label) - loss = fluid.layers.mean(x=loss) + loss = fluid.layers.mean(loss) sgd = fluid.optimizer.Adam(1e-3) sgd.minimize(loss=loss) diff --git a/python/paddle/v2/fluid/tests/unittests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/unittests/test_dynrnn_gradient_check.py index 8b01ec730a..182b025be5 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_dynrnn_gradient_check.py +++ b/python/paddle/v2/fluid/tests/unittests/test_dynrnn_gradient_check.py @@ -272,7 +272,7 @@ class TestSimpleMul(SeedFixedTestCase): out = rnn() out = fluid.layers.sequence_pool(out, pool_type='last') - loss = fluid.layers.mean(x=out) + loss = fluid.layers.mean(out) fluid.backward.append_backward(loss) cpu = fluid.CPUPlace() @@ -348,7 +348,7 @@ class TestSimpleMulWithMemory(SeedFixedTestCase): out = rnn() last = fluid.layers.sequence_pool(input=out, pool_type='last') - loss = fluid.layers.mean(x=last) + loss = fluid.layers.mean(last) fluid.backward.append_backward(loss) cpu = fluid.CPUPlace() diff --git a/python/paddle/v2/fluid/tests/unittests/test_dynrnn_static_input.py b/python/paddle/v2/fluid/tests/unittests/test_dynrnn_static_input.py index d2f05dcd14..b21ac8e800 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_dynrnn_static_input.py +++ b/python/paddle/v2/fluid/tests/unittests/test_dynrnn_static_input.py @@ -125,7 +125,7 @@ class TestDyRnnStaticInput(unittest.TestCase): return static_input_step_outs last = fluid.layers.sequence_pool(input=rnn(), pool_type='last') - loss = fluid.layers.mean(x=last) + loss = fluid.layers.mean(last) append_backward(loss) static_input_grad = self._program.global_block().var( framework.grad_var_name('static_input_tensor')) diff --git a/python/paddle/v2/fluid/tests/unittests/test_inference_model_io.py b/python/paddle/v2/fluid/tests/unittests/test_inference_model_io.py index e381312ccc..62abe99aa2 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_inference_model_io.py +++ b/python/paddle/v2/fluid/tests/unittests/test_inference_model_io.py @@ -38,7 +38,7 @@ class TestBook(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) cost = layers.square_error_cost(input=y_predict, label=y) - avg_cost = layers.mean(x=cost) + avg_cost = layers.mean(cost) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer.minimize(avg_cost, init_program) diff --git a/python/paddle/v2/fluid/tests/unittests/test_layers.py b/python/paddle/v2/fluid/tests/unittests/test_layers.py index e757598bba..149ac347ce 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_layers.py +++ b/python/paddle/v2/fluid/tests/unittests/test_layers.py @@ -30,7 +30,7 @@ class TestBook(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) y = layers.data(name='y', shape=[1], dtype='float32') cost = layers.square_error_cost(input=y_predict, label=y) - avg_cost = layers.mean(x=cost) + avg_cost = layers.mean(cost) self.assertIsNotNone(avg_cost) program.append_backward(avg_cost) @@ -49,7 +49,7 @@ class TestBook(unittest.TestCase): act='softmax', param_attr=["sftmax.w1", "sftmax.w2"]) cost = layers.cross_entropy(input=predict, label=label) - avg_cost = layers.mean(x=cost) + avg_cost = layers.mean(cost) self.assertIsNotNone(avg_cost) print(str(program)) @@ -92,7 +92,7 @@ class TestBook(unittest.TestCase): predict = layers.fc(input=conv_pool_2, size=10, act="softmax") cost = layers.cross_entropy(input=predict, label=label) - avg_cost = layers.mean(x=cost) + avg_cost = layers.mean(cost) program.append_backward(avg_cost) @@ -140,7 +140,7 @@ class TestBook(unittest.TestCase): size=dict_size, act='softmax') cost = layers.cross_entropy(input=predict_word, label=next_word) - avg_cost = layers.mean(x=cost) + avg_cost = layers.mean(cost) self.assertIsNotNone(avg_cost) print(str(program)) @@ -287,7 +287,7 @@ class TestBook(unittest.TestCase): num_total_classes=dict_size, param_attr='nce.w', bias_attr='nce.b') - avg_loss = layers.mean(x=loss) + avg_loss = layers.mean(loss) self.assertIsNotNone(avg_loss) print(str(default_main_program())) diff --git a/python/paddle/v2/fluid/tests/unittests/test_lod_tensor_array_ops.py b/python/paddle/v2/fluid/tests/unittests/test_lod_tensor_array_ops.py index ebc0a2f714..8c59bbb407 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_lod_tensor_array_ops.py +++ b/python/paddle/v2/fluid/tests/unittests/test_lod_tensor_array_ops.py @@ -182,7 +182,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase): array = layers.lod_tensor_to_array(x, table) result = layers.array_to_lod_tensor(array, table) - mean = layers.mean(x=result) + mean = layers.mean(result) append_backward(mean) diff --git a/python/paddle/v2/fluid/tests/unittests/test_memory_optimization_transpiler.py b/python/paddle/v2/fluid/tests/unittests/test_memory_optimization_transpiler.py index a276db581e..9d5f90c627 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_memory_optimization_transpiler.py +++ b/python/paddle/v2/fluid/tests/unittests/test_memory_optimization_transpiler.py @@ -29,7 +29,7 @@ class TestControlFlowGraph(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) y = layers.data(name='y', shape=[1], dtype='float32') cost = layers.square_error_cost(input=y_predict, label=y) - avg_cost = layers.mean(x=cost) + avg_cost = layers.mean(cost) opt = optimizer.SGD(learning_rate=0.001) opt = opt.minimize(avg_cost) diff --git a/python/paddle/v2/fluid/tests/unittests/test_parallel_op.py b/python/paddle/v2/fluid/tests/unittests/test_parallel_op.py index d65752608b..8ace41020e 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_parallel_op.py +++ b/python/paddle/v2/fluid/tests/unittests/test_parallel_op.py @@ -127,7 +127,7 @@ class BaseParallelForTest(unittest.TestCase): data = next(generator) loss = generator.send(data) self.assertIsNotNone(loss) - avg_loss = fluid.layers.mean(x=loss) + avg_loss = fluid.layers.mean(loss) fluid.backward.append_backward(loss=avg_loss) exe = fluid.Executor(place) @@ -170,7 +170,7 @@ class ParallelOpTest(BaseParallelForTest): x = fluid.layers.data(shape=[784], dtype='float32', name='img') x = yield x hidden = fluid.layers.fc(input=x, size=200, param_attr='fc1.w') - loss = fluid.layers.mean(x=hidden) + loss = fluid.layers.mean(hidden) yield loss def test_simple_fc(self): @@ -200,7 +200,7 @@ class ParallelOpTestMultipleInput(BaseParallelForTest): hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w') hidden2 = fluid.layers.fc(input=hidden1, size=200, param_attr='fc2.w') hidden3 = fluid.layers.fc(input=hidden2, size=200, param_attr='fc3.w') - loss = fluid.layers.mean(x=hidden3) + loss = fluid.layers.mean(hidden3) yield loss def test_simple_fc(self): diff --git a/python/paddle/v2/fluid/tests/unittests/test_print_op.py b/python/paddle/v2/fluid/tests/unittests/test_print_op.py index 1e49ce994b..d11e3aeddf 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_print_op.py +++ b/python/paddle/v2/fluid/tests/unittests/test_print_op.py @@ -35,7 +35,7 @@ class TestPrintOpCPU(unittest.TestCase): x.stop_gradient = False printed = layers.Print(input=x, **kargs) if only_forward: return printed - loss = layers.mean(x=printed) + loss = layers.mean(printed) append_backward(loss=loss) return loss diff --git a/python/paddle/v2/fluid/tests/unittests/test_profiler.py b/python/paddle/v2/fluid/tests/unittests/test_profiler.py index 62bfb2b8e2..b4b8a58286 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_profiler.py +++ b/python/paddle/v2/fluid/tests/unittests/test_profiler.py @@ -54,7 +54,7 @@ class TestProfiler(unittest.TestCase): predict = fluid.layers.fc(input=hidden2, size=10, act='softmax') label = fluid.layers.data(name='y', shape=[1], dtype='int64') cost = fluid.layers.cross_entropy(input=predict, label=label) - avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.mean(cost) accuracy = fluid.evaluator.Accuracy(input=predict, label=label) optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9) diff --git a/python/paddle/v2/fluid/tests/unittests/test_recurrent_op.py b/python/paddle/v2/fluid/tests/unittests/test_recurrent_op.py index 177d8fc65f..0e747936fd 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_recurrent_op.py +++ b/python/paddle/v2/fluid/tests/unittests/test_recurrent_op.py @@ -127,7 +127,7 @@ class RecurrentOpTest1(unittest.TestCase): self.output_shape = (self.sent_len, self.batch_size, self.input_dim) self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape) - self.output = layers.mean(x=self.create_rnn_op(), **self.p_info) + self.output = layers.mean(self.create_rnn_op(), **self.p_info) def create_rnn_op(self): x = layers.data( @@ -261,7 +261,7 @@ class RecurrentOpTest2(RecurrentOpTest1): self.output_shape = (self.sent_len, self.batch_size, self.input_dim) self.py_rnn = PySimpleRNN2(self.input_shape, self.output_shape) - self.output = layers.mean(x=self.create_rnn_op(), **self.p_info) + self.output = layers.mean(self.create_rnn_op(), **self.p_info) def create_rnn_op(self): x = layers.data( @@ -360,7 +360,7 @@ class RecurrentOpMultipleMemoryTest(RecurrentOpTest1): self.py_rnn = RecurrentOpMultipleMemoryTest.PySimpleRNN3( self.input_shape, self.output_shape) - self.output = layers.mean(x=self.create_rnn_op(), **self.p_info) + self.output = layers.mean(self.create_rnn_op(), **self.p_info) def create_rnn_op(self): x = layers.data( @@ -444,7 +444,7 @@ class RecurrentOpNoMemBootTest(RecurrentOpTest1): self.output_shape = (self.sent_len, self.batch_size, self.input_dim) self.py_rnn = RecurrentOpNoMemBootTest.PySimpleRNN4(self.input_shape, self.output_shape) - self.output = layers.mean(x=self.create_rnn_op(), **self.p_info) + self.output = layers.mean(self.create_rnn_op(), **self.p_info) print self.main_program def create_rnn_op(self): diff --git a/python/paddle/v2/fluid/tests/unittests/test_registry.py b/python/paddle/v2/fluid/tests/unittests/test_registry.py index 82527a6ec7..b0ec218ab3 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_registry.py +++ b/python/paddle/v2/fluid/tests/unittests/test_registry.py @@ -22,7 +22,7 @@ class TestRegistry(unittest.TestCase): @decorators.prog_scope() def test_registry_layer(self): x = fluid.layers.data(name='X', shape=[10, 10], dtype='float32') - output = fluid.layers.mean(x=x) + output = fluid.layers.mean(x) place = fluid.CPUPlace() exe = fluid.Executor(place) diff --git a/python/paddle/v2/fluid/tests/unittests/test_shrink_rnn_memory.py b/python/paddle/v2/fluid/tests/unittests/test_shrink_rnn_memory.py index 48874ba8a5..c991fbbaa2 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_shrink_rnn_memory.py +++ b/python/paddle/v2/fluid/tests/unittests/test_shrink_rnn_memory.py @@ -39,7 +39,7 @@ class TestShrinkRNNMemoryBase(unittest.TestCase): i = layers.increment(x=i) i.stop_gradient = True self.mem3 = layers.shrink_memory(x=self.mem2, i=i, table=table) - mem3_mean = layers.mean(x=self.mem3) + mem3_mean = layers.mean(self.mem3) append_backward(loss=mem3_mean) self.x_grad = self.main_program.global_block().var('x@GRAD') diff --git a/python/paddle/v2/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py b/python/paddle/v2/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py index 48e6756a86..8cfbd7881a 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py +++ b/python/paddle/v2/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py @@ -145,7 +145,7 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): input=x, mask=y, level=level) out = layers.merge_lod_tensor( in_true=out_true, in_false=out_false, mask=y, x=x, level=level) - mean = layers.mean(x=out) + mean = layers.mean(out) append_backward(mean) diff --git a/python/paddle/v2/fluid/tests/unittests/test_while_op.py b/python/paddle/v2/fluid/tests/unittests/test_while_op.py index 3fa1d5e0ed..3d2a9faf32 100644 --- a/python/paddle/v2/fluid/tests/unittests/test_while_op.py +++ b/python/paddle/v2/fluid/tests/unittests/test_while_op.py @@ -58,7 +58,7 @@ class TestWhileOp(unittest.TestCase): layers.less_than(x=i, y=array_len, cond=cond) sum_result = layers.array_read(array=mem_array, i=i) - loss = layers.mean(x=sum_result) + loss = layers.mean(sum_result) append_backward(loss) From 28ff1cdaa6d0d6f6dabb0668218d99fc88260008 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sat, 24 Feb 2018 16:42:17 +0800 Subject: [PATCH 02/11] create learning rate for each program --- python/paddle/v2/fluid/optimizer.py | 50 ++++++++++++++++++----------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index 61febc4e38..0b3e019d80 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -36,10 +36,15 @@ class Optimizer(object): """ def __init__(self, learning_rate, global_step=None, regularization=None): - assert learning_rate is not None + if not isinstance(learning_rate, float) and \ + not isinstance(learning_rate, framework.Variable): + raise ValueError("learning rate should be float or Variable") self._global_step = global_step self.regularization = regularization - self._global_learning_rate = learning_rate + self._learning_rate = learning_rate + # each program should have a independent learning rate + # program -> Variable(learning_rate) + self._learning_rate_map = defaultdict(lambda: None) # Dictionary of accumulators. Some optimizer subclasses need to # allocate and manage extra variables associated with the parameters # to train. These variables are called accumulators. @@ -48,26 +53,33 @@ class Optimizer(object): self.helper = None def _create_global_learning_rate(self): - if isinstance(self._global_learning_rate, float): - self._global_learning_rate = layers.create_global_var( - name=unique_name.generate("learning_rate"), - shape=[1], - value=float(self._global_learning_rate), - dtype='float32', - persistable=True) - - if not isinstance(self._global_learning_rate, framework.Variable): - raise ValueError("learning rate should be a Variable, " - "actual type is %s", - type(self._global_learning_rate)) - - @property - def global_learning_rate(self): + lr = self.global_learning_rate() + + if isinstance(lr, framework.Variable): + return + else: + if not isinstance(self._learning_rate, float): + raise ValueError( + "learning rate variable is create outside optimizer," + "can not create new learning rate variable for new program") + + # create learning rate in the current main program + self._learning_rate_map[framework.default_main_program( + )] = layers.create_global_var( + name=unique_name.generate("learning_rate"), + shape=[1], + value=float(self._learning_rate), + dtype='float32', + persistable=True) + + def global_learning_rate(self, program=None): """ get global decayed learning rate :return: """ - return self._global_learning_rate + if program is None: + program = framework.default_main_program() + return self._learning_rate_map[program] def _append_optimize_op(self, block, param_and_grad): """ append optimize operator to block and return all the added optimize_op @@ -78,7 +90,7 @@ class Optimizer(object): # create learning rate variable for every parameter param = param_and_grad[0] param_lr = param.optimize_attr['learning_rate'] - return self._global_learning_rate * param_lr + return self.global_learning_rate() * param_lr def _create_accumulators(self, block, parameters): """Create all accumulators needed by the parameters From 3b8bade61776d38686944ce9ebdf9b50e6bb9b9c Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sat, 24 Feb 2018 17:27:33 +0800 Subject: [PATCH 03/11] init learning_rate_map when input learning rate is a Variable --- python/paddle/v2/fluid/optimizer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index 0b3e019d80..9309ec3916 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -45,6 +45,9 @@ class Optimizer(object): # each program should have a independent learning rate # program -> Variable(learning_rate) self._learning_rate_map = defaultdict(lambda: None) + if isinstance(self._learning_rate, framework.Variable): + self._learning_rate_map[framework.default_main_program( + )] = self._learning_rate # Dictionary of accumulators. Some optimizer subclasses need to # allocate and manage extra variables associated with the parameters # to train. These variables are called accumulators. From 71f84c907693ab5469b7eb2252deebd4e18f1d0d Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 26 Feb 2018 10:17:26 +0800 Subject: [PATCH 04/11] move paddle/v2/fluid to paddle/fluid in documentation --- doc/design/concurrent_programming.md | 2 +- doc/design/fluid.md | 2 +- doc/design/memory_optimization.md | 2 +- doc/howto/cluster/fluid_cluster_train_en.md | 6 +++--- doc/howto/optimization/cpu_profiling_cn.md | 14 +++++++------- doc/howto/optimization/cpu_profiling_en.md | 14 +++++++------- doc/howto/read_source.md | 12 ++++++------ 7 files changed, 26 insertions(+), 26 deletions(-) diff --git a/doc/design/concurrent_programming.md b/doc/design/concurrent_programming.md index afc65e831d..f022e67fd3 100644 --- a/doc/design/concurrent_programming.md +++ b/doc/design/concurrent_programming.md @@ -12,7 +12,7 @@ The following table compares concepts in Fluid and Go | Go | Fluid | |----|-------| -|user-defined functions | [layers](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/v2/fluid) | +|user-defined functions | [layers](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid) | | control-flow and built-in functions | [intrinsics/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators) | | goroutines, channels | [class ThreadPool](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework/thread_pool.h) | | runtime | [class Executor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.h) | diff --git a/doc/design/fluid.md b/doc/design/fluid.md index 2acc168007..f78fa8c191 100644 --- a/doc/design/fluid.md +++ b/doc/design/fluid.md @@ -89,7 +89,7 @@ with train_loop.block(): h[t] = the_step(input[t]) ``` -An actual Fluid example is described [here](https://github.com/PaddlePaddle/Paddle/blob/a91efdde6910ce92a78e3aa7157412c4c88d9ee8/python/paddle/v2/fluid/tests/test_while_op.py#L36-L44). +An actual Fluid example is described [here](https://github.com/PaddlePaddle/Paddle/blob/bde090a97564b9c61a6aaa38b72ccc4889d102d9/python/paddle/fluid/tests/unittests/test_while_op.py#L50-L58). From the example, the Fluid programs look very similar to their PyTorch equivalent programs, except that Fluid's loop structure, wrapped with Python's `with` statement, could run much faster than just a Python loop. diff --git a/doc/design/memory_optimization.md b/doc/design/memory_optimization.md index 1f68cef4cc..285464ada7 100644 --- a/doc/design/memory_optimization.md +++ b/doc/design/memory_optimization.md @@ -101,7 +101,7 @@ In-place is a built-in attribute of an operator. Since we treat in-place and oth #### contruct control flow graph -Following is the ProgramDesc protobuf of [machine translation](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/tests/book/test_machine_translation.py) example. +Following is the ProgramDesc protobuf of [machine translation](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_machine_translation.py) example. - Block0: diff --git a/doc/howto/cluster/fluid_cluster_train_en.md b/doc/howto/cluster/fluid_cluster_train_en.md index ae825d9a51..b4465e8269 100644 --- a/doc/howto/cluster/fluid_cluster_train_en.md +++ b/doc/howto/cluster/fluid_cluster_train_en.md @@ -32,7 +32,7 @@ The non-cluster version of this demo with fluid API is as follows: ``` python import paddle.v2 as paddle -import paddle.v2.fluid as fluid +import paddle.fluid as fluid x = fluid.layers.data(name='x', shape=[13], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) @@ -125,11 +125,11 @@ for pass_id in range(100): ### E2E demo -Please find the complete demo from [here](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py). +Please find the complete demo from [here](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book_distribute/notest_dist_fit_a_line.py). First `cd` into the folder that contains the `python` files. In this case: ```bash -cd /paddle/python/paddle/v2/fluid/tests/book_distribute +cd /paddle/python/paddle/fluid/tests/book_distribute ``` In parameter server node run the following in the command line: diff --git a/doc/howto/optimization/cpu_profiling_cn.md b/doc/howto/optimization/cpu_profiling_cn.md index 14eba0e2f3..d59be670c2 100644 --- a/doc/howto/optimization/cpu_profiling_cn.md +++ b/doc/howto/optimization/cpu_profiling_cn.md @@ -35,7 +35,7 @@ cprofilev -a 0.0.0.0 -p 3214 -f profile.out main.py ``` ncalls tottime percall cumtime percall filename:lineno(function) 1 0.284 0.284 29.514 29.514 main.py:1() - 4696 0.128 0.000 15.748 0.003 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/executor.py:20(run) + 4696 0.128 0.000 15.748 0.003 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/executor.py:20(run) 4696 12.040 0.003 12.040 0.003 {built-in method run} 1 0.144 0.144 6.534 6.534 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/__init__.py:14() ``` @@ -61,9 +61,9 @@ cprofilev -a 0.0.0.0 -p 3214 -f profile.out main.py ```text 4696 12.040 0.003 12.040 0.003 {built-in method run} 300005 0.874 0.000 1.681 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/dataset/mnist.py:38(reader) - 107991 0.676 0.000 1.519 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:219(__init__) - 4697 0.626 0.000 2.291 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp) - 1 0.618 0.618 0.618 0.618 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/__init__.py:1() + 107991 0.676 0.000 1.519 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:219(__init__) + 4697 0.626 0.000 2.291 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:428(sync_with_cpp) + 1 0.618 0.618 0.618 0.618 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/__init__.py:1() ``` 可以看到最耗时的函数是C++端的`run`函数。这需要联合我们第二节`Python`与`C++`混合代码的性能分析来进行调优。而`sync_with_cpp`函数的总共耗时很长,每次调用的耗时也很长。于是我们可以点击`sync_with_cpp`的详细信息,了解其调用关系。 @@ -76,9 +76,9 @@ Called By: Function was called by... ncalls tottime cumtime -/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp) <- 4697 0.626 2.291 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp) -/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp) <- 4696 0.019 2.316 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:487(clone) - 1 0.000 0.001 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:534(append_backward) +/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:428(sync_with_cpp) <- 4697 0.626 2.291 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:562(sync_with_cpp) +/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:562(sync_with_cpp) <- 4696 0.019 2.316 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:487(clone) + 1 0.000 0.001 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:534(append_backward) Called: diff --git a/doc/howto/optimization/cpu_profiling_en.md b/doc/howto/optimization/cpu_profiling_en.md index 368af40cc7..01e5fddf61 100644 --- a/doc/howto/optimization/cpu_profiling_en.md +++ b/doc/howto/optimization/cpu_profiling_en.md @@ -49,7 +49,7 @@ port, we will see the output like the following: ``` ncalls tottime percall cumtime percall filename:lineno(function) 1 0.284 0.284 29.514 29.514 main.py:1() - 4696 0.128 0.000 15.748 0.003 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/executor.py:20(run) + 4696 0.128 0.000 15.748 0.003 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/executor.py:20(run) 4696 12.040 0.003 12.040 0.003 {built-in method run} 1 0.144 0.144 6.534 6.534 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/__init__.py:14() ``` @@ -74,9 +74,9 @@ focus on. We can sort above profiling file by tottime: ```text 4696 12.040 0.003 12.040 0.003 {built-in method run} 300005 0.874 0.000 1.681 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/dataset/mnist.py:38(reader) - 107991 0.676 0.000 1.519 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:219(__init__) - 4697 0.626 0.000 2.291 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp) - 1 0.618 0.618 0.618 0.618 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/__init__.py:1() + 107991 0.676 0.000 1.519 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:219(__init__) + 4697 0.626 0.000 2.291 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:428(sync_with_cpp) + 1 0.618 0.618 0.618 0.618 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/__init__.py:1() ``` We can see that the most time-consuming function is the `built-in @@ -93,9 +93,9 @@ Called By: Function was called by... ncalls tottime cumtime -/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp) <- 4697 0.626 2.291 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp) -/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp) <- 4696 0.019 2.316 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:487(clone) - 1 0.000 0.001 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:534(append_backward) +/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:428(sync_with_cpp) <- 4697 0.626 2.291 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:562(sync_with_cpp) +/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:562(sync_with_cpp) <- 4696 0.019 2.316 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:487(clone) + 1 0.000 0.001 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:534(append_backward) Called: diff --git a/doc/howto/read_source.md b/doc/howto/read_source.md index 31987920f3..edf46aff8c 100644 --- a/doc/howto/read_source.md +++ b/doc/howto/read_source.md @@ -1,6 +1,6 @@ # PaddlePaddle Fluid Source Code Overview -Examples: https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/v2/fluid/tests/book +Examples: https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid/tests/book Core: https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework @@ -26,16 +26,16 @@ sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) ``` -- Variables: `x`, `y`, `y_predict`, `cost` and `avg_cost`. [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/framework.py#) -- Layers: `fluid.layers.data`, `fluid.layers.fc` and `fluid.layers.mean` are layers. [Python](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/v2/fluid/layers) +- Variables: `x`, `y`, `y_predict`, `cost` and `avg_cost`. [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/framework.py#) +- Layers: `fluid.layers.data`, `fluid.layers.fc` and `fluid.layers.mean` are layers. [Python](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid/layers) - Every Layer has one or more operators and variables/parameters - All the operators are defined at [`paddle/operators/`](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators). Other worth-looking files: - Base class: [`paddle/framework/operator.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h) - Operator Registration: [`paddle/framework/op_registry.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h) - Operator Lookup: [`paddle/framework/op_info.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_info.h) - Optimizer: `fluid.optimizer.SGD`. It does the following - - Add backward operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/backward.py)] - - Add optimizer operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/optimizer.py)] + - Add backward operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/backward.py)] + - Add optimizer operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/optimizer.py)] # Run Time @@ -57,7 +57,7 @@ exe.run(fluid.default_main_program(), - Place: `place`. one of CPU, GPU or FPGA. [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h) - The device handle are at [paddle/platform/device_context.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h) -- Executor: `fluid.Executor(place)`. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/executor.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.cc)] +- Executor: `fluid.Executor(place)`. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/executor.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.cc)] - Feeds the data: `feed=feeder.feed(data)` - Evaluates all the operators - Fetches the result: `fetch_list=[avg_cost]` From 962326b06e9cc12ae9dc3b1ef353202be14f2a35 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 26 Feb 2018 10:39:53 +0800 Subject: [PATCH 05/11] move doc/api/v2/fluid to doc/api/fluid --- doc/api/{v2 => }/fluid/data_feeder.rst | 0 doc/api/{v2 => }/fluid/evaluator.rst | 0 doc/api/{v2 => }/fluid/executor.rst | 0 doc/api/{v2 => }/fluid/gen_doc.py | 0 doc/api/{v2 => }/fluid/gen_doc.sh | 0 doc/api/fluid/index.rst | 18 ++++++++++++++++++ doc/api/{v2 => }/fluid/initializer.rst | 0 doc/api/{v2 => }/fluid/io.rst | 0 doc/api/{v2 => }/fluid/layers.rst | 0 doc/api/{v2 => }/fluid/nets.rst | 0 doc/api/{v2 => }/fluid/optimizer.rst | 0 doc/api/{v2 => }/fluid/param_attr.rst | 0 doc/api/{v2 => }/fluid/profiler.rst | 0 doc/api/{v2 => }/fluid/regularizer.rst | 0 doc/api/index_en.rst | 2 +- doc/api/v2/fluid.rst | 18 ------------------ 16 files changed, 19 insertions(+), 19 deletions(-) rename doc/api/{v2 => }/fluid/data_feeder.rst (100%) rename doc/api/{v2 => }/fluid/evaluator.rst (100%) rename doc/api/{v2 => }/fluid/executor.rst (100%) rename doc/api/{v2 => }/fluid/gen_doc.py (100%) rename doc/api/{v2 => }/fluid/gen_doc.sh (100%) create mode 100644 doc/api/fluid/index.rst rename doc/api/{v2 => }/fluid/initializer.rst (100%) rename doc/api/{v2 => }/fluid/io.rst (100%) rename doc/api/{v2 => }/fluid/layers.rst (100%) rename doc/api/{v2 => }/fluid/nets.rst (100%) rename doc/api/{v2 => }/fluid/optimizer.rst (100%) rename doc/api/{v2 => }/fluid/param_attr.rst (100%) rename doc/api/{v2 => }/fluid/profiler.rst (100%) rename doc/api/{v2 => }/fluid/regularizer.rst (100%) delete mode 100644 doc/api/v2/fluid.rst diff --git a/doc/api/v2/fluid/data_feeder.rst b/doc/api/fluid/data_feeder.rst similarity index 100% rename from doc/api/v2/fluid/data_feeder.rst rename to doc/api/fluid/data_feeder.rst diff --git a/doc/api/v2/fluid/evaluator.rst b/doc/api/fluid/evaluator.rst similarity index 100% rename from doc/api/v2/fluid/evaluator.rst rename to doc/api/fluid/evaluator.rst diff --git a/doc/api/v2/fluid/executor.rst b/doc/api/fluid/executor.rst similarity index 100% rename from doc/api/v2/fluid/executor.rst rename to doc/api/fluid/executor.rst diff --git a/doc/api/v2/fluid/gen_doc.py b/doc/api/fluid/gen_doc.py similarity index 100% rename from doc/api/v2/fluid/gen_doc.py rename to doc/api/fluid/gen_doc.py diff --git a/doc/api/v2/fluid/gen_doc.sh b/doc/api/fluid/gen_doc.sh similarity index 100% rename from doc/api/v2/fluid/gen_doc.sh rename to doc/api/fluid/gen_doc.sh diff --git a/doc/api/fluid/index.rst b/doc/api/fluid/index.rst new file mode 100644 index 0000000000..b0710d8b19 --- /dev/null +++ b/doc/api/fluid/index.rst @@ -0,0 +1,18 @@ +====================== +Fluid +====================== + +.. toctree:: + :maxdepth: 1 + + layers.rst + data_feeder.rst + executor.rst + initializer.rst + evaluator.rst + nets.rst + optimizer.rst + param_attr.rst + profiler.rst + regularizer.rst + io.rst diff --git a/doc/api/v2/fluid/initializer.rst b/doc/api/fluid/initializer.rst similarity index 100% rename from doc/api/v2/fluid/initializer.rst rename to doc/api/fluid/initializer.rst diff --git a/doc/api/v2/fluid/io.rst b/doc/api/fluid/io.rst similarity index 100% rename from doc/api/v2/fluid/io.rst rename to doc/api/fluid/io.rst diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/fluid/layers.rst similarity index 100% rename from doc/api/v2/fluid/layers.rst rename to doc/api/fluid/layers.rst diff --git a/doc/api/v2/fluid/nets.rst b/doc/api/fluid/nets.rst similarity index 100% rename from doc/api/v2/fluid/nets.rst rename to doc/api/fluid/nets.rst diff --git a/doc/api/v2/fluid/optimizer.rst b/doc/api/fluid/optimizer.rst similarity index 100% rename from doc/api/v2/fluid/optimizer.rst rename to doc/api/fluid/optimizer.rst diff --git a/doc/api/v2/fluid/param_attr.rst b/doc/api/fluid/param_attr.rst similarity index 100% rename from doc/api/v2/fluid/param_attr.rst rename to doc/api/fluid/param_attr.rst diff --git a/doc/api/v2/fluid/profiler.rst b/doc/api/fluid/profiler.rst similarity index 100% rename from doc/api/v2/fluid/profiler.rst rename to doc/api/fluid/profiler.rst diff --git a/doc/api/v2/fluid/regularizer.rst b/doc/api/fluid/regularizer.rst similarity index 100% rename from doc/api/v2/fluid/regularizer.rst rename to doc/api/fluid/regularizer.rst diff --git a/doc/api/index_en.rst b/doc/api/index_en.rst index 77337982be..fc8dbd07eb 100644 --- a/doc/api/index_en.rst +++ b/doc/api/index_en.rst @@ -8,4 +8,4 @@ API v2/model_configs.rst v2/data.rst v2/run_logic.rst - v2/fluid.rst + fluid/index.rst diff --git a/doc/api/v2/fluid.rst b/doc/api/v2/fluid.rst deleted file mode 100644 index 5f15cad2b5..0000000000 --- a/doc/api/v2/fluid.rst +++ /dev/null @@ -1,18 +0,0 @@ -====================== -Fluid -====================== - -.. toctree:: - :maxdepth: 1 - - fluid/layers.rst - fluid/data_feeder.rst - fluid/executor.rst - fluid/initializer.rst - fluid/evaluator.rst - fluid/nets.rst - fluid/optimizer.rst - fluid/param_attr.rst - fluid/profiler.rst - fluid/regularizer.rst - fluid/io.rst From ea9e62b8fcf7326a17a8cd72f1f9171807f92d8a Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 26 Feb 2018 10:45:03 +0800 Subject: [PATCH 06/11] optimize code --- python/paddle/fluid/optimizer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 9309ec3916..93a19de92e 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -38,13 +38,13 @@ class Optimizer(object): def __init__(self, learning_rate, global_step=None, regularization=None): if not isinstance(learning_rate, float) and \ not isinstance(learning_rate, framework.Variable): - raise ValueError("learning rate should be float or Variable") + raise TypeError("learning rate should be float or Variable") self._global_step = global_step self.regularization = regularization self._learning_rate = learning_rate # each program should have a independent learning rate # program -> Variable(learning_rate) - self._learning_rate_map = defaultdict(lambda: None) + self._learning_rate_map = dict() if isinstance(self._learning_rate, framework.Variable): self._learning_rate_map[framework.default_main_program( )] = self._learning_rate @@ -62,7 +62,7 @@ class Optimizer(object): return else: if not isinstance(self._learning_rate, float): - raise ValueError( + raise TypeError( "learning rate variable is create outside optimizer," "can not create new learning rate variable for new program") @@ -82,7 +82,7 @@ class Optimizer(object): """ if program is None: program = framework.default_main_program() - return self._learning_rate_map[program] + return self._learning_rate_map.get(program, None) def _append_optimize_op(self, block, param_and_grad): """ append optimize operator to block and return all the added optimize_op From 777a281a4fa00b8975ae85aa0fbec39a0cac4ac0 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 26 Feb 2018 10:48:53 +0800 Subject: [PATCH 07/11] Fix CI and enhance gitignore --- .gitignore | 1 - python/paddle/fluid/.gitignore | 1 + python/paddle/fluid/layers/layer_function_generator.py | 8 ++++++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index fe0d13f4d9..2badc3bdaa 100644 --- a/.gitignore +++ b/.gitignore @@ -27,7 +27,6 @@ third_party/ cmake-build-* # generated while compiling -python/paddle/v2/fluid/core.so paddle/pybind/pybind.h CMakeFiles cmake_install.cmake diff --git a/python/paddle/fluid/.gitignore b/python/paddle/fluid/.gitignore index 2ff540d576..80c1cf3fcb 100644 --- a/python/paddle/fluid/.gitignore +++ b/python/paddle/fluid/.gitignore @@ -1 +1,2 @@ proto +core.so diff --git a/python/paddle/fluid/layers/layer_function_generator.py b/python/paddle/fluid/layers/layer_function_generator.py index 16a401dc7b..bd79022a0c 100644 --- a/python/paddle/fluid/layers/layer_function_generator.py +++ b/python/paddle/fluid/layers/layer_function_generator.py @@ -130,7 +130,7 @@ def generate_layer_fn(op_type): o_name = not_intermediate_outputs[0].name intermediate_output_names = [output.name for output in intermediate_outputs] - def infer_and_check_dtype(op_proto, **kwargs): + def infer_and_check_dtype(op_proto, *args, **kwargs): """ This function performs the sanity check for dtype and instance type. @@ -141,6 +141,10 @@ def generate_layer_fn(op_type): val = kwargs.pop(name, []) if not isinstance(val, list) and not isinstance(val, tuple): val = [val] + if len(val) == 0: + val = [args[0]] + args = args[1:] + for each in val: if not isinstance(each, Variable): raise ValueError("input of {0} must be variable".format( @@ -158,7 +162,7 @@ def generate_layer_fn(op_type): def func(*args, **kwargs): helper = LayerHelper(op_type, **kwargs) - dtype = infer_and_check_dtype(op_proto, **kwargs) + dtype = infer_and_check_dtype(op_proto, *args, **kwargs) inputs = dict() for ipt in op_proto.inputs: From 84b7b4b96d9b2ebb83fc6e96604bb85380a5abff Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 26 Feb 2018 14:25:38 +0800 Subject: [PATCH 08/11] add c-api quick start --- doc/howto/capi/index_cn.rst | 1 + doc/howto/capi/quick_start.md | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 doc/howto/capi/quick_start.md diff --git a/doc/howto/capi/index_cn.rst b/doc/howto/capi/index_cn.rst index e589a6d346..6981c7ce38 100644 --- a/doc/howto/capi/index_cn.rst +++ b/doc/howto/capi/index_cn.rst @@ -4,6 +4,7 @@ C-API预测库 .. toctree:: :maxdepth: 1 + quick_start.md compile_paddle_lib_cn.md organization_of_the_inputs_cn.md workflow_of_capi_cn.md diff --git a/doc/howto/capi/quick_start.md b/doc/howto/capi/quick_start.md new file mode 100644 index 0000000000..cb696b6959 --- /dev/null +++ b/doc/howto/capi/quick_start.md @@ -0,0 +1,23 @@ +## 开始使用 + +### 概述 +当我们训练完一个神经网络模型之后,下一步就是用模型来做预测。预测就是准备输入数据,经过模型处理之后,得到预测结果的过程。 + +相比于模型训练,预测有如下特点: + +1. 预测不需要训练过程中反向传播和参数更新的部分。 +1. 预测不需要标签(label)。 +1. 预测很多时候需要和用户系统整合在一起。 + +因为上述特点,模型预测SDK需要单独设计,并具备以下特点: + +1. 预测SDK不包含反向传播和参数更新部分,以减小SDK的体积。 +1. 预测SDK需要提供一个简洁的用户接口,方便使用。 +1. 因为输入数据可能有多种结构,对输入数据的格式做清晰简洁的封装。 +1. 为了和用户系统兼容,SDK的接口需要是满足C标准的接口。 + +PaddlePaddle提供了C-API,用于解决上述问题。关于C-API的使用,我们提供了如下指南: + +1. [C-API使用流程](./workflow_of_capi_cn.md) +1. [安装与编译C-API预测库](./compile_paddle_lib_cn.md) +1. [输入/输出数据组织](./organization_of_the_inputs_cn.md) From 28668ad06e5b895d0b777b300570900af99a3897 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 26 Feb 2018 14:47:06 +0800 Subject: [PATCH 09/11] move quick start to index_cn.rst --- doc/howto/capi/index_cn.rst | 17 +++++++++++++++++ doc/howto/capi/quick_start.md | 23 ----------------------- 2 files changed, 17 insertions(+), 23 deletions(-) delete mode 100644 doc/howto/capi/quick_start.md diff --git a/doc/howto/capi/index_cn.rst b/doc/howto/capi/index_cn.rst index 6981c7ce38..e240a3e3dc 100644 --- a/doc/howto/capi/index_cn.rst +++ b/doc/howto/capi/index_cn.rst @@ -1,6 +1,23 @@ C-API预测库 ================== +当我们训练完一个神经网络模型之后,下一步就是用模型来做预测。预测就是准备输入数据,经过模型处理之后,得到预测结果的过程。 + +相比于模型训练,预测有如下特点: + +1. 预测不需要训练过程中反向传播和参数更新的部分。 +1. 预测不需要标签(label)。 +1. 预测很多时候需要和用户系统整合在一起。 + +因为上述特点,模型预测SDK需要单独设计,并具备以下特点: + +1. 预测SDK不包含反向传播和参数更新部分,以减小SDK的体积。 +1. 预测SDK需要提供一个简洁的用户接口,方便使用。 +1. 因为输入数据可能有多种结构,对输入数据的格式做清晰简洁的封装。 +1. 为了和用户系统兼容,SDK的接口需要是满足C标准的接口。 + +PaddlePaddle提供了C-API,用于解决上述问题。关于C-API的使用,我们提供了如下指南: + .. toctree:: :maxdepth: 1 diff --git a/doc/howto/capi/quick_start.md b/doc/howto/capi/quick_start.md deleted file mode 100644 index cb696b6959..0000000000 --- a/doc/howto/capi/quick_start.md +++ /dev/null @@ -1,23 +0,0 @@ -## 开始使用 - -### 概述 -当我们训练完一个神经网络模型之后,下一步就是用模型来做预测。预测就是准备输入数据,经过模型处理之后,得到预测结果的过程。 - -相比于模型训练,预测有如下特点: - -1. 预测不需要训练过程中反向传播和参数更新的部分。 -1. 预测不需要标签(label)。 -1. 预测很多时候需要和用户系统整合在一起。 - -因为上述特点,模型预测SDK需要单独设计,并具备以下特点: - -1. 预测SDK不包含反向传播和参数更新部分,以减小SDK的体积。 -1. 预测SDK需要提供一个简洁的用户接口,方便使用。 -1. 因为输入数据可能有多种结构,对输入数据的格式做清晰简洁的封装。 -1. 为了和用户系统兼容,SDK的接口需要是满足C标准的接口。 - -PaddlePaddle提供了C-API,用于解决上述问题。关于C-API的使用,我们提供了如下指南: - -1. [C-API使用流程](./workflow_of_capi_cn.md) -1. [安装与编译C-API预测库](./compile_paddle_lib_cn.md) -1. [输入/输出数据组织](./organization_of_the_inputs_cn.md) From 901dabc7f50e66d2efe18498acc8a9953f973150 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 26 Feb 2018 14:48:01 +0800 Subject: [PATCH 10/11] clean --- doc/howto/capi/index_cn.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/howto/capi/index_cn.rst b/doc/howto/capi/index_cn.rst index e240a3e3dc..8197c4bc1c 100644 --- a/doc/howto/capi/index_cn.rst +++ b/doc/howto/capi/index_cn.rst @@ -21,7 +21,6 @@ PaddlePaddle提供了C-API,用于解决上述问题。关于C-API的使用, .. toctree:: :maxdepth: 1 - quick_start.md compile_paddle_lib_cn.md organization_of_the_inputs_cn.md workflow_of_capi_cn.md From 13922fb489dec953262a0b9e2dbe4c99c1c7296a Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 26 Feb 2018 15:13:07 +0800 Subject: [PATCH 11/11] fix rst format --- doc/howto/capi/index_cn.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/howto/capi/index_cn.rst b/doc/howto/capi/index_cn.rst index 8197c4bc1c..7f10071798 100644 --- a/doc/howto/capi/index_cn.rst +++ b/doc/howto/capi/index_cn.rst @@ -5,16 +5,16 @@ C-API预测库 相比于模型训练,预测有如下特点: -1. 预测不需要训练过程中反向传播和参数更新的部分。 -1. 预测不需要标签(label)。 -1. 预测很多时候需要和用户系统整合在一起。 +#. 预测不需要训练过程中反向传播和参数更新的部分。 +#. 预测不需要标签(label)。 +#. 预测很多时候需要和用户系统整合在一起。 因为上述特点,模型预测SDK需要单独设计,并具备以下特点: -1. 预测SDK不包含反向传播和参数更新部分,以减小SDK的体积。 -1. 预测SDK需要提供一个简洁的用户接口,方便使用。 -1. 因为输入数据可能有多种结构,对输入数据的格式做清晰简洁的封装。 -1. 为了和用户系统兼容,SDK的接口需要是满足C标准的接口。 +#. 预测SDK不包含反向传播和参数更新部分,以减小SDK的体积。 +#. 预测SDK需要提供一个简洁的用户接口,方便使用。 +#. 因为输入数据可能有多种结构,对输入数据的格式做清晰简洁的封装。 +#. 为了和用户系统兼容,SDK的接口需要是满足C标准的接口。 PaddlePaddle提供了C-API,用于解决上述问题。关于C-API的使用,我们提供了如下指南: