Optimizer use init program (#5275)

* optimizer use init_program

* create persistable variable

* add create_persistable_var to block

* optimizer use create_persistable_var

* fix prefix

* move create_global_persistable_var from Block to LayerHelper

* Polish Optimizer initialization code.

* Using the LayerHelper to create initialize operator and variables

* add_accumulator should use an independent data type

* default use param data type for accumulator
fix-typo
Qiao Longfei 7 years ago committed by GitHub
parent 90f4d5e904
commit f48159ade0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -7,6 +7,11 @@ import copy
__all__ = ['Block', 'Variable', 'Program', 'Operator']
def unique_name(prefix):
uid = core.unique_integer(prefix) # unique during whole process.
return "_".join([prefix, str(uid)])
class Variable(object):
def __init__(self,
block,

@ -1,19 +1,12 @@
import copy
import itertools
import paddle.v2.framework.core as core
from paddle.v2.framework.framework import Variable, g_program, \
g_init_program
g_init_program, unique_name, Program
from paddle.v2.framework.initializer import ConstantInitializer, \
UniformInitializer
def unique_name(prefix):
uid = core.unique_integer(prefix) # unique during whole process.
return "_".join([prefix, str(uid)])
class LayerHelper(object):
def __init__(self, layer_type, **kwargs):
self.kwargs = kwargs
@ -138,9 +131,19 @@ class LayerHelper(object):
def create_variable(self, *args, **kwargs):
return self.program.current_block().create_var(*args, **kwargs)
def create_global_variable(self, *args, **kwargs):
def create_global_variable(self, persistable=False, *args, **kwargs):
return self.program.global_block().create_var(
*args, persistable=False, **kwargs)
*args, persistable=persistable, **kwargs)
def set_variable_initializer(self, var, initializer):
assert isinstance(var, Variable)
self.init_program.global_block().create_var(
name=var.name,
type=var.type,
dtype=var.data_type,
shape=var.shape,
persistable=True,
initializer=initializer)
def append_bias_op(self, input_var, num_flatten_dims=None):
"""

File diff suppressed because it is too large Load Diff

@ -36,7 +36,7 @@ cost = layers.square_error_cost(
avg_cost = layers.mean(x=cost, program=program, init_program=init_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost)
opts = sgd_optimizer.minimize(avg_cost, init_program)
BATCH_SIZE = 20

@ -208,7 +208,7 @@ cost = layers.cross_entropy(
avg_cost = layers.mean(x=cost, program=program, init_program=init_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost)
opts = sgd_optimizer.minimize(avg_cost, init_program)
BATCH_SIZE = 128
PASS_NUM = 1

@ -44,7 +44,7 @@ class TestBook(unittest.TestCase):
x=cost, program=program, init_program=init_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost)
opts = sgd_optimizer.minimize(avg_cost, init_program)
place = core.CPUPlace()
exe = executor.Executor(place)

@ -7,6 +7,7 @@ from paddle.v2.framework.backward import append_backward_ops
class TestOptimizer(unittest.TestCase):
def test_sgd_optimizer(self):
init_program = framework.Program()
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
@ -22,12 +23,13 @@ class TestOptimizer(unittest.TestCase):
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01)
opts = sgd_optimizer.minimize(mul_out)
opts = sgd_optimizer.minimize(mul_out, init_program)
self.assertEqual(len(opts), 1)
sgd_op = opts[0]
self.assertEqual(sgd_op.type, "sgd")
def test_sgd_optimizer_with_global_step(self):
init_program = framework.Program()
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
@ -44,15 +46,22 @@ class TestOptimizer(unittest.TestCase):
attrs={"x_num_col_dims": 1})
global_step = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="step")
learning_rate = 0.01
sgd_optimizer = optimizer.SGDOptimizer(
learning_rate=0.01, global_step=global_step)
opts = sgd_optimizer.minimize(mul_out)
learning_rate=learning_rate, global_step=global_step)
opts = sgd_optimizer.minimize(mul_out, init_program)
self.assertEqual(len(opts), 2)
sgd_op = opts[0]
self.assertEqual(sgd_op.type, "sgd")
increment_op = opts[1]
self.assertEqual(increment_op.type, "increment")
# Check init_program
init_ops = init_program.global_block().ops
self.assertEqual(len(init_ops), 1)
self.assertEqual(init_ops[0].type, "fill_constant")
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
class TestMomentumOptimizer(unittest.TestCase):
class MockMomentum(optimizer.MomentumOptimizer):
@ -63,6 +72,7 @@ class TestMomentumOptimizer(unittest.TestCase):
return self._velocity_acc_str
def test_vanilla_momentum_optimizer(self):
init_program = framework.Program()
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
@ -77,12 +87,14 @@ class TestMomentumOptimizer(unittest.TestCase):
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
momentum_optimizer = self.MockMomentum(learning_rate=0.01, momentum=0.2)
learning_rate = 0.01
momentum_optimizer = self.MockMomentum(
learning_rate=learning_rate, momentum=0.2)
params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass(params_grads,
mul_out)
opts = momentum_optimizer.create_optimization_pass(
params_grads, mul_out, init_program)
self.assertEqual(len(opts), 1)
sgd_op = opts[0]
self.assertEqual(sgd_op.type, "momentum")
@ -96,7 +108,16 @@ class TestMomentumOptimizer(unittest.TestCase):
self.assertEqual(len(velocity_acc), 1)
self.assertTrue(mul_x.name in velocity_acc)
# Check init_program
init_ops = init_program.global_block().ops
self.assertEqual(len(init_ops), 2)
self.assertEqual(init_ops[0].type, "fill_constant")
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
self.assertEqual(init_ops[1].type, "fill_constant")
self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
def test_nesterov_momentum_optimizer(self):
init_program = framework.Program()
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
@ -111,13 +132,14 @@ class TestMomentumOptimizer(unittest.TestCase):
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
learning_rate = 0.01
momentum_optimizer = self.MockMomentum(
learning_rate=0.01, momentum=0.2, use_nesterov=True)
learning_rate=learning_rate, momentum=0.2, use_nesterov=True)
params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass(params_grads,
mul_out)
opts = momentum_optimizer.create_optimization_pass(
params_grads, mul_out, init_program)
self.assertEqual(len(opts), 1)
sgd_op = opts[0]
self.assertEqual(sgd_op.type, "momentum")
@ -131,6 +153,14 @@ class TestMomentumOptimizer(unittest.TestCase):
self.assertEqual(len(velocity_acc), 1)
self.assertTrue(mul_x.name in velocity_acc)
# Check init_program
init_ops = init_program.global_block().ops
self.assertEqual(len(init_ops), 2)
self.assertEqual(init_ops[0].type, "fill_constant")
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
self.assertEqual(init_ops[1].type, "fill_constant")
self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
class TestAdagradOptimizer(unittest.TestCase):
class MockAdagrad(optimizer.AdagradOptimizer):
@ -141,6 +171,7 @@ class TestAdagradOptimizer(unittest.TestCase):
return self._moment_acc_str
def test_adagrad_optimizer(self):
init_program = framework.Program()
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
@ -155,11 +186,14 @@ class TestAdagradOptimizer(unittest.TestCase):
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
adagrad_optimizer = self.MockAdagrad(learning_rate=0.01, epsilon=1.0e-6)
learning_rate = 0.01
adagrad_optimizer = self.MockAdagrad(
learning_rate=learning_rate, epsilon=1.0e-6)
params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0)
opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out)
opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out,
init_program)
self.assertEqual(len(opts), 1)
adagrad_op = opts[0]
self.assertEqual(adagrad_op.type, "adagrad")
@ -172,6 +206,14 @@ class TestAdagradOptimizer(unittest.TestCase):
self.assertEqual(len(moment_acc), 1)
self.assertTrue(mul_x.name in moment_acc)
# Check init_program
init_ops = init_program.global_block().ops
self.assertEqual(len(init_ops), 2)
self.assertEqual(init_ops[0].type, "fill_constant")
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
self.assertEqual(init_ops[1].type, "fill_constant")
self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
class TestAdamOptimizer(unittest.TestCase):
class MockAdam(optimizer.AdamOptimizer):
@ -185,6 +227,7 @@ class TestAdamOptimizer(unittest.TestCase):
return self._moment2_acc_str
def test_adam_optimizer(self):
init_program = framework.Program()
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
@ -199,12 +242,14 @@ class TestAdamOptimizer(unittest.TestCase):
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
learning_rate = 0.01
adam_optimizer = self.MockAdam(
learning_rate=0.01, beta1=0.9, beta2=0.999)
learning_rate=learning_rate, beta1=0.9, beta2=0.999)
params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out)
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out,
init_program)
self.assertEqual(len(opts), 3)
adam_op = opts[0]
self.assertEqual(adam_op.type, "adam")
@ -221,6 +266,12 @@ class TestAdamOptimizer(unittest.TestCase):
self.assertTrue(mul_x.name in moment1_acc)
self.assertTrue(mul_x.name in moment2_acc)
# Check init_program
init_ops = init_program.global_block().ops
self.assertEqual(len(init_ops), 5)
self.assertEqual(init_ops[0].type, "fill_constant")
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
class TestAdamaxOptimizer(unittest.TestCase):
class MockAdamax(optimizer.AdamaxOptimizer):
@ -234,6 +285,7 @@ class TestAdamaxOptimizer(unittest.TestCase):
return self._inf_norm_acc_str
def test_adamax_optimizer(self):
init_program = framework.Program()
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
@ -248,12 +300,14 @@ class TestAdamaxOptimizer(unittest.TestCase):
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
learning_rate = 0.01
adamax_optimizer = self.MockAdamax(
learning_rate=0.01, beta1=0.9, beta2=0.999)
learning_rate=learning_rate, beta1=0.9, beta2=0.999)
params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adamax_optimizer.get_accumulators()), 0)
opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out)
opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out,
init_program)
self.assertEqual(len(opts), 2)
adam_op = opts[0]
self.assertEqual(adam_op.type, "adamax")
@ -270,6 +324,12 @@ class TestAdamaxOptimizer(unittest.TestCase):
self.assertTrue(mul_x.name in moment_acc)
self.assertTrue(mul_x.name in inf_norm_acc)
# Check init_program
init_ops = init_program.global_block().ops
self.assertEqual(len(init_ops), 4)
self.assertEqual(init_ops[0].type, "fill_constant")
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
if __name__ == '__main__':
unittest.main()

@ -54,8 +54,10 @@ avg_cost = layers.mean(x=cost, program=program)
accuracy = layers.accuracy(
input=predict, label=label, program=program, init_program=init_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost)
# optimizer = optimizer.MomentumOptimizer(learning_rate=0.1 / 128.0,
# momentum=0.9)
optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999)
opts = optimizer.minimize(avg_cost, init_program)
BATCH_SIZE = 50
PASS_NUM = 3

@ -58,8 +58,8 @@ cost = layers.cross_entropy(
input=predict, label=label, program=program, init_program=init_program)
avg_cost = layers.mean(x=cost, program=program, init_program=init_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost)
optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost, init_program)
train_reader = paddle.batch(
paddle.reader.shuffle(
@ -89,6 +89,7 @@ for pass_id in range(PASS_NUM):
'y': tensor_y},
fetch_list=[avg_cost])
out = np.array(outs[0])
if out[0] < 5.0:
exit(0) # if avg cost less than 5.0, we think our code is good.
exit(1)

@ -109,7 +109,7 @@ cost = layers.cross_entropy(
avg_cost = layers.mean(x=cost, program=program, init_program=init_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost)
opts = sgd_optimizer.minimize(avg_cost, init_program)
train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), batch_size)

Loading…
Cancel
Save