test Optimizer in dygraph (#21949)

* test Optimizer in dygraph, test=develop

* add optest for Optimizer in dygraph, test=develop

* fix adagrad optimizer, test=develop

* fix dpsgd optimizer, test=develop

* fix test_optimizer.py, test=develop

* fix dpsgd optimizer, this op only support cpu, test=develop

* add optest for optimizer, test=develop

* add description for dpsgd, test=develop

* add rmsprop to white_list in unused_var_check.cc, test=develop

* polish code style, test=develop

* polish code style, test=develop

* delete seed attribute for DpsgdOptimizer, test=develop

* change testing to debugging, test=develop
release/1.7
zhongpu 5 years ago committed by Leo Chen
parent fffcd48c24
commit d0f0a2520c

@ -57,7 +57,8 @@ const std::unordered_set<std::string> op_has_unsed_vars_white_list = {
"warpctc_grad",
"sync_batch_norm",
"match_matrix_tensor_grad",
"ngraph_engine"};
"ngraph_engine",
"rmsprop"};
namespace paddle {
namespace framework {

@ -83,6 +83,16 @@ class DpsgdOpMaker : public framework::OpProtoAndCheckerMaker {
"(float, default 1.0e-8) "
"Constant for numerical stability")
.SetDefault(1.0f);
AddAttr<int>(
"seed",
"(int, default 0) "
"This property is only used for debugging, users do not need to set it."
"Random seed for generating samples. If seed is set to 0, this "
"operator will use the"
"system's random number seed, otherwise, this operator will always "
"generate the same random"
"number every time.")
.SetDefault(0);
AddComment(R"DOC(
Dpsgd Optimizer.

@ -79,16 +79,14 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
float X;
float mu = 0.0;
float U1, U2;
unsigned seed = (unsigned int)(time(NULL));
unsigned seed = static_cast<unsigned int>(ctx.Attr<int>("seed"));
if (seed == 0) {
seed = (unsigned)(time(NULL));
}
std::minstd_rand engine;
engine.seed(seed);
std::uniform_real_distribution<T> dist(0.0, 1.0);
do {
// srand((unsigned int)(time(NULL)));
// U1 = (rand() * 1.0) / RAND_MAX;
// U2 = (rand() * 1.0) / RAND_MAX;
// U1 = rand_rr(&seed) * (1.0 / RAND_MAX);
// U2 = rand_rr(&seed) * (1.0 / RAND_MAX);
U1 = dist(engine);
U2 = dist(engine);
V1 = 2 * U1 - 1;

@ -1030,6 +1030,8 @@ class DGCMomentumOptimizer(Optimizer):
num_trainers=None,
regularization=None,
name=None):
if framework.in_dygraph_mode():
raise Exception("In dygraph, don't support DGCMomentumOptimizer.")
assert learning_rate is not None
assert momentum is not None
super(DGCMomentumOptimizer, self).__init__(
@ -1526,24 +1528,16 @@ class AdagradOptimizer(Optimizer):
assert isinstance(block, framework.Block)
for p in parameters:
self._add_accumulator(self._moment_acc_str, p)
self._add_accumulator(
self._moment_acc_str,
p,
fill_value=self.initial_accumulator_value)
def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
moment_acc = self._get_accumulator(self._moment_acc_str,
param_and_grad[0])
startup_block = framework.default_startup_program().global_block()
startup_block.append_op(
type='fill_constant',
inputs={},
outputs={'Out': [moment_acc]},
attrs={
'dtype': moment_acc.dtype,
'value': self.initial_accumulator_value,
'shape': moment_acc.shape,
})
# Create the adagrad optimizer op
adagrad_op = block.append_op(
type=self.type,
@ -2031,11 +2025,21 @@ class DpsgdOptimizer(Optimizer):
self._clip = clip
self._batch_size = batch_size
self._sigma = sigma
'''
Note(wangzhongpu):
This property is only used for debugging, do not need to set it!
Dpsgd operator use time(NULL) as random seed to generate random number.
However, during debugging, we need determinated result, so we will set self._seed to a fixed number.
'''
self._seed = None
def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
# create the dpsgd optimize op
if self._seed == None:
self._seed = 0
dpsgd_op = block.append_op(
type=self.type,
inputs={
@ -2047,7 +2051,8 @@ class DpsgdOptimizer(Optimizer):
attrs={
"clip": self._clip,
"batch_size": self._batch_size,
"sigma": self._sigma
"sigma": self._sigma,
"seed": self._seed
},
stop_gradient=True)
@ -2846,6 +2851,8 @@ class ModelAverage(Optimizer):
max_average_window=10000,
regularization=None,
name=None):
if framework.in_dygraph_mode():
raise Exception("In dygraph, don't support ModelAverage.")
super(ModelAverage, self).__init__(
0.0, regularization=regularization, name=name)
self.average_window = average_window_rate
@ -3159,6 +3166,9 @@ class ExponentialMovingAverage(object):
"""
def __init__(self, decay=0.999, thres_steps=None, name=None):
if framework.in_dygraph_mode():
raise Exception(
"In dygraph, don't support ExponentialMovingAverage.")
self._decay = decay
self._thres_steps = thres_steps
self._name = name if name is not None else ''
@ -3380,6 +3390,8 @@ class PipelineOptimizer(object):
queue_size=30,
sync_steps=1,
start_cpu_core_id=0):
if framework.in_dygraph_mode():
raise Exception("In dygraph, don't support PipelineOptimizer.")
# TODO: check properties
self._optimizer = optimizer
self._cut_list = cut_list
@ -3665,6 +3677,8 @@ class RecomputeOptimizer(Optimizer):
"""
def __init__(self, optimizer):
if framework.in_dygraph_mode():
raise Exception("In dygraph, don't support RecomputeOptimizer.")
self._optimizer = optimizer
self._checkpoints = None
@ -3951,6 +3965,8 @@ class LookaheadOptimizer(object):
def __init__(self, inner_optimizer, alpha=0.5, k=5):
if framework.in_dygraph_mode():
raise Exception("In dygraph, don't support LookaheadOptimizer.")
assert (inner_optimizer is not None), "inner optimizer can not be None"
assert (
0.0 <= alpha <= 1.0

@ -270,7 +270,7 @@ class TestAdagradOptimizer(unittest.TestCase):
# Check init_program
init_ops = init_program.global_block().ops
self.assertEqual(len(init_ops), 3)
self.assertEqual(len(init_ops), 2)
self.assertEqual(init_ops[0].type, "fill_constant")
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
self.assertEqual(init_ops[1].type, "fill_constant")

Loading…
Cancel
Save