Adding L2 Regularization to Recognize digits MLP example (#5186)

8 years ago · 5906baa3f4
parent 79c5a46194
commit 5906baa3f4
2 changed files with 24 additions and 9 deletions
--- a/python/paddle/v2/framework/layer_helper.py
+++ b/python/paddle/v2/framework/layer_helper.py
@ -131,12 +131,14 @@ class LayerHelper(object):
        return dtype

    def create_parameter(self, attr, shape, dtype, suffix='w'):
-        if attr['name'] is None:
-            attr['name'] = unique_name(".".join([self.name, suffix]))
+        # Deepcopy the attr so that parameters can be shared in program
+        attr_copy = copy.deepcopy(attr)
+        if attr_copy['name'] is None:
+            attr_copy['name'] = unique_name(".".join([self.name, suffix]))
        self.init_program.global_block().create_parameter(
-            dtype=dtype, shape=shape, **attr)
+            dtype=dtype, shape=shape, **attr_copy)
        return self.program.global_block().create_parameter(
-            name=attr['name'], dtype=dtype, shape=shape)
+            name=attr_copy['name'], dtype=dtype, shape=shape)

    def create_tmp_variable(self, dtype):
        return self.program.current_block().create_var(
--- a/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py
+++ b/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py
@ -5,9 +5,11 @@ import paddle.v2.framework.optimizer as optimizer

 from paddle.v2.framework.framework import Program, g_program
 from paddle.v2.framework.executor import Executor
+from paddle.v2.framework.regularizer import L2DecayRegularizer

 import numpy as np

+BATCH_SIZE = 128
 init_program = Program()
 program = Program()
 image = layers.data(
@ -17,22 +19,35 @@ image = layers.data(
    program=program,
    init_program=init_program)

+param_attr = {
+    'name': None,
+    'init_attr': {
+        'type': 'uniform_random',
+        'min': -1.0,
+        'max': 1.0
+    },
+    'regularization': L2DecayRegularizer(0.0005 * BATCH_SIZE)
+}
+
 hidden1 = layers.fc(input=image,
                    size=128,
                    act='relu',
                    program=program,
-                    init_program=init_program)
+                    init_program=init_program,
+                    param_attr=param_attr)
 hidden2 = layers.fc(input=hidden1,
                    size=64,
                    act='relu',
                    program=program,
-                    init_program=init_program)
+                    init_program=init_program,
+                    param_attr=param_attr)

 predict = layers.fc(input=hidden2,
                    size=10,
                    act='softmax',
                    program=program,
-                    init_program=init_program)
+                    init_program=init_program,
+                    param_attr=param_attr)

 label = layers.data(
    name='y',
@ -48,8 +63,6 @@ avg_cost = layers.mean(x=cost, program=program, init_program=init_program)
 sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
 opts = sgd_optimizer.minimize(avg_cost)

-BATCH_SIZE = 128
-
 train_reader = paddle.batch(
    paddle.reader.shuffle(
        paddle.dataset.mnist.train(), buf_size=8192),