Merge pull request #429 from emailweixu/math_mul

'*' operator overload for LayerOutput
9 years ago · a276684262
parent 8295eb91bf 36fa251756
commit a276684262
7 changed files with 294 additions and 49 deletions
--- a/doc/ui/api/trainer_config_helpers/layers.rst
+++ b/doc/ui/api/trainer_config_helpers/layers.rst
@ -254,6 +254,12 @@ expand_layer
    :members: expand_layer
    :noindex:

+repeat_layer
+------------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: repeat_layer
+    :noindex:
+
 Math Layers
 ===========

--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@ -3015,7 +3015,7 @@ def Layer(
    layer_func = layers.get(type)
    config_assert(layer_func,
                  "layer type '%s' not supported." % type)
-    layer_func(name, **xargs)
+    return layer_func(name, **xargs)

@config_func
 def ParameterHook(
--- a/python/paddle/trainer_config_helpers/init.py
+++ b/python/paddle/trainer_config_helpers/init.py
@ -20,3 +20,6 @@ from layers import *
 from networks import *
 from optimizers import *
 from attrs import *
+
+# This will enable operator overload for LayerOutput
+import math
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
--- a/python/paddle/trainer_config_helpers/math.py
+++ b/python/paddle/trainer_config_helpers/math.py
@ -13,10 +13,11 @@
 # limitations under the License.

 from .layers import LayerOutput, mixed_layer, identity_projection, \
-    slope_intercept_layer
+    slope_intercept_layer, scaling_layer, repeat_layer
 from .attrs import is_compatible_with
 from .default_decorators import *
 import activations as act
+from paddle.trainer.config_parser import logger

 __all__ = []

@ -40,7 +41,21 @@ register_unary_math_op('square', act.SquareActivation())
 def add(layeroutput, other):
    if is_compatible_with(other, float):
        return slope_intercept_layer(input=layeroutput, intercept=other)
-    assert isinstance(other, LayerOutput)
+    if not isinstance(other, LayerOutput):
+        logger.fatal("LayerOutput can only be added with"
+                     " another LayerOutput or a number")
+    if layeroutput.size == other.size:
+        return mixed_layer(input=[identity_projection(input=layeroutput),
+                                  identity_projection(input=other)])
+    if other.size != 1 and layeroutput.size != 1:
+        logger.fatal("Two LayerOutput can be added only if they have equal size"
+                     " or one of their sizes is 1. sizes are %s and %s" %
+                     (layeroutput.size, other.size))
+    elif layeroutput.size == 1:
+        tmp = layeroutput
+        layeroutput = other
+        other = tmp
+    other = repeat_layer(other, layeroutput.size)
    return mixed_layer(input=[identity_projection(input=layeroutput),
                              identity_projection(input=other)])

@ -50,10 +65,11 @@ LayerOutput.__add__ = add
 def sub(layeroutput, other):
    if is_compatible_with(other, float):
        return slope_intercept_layer(input=layeroutput, intercept=other)
-    assert isinstance(other, LayerOutput)
+    if not isinstance(other, LayerOutput):
+        logger.fatal("LayerOutput can only be subtracted with"
+                     " another Layeroutput or a number")
    neg = slope_intercept_layer(input=other, slope=-1.0)
-    return mixed_layer(input=[identity_projection(input=layeroutput),
-                              identity_projection(input=neg)])
+    return add(layeroutput, neg)

 LayerOutput.__sub__ = sub

@ -62,3 +78,20 @@ def rsub(layeroutput, other):
    return add(neg, other)

 LayerOutput.__rsub__ = rsub
+
+def mul(layeroutput, other):
+    if is_compatible_with(other, float):
+        return slope_intercept_layer(input=layeroutput, slope=other)
+    if not isinstance(other, LayerOutput):
+        logger.fatal("LayerOutput can only be multiplied with"
+                     " another Layeroutput or a number")
+    elif layeroutput.size == 1:
+        return scaling_layer(input=other, weight=layeroutput)
+    elif other.size == 1:
+        return scaling_layer(input=layeroutput, weight=other)
+    else:
+        logger.fatal("At least one of the operand of '*' must be a number"
+                     " or a LayerOutput with size=1")
+
+LayerOutput.__mul__ = mul
+LayerOutput.__rmul__ = mul
--- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
@ -19,6 +19,12 @@ y = x + y
 y = y - x
 y = y - 2
 y = 2 - y
-
+y = 2 * y
+y = y * 3
+z= data_layer(name='data_2', size=1)
+y = y * z
+y = z * y
+y = y + z
+y = z + y
 outputs(y)

--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
@ -209,8 +209,129 @@ layers {
  slope: 1.0
  intercept: 2
 }
+layers {
+  name: "__slope_intercept_layer_6__"
+  type: "slope_intercept"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__slope_intercept_layer_5__"
+  }
+  slope: 2
+  intercept: 0.0
+}
+layers {
+  name: "__slope_intercept_layer_7__"
+  type: "slope_intercept"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__slope_intercept_layer_6__"
+  }
+  slope: 3
+  intercept: 0.0
+}
+layers {
+  name: "data_2"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "__scaling_layer_0__"
+  type: "scaling"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "data_2"
+  }
+  inputs {
+    input_layer_name: "__slope_intercept_layer_7__"
+  }
+}
+layers {
+  name: "__scaling_layer_1__"
+  type: "scaling"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "data_2"
+  }
+  inputs {
+    input_layer_name: "__scaling_layer_0__"
+  }
+}
+layers {
+  name: "__repeat_layer_0__"
+  type: "featmap_expand"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "data_2"
+  }
+  num_filters: 100
+}
+layers {
+  name: "__mixed_2__"
+  type: "mixed"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__scaling_layer_1__"
+    proj_conf {
+      type: "identity"
+      name: "___mixed_2__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+  inputs {
+    input_layer_name: "__repeat_layer_0__"
+    proj_conf {
+      type: "identity"
+      name: "___mixed_2__.w1"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__repeat_layer_1__"
+  type: "featmap_expand"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "data_2"
+  }
+  num_filters: 100
+}
+layers {
+  name: "__mixed_3__"
+  type: "mixed"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__mixed_2__"
+    proj_conf {
+      type: "identity"
+      name: "___mixed_3__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+  inputs {
+    input_layer_name: "__repeat_layer_1__"
+    proj_conf {
+      type: "identity"
+      name: "___mixed_3__.w1"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+input_layer_names: "data_2"
 input_layer_names: "data"
-output_layer_names: "__slope_intercept_layer_5__"
+output_layer_names: "__mixed_3__"
 sub_models {
  name: "root"
  layer_names: "data"
@ -228,8 +349,18 @@ sub_models {
  layer_names: "__slope_intercept_layer_3__"
  layer_names: "__slope_intercept_layer_4__"
  layer_names: "__slope_intercept_layer_5__"
+  layer_names: "__slope_intercept_layer_6__"
+  layer_names: "__slope_intercept_layer_7__"
+  layer_names: "data_2"
+  layer_names: "__scaling_layer_0__"
+  layer_names: "__scaling_layer_1__"
+  layer_names: "__repeat_layer_0__"
+  layer_names: "__mixed_2__"
+  layer_names: "__repeat_layer_1__"
+  layer_names: "__mixed_3__"
+  input_layer_names: "data_2"
  input_layer_names: "data"
-  output_layer_names: "__slope_intercept_layer_5__"
+  output_layer_names: "__mixed_3__"
  is_recurrent_layer_group: false
 }