You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
358 lines
12 KiB
358 lines
12 KiB
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# recurrent_units.py
|
|
# Version 2.0
|
|
#
|
|
# Some recurrent units can be used in recurrent layer group,
|
|
# to use these units, import this module in your config_file:
|
|
# import trainer.recurrent_units
|
|
#
|
|
# The modules in this file are DEPRECATED.
|
|
# If you would like to use lstm/gru
|
|
# please use the functions defined in paddle.trainer_config_helpers.
|
|
|
|
from paddle.trainer.config_parser import *
|
|
|
|
|
|
# long short term memory, can be used in recurrent machine
|
|
# *inputs* must be a list of Projections, for example:
|
|
# inputs = [FullMatrixProjection("input_layer_name")],
|
|
# *para_prefix* defines parameter names, if the *para_prefix* of
|
|
# two LstmRecurrentUnit is same, they share same parameters
|
|
# *out_memory* can be defined outside if it's used outside
|
|
def LstmRecurrentUnit(name,
|
|
size,
|
|
active_type,
|
|
state_active_type,
|
|
gate_active_type,
|
|
inputs,
|
|
para_prefix=None,
|
|
error_clipping_threshold=0,
|
|
out_memory=None):
|
|
|
|
if para_prefix is None:
|
|
para_prefix = name
|
|
if out_memory is None:
|
|
out_memory = Memory(name=name, size=size)
|
|
|
|
state_memory = Memory(name=name + "_" + "state", size=size)
|
|
|
|
Layer(
|
|
name=name + "_" + "input_recurrent",
|
|
type="mixed",
|
|
size=size * 4, #(input_s, input_gate, forget_gate, output_gate)
|
|
error_clipping_threshold=error_clipping_threshold,
|
|
bias=Bias(
|
|
initial_std=0, parameter_name=para_prefix + "_input_recurrent.b"),
|
|
inputs=inputs + [
|
|
FullMatrixProjection(
|
|
out_memory, parameter_name=para_prefix + "_input_recurrent.w"),
|
|
], )
|
|
LstmStepLayer(
|
|
name=name,
|
|
size=size,
|
|
bias=Bias(parameter_name=para_prefix + "_check.b"),
|
|
inputs=[name + "_" + "input_recurrent", state_memory],
|
|
active_type=active_type,
|
|
active_gate_type=gate_active_type,
|
|
active_state_type=state_active_type, )
|
|
GetOutputLayer(
|
|
name=name + "_" + "state",
|
|
size=size,
|
|
inputs=Input(
|
|
name, input_layer_argument="state"), )
|
|
|
|
|
|
def LstmRecurrentUnitNaive(name,
|
|
size,
|
|
active_type,
|
|
state_active_type,
|
|
gate_active_type,
|
|
inputs,
|
|
para_prefix=None,
|
|
error_clipping_threshold=0,
|
|
out_memory=None):
|
|
|
|
if para_prefix is None:
|
|
para_prefix = name
|
|
if out_memory is None:
|
|
out_memory = Memory(name=name, size=size)
|
|
|
|
state_memory = Memory(name=name + "_" + "state", size=size)
|
|
|
|
Layer(
|
|
name=name + "_" + "input_recurrent",
|
|
type="mixed",
|
|
size=size * 4, #(input_s, input_gate, forget_gate, output_gate)
|
|
error_clipping_threshold=error_clipping_threshold,
|
|
bias=Bias(
|
|
initial_std=0, parameter_name=para_prefix + "_input_recurrent.b"),
|
|
inputs=inputs + [
|
|
FullMatrixProjection(
|
|
out_memory, parameter_name=para_prefix + "_input_recurrent.w"),
|
|
], )
|
|
ExpressionLayer(
|
|
name=name + "_" + "input_s",
|
|
size=size,
|
|
active_type=active_type,
|
|
inputs=[
|
|
IdentityOffsetProjection(
|
|
name + "_" + "input_recurrent", offset=0)
|
|
], )
|
|
ExpressionLayer(
|
|
name=name + "_" + "input_gate",
|
|
active_type=gate_active_type,
|
|
inputs=[
|
|
IdentityOffsetProjection(
|
|
name + "_" + "input_recurrent", offset=size), DotMulProjection(
|
|
state_memory, parameter_name=para_prefix + "_input_check.w")
|
|
], )
|
|
ExpressionLayer(
|
|
name=name + "_" + "forget_gate",
|
|
active_type=gate_active_type,
|
|
inputs=[
|
|
IdentityOffsetProjection(
|
|
name + "_" + "input_recurrent", offset=size * 2),
|
|
DotMulProjection(
|
|
state_memory, parameter_name=para_prefix + "_forget_check.w")
|
|
], )
|
|
ExpressionLayer(
|
|
name=name + "_" + "state",
|
|
inputs=[
|
|
DotMulOperator([name + "_" + "input_s", name + "_" + "input_gate"]),
|
|
DotMulOperator([state_memory, name + "_" + "forget_gate"]),
|
|
], )
|
|
ExpressionLayer(
|
|
name=name + "_" + "output_gate",
|
|
active_type=gate_active_type,
|
|
inputs=[
|
|
IdentityOffsetProjection(
|
|
name + "_" + "input_recurrent", offset=size * 3),
|
|
DotMulProjection(
|
|
name + "_" + "state",
|
|
parameter_name=para_prefix + "_output_check.w")
|
|
], )
|
|
ExpressionLayer(
|
|
name=name + "_" + "state_atv",
|
|
active_type=state_active_type,
|
|
inputs=IdentityProjection(name + "_" + "state"), )
|
|
ExpressionLayer(
|
|
name=name,
|
|
inputs=DotMulOperator(
|
|
[name + "_" + "state_atv", name + "_" + "output_gate"]), )
|
|
|
|
|
|
# like LstmRecurrentUnit, but it's a layer group.
|
|
# it is equivalent to LstmLayer
|
|
def LstmRecurrentLayerGroup(name,
|
|
size,
|
|
active_type,
|
|
state_active_type,
|
|
gate_active_type,
|
|
inputs,
|
|
para_prefix=None,
|
|
error_clipping_threshold=0,
|
|
seq_reversed=False):
|
|
|
|
input_layer_name = name + "_" + "transform_input"
|
|
Layer(
|
|
name=input_layer_name,
|
|
type="mixed",
|
|
size=size * 4,
|
|
active_type="",
|
|
bias=False,
|
|
inputs=inputs, )
|
|
|
|
RecurrentLayerGroupBegin(
|
|
name + "_layer_group",
|
|
in_links=[input_layer_name],
|
|
out_links=[name],
|
|
seq_reversed=seq_reversed)
|
|
|
|
LstmRecurrentUnit(
|
|
name=name,
|
|
size=size,
|
|
active_type=active_type,
|
|
state_active_type=state_active_type,
|
|
gate_active_type=gate_active_type,
|
|
inputs=[IdentityProjection(input_layer_name)],
|
|
para_prefix=para_prefix,
|
|
error_clipping_threshold=error_clipping_threshold, )
|
|
|
|
RecurrentLayerGroupEnd(name + "_layer_group")
|
|
|
|
|
|
# gated recurrent unit, can be used in recurrent machine
|
|
# *inputs* should be a list of Projections, for example:
|
|
# inputs = [FullMatrixProjection("input_layer_name")],
|
|
# *para_prefix* defines parameter names, if the *para_prefix* of
|
|
# two GatedRecurrentUnit is same, they share same parameters
|
|
# *out_memory* can be defined outside if it's used outside
|
|
|
|
|
|
def GatedRecurrentUnit(name,
|
|
size,
|
|
active_type,
|
|
gate_active_type,
|
|
inputs,
|
|
para_prefix=None,
|
|
error_clipping_threshold=0,
|
|
out_memory=None):
|
|
if type_of(inputs) == str: #only used by GatedRecurrentLayerGroup
|
|
input_layer_name = inputs
|
|
else:
|
|
input_layer_name = name + "_" + "transform_input"
|
|
Layer(
|
|
name=input_layer_name,
|
|
type="mixed",
|
|
size=size * 3,
|
|
active_type="",
|
|
bias=False,
|
|
inputs=inputs, )
|
|
|
|
if para_prefix is None:
|
|
para_prefix = name
|
|
if out_memory is None:
|
|
out_memory = Memory(name=name, size=size)
|
|
|
|
GruStepLayer(
|
|
name=name,
|
|
size=size,
|
|
bias=Bias(parameter_name=para_prefix + "_gate.b"),
|
|
inputs=[
|
|
input_layer_name, Input(
|
|
out_memory, parameter_name=para_prefix + "_gate.w")
|
|
],
|
|
active_type=active_type,
|
|
active_gate_type=gate_active_type, )
|
|
|
|
|
|
def GatedRecurrentUnitNaive(name,
|
|
size,
|
|
active_type,
|
|
gate_active_type,
|
|
inputs,
|
|
para_prefix=None,
|
|
error_clipping_threshold=0,
|
|
out_memory=None):
|
|
|
|
if type_of(inputs) == str: #only used by GatedRecurrentLayerGroup
|
|
input_layer_name = inputs
|
|
else:
|
|
input_layer_name = name + "_" + "transform_input"
|
|
Layer(
|
|
name=input_layer_name,
|
|
type="mixed",
|
|
size=size * 3,
|
|
active_type="",
|
|
bias=False,
|
|
inputs=inputs, )
|
|
|
|
if para_prefix is None:
|
|
para_prefix = name
|
|
if out_memory is None:
|
|
out_memory = Memory(name=name, size=size)
|
|
|
|
Layer(
|
|
name=name + "_" + "update_gate",
|
|
type="mixed",
|
|
size=size,
|
|
active_type=gate_active_type,
|
|
error_clipping_threshold=error_clipping_threshold,
|
|
bias=Bias(
|
|
initial_std=0, parameter_name=para_prefix + "_update_gate.b"),
|
|
inputs=[
|
|
IdentityOffsetProjection(
|
|
input_layer_name, offset=0), FullMatrixProjection(
|
|
out_memory, parameter_name=para_prefix + "_update_gate.w")
|
|
], )
|
|
Layer(
|
|
name=name + "_" + "reset_gate",
|
|
type="mixed",
|
|
size=size,
|
|
active_type=gate_active_type,
|
|
error_clipping_threshold=error_clipping_threshold,
|
|
bias=Bias(
|
|
initial_std=0, parameter_name=para_prefix + "_reset_gate.b"),
|
|
inputs=[
|
|
IdentityOffsetProjection(
|
|
input_layer_name, offset=size), FullMatrixProjection(
|
|
out_memory, parameter_name=para_prefix + "_reset_gate.w")
|
|
], )
|
|
ExpressionLayer(
|
|
name=name + "_" + "reset_output",
|
|
inputs=DotMulOperator([out_memory, name + "_" + "reset_gate"]), )
|
|
Layer(
|
|
name=name + "_" + "output_candidate",
|
|
type="mixed",
|
|
size=size,
|
|
active_type=active_type,
|
|
error_clipping_threshold=error_clipping_threshold,
|
|
bias=Bias(
|
|
initial_std=0, parameter_name=para_prefix + "_output_candidate.b"),
|
|
inputs=[
|
|
IdentityOffsetProjection(
|
|
input_layer_name, offset=size * 2), FullMatrixProjection(
|
|
name + "_" + "reset_output",
|
|
parameter_name=para_prefix + "_output_candidate.w")
|
|
], )
|
|
ExpressionLayer( #element-wise interpolation
|
|
name=name,
|
|
inputs=[
|
|
IdentityProjection(out_memory),
|
|
DotMulOperator(
|
|
[out_memory, name + "_" + "update_gate"], scale=-1.0),
|
|
DotMulOperator(
|
|
[name + "_" + "output_candidate", name + "_" + "update_gate"]),
|
|
], )
|
|
|
|
|
|
# like GatedRecurrentUnit, but it's a layer group.
|
|
# it is equivalent to GatedRecurrentLayer.
|
|
def GatedRecurrentLayerGroup(name,
|
|
size,
|
|
active_type,
|
|
gate_active_type,
|
|
inputs,
|
|
para_prefix=None,
|
|
error_clipping_threshold=0,
|
|
seq_reversed=False):
|
|
|
|
input_layer_name = name + "_" + "transform_input"
|
|
Layer(
|
|
name=input_layer_name,
|
|
type="mixed",
|
|
size=size * 3,
|
|
active_type="",
|
|
bias=False,
|
|
inputs=inputs, )
|
|
|
|
RecurrentLayerGroupBegin(
|
|
name + "_layer_group",
|
|
in_links=[input_layer_name],
|
|
out_links=[name],
|
|
seq_reversed=seq_reversed)
|
|
|
|
GatedRecurrentUnit(
|
|
name=name,
|
|
size=size,
|
|
active_type=active_type,
|
|
gate_active_type=gate_active_type,
|
|
inputs=input_layer_name, #transform outside
|
|
para_prefix=para_prefix,
|
|
error_clipping_threshold=error_clipping_threshold, )
|
|
|
|
RecurrentLayerGroupEnd(name + "_layer_group")
|