# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
"""
All layers just related to the neural network .
"""
from . . layer_helper import LayerHelper
from . . initializer import Normal , Constant
from . . framework import Variable
from . . param_attr import ParamAttr
from tensor import concat
__all__ = [
' fc ' , ' embedding ' , ' dynamic_lstm ' , ' gru_unit ' , ' linear_chain_crf ' ,
' crf_decoding ' , ' cos_sim ' , ' cross_entropy ' , ' square_error_cost ' , ' accuracy ' ,
' chunk_eval ' , ' sequence_conv ' , ' conv2d ' , ' sequence_pool ' , ' pool2d ' ,
' batch_norm ' , ' beam_search_decode ' , ' conv2d_transpose ' , ' sequence_expand ' ,
' lstm_unit ' , ' reduce_sum ' , ' reduce_mean ' , ' reduce_max ' , ' reduce_min ' ,
' sequence_first_step ' , ' sequence_last_step ' , ' dropout ' , ' split ' ,
' ctc_greedy_decoder ' , ' edit_distance ' , ' l2_normalize ' , ' matmul ' , ' warpctc ' ,
' sequence_reshape '
]
def fc ( input ,
size ,
num_flatten_dims = 1 ,
param_attr = None ,
bias_attr = None ,
act = None ,
name = None ) :
"""
* * Fully Connected Layer * *
The fully connected layer can take multiple tensors as its inputs . It
creates a variable ( one for each input tensor ) called weights for each input
tensor , which represents a fully connected weight matrix from each input
unit to each output unit . The fully connected layer multiplies each input
tensor with its coresponding weight to produce an output Tensor . If
multiple input tensors are given , the results of multiple multiplications
will be sumed up . If bias_attr is not None , a biases variable will be
created and added to the output . Finally , if activation is not None ,
it will be applied to the output as well .
This process can be formulated as follows :
. . math : :
Out = Act ( { \sum_ { i = 0 } ^ { N - 1 } W_iX_i + b } )
In the above equation :
* : math : ` N ` : Number of the input .
* : math : ` X_i ` : The input tensor .
* : math : ` W ` : The weights created by this layer .
* : math : ` b ` : The bias parameter created by this layer ( if needed ) .
* : math : ` Act ` : The activation funtion .
* : math : ` Out ` : The output tensor .
Args :
input ( Variable | list ) : The input tensor ( s ) to the fully connected layer .
size ( int ) : The number of output units in the fully connected layer .
num_flatten_dims ( int ) : The fc layer can accept an input tensor with more
than two dimensions . If this happens , the
multidimensional tensor will first be flattened
into a 2 - dimensional matrix . The parameter
` num_flatten_dims ` determines how the input tensor
is flattened : the first ` num_flatten_dims `
dimensions will be flatten to form the first
dimension of the final matrix ( height of the
matrix ) , and the rest ` rank ( X ) - num_flatten_dims `
dimensions are flattened to form the second
dimension of the final matrix ( width of the matrix ) .
For example , suppose ` X ` is a 6 - dimensional tensor
with a shape [ 2 , 3 , 4 , 5 , 6 ] , and
` num_flatten_dims ` = 3. Then , the flattened matrix
will have a shape [ 2 x 3 x 4 , 5 x 6 ] = [ 24 , 30 ] .
By default , ` num_flatten_dims ` is set to 1.
param_attr ( ParamAttr | list ) : The parameter attribute for learnable
parameters / weights of the fully connected
layer .
param_initializer ( ParamAttr | list ) : The initializer used for the
weight / parameter . If set None ,
XavierInitializer ( ) will be used .
bias_attr ( ParamAttr | list ) : The parameter attribute for the bias parameter
for this layer . If set None , no bias will be
added to the output units .
bias_initializer ( ParamAttr | list ) : The initializer used for the bias .
If set None , then ConstantInitializer ( )
will be used .
act ( str ) : Activation to be applied to the output of the fully connected
layer .
name ( str ) : Name / alias of the fully connected layer .
Returns :
Variable : The output tensor variable .
Raises :
ValueError : If rank of the input tensor is less than 2.
Examples :
. . code - block : : python
data = fluid . layers . data ( name = " data " , shape = [ 32 , 32 ] , dtype = " float32 " )
fc = fluid . layers . fc ( input = data , size = 1000 , act = " tanh " )
"""
helper = LayerHelper ( " fc " , * * locals ( ) )
dtype = helper . input_dtype ( )
mul_results = [ ]
for input_var , param_attr in helper . iter_inputs_and_params ( ) :
input_shape = input_var . shape
param_shape = [
reduce ( lambda a , b : a * b , input_shape [ num_flatten_dims : ] , 1 )
] + [ size ]
w = helper . create_parameter (
attr = param_attr , shape = param_shape , dtype = dtype , is_bias = False )
tmp = helper . create_tmp_variable ( dtype )
helper . append_op (
type = " mul " ,
inputs = {
" X " : input_var ,
" Y " : w ,
} ,
outputs = { " Out " : tmp } ,
attrs = { " x_num_col_dims " : num_flatten_dims ,
" y_num_col_dims " : 1 } )
mul_results . append ( tmp )
# sum
if len ( mul_results ) == 1 :
pre_bias = mul_results [ 0 ]
else :
pre_bias = helper . create_tmp_variable ( dtype )
helper . append_op (
type = " sum " , inputs = { " X " : mul_results } , outputs = { " Out " : pre_bias } )
# add bias
pre_activation = helper . append_bias_op ( pre_bias )
# add activation
return helper . append_activation ( pre_activation )
def embedding ( input , size , is_sparse = False , param_attr = None , dtype = ' float32 ' ) :
"""
* * Embedding Layer * *
This layer is used to lookup a vector of IDs , provided by * input * , in a lookup table .
The result of this lookup is the embedding of each ID in the * input * .
All the input variables are passed in as local variables to the LayerHelper
constructor .
Args :
input ( Variable ) : Input to the function
size ( tuple | list | None ) : Shape of the look up table parameter
is_sparse ( bool ) : Boolean flag that specifying whether the input is sparse
param_attr ( ParamAttr ) : Parameters for this layer
dtype ( np . dtype | core . DataType | str ) : The type of data : float32 , float_16 , int etc
Returns :
Variable : The tensor variable storing the embeddings of the \
supplied inputs .
Examples :
. . code - block : : python
dict_size = len ( dataset . ids )
data = fluid . layers . data ( name = ' ids ' , shape = [ 32 , 32 ] , dtype = ' float32 ' )
fc = fluid . layers . embedding ( input = data , size = [ dict_size , 16 ] )
"""
helper = LayerHelper ( ' embedding ' , * * locals ( ) )
w = helper . create_parameter (
attr = helper . param_attr , shape = size , dtype = dtype , is_bias = False )
tmp = helper . create_tmp_variable ( dtype )
helper . append_op (
type = ' lookup_table ' ,
inputs = { ' Ids ' : input ,
' W ' : w } ,
outputs = { ' Out ' : tmp } ,
attrs = { ' is_sparse ' : is_sparse } )
return tmp
# TODO(qijun): expose H0 and C0
def dynamic_lstm ( input ,
size ,
param_attr = None ,
bias_attr = None ,
use_peepholes = True ,
is_reverse = False ,
gate_activation = ' sigmoid ' ,
cell_activation = ' tanh ' ,
candidate_activation = ' tanh ' ,
dtype = ' float32 ' ) :
"""
* * Dynamic LSTM Layer * *
The defalut implementation is diagonal / peephole connection
( https : / / arxiv . org / pdf / 1402.1128 . pdf ) , the formula is as follows :
. . math : :
i_t & = \sigma ( W_ { ix } x_ { t } + W_ { ih } h_ { t - 1 } + W_ { ic } c_ { t - 1 } + b_i )
f_t & = \sigma ( W_ { fx } x_ { t } + W_ { fh } h_ { t - 1 } + W_ { fc } c_ { t - 1 } + b_f )
\\tilde { c_t } & = act_g ( W_ { cx } x_t + W_ { ch } h_ { t - 1 } + b_c )
o_t & = \sigma ( W_ { ox } x_ { t } + W_ { oh } h_ { t - 1 } + W_ { oc } c_t + b_o )
c_t & = f_t \odot c_ { t - 1 } + i_t \odot \\tilde { c_t }
h_t & = o_t \odot act_h ( c_t )
where the : math : ` W ` terms denote weight matrices ( e . g . : math : ` W_ { xi } ` is
the matrix of weights from the input gate to the input ) , : math : ` W_ { ic } , \
W_ { fc } , W_ { oc } ` are diagonal weight matrices for peephole connections . In
our implementation , we use vectors to reprenset these diagonal weight
matrices . The : math : ` b ` terms denote bias vectors ( : math : ` b_i ` is the input
gate bias vector ) , : math : ` \sigma ` is the non - line activations , such as
logistic sigmoid function , and : math : ` i , f , o ` and : math : ` c ` are the input
gate , forget gate , output gate , and cell activation vectors , respectively ,
all of which have the same size as the cell output activation vector : math : ` h ` .
The : math : ` \odot ` is the element - wise product of the vectors . : math : ` act_g `
and : math : ` act_h ` are the cell input and cell output activation functions
and ` tanh ` is usually used for them . : math : ` \\tilde { c_t } ` is also called
candidate hidden state , which is computed based on the current input and
the previous hidden state .
Set ` use_peepholes ` to ` False ` to disable peephole connection . The formula
is omitted here , please refer to the paper
http : / / www . bioinf . jku . at / publications / older / 2604. pdf for details .
Note that these : math : ` W_ { xi } x_ { t } , W_ { xf } x_ { t } , W_ { xc } x_ { t } , W_ { xo } x_ { t } `
operations on the input : math : ` x_ { t } ` are NOT included in this operator .
Users can choose to use fully - connect layer before LSTM layer .
Args :
input ( Variable ) : The input of dynamic_lstm layer , which supports
variable - time length input sequence . The underlying
tensor in this Variable is a matrix with shape
( T X 4 D ) , where T is the total time steps in this
mini - batch , D is the hidden size .
size ( int ) : 4 * hidden size .
param_attr ( ParamAttr ) : The parameter attribute for the learnable
hidden - hidden weights .
- The shape is ( D x 4 D ) , where D is the hidden
size .
- Weights = { : math : ` W_ { ch } , W_ { ih } , \
W_ { fh } , W_ { oh } ` }
bias_attr ( ParamAttr ) : The bias attribute for the learnable bias
weights , which contains two parts , input - hidden
bias weights and peephole connections weights if
setting ` use_peepholes ` to ` True ` .
1. ` use_peepholes = False `
- The shape is ( 1 x 4 D ) .
- Biases = { : math : ` b_c , b_i , b_f , b_o ` } .
2. ` use_peepholes = True `
- The shape is ( 1 x 7 D ) .
- Biases = { : math : ` b_c , b_i , b_f , b_o , W_ { ic } , \
W_ { fc } , W_ { oc } ` } .
use_peepholes ( bool ) : Whether to enable diagonal / peephole connections ,
default ` True ` .
is_reverse ( bool ) : Whether to compute reversed LSTM , default ` False ` .
gate_activation ( str ) : The activation for input gate , forget gate and
output gate . Choices = [ " sigmoid " , " tanh " , " relu " ,
" identity " ] , default " sigmoid " .
cell_activation ( str ) : The activation for cell output . Choices = [ " sigmoid " ,
" tanh " , " relu " , " identity " ] , default " tanh " .
candidate_activation ( str ) : The activation for candidate hidden state .
Choices = [ " sigmoid " , " tanh " , " relu " , " identity " ] ,
default " tanh " .
dtype ( str ) : Data type . Choices = [ " float32 " , " float64 " ] , default " float32 " .
Returns :
tuple : The hidden state , and cell state of LSTM . The shape of both \
is ( T x D ) , and lod is the same with the ` input ` .
Examples :
. . code - block : : python
hidden_dim = 512
forward_proj = fluid . layers . fc ( input = input_seq , size = hidden_dim * 4 ,
act = None , bias_attr = None )
forward , _ = fluid . layers . dynamic_lstm (
input = forward_proj , size = hidden_dim * 4 , use_peepholes = False )
"""
helper = LayerHelper ( ' lstm ' , * * locals ( ) )
size = size / 4
weight = helper . create_parameter (
attr = helper . param_attr , shape = [ size , 4 * size ] , dtype = dtype )
bias_size = [ 1 , 7 * size ]
if not use_peepholes :
bias_size [ 1 ] = 4 * size
bias = helper . create_parameter (
attr = helper . bias_attr , shape = bias_size , dtype = dtype , is_bias = True )
hidden = helper . create_tmp_variable ( dtype )
cell = helper . create_tmp_variable ( dtype )
batch_gate = helper . create_tmp_variable ( dtype )
batch_cell_pre_act = helper . create_tmp_variable ( dtype )
helper . append_op (
type = ' lstm ' ,
inputs = { ' Input ' : input ,
' Weight ' : weight ,
' Bias ' : bias } ,
outputs = {
' Hidden ' : hidden ,
' Cell ' : cell ,
' BatchGate ' : batch_gate ,
' BatchCellPreAct ' : batch_cell_pre_act
} ,
attrs = {
' use_peepholes ' : use_peepholes ,
' is_reverse ' : is_reverse ,
' gate_activation ' : gate_activation ,
' cell_activation ' : cell_activation ,
' candidate_activation ' : candidate_activation
} )
return hidden , cell
def gru_unit ( input ,
hidden ,
size ,
weight = None ,
bias = None ,
activation = ' tanh ' ,
gate_activation = ' sigmoid ' ) :
"""
GRU unit layer . The equation of a gru step is :
. . math : :
u_t & = actGate ( xu_ { t } + W_u h_ { t - 1 } + b_u )
r_t & = actGate ( xr_ { t } + W_r h_ { t - 1 } + b_r )
m_t & = actNode ( xm_t + W_c dot ( r_t , h_ { t - 1 } ) + b_m )
h_t & = dot ( ( 1 - u_t ) , m_t ) + dot ( u_t , h_ { t - 1 } )
The inputs of gru unit includes : math : ` z_t ` , : math : ` h_ { t - 1 } ` . In terms
of the equation above , the : math : ` z_t ` is split into 3 parts -
: math : ` xu_t ` , : math : ` xr_t ` and : math : ` xm_t ` . This means that in order to
implement a full GRU unit operator for an input , a fully
connected layer has to be applied , such that : math : ` z_t = W_ { fc } x_t ` .
The terms : math : ` u_t ` and : math : ` r_t ` represent the update and reset gates
of the GRU cell . Unlike LSTM , GRU has one lesser gate . However , there is
an intermediate candidate hidden output , which is denoted by : math : ` m_t ` .
This layer has three outputs : math : ` h_t ` , : math : ` dot ( r_t , h_ { t - 1 } ) `
and concatenation of : math : ` u_t ` , : math : ` r_t ` and : math : ` m_t ` .
Args :
input ( Variable ) : The fc transformed input value of current step .
hidden ( Variable ) : The hidden value of lstm unit from previous step .
size ( integer ) : The input dimension value .
weight ( ParamAttr ) : The weight parameters for gru unit . Default : None
bias ( ParamAttr ) : The bias parameters for gru unit . Default : None
activation ( string ) : The activation type for cell ( actNode ) . Default : ' tanh '
gate_activation ( string ) : The activation type for gates ( actGate ) . Default : ' sigmoid '
Returns :
tuple : The hidden value , reset - hidden value and gate values .
Examples :
. . code - block : : python
# assuming we have x_t_data and prev_hidden of size=10
x_t = fluid . layers . fc ( input = x_t_data , size = 30 )
hidden_val , r_h_val , gate_val = fluid . layers . gru_unit ( input = x_t ,
hidden = prev_hidden )
"""
activation_dict = dict (
identity = 0 ,
sigmoid = 1 ,
tanh = 2 ,
relu = 3 , )
activation = activation_dict [ activation ]
gate_activation = activation_dict [ gate_activation ]
helper = LayerHelper ( ' gru_unit ' , * * locals ( ) )
dtype = helper . input_dtype ( )
size = size / 3
# create weight
if weight is None :
weight = helper . create_parameter (
attr = helper . param_attr , shape = [ size , 3 * size ] , dtype = dtype )
# create bias
if bias is None :
bias_size = [ 1 , 3 * size ]
bias = helper . create_parameter (
attr = helper . bias_attr , shape = bias_size , dtype = dtype , is_bias = True )
gate = helper . create_tmp_variable ( dtype )
reset_hidden_pre = helper . create_tmp_variable ( dtype )
updated_hidden = helper . create_tmp_variable ( dtype )
helper . append_op (
type = ' gru_unit ' ,
inputs = { ' Input ' : input ,
' HiddenPrev ' : hidden ,
' Weight ' : weight } ,
outputs = {
' Gate ' : gate ,
' ResetHiddenPrev ' : reset_hidden_pre ,
' Hidden ' : updated_hidden ,
} ,
attrs = {
' activation ' : 0 ,
' gate_activation ' : 1 ,
} )
return updated_hidden , reset_hidden_pre , gate
def linear_chain_crf ( input , label , param_attr = None ) :
helper = LayerHelper ( ' linear_chain_crf ' , * * locals ( ) )
size = input . shape [ 1 ]
transition = helper . create_parameter (
attr = helper . param_attr ,
shape = [ size + 2 , size ] ,
dtype = helper . input_dtype ( ) )
alpha = helper . create_tmp_variable ( dtype = helper . input_dtype ( ) )
emission_exps = helper . create_tmp_variable ( dtype = helper . input_dtype ( ) )
transition_exps = helper . create_tmp_variable ( dtype = helper . input_dtype ( ) )
log_likelihood = helper . create_tmp_variable ( dtype = helper . input_dtype ( ) )
helper . append_op (
type = ' linear_chain_crf ' ,
inputs = { " Emission " : [ input ] ,
" Transition " : transition ,
" Label " : label } ,
outputs = {
" Alpha " : [ alpha ] ,
" EmissionExps " : [ emission_exps ] ,
" TransitionExps " : transition_exps ,
" LogLikelihood " : log_likelihood
} )
return log_likelihood
def crf_decoding ( input , param_attr , label = None ) :
helper = LayerHelper ( ' crf_decoding ' , * * locals ( ) )
transition = helper . get_parameter ( param_attr . name )
viterbi_path = helper . create_tmp_variable ( dtype = helper . input_dtype ( ) )
helper . append_op (
type = ' crf_decoding ' ,
inputs = { " Emission " : [ input ] ,
" Transition " : transition ,
" Label " : label } ,
outputs = { " ViterbiPath " : [ viterbi_path ] } )
return viterbi_path
def cos_sim ( X , Y , * * kwargs ) :
"""
This function performs the cosine similarity between two tensors
X and Y and returns that as the output .
"""
helper = LayerHelper ( ' cos_sim ' , * * kwargs )
out = helper . create_tmp_variable ( dtype = X . dtype )
xnorm = helper . create_tmp_variable ( dtype = X . dtype )
ynorm = helper . create_tmp_variable ( dtype = X . dtype )
helper . append_op (
type = ' cos_sim ' ,
inputs = { ' X ' : [ X ] ,
' Y ' : [ Y ] } ,
outputs = { ' Out ' : [ out ] ,
' XNorm ' : [ xnorm ] ,
' YNorm ' : [ ynorm ] } )
return out
def dropout ( x , dropout_prob , is_test = False , seed = 0 , * * kwargs ) :
helper = LayerHelper ( ' dropout ' , * * kwargs )
out = helper . create_tmp_variable ( dtype = x . dtype )
mask = helper . create_tmp_variable ( dtype = x . dtype , stop_gradient = True )
helper . append_op (
type = ' dropout ' ,
inputs = { ' X ' : [ x ] } ,
outputs = { ' Out ' : [ out ] ,
' Mask ' : [ mask ] } ,
attrs = { ' dropout_prob ' : dropout_prob ,
' is_test ' : is_test ,
' seed ' : seed } )
return out
def cross_entropy ( input , label , * * kwargs ) :
"""
* * Cross Entropy Layer * *
This layer computes the cross entropy between ` input ` and ` label ` . It supports
both standard cross - entropy and soft - label cross - entropy loss computation .
1 ) One - hot cross - entropy :
` soft_label = False ` , ` Label [ i , 0 ] ` indicates the class index for sample i :
. . math : :
Y [ i ] = - \log ( X [ i , Label [ i ] ] )
2 ) Soft - label cross - entropy :
` soft_label = True ` , ` Label [ i , j ] ` indicates the soft label of class j
for sample i :
. . math : :
Y [ i ] = \sum_j { - Label [ i , j ] * log ( X [ i , j ] ) }
Please make sure that in this case the summation of each row of ` label `
equals one .
3 ) One - hot cross - entropy with vecterized ` label ` :
As a special case of 2 ) , when each row of ' label ' has only one
non - zero element which is equal to 1 , soft - label cross - entropy degenerates
to a one - hot cross - entropy with one - hot label representation .
Args :
input ( Variable | list ) : a 2 - D tensor with shape [ N x D ] , where N is the
batch size and D is the number of classes . This input is a probability
computed by the previous operator , which is almost always the result
of a softmax operator .
label ( Variable | list ) : the ground truth which is a 2 - D tensor . When
` soft_label ` is set to ` False ` , ` label ` is a tensor < int64 > with shape
[ N x 1 ] . When ` soft_label ` is set to ` True ` , ` label ` is a
tensor < float / double > with shape [ N x D ] .
soft_label ( bool , via ` * * kwargs ` ) : a flag indicating whether to interpretate
the given labels as soft labels , default ` False ` .
Returns :
A 2 - D tensor with shape [ N x 1 ] , the cross entropy loss .
Raises :
` ValueError ` : 1 ) the 1 st dimension of ` input ` and ` label ` are not equal ; 2 ) when \
` soft_label == True ` , and the 2 nd dimension of ` input ` and ` label ` are not \
equal ; 3 ) when ` soft_label == False ` , and the 2 nd dimension of ` label ` is not 1.
Examples :
. . code - block : : python
predict = fluid . layers . fc ( input = net , size = classdim , act = ' softmax ' )
cost = fluid . layers . cross_entropy ( input = predict , label = label )
"""
helper = LayerHelper ( ' cross_entropy ' , * * kwargs )
out = helper . create_tmp_variable ( dtype = input . dtype )
helper . append_op (
type = ' cross_entropy ' ,
inputs = { ' X ' : [ input ] ,
' Label ' : [ label ] } ,
outputs = { ' Y ' : [ out ] } ,
attrs = kwargs )
return out
def square_error_cost ( input , label , * * kwargs ) :
"""
* * Square error cost layer * *
This layer accepts input predictions and target label and returns the squared error cost .
For predictions , : math : ` X ` , and target labels , : math : ` Y ` , the equation is :
. . math : :
Out = ( X - Y ) ^ 2
In the above equation :
* : math : ` X ` : Input predictions , a tensor .
* : math : ` Y ` : Input labels , a tensor .
* : math : ` Out ` : Output value , same shape with : math : ` X ` .
Args :
input ( Variable ) : Input tensor , has predictions .
label ( Variable ) : Label tensor , has target labels .
Returns :
Variable : The tensor variable storing the element - wise squared error difference \
of input and label .
Examples :
. . code - block : : python
y = layers . data ( name = ' y ' , shape = [ 1 ] , dtype = ' float32 ' )
y_predict = layers . data ( name = ' y_predict ' , shape = [ 1 ] , dtype = ' float32 ' )
cost = layers . square_error_cost ( input = y_predict , label = y )
"""
helper = LayerHelper ( ' square_error_cost ' , * * kwargs )
minus_out = helper . create_tmp_variable ( dtype = input . dtype )
helper . append_op (
type = ' elementwise_sub ' ,
inputs = { ' X ' : [ input ] ,
' Y ' : [ label ] } ,
outputs = { ' Out ' : [ minus_out ] } )
square_out = helper . create_tmp_variable ( dtype = input . dtype )
helper . append_op (
type = ' square ' , inputs = { ' X ' : [ minus_out ] } ,
outputs = { ' Out ' : [ square_out ] } )
return square_out
def accuracy ( input , label , k = 1 , correct = None , total = None , * * kwargs ) :
"""
This function computes the accuracy using the input and label .
The output is the top_k inputs and their indices .
"""
helper = LayerHelper ( " accuracy " , * * kwargs )
topk_out = helper . create_tmp_variable ( dtype = input . dtype )
topk_indices = helper . create_tmp_variable ( dtype = " int64 " )
helper . append_op (
type = " top_k " ,
inputs = { " X " : [ input ] } ,
outputs = { " Out " : [ topk_out ] ,
" Indices " : [ topk_indices ] } ,
attrs = { " k " : k } )
acc_out = helper . create_tmp_variable ( dtype = " float32 " )
if correct is None :
correct = helper . create_tmp_variable ( dtype = " int64 " )
if total is None :
total = helper . create_tmp_variable ( dtype = " int64 " )
helper . append_op (
type = " accuracy " ,
inputs = {
" Out " : [ topk_out ] ,
" Indices " : [ topk_indices ] ,
" Label " : [ label ]
} ,
outputs = {
" Accuracy " : [ acc_out ] ,
" Correct " : [ correct ] ,
" Total " : [ total ] ,
} )
return acc_out
def chunk_eval ( input ,
label ,
chunk_scheme ,
num_chunk_types ,
excluded_chunk_types = None ,
* * kwargs ) :
"""
This function computes and outputs the precision , recall and
F1 - score of chunk detection .
"""
helper = LayerHelper ( " chunk_eval " , * * kwargs )
# prepare output
precision = helper . create_tmp_variable ( dtype = " float32 " )
recall = helper . create_tmp_variable ( dtype = " float32 " )
f1_score = helper . create_tmp_variable ( dtype = " float32 " )
num_infer_chunks = helper . create_tmp_variable ( dtype = " int64 " )
num_label_chunks = helper . create_tmp_variable ( dtype = " int64 " )
num_correct_chunks = helper . create_tmp_variable ( dtype = " int64 " )
helper . append_op (
type = " chunk_eval " ,
inputs = { " Inference " : [ input ] ,
" Label " : [ label ] } ,
outputs = {
" Precision " : [ precision ] ,
" Recall " : [ recall ] ,
" F1-Score " : [ f1_score ] ,
" NumInferChunks " : [ num_infer_chunks ] ,
" NumLabelChunks " : [ num_label_chunks ] ,
" NumCorrectChunks " : [ num_correct_chunks ]
} ,
attrs = {
" num_chunk_types " : num_chunk_types ,
" chunk_scheme " : chunk_scheme ,
" excluded_chunk_types " : excluded_chunk_types or [ ]
} )
return precision , recall , f1_score , num_infer_chunks , num_label_chunks , num_correct_chunks
def sequence_conv ( input ,
num_filters ,
filter_size = 3 ,
filter_stride = 1 ,
padding = None ,
bias_attr = None ,
param_attr = None ,
act = None ) :
"""
This function creates the op for sequence_conv , using the inputs and
other convolutional configurations for the filters and stride as given
in the input parameters to the function .
"""
# FIXME(dzh) : want to unify the argument of python layer
# function. So we ignore some unecessary attributes.
# such as, padding_trainable, context_start.
helper = LayerHelper ( ' sequence_conv ' , * * locals ( ) )
dtype = helper . input_dtype ( )
filter_shape = [ filter_size * input . shape [ 1 ] , num_filters ]
filter_param = helper . create_parameter (
attr = helper . param_attr , shape = filter_shape , dtype = dtype )
pre_bias = helper . create_tmp_variable ( dtype )
helper . append_op (
type = ' sequence_conv ' ,
inputs = {
' X ' : [ input ] ,
' Filter ' : [ filter_param ] ,
} ,
outputs = { " Out " : pre_bias } ,
attrs = {
' contextStride ' : filter_stride ,
' contextStart ' : - int ( filter_size / 2 ) ,
' contextLength ' : filter_size
} )
pre_act = helper . append_bias_op ( pre_bias )
return helper . append_activation ( pre_act )
def conv2d ( input ,
num_filters ,
filter_size ,
stride = None ,
padding = None ,
groups = None ,
param_attr = None ,
bias_attr = None ,
use_cudnn = True ,
act = None ) :
"""
* * Convlution2D Layer * *
The convolution2D layer calculates the output based on the input , filter
and strides , paddings , dilations , groups parameters . Input ( Input ) and Output ( Output )
are in NCHW format . Where N is batch size , C is the number of channels , H is the height
of the feature , and W is the width of the feature .
The details of convolution layer , please refer UFLDL ' s `convolution,
< http : / / ufldl . stanford . edu / tutorial / supervised / FeatureExtractionUsingConvolution / > ` _ .
If bias attribution and activation type are provided , bias is added to the output of the convolution ,
and the corresponding activation function is applied to the final result .
For each input : math : ` X ` , the equation is :
. . math : :
Out = \sigma ( W \\ast X + b )
In the above equation :
* : math : ` X ` : Input value , a tensor with NCHW format .
* : math : ` W ` : Filter value , a tensor with MCHW format .
* : math : ` \\ast ` : Convolution operation .
* : math : ` b ` : Bias value , a 2 - D tensor with shape [ M , 1 ] .
* : math : ` \\sigma ` : Activation function .
* : math : ` Out ` : Output value , the shape of : math : ` Out ` and : math : ` X ` may be different .
Example :
Input :
Input shape : $ ( N , C_ { in } , H_ { in } , W_ { in } ) $
Filter shape : $ ( C_ { out } , C_ { in } , H_f , W_f ) $
Output :
Output shape : $ ( N , C_ { out } , H_ { out } , W_ { out } ) $
Where
. . math : :
H_ { out } & = \\frac { ( H_ { in } + 2 * paddings [ 0 ] - ( dilations [ 0 ] * ( H_f - 1 ) + 1 ) ) } { strides [ 0 ] } + 1 \\\\
W_ { out } & = \\frac { ( W_ { in } + 2 * paddings [ 1 ] - ( dilations [ 1 ] * ( W_f - 1 ) + 1 ) ) } { strides [ 1 ] } + 1
Args :
input ( Variable ) : The input image with [ N , C , H , W ] format .
num_filters ( int ) : The number of filter . It is as same as the output
image channel .
filter_size ( int | tuple | None ) : The filter size . If filter_size is a tuple ,
it must contain two integers , ( filter_size_H , filter_size_W ) .
Otherwise , the filter will be a square .
stride ( int | tuple ) : The stride size . If stride is a tuple , it must
contain two integers , ( stride_H , stride_W ) . Otherwise , the
stride_H = stride_W = stride . Default : stride = 1.
padding ( int | tuple ) : The padding size . If padding is a tuple , it must
contain two integers , ( padding_H , padding_W ) . Otherwise , the
padding_H = padding_W = padding . Default : padding = 0.
groups ( int ) : The groups number of the Conv2d Layer . According to grouped
convolution in Alex Krizhevsky ' s Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels , while the second half of the filters is only
connected to the second half of the input channels . Default : groups = 1
param_attr ( ParamAttr ) : The parameters to the Conv2d Layer . Default : None
bias_attr ( ParamAttr ) : Bias parameter for the Conv2d layer . Default : None
use_cudnn ( bool ) : Use cudnn kernel or not , it is valid only when the cudnn
library is installed . Default : True
act ( str ) : Activation type . Default : None
Returns :
Variable : The tensor variable storing the convolution and \
non - linearity activation result .
Raises :
ValueError : If the shapes of input , filter_size , stride , padding and groups mismatch .
Examples :
. . code - block : : python
data = fluid . layers . data ( name = ' data ' , shape = [ 3 , 32 , 32 ] , dtype = ' float32 ' )
conv2d = fluid . layers . conv2d ( input = data , num_filters = 2 , filter_size = 3 , act = " relu " )
"""
if stride is None :
stride = [ 1 , 1 ]
helper = LayerHelper ( ' conv2d ' , * * locals ( ) )
dtype = helper . input_dtype ( )
num_channels = input . shape [ 1 ]
if groups is None :
num_filter_channels = num_channels
else :
if num_channels % groups != 0 :
raise ValueError ( " num_channels must be divisible by groups. " )
num_filter_channels = num_channels / groups
if isinstance ( filter_size , int ) :
filter_size = [ filter_size , filter_size ]
if isinstance ( stride , int ) :
stride = [ stride , stride ]
if isinstance ( padding , int ) :
padding = [ padding , padding ]
if not isinstance ( use_cudnn , bool ) :
raise ValueError ( " use_cudnn should be True or False " )
input_shape = input . shape
filter_shape = [ num_filters , num_filter_channels ] + filter_size
def _get_default_param_initializer ( ) :
std = ( 2.0 / ( filter_size [ 0 ] * * 2 * num_channels ) ) * * 0.5
return Normal ( 0.0 , std , 0 )
filter_param = helper . create_parameter (
attr = helper . param_attr ,
shape = filter_shape ,
dtype = dtype ,
default_initializer = _get_default_param_initializer ( ) )
pre_bias = helper . create_tmp_variable ( dtype )
helper . append_op (
type = ' conv2d ' ,
inputs = {
' Input ' : input ,
' Filter ' : filter_param ,
} ,
outputs = { " Output " : pre_bias } ,
attrs = {
' strides ' : stride ,
' paddings ' : padding ,
' groups ' : groups ,
' use_cudnn ' : use_cudnn
} )
pre_act = helper . append_bias_op ( pre_bias , dim_start = 1 , dim_end = 2 )
return helper . append_activation ( pre_act )
def sequence_pool ( input , pool_type , * * kwargs ) :
"""
This function add the operator for sequence pooling .
It pools features of all time - steps of each instance , and is applied
on top of the input using pool_type mentioned in the parameters .
It supports four pool_type :
- average : : math : ` Out [ i ] = \\frac { \sum_i X_i } { N } `
- sum : : math : ` Out [ i ] = \sum_jX_ { ij } `
- sqrt : : math : ` Out [ i ] = \\frac { \sum_jX_ { ij } } { \sqrt { len ( X_i ) } } `
- max : : math : ` Out [ i ] = max ( X_i ) `
. . code - block : : text
x is a 1 - level LoDTensor :
x . lod = [ [ 0 , 2 , 5 , 7 ] ]
x . data = [ 1 , 3 , 2 , 4 , 6 , 5 , 1 ]
x . dims = [ 7 , 1 ]
then output is a Tensor :
out . dim = [ 3 , 1 ]
with condition len ( x . lod [ - 1 ] ) - 1 == out . dims [ 0 ]
for different pool_type :
average : out . data = [ 2 , 4 , 3 ] , where 2 = ( 1 + 3 ) / 2 , 4 = ( 2 + 4 + 6 ) / 3 , 3 = ( 5 + 1 ) / 2
sum : out . data = [ 4 , 12 , 6 ] , where 4 = 1 + 3 , 12 = 2 + 4 + 6 , 6 = 5 + 1
sqrt : out . data = [ 2.82 , 6.93 , 4.24 ] , where 2.82 = ( 1 + 3 ) / sqrt ( 2 ) ,
6.93 = ( 2 + 4 + 6 ) / sqrt ( 3 ) , 4.24 = ( 5 + 1 ) / sqrt ( 2 )
max : out . data = [ 3 , 6 , 5 ] , where 3 = max ( 1 , 3 ) , 6 = max ( 2 , 4 , 6 ) , 5 = max ( 5 , 1 )
Args :
input ( variable ) : The input variable which is a LoDTensor .
pool_type ( string ) : The pooling type of sequence_pool .
It supports average , sum , sqrt and max .
Returns :
The sequence pooling variable which is a Tensor .
Examples :
. . code - block : : python
x = fluid . layers . data ( name = ' x ' , shape = [ 7 , 1 ] ,
dtype = ' float32 ' , lod_level = 1 )
avg_x = fluid . layers . sequence_pool ( input = x , pool_type = ' average ' )
sum_x = fluid . layers . sequence_pool ( input = x , pool_type = ' sum ' )
sqrt_x = fluid . layers . sequence_pool ( input = x , pool_type = ' sqrt ' )
max_x = fluid . layers . sequence_pool ( input = x , pool_type = ' max ' )
"""
helper = LayerHelper ( ' sequence_pool ' , input = input , * * kwargs )
dtype = helper . input_dtype ( )
pool_out = helper . create_tmp_variable ( dtype )
max_index = helper . create_tmp_variable ( dtype )
helper . append_op (
type = " sequence_pool " ,
inputs = { " X " : input } ,
outputs = { " Out " : pool_out ,
" MaxIndex " : max_index } ,
attrs = { " pooltype " : pool_type . upper ( ) } )
# when pool_type is max, variable max_index is initialized,
# so we stop the gradient explicitly here
if pool_type == ' max ' :
max_index . stop_gradient = True
return pool_out
def sequence_first_step ( input , * * kwargs ) :
"""
This funciton get the first step of sequence .
. . code - block : : text
x is a 1 - level LoDTensor :
x . lod = [ [ 0 , 2 , 5 , 7 ] ]
x . data = [ 1 , 3 , 2 , 4 , 6 , 5 , 1 ]
x . dims = [ 7 , 1 ]
then output is a Tensor :
out . dim = [ 3 , 1 ]
with condition len ( x . lod [ - 1 ] ) - 1 == out . dims [ 0 ]
out . data = [ 1 , 2 , 5 ] , where 1 = first ( 1 , 3 ) , 2 = first ( 2 , 4 , 6 ) , 5 = first ( 5 , 1 )
Args :
input ( variable ) : The input variable which is a LoDTensor .
Returns :
The sequence ' s first step variable which is a Tensor.
Examples :
. . code - block : : python
x = fluid . layers . data ( name = ' x ' , shape = [ 7 , 1 ] ,
dtype = ' float32 ' , lod_level = 1 )
x_first_step = fluid . layers . sequence_first_step ( input = x )
"""
return sequence_pool ( input = input , pool_type = " first " )
def sequence_last_step ( input , * * kwargs ) :
"""
This funciton get the last step of sequence .
. . code - block : : text
x is a 1 - level LoDTensor :
x . lod = [ [ 0 , 2 , 5 , 7 ] ]
x . data = [ 1 , 3 , 2 , 4 , 6 , 5 , 1 ]
x . dims = [ 7 , 1 ]
then output is a Tensor :
out . dim = [ 3 , 1 ]
with condition len ( x . lod [ - 1 ] ) - 1 == out . dims [ 0 ]
out . data = [ 3 , 6 , 1 ] , where 3 = last ( 1 , 3 ) , 6 = last ( 2 , 4 , 6 ) , 1 = last ( 5 , 1 )
Args :
input ( variable ) : The input variable which is a LoDTensor .
Returns :
The sequence ' s last step variable which is a Tensor.
Examples :
. . code - block : : python
x = fluid . layers . data ( name = ' x ' , shape = [ 7 , 1 ] ,
dtype = ' float32 ' , lod_level = 1 )
x_last_step = fluid . layers . sequence_last_step ( input = x )
"""
return sequence_pool ( input = input , pool_type = " last " )
def pool2d ( input ,
pool_size ,
pool_type ,
pool_stride = None ,
pool_padding = None ,
global_pooling = False ,
use_cudnn = True ,
name = None ) :
"""
This function adds the operator for pooling in 2 dimensions , using the
pooling configurations mentioned in input parameters .
"""
if pool_padding is None :
pool_padding = [ 0 , 0 ]
if pool_stride is None :
pool_stride = [ 1 , 1 ]
if pool_type not in [ " max " , " avg " ] :
raise ValueError (
" Unknown pool_type: ' %s ' . It can only be ' max ' or ' avg ' . " ,
str ( pool_type ) )
if isinstance ( pool_size , int ) :
pool_size = [ pool_size , pool_size ]
if isinstance ( pool_stride , int ) :
pool_stride = [ pool_stride , pool_stride ]
if isinstance ( pool_padding , int ) :
pool_padding = [ pool_padding , pool_padding ]
if not isinstance ( use_cudnn , bool ) :
raise ValueError ( " use_cudnn should be True or False " )
helper = LayerHelper ( ' pool2d ' , * * locals ( ) )
dtype = helper . input_dtype ( )
pool_out = helper . create_tmp_variable ( dtype )
helper . append_op (
type = " pool2d " ,
inputs = { " X " : input } ,
outputs = { " Out " : pool_out } ,
attrs = {
" pooling_type " : pool_type ,
" ksize " : pool_size ,
" global_pooling " : global_pooling ,
" strides " : pool_stride ,
" paddings " : pool_padding ,
" use_cudnn " : use_cudnn
} )
return pool_out
def batch_norm ( input ,
act = None ,
is_test = False ,
momentum = 0.9 ,
epsilon = 1e-05 ,
param_attr = None ,
bias_attr = None ,
data_layout = ' NCHW ' ,
name = None ) :
"""
This function helps create an operator to implement
the BatchNorm layer using the configurations from the input parameters .
"""
helper = LayerHelper ( ' batch_norm ' , * * locals ( ) )
dtype = helper . input_dtype ( )
input_shape = input . shape
if data_layout == ' NCHW ' :
channel_num = input_shape [ 1 ]
else :
if data_layout == ' NHWC ' :
channel_num = input_shape [ - 1 ]
else :
raise ValueError ( " unsupported data layout: " + data_layout )
param_shape = [ channel_num ]
# create parameter
scale = helper . create_parameter (
attr = helper . param_attr ,
shape = param_shape ,
dtype = dtype ,
default_initializer = Constant ( 1.0 ) )
bias = helper . create_parameter (
attr = helper . bias_attr , shape = param_shape , dtype = dtype , is_bias = True )
mean = helper . create_global_variable (
dtype = input . dtype ,
shape = param_shape ,
persistable = True ,
stop_gradient = True )
helper . set_variable_initializer ( var = mean , initializer = Constant ( 0.0 ) )
variance = helper . create_global_variable (
dtype = input . dtype ,
shape = param_shape ,
persistable = True ,
stop_gradient = True )
helper . set_variable_initializer ( var = variance , initializer = Constant ( 1.0 ) )
# create output
# mean and mean_out share the same memory
mean_out = mean
# variance and variance out share the same memory
variance_out = variance
saved_mean = helper . create_tmp_variable ( dtype = dtype , stop_gradient = True )
saved_variance = helper . create_tmp_variable ( dtype = dtype , stop_gradient = True )
batch_norm_out = helper . create_tmp_variable ( dtype )
helper . append_op (
type = " batch_norm " ,
inputs = {
" X " : input ,
" Scale " : scale ,
" Bias " : bias ,
" Mean " : mean ,
" Variance " : variance
} ,
outputs = {
" Y " : batch_norm_out ,
" MeanOut " : mean_out ,
" VarianceOut " : variance_out ,
" SavedMean " : saved_mean ,
" SavedVariance " : saved_variance
} ,
attrs = { " momentum " : momentum ,
" epsilon " : epsilon ,
" is_test " : is_test } )
return helper . append_activation ( batch_norm_out )
def beam_search_decode ( ids , scores , name = None ) :
helper = LayerHelper ( ' beam_search_decode ' , * * locals ( ) )
sentence_ids = helper . create_tmp_variable ( dtype = ids . dtype )
sentence_scores = helper . create_tmp_variable ( dtype = ids . dtype )
helper . append_op (
type = " beam_search_decode " ,
inputs = { " Ids " : ids ,
" Scores " : scores } ,
outputs = {
" SentenceIds " : sentence_ids ,
" SentenceScores " : sentence_scores
} )
return sentence_ids , sentence_scores
def conv2d_transpose ( input ,
num_filters ,
output_size = None ,
filter_size = None ,
padding = None ,
stride = None ,
dilation = None ,
param_attr = None ,
use_cudnn = True ,
name = None ) :
"""
The transpose of conv2d layer .
This layer is also known as deconvolution layer .
Args :
input ( Variable ) : The input image with [ N , C , H , W ] format .
num_filters ( int ) : The number of filter . It is as same as the output
image channel .
output_size ( int | tuple | None ) : The output image size . If output size is a
tuple , it must contain two integers , ( image_H , image_W ) . This
parameter only works when filter_size is None .
filter_size ( int | tuple | None ) : The filter size . If filter_size is a tuple ,
it must contain two integers , ( filter_size_H , filter_size_W ) .
Otherwise , the filter will be a square . None if use output size to
calculate filter_size
padding ( int | tuple ) : The padding size . If padding is a tuple , it must
contain two integers , ( padding_H , padding_W ) . Otherwise , the
padding_H = padding_W = padding .
stride ( int | tuple ) : The stride size . If stride is a tuple , it must
contain two integers , ( stride_H , stride_W ) . Otherwise , the
stride_H = stride_W = stride .
dilation ( int | tuple ) : The dilation size . If dilation is a tuple , it must
contain two integers , ( dilation_H , dilation_W ) . Otherwise , the
dilation_H = dilation_W = dilation .
param_attr : Parameter Attribute .
use_cudnn ( bool ) : Use cudnn kernel or not , it is valid only when the cudnn
library is installed . Default : True
name ( str | None ) : A name for this layer ( optional ) . If set None , the layer
will be named automatically .
Returns :
Variable : Output image .
"""
helper = LayerHelper ( " conv2d_transpose " , * * locals ( ) )
if not isinstance ( input , Variable ) :
raise TypeError ( " Input of conv2d_transpose must be Variable " )
input_channel = input . shape [ 1 ]
op_attr = dict ( )
if isinstance ( padding , int ) :
op_attr [ ' paddings ' ] = [ padding , padding ]
elif padding is not None :
op_attr [ ' paddings ' ] = padding
if isinstance ( stride , int ) :
op_attr [ ' strides ' ] = [ stride , stride ]
elif stride is not None :
op_attr [ ' strides ' ] = stride
if isinstance ( dilation , int ) :
op_attr [ ' dilations ' ] = [ dilation , dilation ]
elif dilation is not None :
op_attr [ ' dilations ' ] = dilation
if not isinstance ( use_cudnn , bool ) :
raise ValueError ( " use_cudnn should be True or False " )
op_attr [ ' use_cudnn ' ] = use_cudnn
if filter_size is None :
if output_size is None :
raise ValueError ( " output_size must be set when filter_size is None " )
if isinstance ( output_size , int ) :
output_size = [ output_size , output_size ]
padding = op_attr . get ( ' paddings ' , [ 0 , 0 ] )
stride = op_attr . get ( ' strides ' , [ 1 , 1 ] )
dilation = op_attr . get ( ' dilations ' , [ 1 , 1 ] )
h_in = input . shape [ 2 ]
w_in = input . shape [ 3 ]
filter_size_h = ( output_size [ 0 ] - ( h_in - 1 ) * stride [ 0 ] + 2 *
padding [ 0 ] - 1 ) / dilation [ 0 ] + 1
filter_size_w = ( output_size [ 1 ] - ( w_in - 1 ) * stride [ 1 ] + 2 *
padding [ 1 ] - 1 ) / dilation [ 1 ] + 1
filter_size = [ filter_size_h , filter_size_w ]
elif isinstance ( filter_size , int ) :
filter_size = [ filter_size , filter_size ]
filter_shape = [ input_channel , num_filters ] + filter_size
img_filter = helper . create_parameter (
dtype = input . dtype , shape = filter_shape , attr = helper . param_attr )
out = helper . create_tmp_variable ( dtype = input . dtype )
helper . append_op (
type = ' conv2d_transpose ' ,
inputs = { ' Input ' : [ input ] ,
' Filter ' : [ img_filter ] } ,
outputs = { ' Output ' : out } ,
attrs = op_attr )
return out
def sequence_expand ( x , y , name = None ) :
""" Sequence Expand Layer. This layer will expand the input variable **x**
according to LoD information of * * y * * . And the following examples will
explain how sequence_expand works :
. . code - block : : text
* Case 1
x is a LoDTensor :
x . lod = [ [ 0 , 2 , 3 ] ,
[ 0 , 1 , 3 , 4 ] ]
x . data = [ a , b , c , d ]
x . dims = [ 4 , 1 ]
y is a LoDTensor :
y . lod = [ [ 0 , 2 , 4 ] ,
[ 0 , 3 , 6 , 7 , 8 ] ]
with condition len ( y . lod [ - 1 ] ) - 1 == x . dims [ 0 ]
then output is a 2 - level LoDTensor :
out . lod = [ [ 0 , 2 , 4 ] ,
[ 0 , 3 , 6 , 7 , 8 ] ]
out . data = [ a , a , a , b , b , b , c , d ]
out . dims = [ 8 , 1 ]
* Case 2
x is a Tensor :
x . data = [ a , b , c ]
x . dims = [ 3 , 1 ]
y is a LoDTensor :
y . lod = [ [ 0 , 2 , 3 , 6 ] ]
with condition len ( y . lod [ - 1 ] ) - 1 == x . dims [ 0 ]
then output is a 1 - level LoDTensor :
out . lod = [ [ 0 , 2 , 3 , 6 ] ]
out . data = [ a , a , b , c , c , c ]
out . dims = [ 6 , 1 ]
Args :
x ( Variable ) : The input variable which is a Tensor or LoDTensor .
y ( Variable ) : The input variable which is a LoDTensor .
name ( str | None ) : A name for this layer ( optional ) . If set None , the layer
will be named automatically .
Returns :
Variable : The expanded variable which is a LoDTensor .
Examples :
. . code - block : : python
x = fluid . layers . data ( name = ' x ' , shape = [ 10 ] , dtype = ' float32 ' )
y = fluid . layers . data ( name = ' y ' , shape = [ 10 , 20 ] ,
dtype = ' float32 ' , lod_level = 1 )
out = layers . sequence_expand ( x = x , y = y )
"""
helper = LayerHelper ( ' sequence_expand ' , input = x , * * locals ( ) )
dtype = helper . input_dtype ( )
tmp = helper . create_tmp_variable ( dtype )
helper . append_op (
type = ' sequence_expand ' , inputs = { ' X ' : x ,
' Y ' : y } , outputs = { ' Out ' : tmp } )
return tmp
def lstm_unit ( x_t ,
hidden_t_prev ,
cell_t_prev ,
forget_bias = 0.0 ,
param_attr = None ,
bias_attr = None ,
name = None ) :
""" Lstm unit layer. The equation of a lstm step is:
. . math : :
i_t & = \sigma ( W_ { x_i } x_ { t } + W_ { h_i } h_ { t - 1 } + b_i )
f_t & = \sigma ( W_ { x_f } x_ { t } + W_ { h_f } h_ { t - 1 } + b_f )
c_t & = f_tc_ { t - 1 } + i_t tanh ( W_ { x_c } x_t + W_ { h_c } h_ { t - 1 } + b_c )
o_t & = \sigma ( W_ { x_o } x_ { t } + W_ { h_o } h_ { t - 1 } + b_o )
h_t & = o_t tanh ( c_t )
The inputs of lstm unit include : math : ` x_t ` , : math : ` h_ { t - 1 } ` and
: math : ` c_ { t - 1 } ` . The 2 nd dimensions of : math : ` h_ { t - 1 } ` and : math : ` c_ { t - 1 } `
should be same . The implementation separates the linear transformation and
non - linear transformation apart . Here , we take : math : ` i_t ` as an example .
The linear transformation is applied by calling a ` fc ` layer and the
equation is :
. . math : :
L_ { i_t } = W_ { x_i } x_ { t } + W_ { h_i } h_ { t - 1 } + b_i
The non - linear transformation is applied by calling ` lstm_unit_op ` and the
equation is :
. . math : :
i_t = \sigma ( L_ { i_t } )
This layer has two outputs including : math : ` h_t ` and : math : ` o_t ` .
Args :
x_t ( Variable ) : The input value of current step , a 2 - D tensor with shape
M x N , M for batch size and N for input size .
hidden_t_prev ( Variable ) : The hidden value of lstm unit , a 2 - D tensor
with shape M x S , M for batch size and S for size of lstm unit .
cell_t_prev ( Variable ) : The cell value of lstm unit , a 2 - D tensor with
shape M x S , M for batch size and S for size of lstm unit .
forget_bias ( float ) : The forget bias of lstm unit .
param_attr ( ParamAttr ) : The attributes of parameter weights , used to set
initializer , name etc .
bias_attr ( ParamAttr ) : The attributes of bias weights , if not False ,
bias weights will be created and be set to default value .
name ( str | None ) : A name for this layer ( optional ) . If set None , the layer
will be named automatically .
Returns :
tuple : The hidden value and cell value of lstm unit .
Raises :
ValueError : The ranks of * * x_t * * , * * hidden_t_prev * * and * * cell_t_prev * * \
not be 2 or the 1 st dimensions of * * x_t * * , * * hidden_t_prev * * \
and * * cell_t_prev * * not be the same or the 2 nd dimensions of \
* * hidden_t_prev * * and * * cell_t_prev * * not be the same .
Examples :
. . code - block : : python
x_t = fluid . layers . fc ( input = x_t_data , size = 10 )
prev_hidden = fluid . layers . fc ( input = prev_hidden_data , size = 30 )
prev_cell = fluid . layers . fc ( input = prev_cell_data , size = 30 )
hidden_value , cell_value = fluid . layers . lstm_unit ( x_t = x_t ,
hidden_t_prev = prev_hidden ,
cell_t_prev = prev_cell )
"""
helper = LayerHelper ( ' lstm_unit ' , * * locals ( ) )
if len ( x_t . shape ) != 2 :
raise ValueError ( " Rank of x_t must be 2. " )
if len ( hidden_t_prev . shape ) != 2 :
raise ValueError ( " Rank of hidden_t_prev must be 2. " )
if len ( cell_t_prev . shape ) != 2 :
raise ValueError ( " Rank of cell_t_prev must be 2. " )
if x_t . shape [ 0 ] != hidden_t_prev . shape [ 0 ] or x_t . shape [
0 ] != cell_t_prev . shape [ 0 ] :
raise ValueError ( " The 1st dimensions of x_t, hidden_t_prev and "
" cell_t_prev must be the same. " )
if hidden_t_prev . shape [ 1 ] != cell_t_prev . shape [ 1 ] :
raise ValueError ( " The 2nd dimensions of hidden_t_prev and "
" cell_t_prev must be the same. " )
if bias_attr is None :
bias_attr = ParamAttr ( )
size = cell_t_prev . shape [ 1 ]
concat_out = concat ( input = [ x_t , hidden_t_prev ] , axis = 1 )
fc_out = fc ( input = concat_out ,
size = 4 * size ,
param_attr = param_attr ,
bias_attr = bias_attr )
dtype = x_t . dtype
c = helper . create_tmp_variable ( dtype )
h = helper . create_tmp_variable ( dtype )
helper . append_op (
type = ' lstm_unit ' ,
inputs = { " X " : fc_out ,
" C_prev " : cell_t_prev } ,
outputs = { " C " : c ,
" H " : h } ,
attrs = { " forget_bias " : forget_bias } )
return h , c
def reduce_sum ( input , dim = None , keep_dim = False , name = None ) :
"""
Computes the sum of tensor elements over the given dimension .
Args :
input ( Variable ) : The input variable which is a Tensor or LoDTensor .
dim ( int | None ) : The dimension along which the sum is performed . If
: attr : ` None ` , sum all elements of : attr : ` input ` and return a
Tensor variable with a single element , otherwise must be in the
range : math : ` [ - rank ( input ) , rank ( input ) ) ` . If : math : ` dim < 0 ` ,
the dimension to reduce is : math : ` rank + dim ` .
keep_dim ( bool ) : Whether to reserve the reduced dimension in the
output Tensor . The result tensor will have one fewer dimension
than the : attr : ` input ` unless : attr : ` keep_dim ` is true .
name ( str | None ) : A name for this layer ( optional ) . If set None , the layer
will be named automatically .
Returns :
Variable : The reduced Tensor variable .
Examples :
. . code - block : : python
# x is a Tensor variable with following elements:
# [[0.2, 0.3, 0.5, 0.9]
# [0.1, 0.2, 0.6, 0.7]]
# Each example is followed by the correspending output tensor.
fluid . layers . reduce_sum ( x ) # [3.5]
fluid . layers . reduce_sum ( x , dim = 0 ) # [0.3, 0.5, 1.1, 1.6]
fluid . layers . reduce_sum ( x , dim = - 1 ) # [1.9, 1.6]
fluid . layers . reduce_sum ( x , dim = 1 , keep_dim = True ) # [[1.9], [1.6]]
"""
helper = LayerHelper ( ' reduce_sum ' , * * locals ( ) )
out = helper . create_tmp_variable ( dtype = helper . input_dtype ( ) )
helper . append_op (
type = ' reduce_sum ' ,
inputs = { ' X ' : input } ,
outputs = { ' Out ' : out } ,
attrs = {
' dim ' : dim if dim != None else 0 ,
' keep_dim ' : keep_dim ,
' reduce_all ' : True if dim == None else False
} )
return out
def reduce_mean ( input , dim = None , keep_dim = False , name = None ) :
"""
Computes the mean of tensor elements over the given dimension .
Args :
input ( Variable ) : The input variable which is a Tensor or LoDTensor .
dim ( int | None ) : The dimension along which the mean is computed . If
: attr : ` None ` , compute the mean over all elements of : attr : ` input `
and return a Tensor variable with a single element , otherwise
must be in the range : math : ` [ - rank ( input ) , rank ( input ) ) ` . If
: math : ` dim < 0 ` , the dimension to reduce is : math : ` rank + dim ` .
keep_dim ( bool ) : Whether to reserve the reduced dimension in the
output Tensor . The result tensor will have one fewer dimension
than the : attr : ` input ` unless : attr : ` keep_dim ` is true .
name ( str | None ) : A name for this layer ( optional ) . If set None , the layer
will be named automatically .
Returns :
Variable : The reduced Tensor variable .
Examples :
. . code - block : : python
# x is a Tensor variable with following elements:
# [[0.2, 0.3, 0.5, 0.9]
# [0.1, 0.2, 0.6, 0.7]]
# Each example is followed by the correspending output tensor.
fluid . layers . reduce_mean ( x ) # [0.4375]
fluid . layers . reduce_mean ( x , dim = 0 ) # [0.15, 0.25, 0.55, 0.8]
fluid . layers . reduce_mean ( x , dim = - 1 ) # [0.475, 0.4]
fluid . layers . reduce_mean ( x , dim = 1 , keep_dim = True ) # [[0.475], [0.4]]
"""
helper = LayerHelper ( ' reduce_mean ' , * * locals ( ) )
out = helper . create_tmp_variable ( dtype = helper . input_dtype ( ) )
helper . append_op (
type = ' reduce_mean ' ,
inputs = { ' X ' : input } ,
outputs = { ' Out ' : out } ,
attrs = {
' dim ' : dim if dim != None else 0 ,
' keep_dim ' : keep_dim ,
' reduce_all ' : True if dim == None else False
} )
return out
def reduce_max ( input , dim = None , keep_dim = False , name = None ) :
"""
Computes the maximum of tensor elements over the given dimension .
Args :
input ( Variable ) : The input variable which is a Tensor or LoDTensor .
dim ( int | None ) : The dimension along which the maximum is computed .
If : attr : ` None ` , compute the maximum over all elements of
: attr : ` input ` and return a Tensor variable with a single element ,
otherwise must be in the range : math : ` [ - rank ( input ) , rank ( input ) ) ` .
If : math : ` dim < 0 ` , the dimension to reduce is : math : ` rank + dim ` .
keep_dim ( bool ) : Whether to reserve the reduced dimension in the
output Tensor . The result tensor will have one fewer dimension
than the : attr : ` input ` unless : attr : ` keep_dim ` is true .
name ( str | None ) : A name for this layer ( optional ) . If set None , the layer
will be named automatically .
Returns :
Variable : The reduced Tensor variable .
Examples :
. . code - block : : python
# x is a Tensor variable with following elements:
# [[0.2, 0.3, 0.5, 0.9]
# [0.1, 0.2, 0.6, 0.7]]
# Each example is followed by the correspending output tensor.
fluid . layers . reduce_max ( x ) # [0.9]
fluid . layers . reduce_max ( x , dim = 0 ) # [0.2, 0.3, 0.6, 0.9]
fluid . layers . reduce_max ( x , dim = - 1 ) # [0.9, 0.7]
fluid . layers . reduce_max ( x , dim = 1 , keep_dim = True ) # [[0.9], [0.7]]
"""
helper = LayerHelper ( ' reduce_max ' , * * locals ( ) )
out = helper . create_tmp_variable ( dtype = helper . input_dtype ( ) )
helper . append_op (
type = ' reduce_max ' ,
inputs = { ' X ' : input } ,
outputs = { ' Out ' : out } ,
attrs = {
' dim ' : dim if dim != None else 0 ,
' keep_dim ' : keep_dim ,
' reduce_all ' : True if dim == None else False
} )
return out
def reduce_min ( input , dim = None , keep_dim = False , name = None ) :
"""
Computes the minimum of tensor elements over the given dimension .
Args :
input ( Variable ) : The input variable which is a Tensor or LoDTensor .
dim ( int | None ) : The dimension along which the minimum is computed .
If : attr : ` None ` , compute the minimum over all elements of
: attr : ` input ` and return a Tensor variable with a single element ,
otherwise must be in the range : math : ` [ - rank ( input ) , rank ( input ) ) ` .
If : math : ` dim < 0 ` , the dimension to reduce is : math : ` rank + dim ` .
keep_dim ( bool ) : Whether to reserve the reduced dimension in the
output Tensor . The result tensor will have one fewer dimension
than the : attr : ` input ` unless : attr : ` keep_dim ` is true .
name ( str | None ) : A name for this layer ( optional ) . If set None , the layer
will be named automatically .
Returns :
Variable : The reduced Tensor variable .
Examples :
. . code - block : : python
# x is a Tensor variable with following elements:
# [[0.2, 0.3, 0.5, 0.9]
# [0.1, 0.2, 0.6, 0.7]]
# Each example is followed by the correspending output tensor.
fluid . layers . reduce_min ( x ) # [0.1]
fluid . layers . reduce_min ( x , dim = 0 ) # [0.1, 0.2, 0.5, 0.7]
fluid . layers . reduce_min ( x , dim = - 1 ) # [0.2, 0.1]
fluid . layers . reduce_min ( x , dim = 1 , keep_dim = True ) # [[0.2], [0.1]]
"""
helper = LayerHelper ( ' reduce_min ' , * * locals ( ) )
out = helper . create_tmp_variable ( dtype = helper . input_dtype ( ) )
helper . append_op (
type = ' reduce_min ' ,
inputs = { ' X ' : input } ,
outputs = { ' Out ' : out } ,
attrs = {
' dim ' : dim if dim != None else 0 ,
' keep_dim ' : keep_dim ,
' reduce_all ' : True if dim == None else False
} )
return out
def split ( input , num_or_sections , dim = - 1 , name = None ) :
"""
Split the input tensor into multiple sub - tensors .
Args :
input ( Variable ) : The input variable which is a Tensor or LoDTensor .
num_or_sections ( int | list ) : If : attr : ` num_or_sections ` is an integer ,
then the integer indicates the number of equal sized sub - tensors
that the tensor will be divided into . If : attr : ` num_or_sections `
is a list of integers , the length of list indicates the number of
sub - tensors and the integers indicate the sizes of sub - tensors '
: attr : ` dim ` dimension orderly .
dim ( int ) : The dimension along which to split . If : math : ` dim < 0 ` , the
dimension to split along is : math : ` rank ( input ) + dim ` .
name ( str | None ) : A name for this layer ( optional ) . If set None , the layer
will be named automatically .
Returns :
List : The list of segmented tensor variables .
Examples :
. . code - block : : python
# x is a Tensor variable with shape [3, 9, 5]:
x0 , x1 , x2 = fluid . layers . split ( x , num_or_sections = 3 , dim = 1 )
x0 . shape # [3, 3, 5]
x1 . shape # [3, 3, 5]
x2 . shape # [3, 3, 5]
x0 , x1 , x2 = fluid . layers . split ( x , num_or_sections = [ 2 , 3 , 4 ] , dim = 1 )
x0 . shape # [3, 2, 5]
x1 . shape # [3, 3, 5]
x2 . shape # [3, 4, 5]
"""
helper = LayerHelper ( ' split ' , * * locals ( ) )
input_shape = input . shape
dim = ( len ( input_shape ) + dim ) if dim < 0 else dim
if isinstance ( num_or_sections , int ) :
assert num_or_sections > 1 , ' num_or_sections must be more than 1. '
num = num_or_sections
else :
assert len ( num_or_sections ) < input_shape [
dim ] , ' len(num_or_sections) must not be more than input.shape[dim]. '
num = len ( num_or_sections )
outs = [
helper . create_tmp_variable ( dtype = helper . input_dtype ( ) )
for i in range ( num )
]
helper . append_op (
type = ' split ' ,
inputs = { ' X ' : input } ,
outputs = { ' Out ' : outs } ,
attrs = {
' num ' : num_or_sections if isinstance ( num_or_sections , int ) else 0 ,
' sections ' : num_or_sections
if isinstance ( num_or_sections , list ) else [ ] ,
' axis ' : dim
} )
return outs
def l2_normalize ( x , axis , epsilon = 1e-12 , name = None ) :
"""
* * L2 normalize Layer * *
The l2 normalize layer normalizes ` x ` along dimension ` axis ` using an L2
norm . For a 1 - D tensor ( ` dim ` is fixed to 0 ) , this layer computes
output = x / sqrt ( max ( sum ( x * * 2 ) , epsilon ) )
For ` x ` with more dimensions , this layer independently normalizes each 1 - D
slice along dimension ` axis ` .
Args :
x ( Variable | list ) : The input tensor to l2_normalize layer .
axis ( int ) : Dimension along which to normalize the input .
epsilon ( float ) : A lower bound value for ` x ` ' s l2 norm. sqrt(epsilon) will
be used as the divisor if the l2 norm of ` x ` is less than
sqrt ( epsilon ) .
name ( str | None ) : A name for this layer ( optional ) . If set None , the layer
will be named automatically .
Returns :
Variable : The output tensor variable .
Examples :
. . code - block : : python
data = fluid . layers . data ( name = " data " ,
shape = ( 3 , 17 , 13 ) ,
dtype = " float32 " )
fc = fluid . layers . l2_normalize ( x = data , axis = 1 )
"""
if len ( x . shape ) == 1 : axis = 0
helper = LayerHelper ( " l2_normalize " , * * locals ( ) )
square = helper . create_tmp_variable ( dtype = x . dtype )
helper . append_op ( type = " square " , inputs = { " X " : x } , outputs = { " Out " : square } )
reduced_sum = helper . create_tmp_variable ( dtype = x . dtype )
helper . append_op (
type = " reduce_sum " ,
inputs = { " X " : square } ,
outputs = { " Out " : reduced_sum } ,
attrs = {
" dim " : 1 if axis is None else axis ,
" keep_dim " : True ,
" reduce_all " : False
} )
# TODO(caoying) A lower bound value epsilon for the norm is needed to
# imporve the numeric stability of reciprocal. This requires a maximum_op.
rsquare = helper . create_tmp_variable ( dtype = x . dtype )
helper . append_op (
type = " reciprocal " , inputs = { " X " : reduced_sum } , outputs = { " Out " : rsquare } )
# TODO(caoying) the current elementwise_mul operator does not support a
# general broadcast rule which broadcasts input(Y) to have the same
# dimension with Input(X) starting from a specified dimension. So this
# exanpsion is requred. Once a general broadcast rule is spported, this
# expanding canbe removed.
rsquare_expanded = helper . create_tmp_variable ( dtype = x . dtype )
expand_times = [ 1 ] * len ( x . shape )
expand_times [ axis ] = int ( x . shape [ axis ] )
helper . append_op (
type = " expand " ,
inputs = { " X " : rsquare } ,
outputs = { " Out " : rsquare_expanded } ,
attrs = { " expand_times " : expand_times } )
out = helper . create_tmp_variable ( dtype = x . dtype )
helper . append_op (
type = " elementwise_mul " ,
inputs = { " X " : x ,
" Y " : rsquare_expanded } ,
outputs = { " Out " : out } )
return out
def matmul ( x , y , transpose_x = False , transpose_y = False , name = None ) :
"""
Applies matrix multipication to two tensors . Currently only rank 1 to rank
3 input tensors are supported .
The actual behavior depends on the shapes of : math : ` x ` , : math : ` y ` and the
flag values of : attr : ` transpose_x ` , : attr : ` transpose_y ` . Specifically :
- If a transpose flag is specified , the last two dimensions of the tensor
are transposed . If the tensor is rank - 1 of shape : math : ` [ D ] ` , then for
: math : ` x ` it is treated as : math : ` [ 1 , D ] ` in nontransposed form and as
: math : ` [ D , 1 ] ` in transposed form , whereas for : math : ` y ` it is the
opposite : It is treated as : math : ` [ D , 1 ] ` in nontransposed form and as
: math : ` [ 1 , D ] ` in transposed form .
- After transpose , the two tensors are 2 - D or 3 - D and matrix multipication
performs in the following way .
- If both are 2 - D , they are multiplied like conventional matrices .
- If either is 3 - D , it is treated as a stack of matrices residing in the
last two dimensions and a batched matrix multiply supporting broadcast
applies on the two tensors .
Also note that if the raw tensor : math : ` x ` or : math : ` y ` is rank - 1 and
nontransposed , the prepended or appended dimension : math : ` 1 ` will be
removed after matrix multipication .
Args :
x ( Variable ) : The input variable which is a Tensor or LoDTensor .
y ( Variable ) : The input variable which is a Tensor or LoDTensor .
transpose_x ( bool ) : Whether to transpose : math : ` x ` before multiplication .
transpose_y ( bool ) : Whether to transpose : math : ` y ` before multiplication .
name ( str | None ) : A name for this layer ( optional ) . If set None , the layer
will be named automatically .
Returns :
Variable : The product Tensor variable .
Examples :
. . code - block : : python
# Examples to clarify shapes of the inputs and output
# x: [B, M, K], y: [B, K, N]
fluid . layers . matmul ( x , y ) # out: [B, M, N]
# x: [B, M, K], y: [K, N]
fluid . layers . matmul ( x , y ) # out: [B, M, N]
# x: [B, M, K], y: [K]
fluid . layers . matmul ( x , y ) # out: [B, M]
# x: [M, K], y: [K, N]
fluid . layers . matmul ( x , y ) # out: [M, N]
# x: [K], y: [K]
fluid . layers . matmul ( x , y ) # out: [1]
# x: [M], y: [N]
fluid . layers . matmul ( x , y , True , True ) # out: [M, N]
"""
helper = LayerHelper ( ' matmul ' , * * locals ( ) )
assert max (
len ( x . shape ) , len ( y . shape )
) < = 3 , ' Currently only rank 1 to rank 3 input tensors are supported. '
out = helper . create_tmp_variable ( dtype = helper . input_dtype ( ) )
helper . append_op (
type = ' matmul ' ,
inputs = { ' X ' : x ,
' Y ' : y } ,
outputs = { ' Out ' : out } ,
attrs = { ' transpose_X ' : transpose_x ,
' transpose_Y ' : transpose_y } )
return out
def edit_distance ( input , label , normalized = False , tokens = None , name = None ) :
"""
EditDistance operator computes the edit distances between a batch of hypothesis strings and their references . Edit distance , also called Levenshtein distance , measures how dissimilar two strings are by counting the minimum number of operations to transform one string into anthor . Here the operations include insertion , deletion , and substitution . For example , given hypothesis string A = " kitten " and reference B = " sitting " , the edit distance is 3 for A will be transformed into B at least after two substitutions and one insertion :
" kitten " - > " sitten " - > " sittin " - > " sitting "
Input ( Hyps ) is a LoDTensor consisting of all the hypothesis strings with the total number denoted by ` batch_size ` , and the separation is specified by the LoD information . And the ` batch_size ` reference strings are arranged in order in the same way in the LoDTensor Input ( Refs ) .
Output ( Out ) contains the ` batch_size ` results and each stands for the edit stance for a pair of strings respectively . If Attr ( normalized ) is true , the edit distance will be divided by the length of reference string .
Args :
input ( Variable ) : The indices for hypothesis strings .
label ( Variable ) : The indices for reference strings .
normalized ( bool ) : Indicated whether to normalize the edit distance by the length of reference string .
tokens ( list ) : Tokens that should be removed before calculating edit distance .
Returns :
Variable : sequence - to - sequence edit distance loss in shape [ batch_size , 1 ] .
Examples :
. . code - block : : python
x = fluid . layers . data ( name = ' x ' , shape = [ 8 ] , dtype = ' float32 ' )
y = fluid . layers . data ( name = ' y ' , shape = [ 7 ] , dtype = ' float32 ' )
cost = fluid . layers . edit_distance ( input = x , label = y )
"""
helper = LayerHelper ( " edit_distance " , * * locals ( ) )
# remove some tokens from input and labels
if tokens is not None and len ( tokens ) > 0 :
erased_input = helper . create_tmp_variable ( dtype = " int64 " )
erased_label = helper . create_tmp_variable ( dtype = " int64 " )
helper . append_op (
type = " sequence_erase " ,
inputs = { " X " : [ input ] } ,
outputs = { " Out " : [ erased_input ] } ,
attrs = { " tokens " : tokens } )
input = erased_input
helper . append_op (
type = " sequence_erase " ,
inputs = { " X " : [ label ] } ,
outputs = { " Out " : [ erase_label ] } ,
attrs = { " tokens " : tokens } )
label = erased_label
# edit distance op
edit_distance_out = helper . create_tmp_variable ( dtype = " int64 " )
helper . append_op (
type = " edit_distance " ,
inputs = { " Hyps " : [ input ] ,
" Refs " : [ label ] } ,
outputs = { " Out " : [ edit_distance_out ] } ,
attrs = { " normalized " : normalized } )
return edit_distance_out
def ctc_greedy_decoder ( input , blank , name = None ) :
"""
This op is used to decode sequences by greedy policy by below steps :
1. Get the indexes of max value for each row in input . a . k . a . numpy . argmax ( input , axis = 0 ) .
2. For each sequence in result of step1 , merge repeated tokens between two blanks and delete all blanks .
A simple example as below :
. . code - block : : text
Given :
input . data = [ [ 0.6 , 0.1 , 0.3 , 0.1 ] ,
[ 0.3 , 0.2 , 0.4 , 0.1 ] ,
[ 0.1 , 0.5 , 0.1 , 0.3 ] ,
[ 0.5 , 0.1 , 0.3 , 0.1 ] ,
[ 0.5 , 0.1 , 0.3 , 0.1 ] ,
[ 0.2 , 0.2 , 0.2 , 0.4 ] ,
[ 0.2 , 0.2 , 0.1 , 0.5 ] ,
[ 0.5 , 0.1 , 0.3 , 0.1 ] ]
input . lod = [ [ 0 , 4 , 8 ] ]
Then :
output . data = [ [ 2 ] ,
[ 1 ] ,
[ 3 ] ]
output . lod = [ [ 0 , 2 , 3 ] ]
Args :
input ( Variable ) : ( LoDTensor < float > ) , the probabilities of variable - length sequences , which is a 2 - D Tensor with LoD information . It ' s shape is [Lp, num_classes + 1], where Lp is the sum of all input sequences ' length and num_classes is the true number of classes . ( not including the blank label ) .
blank ( int ) : the blank label index of Connectionist Temporal Classification ( CTC ) loss , which is in thehalf - opened interval [ 0 , num_classes + 1 ) .
Returns :
Variable : CTC greedy decode result .
Examples :
. . code - block : : python
x = fluid . layers . data ( name = ' x ' , shape = [ 8 ] , dtype = ' float32 ' )
cost = fluid . layers . ctc_greedy_decoder ( input = x , blank = 0 )
"""
helper = LayerHelper ( " ctc_greedy_decoder " , * * locals ( ) )
# top 1 op
topk_out = helper . create_tmp_variable ( dtype = input . dtype )
topk_indices = helper . create_tmp_variable ( dtype = " int64 " )
helper . append_op (
type = " top_k " ,
inputs = { " X " : [ input ] } ,
outputs = { " Out " : [ topk_out ] ,
" Indices " : [ topk_indices ] } ,
attrs = { " k " : 1 } )
# ctc align op
ctc_out = helper . create_tmp_variable ( dtype = " int64 " )
helper . append_op (
type = " ctc_align " ,
inputs = { " Input " : [ topk_indices ] } ,
outputs = { " Output " : [ ctc_out ] } ,
attrs = { " merge_repeated " : True ,
" blank " : blank } )
return ctc_out
def warpctc ( input , label , blank = 0 , norm_by_times = False , * * kwargs ) :
"""
An operator integrating the open source Warp - CTC library
( https : / / github . com / baidu - research / warp - ctc )
to compute Connectionist Temporal Classification ( CTC ) loss .
It can be aliased as softmax with CTC , since a native softmax activation is
interated to the Warp - CTC library , to to normlize values for each row of the
input tensor .
Args :
input ( Variable ) : ( LodTensor , default : LoDTensor < float > ) ,
the unscaled probabilities of variable - length sequences ,
which is a 2 - D Tensor with LoD information .
It ' s shape is [Lp, num_classes + 1], where Lp is the sum of all input
sequences ' length and num_classes is the true number of classes.
( not including the blank label ) .
label ( Variable ) : ( LodTensor , default : LoDTensor < int > ) , the ground truth
of variable - length sequence , which is a 2 - D Tensor with LoD
information . It is of the shape [ Lg , 1 ] , where Lg is th sum of
all labels ' length.
blank : ( int , default : 0 ) , the blank label index of Connectionist
Temporal Classification ( CTC ) loss , which is in the
half - opened interval [ 0 , num_classes + 1 ) .
norm_by_times : ( bool , default : false ) , whether to normalize
the gradients by the number of time - step , which is also the
sequence ' s length. There is no need to normalize the gradients
if warpctc layer was follewed by a mean_op .
Returns :
Variable : The Connectionist Temporal Classification ( CTC ) loss ,
which is a 2 - D Tensor of the shape [ batch_size , 1 ] .
Examples :
. . code - block : : python
y = layers . data ( name = ' y ' , shape = [ 11 , 8 ] , dtype = ' float32 ' , lod_level = 1 )
y_predict = layers . data ( name = ' y_predict ' , shape = [ 11 , 1 ] , dtype = ' float32 ' )
cost = layers . warpctc ( input = y_predict , label = y )
"""
helper = LayerHelper ( ' warpctc ' , * * kwargs )
loss_out = helper . create_tmp_variable ( dtype = input . dtype )
grad_out = helper . create_tmp_variable ( dtype = input . dtype )
helper . append_op (
type = ' warpctc ' ,
inputs = { ' Logits ' : [ input ] ,
' Label ' : [ label ] } ,
outputs = { ' WarpCTCGrad ' : [ grad_out ] ,
' Loss ' : [ loss_out ] } ,
attrs = { ' blank ' : blank ,
' norm_by_times ' : norm_by_times } )
return loss_out
def sequence_reshape ( input , new_dim ) :
"""
* * Sequence Reshape Layer * *
This layer will rearrange the input sequences . The new dimension is set by
user . Length of each sequence is computed according to original length ,
original dimension and new dimension . The following example will help to
illustrate the function of this layer :
. . code - block : : text
x is a LoDTensor :
x . lod = [ [ 0 , 2 , 6 ] ]
x . data = [ [ 1 , 2 ] , [ 3 , 4 ] ,
[ 5 , 6 ] , [ 7 , 8 ] , [ 9 , 10 ] , [ 11 , 12 ] ]
x . dims = [ 6 , 2 ]
set new_dim = 4
then out is a LoDTensor :
out . lod = [ [ 0 , 1 , 3 ] ]
out . data = [ [ 1 , 2 , 3 , 4 ] ,
[ 5 , 6 , 7 , 8 ] , [ 9 , 10 , 11 , 12 ] ]
out . dims = [ 3 , 4 ]
Currently , only 1 - level LoDTensor is supported and please make sure
( original length * original dimension ) can be divided by new dimension with
no remainder for each sequence .
Args :
input ( Variable ) : ( LodTensor , default : LoDTensor < float > ) , a 2 - D LoDTensor
with shape being [ N , M ] where M for dimension .
new_dim ( int ) : New dimension which the input LoDTensor is reshaped to .
Returns :
Variable : Reshaped LoDTensor according to new dimension .
Examples :
. . code - block : : python
x = fluid . layers . data ( name = ' x ' , shape = [ 5 , 20 ] ,
dtype = ' float32 ' , lod_level = 1 )
x_reshaped = layers . sequence_reshape ( input = x , new_dim = 10 )
"""
helper = LayerHelper ( ' sequence_reshape ' , * * locals ( ) )
out = helper . create_tmp_variable ( helper . input_dtype ( ) )
helper . append_op (
type = ' sequence_reshape ' ,
inputs = { ' X ' : [ input ] } ,
outputs = { ' Out ' : [ out ] } ,
attrs = { ' new_dim ' : new_dim } )
return out