@ -50,7 +50,8 @@ __all__ = [
' sequence_last_step ' ,
' dropout ' ,
' split ' ,
' greedy_ctc_error ' ,
' ctc_greedy_decoder ' ,
' edit_distance_error ' ,
' l2_normalize ' ,
' matmul ' ,
]
@ -1791,17 +1792,21 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
return out
def greedy_ctc _error( input , label , blank , normalized = False , name = None ) :
def edit_distance _error( input , label , normalized = False , name = None ) :
"""
This evaluator is to calculate sequence - to - sequence edit distance .
EditDistance operator computes the edit distances between a batch of hypothesis strings and their references . Edit distance , also called Levenshtein distance , measures how dissimilar two strings are by counting the minimum number of operations to transform one string into anthor . Here the operations include insertion , deletion , and substitution . For example , given hypothesis string A = " kitten " and reference B = " sitting " , the edit distance is 3 for A will be transformed into B at least after two substitutions and one insertion :
Args :
" kitten " - > " sitten " - > " sittin " - > " sitting "
input ( Variable ) : ( LodTensor , default : LoDTensor < float > ) , the unscaled probabilities of variable - length sequences , which is a 2 - D Tensor with LoD information . It ' s shape is [Lp, num_classes + 1], where Lp is the sum of all input sequences ' length and num_classes is the true number of classes . ( not including the blank label ) .
Input ( Hyps ) is a LoDTensor consisting of all the hypothesis strings with the total number denoted by ` batch_size ` , and the separation is specified by the LoD information . And the ` batch_size ` reference strings are arranged in order in the same way in the LoDTensor Input ( Refs ) .
label ( Variable ) : ( LodTensor , default : LoDTensor < int > ) , the ground truth of variable - length sequence , which is a 2 - D Tensor with LoD information . It is of the shape [ Lg , 1 ] , where Lg is th sum of all labels ' length .
Output ( Out ) contains the ` batch_size ` results and each stands for the edit stance for a pair of strings respectively . If Attr ( normalized ) is true , the edit distance will be divided by the length of reference string .
blank ( int ) : the blank label index of Connectionist Temporal Classification ( CTC ) loss , which is in thehalf - opened interval [ 0 , num_classes + 1 ) .
Args :
input ( Variable ) : The indices for hypothesis strings .
label ( Variable ) : The indices for reference strings .
normalized ( bool ) : Indicated whether to normalize the edit distance by the length of reference string .
@ -1812,11 +1817,73 @@ def greedy_ctc_error(input, label, blank, normalized=False, name=None):
. . code - block : : python
x = fluid . layers . data ( name = ' x ' , shape = [ 8 ] , dtype = ' float32 ' )
y = fluid . layers . data ( name = ' y ' , shape = [ 1 ] , dtype = ' float32 ' )
y = fluid . layers . data ( name = ' y ' , shape = [ 7 ] , dtype = ' float32 ' )
cost = fluid . layers . edit_distance_error ( input = x , label = y )
"""
helper = LayerHelper ( " edit_distance_error " , * * locals ( ) )
# edit distance op
edit_distance_out = helper . create_tmp_variable ( dtype = " int64 " )
helper . append_op (
type = " edit_distance " ,
inputs = { " Hyps " : [ input ] ,
" Refs " : [ label ] } ,
outputs = { " Out " : [ edit_distance_out ] } ,
attrs = { " normalized " : normalized } )
return edit_distance_out
def ctc_greedy_decoder ( input , blank , name = None ) :
"""
This op is used to decode sequences by greedy policy by below steps :
1. Get the indexes of max value for each row in input . a . k . a . numpy . argmax ( input , axis = 0 ) .
2. For each sequence in result of step1 , merge repeated tokens between two blanks and delete all blanks .
A simple example as below :
. . code - block : : text
Given :
input . data = [ [ 0.6 , 0.1 , 0.3 , 0.1 ] ,
[ 0.3 , 0.2 , 0.4 , 0.1 ] ,
[ 0.1 , 0.5 , 0.1 , 0.3 ] ,
[ 0.5 , 0.1 , 0.3 , 0.1 ] ,
[ 0.5 , 0.1 , 0.3 , 0.1 ] ,
[ 0.2 , 0.2 , 0.2 , 0.4 ] ,
[ 0.2 , 0.2 , 0.1 , 0.5 ] ,
[ 0.5 , 0.1 , 0.3 , 0.1 ] ]
input . lod = [ [ 0 , 4 , 8 ] ]
Then :
output . data = [ [ 2 ] ,
[ 1 ] ,
[ 3 ] ]
output . lod = [ [ 0 , 2 , 3 ] ]
Args :
input ( Variable ) : ( LoDTensor < float > ) , the probabilities of variable - length sequences , which is a 2 - D Tensor with LoD information . It ' s shape is [Lp, num_classes + 1], where Lp is the sum of all input sequences ' length and num_classes is the true number of classes . ( not including the blank label ) .
blank ( int ) : the blank label index of Connectionist Temporal Classification ( CTC ) loss , which is in thehalf - opened interval [ 0 , num_classes + 1 ) .
Returns :
Variable : CTC greedy decode result .
Examples :
. . code - block : : python
x = fluid . layers . data ( name = ' x ' , shape = [ 8 ] , dtype = ' float32 ' )
cost = fluid . layers . greedy_ctc_error ( input = x , label = y , blank = 0 )
cost = fluid . layers . ctc_greedy_decoder( input = x , blank = 0 )
"""
helper = LayerHelper ( " greedy_ctc_error " , * * locals ( ) )
helper = LayerHelper ( " ctc_greedy_decode r" , * * locals ( ) )
# top 1 op
topk_out = helper . create_tmp_variable ( dtype = input . dtype )
topk_indices = helper . create_tmp_variable ( dtype = " int64 " )
@ -1835,14 +1902,4 @@ def greedy_ctc_error(input, label, blank, normalized=False, name=None):
outputs = { " Output " : [ ctc_out ] } ,
attrs = { " merge_repeated " : True ,
" blank " : blank } )
# edit distance op
edit_distance_out = helper . create_tmp_variable ( dtype = " int64 " )
helper . append_op (
type = " edit_distance " ,
inputs = { " Hyps " : [ ctc_out ] ,
" Refs " : [ label ] } ,
outputs = { " Out " : [ edit_distance_out ] } ,
attrs = { " normalized " : normalized } )
return edit_distance_out
return ctc_out