|
|
|
@ -21,25 +21,25 @@ from op_test import OpTest
|
|
|
|
|
from test_softmax_op import stable_softmax
|
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
|
from paddle.fluid import Program, program_guard
|
|
|
|
|
import paddle
|
|
|
|
|
import paddle.nn.functional as F
|
|
|
|
|
|
|
|
|
|
CUDA_BLOCK_SIZE = 512
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CTCForward(object):
|
|
|
|
|
def __init__(self, softmax, softmax_lod, labels, labels_lod, blank,
|
|
|
|
|
norm_by_times):
|
|
|
|
|
def __init__(self, softmax, softmax_lod, labels, labels_lod, num_classes,
|
|
|
|
|
batch_size, blank, norm_by_times):
|
|
|
|
|
self.softmax = softmax
|
|
|
|
|
self.softmax_lod = softmax_lod
|
|
|
|
|
assert labels.shape[1] == 1
|
|
|
|
|
self.labels = labels
|
|
|
|
|
self.labels_lod = labels_lod
|
|
|
|
|
self.blank = blank
|
|
|
|
|
self.norm_by_times = norm_by_times
|
|
|
|
|
|
|
|
|
|
self.level = 0
|
|
|
|
|
self.num_classes = softmax.shape[1]
|
|
|
|
|
self.batch_size = len(softmax_lod[self.level])
|
|
|
|
|
assert self.batch_size == len(labels_lod[self.level])
|
|
|
|
|
self.num_classes = num_classes
|
|
|
|
|
self.batch_size = batch_size
|
|
|
|
|
|
|
|
|
|
self.loss = np.zeros([self.batch_size, 1], dtype="float32")
|
|
|
|
|
self.gradient = np.zeros(self.softmax.shape, dtype="float32")
|
|
|
|
@ -163,17 +163,25 @@ class CTCForward(object):
|
|
|
|
|
softmax_offset = 0
|
|
|
|
|
labels_offset = 0
|
|
|
|
|
for i in range(self.batch_size):
|
|
|
|
|
softmax_start_i = softmax_offset
|
|
|
|
|
softmax_end_i = softmax_offset + self.softmax_lod[self.level][i]
|
|
|
|
|
labels_start_i = labels_offset
|
|
|
|
|
labels_end_i = labels_offset + self.labels_lod[self.level][i]
|
|
|
|
|
|
|
|
|
|
softmax_a_sequence = self.softmax[softmax_start_i:softmax_end_i, :]
|
|
|
|
|
labels_a_sequence = self.labels[labels_start_i:labels_end_i, :]
|
|
|
|
|
self.loss[i] = self.forward_a_sequence(softmax_a_sequence,
|
|
|
|
|
labels_a_sequence)
|
|
|
|
|
softmax_offset += self.softmax_lod[self.level][i]
|
|
|
|
|
labels_offset += self.labels_lod[self.level][i]
|
|
|
|
|
if self.labels.shape[1] == 1:
|
|
|
|
|
softmax_start_i = softmax_offset
|
|
|
|
|
softmax_end_i = softmax_offset + self.softmax_lod[self.level][i]
|
|
|
|
|
labels_start_i = labels_offset
|
|
|
|
|
labels_end_i = labels_offset + self.labels_lod[self.level][i]
|
|
|
|
|
|
|
|
|
|
softmax_a_sequence = self.softmax[softmax_start_i:
|
|
|
|
|
softmax_end_i, :]
|
|
|
|
|
labels_a_sequence = self.labels[labels_start_i:labels_end_i, :]
|
|
|
|
|
self.loss[i] = self.forward_a_sequence(softmax_a_sequence,
|
|
|
|
|
labels_a_sequence)
|
|
|
|
|
softmax_offset += self.softmax_lod[self.level][i]
|
|
|
|
|
labels_offset += self.labels_lod[self.level][i]
|
|
|
|
|
else:
|
|
|
|
|
softmax_a_sequence = self.softmax[:self.softmax_lod[i], i, :]
|
|
|
|
|
labels_a_sequence = self.labels[:self.labels_lod[i], :]
|
|
|
|
|
self.loss[i] = self.forward_a_sequence(softmax_a_sequence,
|
|
|
|
|
labels_a_sequence)
|
|
|
|
|
|
|
|
|
|
return self.loss
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -201,7 +209,8 @@ class TestWarpCTCOp(OpTest):
|
|
|
|
|
dtype="int32")
|
|
|
|
|
|
|
|
|
|
ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod,
|
|
|
|
|
self.blank, self.norm_by_times)
|
|
|
|
|
self.num_classes, self.batch_size, self.blank,
|
|
|
|
|
self.norm_by_times)
|
|
|
|
|
loss = ctc.forward()
|
|
|
|
|
|
|
|
|
|
max_sequence_length = 0
|
|
|
|
@ -223,7 +232,7 @@ class TestWarpCTCOp(OpTest):
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def test_check_output(self):
|
|
|
|
|
self.check_output(check_dygraph=False)
|
|
|
|
|
self.check_output()
|
|
|
|
|
|
|
|
|
|
def test_check_grad(self):
|
|
|
|
|
self.outputs['WarpCTCGrad'] = self.gradient
|
|
|
|
@ -237,7 +246,7 @@ class TestWarpCTCOpCase1(TestWarpCTCOp):
|
|
|
|
|
self.num_classes = CUDA_BLOCK_SIZE + 2
|
|
|
|
|
self.logits_lod = [[4, 1, 3, 3]]
|
|
|
|
|
self.labels_lod = [[3, 1, 4, 4]]
|
|
|
|
|
self.blank = 0
|
|
|
|
|
self.blank = self.num_classes - 1
|
|
|
|
|
self.norm_by_times = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -267,7 +276,8 @@ class TestWarpCTCOpWithPadding(OpTest):
|
|
|
|
|
dtype="int32")
|
|
|
|
|
|
|
|
|
|
ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod,
|
|
|
|
|
self.blank, self.norm_by_times)
|
|
|
|
|
self.num_classes, self.batch_size, self.blank,
|
|
|
|
|
self.norm_by_times)
|
|
|
|
|
loss = ctc.forward()
|
|
|
|
|
|
|
|
|
|
max_sequence_length = 0
|
|
|
|
@ -317,7 +327,7 @@ class TestWarpCTCOpWithPadding(OpTest):
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def test_check_output(self):
|
|
|
|
|
self.check_output(check_dygraph=False)
|
|
|
|
|
self.check_output()
|
|
|
|
|
|
|
|
|
|
def test_check_grad(self):
|
|
|
|
|
self.outputs['WarpCTCGrad'] = self.gradient
|
|
|
|
@ -333,7 +343,7 @@ class TestWarpCTCOpWithPaddingCase1(TestWarpCTCOpWithPadding):
|
|
|
|
|
self.labels_lod = [[3, 1, 4, 4]]
|
|
|
|
|
self.logits_length = np.array([4, 1, 3, 3], dtype=np.int64)
|
|
|
|
|
self.labels_length = np.array([3, 1, 4, 4], dtype=np.int64)
|
|
|
|
|
self.blank = 0
|
|
|
|
|
self.blank = self.num_classes - 1
|
|
|
|
|
self.norm_by_times = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -389,5 +399,97 @@ class TestWarpCTCOpError(unittest.TestCase):
|
|
|
|
|
self.assertRaises(TypeError, test_label_len_Variable)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestCTCLossAPICase(unittest.TestCase):
|
|
|
|
|
def test_functinal_api(self):
|
|
|
|
|
self.batch_size = 4
|
|
|
|
|
self.num_classes = CUDA_BLOCK_SIZE + 2
|
|
|
|
|
self.logits_length = np.array([4, 1, 3, 3], dtype=np.int64)
|
|
|
|
|
self.labels_length = np.array([3, 1, 4, 4], dtype=np.int64)
|
|
|
|
|
self.blank = self.num_classes - 1
|
|
|
|
|
self.norm_by_times = False
|
|
|
|
|
|
|
|
|
|
logits = np.random.uniform(0.1, 1.0, [
|
|
|
|
|
max(self.logits_length), self.batch_size, self.num_classes
|
|
|
|
|
]).astype("float32")
|
|
|
|
|
softmax = np.apply_along_axis(stable_softmax, -1, logits)
|
|
|
|
|
# labels should not be blank
|
|
|
|
|
labels = np.random.randint(
|
|
|
|
|
0,
|
|
|
|
|
self.num_classes - 1, [self.batch_size, max(self.labels_length)],
|
|
|
|
|
dtype="int32")
|
|
|
|
|
|
|
|
|
|
ctc = CTCForward(softmax, self.logits_length, labels,
|
|
|
|
|
self.labels_length, self.num_classes, self.batch_size,
|
|
|
|
|
self.blank, self.norm_by_times)
|
|
|
|
|
loss_np = ctc.forward()
|
|
|
|
|
|
|
|
|
|
paddle.disable_static()
|
|
|
|
|
softmax = paddle.to_variable(logits)
|
|
|
|
|
labels = paddle.to_variable(labels)
|
|
|
|
|
logits_length = paddle.to_variable(self.logits_length)
|
|
|
|
|
labels_length = paddle.to_variable(self.labels_length)
|
|
|
|
|
loss_pd_mean = F.ctc_loss(
|
|
|
|
|
softmax,
|
|
|
|
|
labels,
|
|
|
|
|
logits_length,
|
|
|
|
|
labels_length,
|
|
|
|
|
blank=self.blank,
|
|
|
|
|
reduction='mean')
|
|
|
|
|
loss_pd_mean = loss_pd_mean.numpy()
|
|
|
|
|
|
|
|
|
|
loss_pd_sum = F.ctc_loss(
|
|
|
|
|
softmax,
|
|
|
|
|
labels,
|
|
|
|
|
logits_length,
|
|
|
|
|
labels_length,
|
|
|
|
|
blank=self.blank,
|
|
|
|
|
reduction='sum')
|
|
|
|
|
loss_pd_sum = loss_pd_sum.numpy()
|
|
|
|
|
paddle.enable_static()
|
|
|
|
|
loss_np = np.squeeze(loss_np, axis=-1)
|
|
|
|
|
loss_np_mean = (loss_np / labels_length.numpy()).mean()
|
|
|
|
|
loss_np_sum = loss_np.sum()
|
|
|
|
|
|
|
|
|
|
self.assertTrue(np.allclose(loss_pd_mean, loss_np_mean, atol=1))
|
|
|
|
|
self.assertTrue(np.allclose(loss_pd_sum, loss_np_sum, atol=1))
|
|
|
|
|
|
|
|
|
|
def test_class_api(self):
|
|
|
|
|
self.batch_size = 3
|
|
|
|
|
self.num_classes = 15
|
|
|
|
|
self.logits_length = np.array([3, 3, 3], dtype=np.int64)
|
|
|
|
|
self.labels_length = np.array([0, 1, 2], dtype=np.int64)
|
|
|
|
|
self.blank = 0
|
|
|
|
|
self.norm_by_times = False
|
|
|
|
|
|
|
|
|
|
logits = np.random.uniform(0.1, 1.0, [
|
|
|
|
|
max(self.logits_length), self.batch_size, self.num_classes
|
|
|
|
|
]).astype("float32")
|
|
|
|
|
softmax = np.apply_along_axis(stable_softmax, -1, logits)
|
|
|
|
|
# labels should not be blank
|
|
|
|
|
labels = np.random.randint(
|
|
|
|
|
1,
|
|
|
|
|
self.num_classes, [self.batch_size, max(self.labels_length)],
|
|
|
|
|
dtype="int32")
|
|
|
|
|
|
|
|
|
|
ctc = CTCForward(softmax, self.logits_length, labels,
|
|
|
|
|
self.labels_length, self.num_classes, self.batch_size,
|
|
|
|
|
self.blank, self.norm_by_times)
|
|
|
|
|
loss_np = ctc.forward()
|
|
|
|
|
|
|
|
|
|
paddle.disable_static()
|
|
|
|
|
softmax = paddle.to_variable(logits)
|
|
|
|
|
labels = paddle.to_variable(labels)
|
|
|
|
|
logits_length = paddle.to_variable(self.logits_length)
|
|
|
|
|
labels_length = paddle.to_variable(self.labels_length)
|
|
|
|
|
|
|
|
|
|
loss_pd = paddle.nn.CTCLoss(self.blank, 'none')(
|
|
|
|
|
softmax, labels, logits_length, labels_length)
|
|
|
|
|
loss_pd = loss_pd.numpy()
|
|
|
|
|
paddle.enable_static()
|
|
|
|
|
loss_np = np.squeeze(loss_np, axis=-1)
|
|
|
|
|
|
|
|
|
|
self.assertTrue(np.allclose(loss_pd, loss_np, atol=1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
unittest.main()
|
|
|
|
|