|
|
|
@ -36,12 +36,14 @@ def lstmp(
|
|
|
|
|
w_b=None, # 1 x 4D
|
|
|
|
|
w_c=None, # 1 x 3D
|
|
|
|
|
is_reverse=False,
|
|
|
|
|
proj_clip=0.0,
|
|
|
|
|
cell_clip=0.0,
|
|
|
|
|
act_gate=None,
|
|
|
|
|
act_cell=None,
|
|
|
|
|
act_cand=None,
|
|
|
|
|
act_proj=None):
|
|
|
|
|
def _step(x, w_r, w_rh, w_c, r_pre, c_pre, act_gate, act_cell, act_cand,
|
|
|
|
|
act_proj):
|
|
|
|
|
def _step(x, w_r, w_rh, w_c, r_pre, c_pre, proj_clip, cell_clip, act_gate,
|
|
|
|
|
act_cell, act_cand, act_proj):
|
|
|
|
|
g = np.dot(r_pre, w_r) # 1 x 4D
|
|
|
|
|
g = g + x
|
|
|
|
|
g = np.reshape(g, (1, g.size))
|
|
|
|
@ -55,6 +57,17 @@ def lstmp(
|
|
|
|
|
g_f = act_gate(g_f + w_fc * c_pre) # 1 x D
|
|
|
|
|
c = g_f * c_pre + g_i * act_cand(c) # 1 x D
|
|
|
|
|
|
|
|
|
|
def array_clip(a, clip):
|
|
|
|
|
size = np.prod(a.shape)
|
|
|
|
|
new_a = np.reshape(a, (size))
|
|
|
|
|
for i in range(size):
|
|
|
|
|
new_a[i] = max(new_a[i], -1.0 * clip)
|
|
|
|
|
new_a[i] = min(new_a[i], clip)
|
|
|
|
|
new_a = np.reshape(new_a, a.shape)
|
|
|
|
|
return new_a
|
|
|
|
|
|
|
|
|
|
if cell_clip > 0.0:
|
|
|
|
|
c = array_clip(c, cell_clip)
|
|
|
|
|
if w_c is None:
|
|
|
|
|
g_o = act_gate(g_o) # 1 x D
|
|
|
|
|
else:
|
|
|
|
@ -64,6 +77,8 @@ def lstmp(
|
|
|
|
|
# projection
|
|
|
|
|
r = np.dot(h, w_rh)
|
|
|
|
|
r = act_proj(r)
|
|
|
|
|
if proj_clip > 0.0:
|
|
|
|
|
r = array_clip(r, proj_clip)
|
|
|
|
|
return r, c
|
|
|
|
|
|
|
|
|
|
def _reverse(x, offset):
|
|
|
|
@ -87,13 +102,13 @@ def lstmp(
|
|
|
|
|
# compute one sequence
|
|
|
|
|
seq_len = lod[0][i]
|
|
|
|
|
x = input[offset[i]:offset[i + 1], :]
|
|
|
|
|
r_pre = np.dot(h0[i], w_rh) # 1 x P
|
|
|
|
|
r_pre = act_proj(r_pre)
|
|
|
|
|
r_pre = h0[i]
|
|
|
|
|
c_pre = c0[i] # 1 x D
|
|
|
|
|
for j in range(seq_len):
|
|
|
|
|
# compute one step
|
|
|
|
|
r_pre, c_pre = _step(x[j], w_r, w_rh, w_c, r_pre, c_pre, act_gate,
|
|
|
|
|
act_cell, act_cand, act_proj)
|
|
|
|
|
r_pre, c_pre = _step(x[j], w_r, w_rh, w_c, r_pre, c_pre, proj_clip,
|
|
|
|
|
cell_clip, act_gate, act_cell, act_cand,
|
|
|
|
|
act_proj)
|
|
|
|
|
projection.append(r_pre.flatten())
|
|
|
|
|
cell.append(c_pre.flatten())
|
|
|
|
|
|
|
|
|
@ -123,13 +138,12 @@ class TestLstmpOp(LstmTest.TestLstmOp):
|
|
|
|
|
|
|
|
|
|
T = sum(self.lod[0])
|
|
|
|
|
N = len(self.lod[0])
|
|
|
|
|
|
|
|
|
|
x = np.random.normal(size=(T, 4 * self.D)).astype('float64')
|
|
|
|
|
if self.has_initial_state:
|
|
|
|
|
h0 = np.random.normal(size=(N, self.D)).astype('float64')
|
|
|
|
|
h0 = np.random.normal(size=(N, self.P)).astype('float64')
|
|
|
|
|
c0 = np.random.normal(size=(N, self.D)).astype('float64')
|
|
|
|
|
else:
|
|
|
|
|
h0 = np.zeros((N, self.D)).astype('float64')
|
|
|
|
|
h0 = np.zeros((N, self.P)).astype('float64')
|
|
|
|
|
c0 = np.zeros((N, self.D)).astype('float64')
|
|
|
|
|
w = np.random.normal(size=(self.P, 4 * self.D)).astype('float64')
|
|
|
|
|
if self.use_peepholes:
|
|
|
|
@ -140,9 +154,12 @@ class TestLstmpOp(LstmTest.TestLstmOp):
|
|
|
|
|
w_b = b[:, 0:4 * self.D]
|
|
|
|
|
w_c = b[:, 4 * self.D:] if self.use_peepholes else None
|
|
|
|
|
w_rh = np.random.normal(size=(self.D, self.P)).astype('float64')
|
|
|
|
|
proj_clip = 0.1
|
|
|
|
|
cell_clip = 0.1
|
|
|
|
|
r, c = lstmp(x, self.lod, h0, c0, w, w_rh, w_b, w_c, self.is_reverse,
|
|
|
|
|
ACTIVATION[self.act_gate], ACTIVATION[self.act_cell],
|
|
|
|
|
ACTIVATION[self.act_cand], ACTIVATION[self.act_proj])
|
|
|
|
|
proj_clip, cell_clip, ACTIVATION[self.act_gate],
|
|
|
|
|
ACTIVATION[self.act_cell], ACTIVATION[self.act_cand],
|
|
|
|
|
ACTIVATION[self.act_proj])
|
|
|
|
|
|
|
|
|
|
self.inputs = {'Input': (x, self.lod), 'Weight': w, 'ProjWeight': w_rh}
|
|
|
|
|
|
|
|
|
@ -159,6 +176,8 @@ class TestLstmpOp(LstmTest.TestLstmOp):
|
|
|
|
|
self.attrs = {
|
|
|
|
|
'use_peepholes': self.use_peepholes,
|
|
|
|
|
'is_reverse': self.is_reverse,
|
|
|
|
|
'proj_clip': proj_clip,
|
|
|
|
|
'cell_clip': cell_clip,
|
|
|
|
|
'gate_activation': self.act_gate,
|
|
|
|
|
'cell_activation': self.act_cell,
|
|
|
|
|
'candidate_activation': self.act_cand,
|
|
|
|
@ -171,14 +190,14 @@ class TestLstmpOp(LstmTest.TestLstmOp):
|
|
|
|
|
def test_check_grad(self):
|
|
|
|
|
# TODO(qingqing) remove folowing lines after the check_grad is refined.
|
|
|
|
|
N = len(self.lod[0])
|
|
|
|
|
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
|
|
|
|
|
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchCellPreAct'] = np.zeros(
|
|
|
|
|
(N, self.D)).astype('float64')
|
|
|
|
|
self.check_grad(
|
|
|
|
|
['Input', 'Weight', 'ProjWeight', 'Bias'], ['Projection'],
|
|
|
|
|
max_relative_error=1e-2)
|
|
|
|
|
max_relative_error=1e-2,
|
|
|
|
|
numeric_grad_delta=0.0000005)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestLstmpOpHasInitial(TestLstmpOp):
|
|
|
|
@ -188,7 +207,6 @@ class TestLstmpOpHasInitial(TestLstmpOp):
|
|
|
|
|
def test_check_grad(self):
|
|
|
|
|
# TODO(qingqing) remove folowing lines after the check_grad is refined.
|
|
|
|
|
N = len(self.lod[0])
|
|
|
|
|
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
|
|
|
|
|
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchCellPreAct'] = np.zeros(
|
|
|
|
@ -196,11 +214,11 @@ class TestLstmpOpHasInitial(TestLstmpOp):
|
|
|
|
|
self.check_grad(
|
|
|
|
|
['Input', 'Weight', 'ProjWeight', 'Bias', 'H0', 'C0'],
|
|
|
|
|
['Projection'],
|
|
|
|
|
numeric_grad_delta=0.0000005,
|
|
|
|
|
max_relative_error=1e-2)
|
|
|
|
|
|
|
|
|
|
def test_check_grad_ingore_bias(self):
|
|
|
|
|
N = len(self.lod[0])
|
|
|
|
|
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
|
|
|
|
|
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchCellPreAct'] = np.zeros(
|
|
|
|
@ -208,11 +226,11 @@ class TestLstmpOpHasInitial(TestLstmpOp):
|
|
|
|
|
self.check_grad(
|
|
|
|
|
['Input', 'ProjWeight', 'Weight'], ['Projection'],
|
|
|
|
|
max_relative_error=1e-2,
|
|
|
|
|
numeric_grad_delta=0.0000005,
|
|
|
|
|
no_grad_set=set('Bias'))
|
|
|
|
|
|
|
|
|
|
def test_check_grad_ingore_weight(self):
|
|
|
|
|
N = len(self.lod[0])
|
|
|
|
|
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
|
|
|
|
|
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchCellPreAct'] = np.zeros(
|
|
|
|
@ -220,11 +238,11 @@ class TestLstmpOpHasInitial(TestLstmpOp):
|
|
|
|
|
self.check_grad(
|
|
|
|
|
['Input', 'ProjWeight', 'Bias'], ['Projection'],
|
|
|
|
|
max_relative_error=1e-2,
|
|
|
|
|
numeric_grad_delta=0.0000005,
|
|
|
|
|
no_grad_set=set('Weight'))
|
|
|
|
|
|
|
|
|
|
def test_check_grad_ingore_proj_weight(self):
|
|
|
|
|
N = len(self.lod[0])
|
|
|
|
|
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
|
|
|
|
|
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchCellPreAct'] = np.zeros(
|
|
|
|
@ -232,11 +250,11 @@ class TestLstmpOpHasInitial(TestLstmpOp):
|
|
|
|
|
self.check_grad(
|
|
|
|
|
['Input', 'Weight', 'Bias'], ['Projection'],
|
|
|
|
|
max_relative_error=1e-2,
|
|
|
|
|
numeric_grad_delta=0.0000005,
|
|
|
|
|
no_grad_set=set('ProjWeight'))
|
|
|
|
|
|
|
|
|
|
def test_check_grad_ingore_input(self):
|
|
|
|
|
N = len(self.lod[0])
|
|
|
|
|
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
|
|
|
|
|
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchCellPreAct'] = np.zeros(
|
|
|
|
@ -244,11 +262,11 @@ class TestLstmpOpHasInitial(TestLstmpOp):
|
|
|
|
|
self.check_grad(
|
|
|
|
|
['Weight', 'ProjWeight', 'Bias'], ['Projection'],
|
|
|
|
|
max_relative_error=1e-2,
|
|
|
|
|
numeric_grad_delta=0.0000005,
|
|
|
|
|
no_grad_set=set('Input'))
|
|
|
|
|
|
|
|
|
|
def test_check_grad_ingore_h0(self):
|
|
|
|
|
N = len(self.lod[0])
|
|
|
|
|
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
|
|
|
|
|
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchCellPreAct'] = np.zeros(
|
|
|
|
@ -256,11 +274,11 @@ class TestLstmpOpHasInitial(TestLstmpOp):
|
|
|
|
|
self.check_grad(
|
|
|
|
|
['Input', 'Weight', 'ProjWeight', 'Bias', 'C0'], ['Projection'],
|
|
|
|
|
max_relative_error=1e-2,
|
|
|
|
|
numeric_grad_delta=0.0000005,
|
|
|
|
|
no_grad_set=set('H0'))
|
|
|
|
|
|
|
|
|
|
def test_check_grad_ingore_c0(self):
|
|
|
|
|
N = len(self.lod[0])
|
|
|
|
|
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
|
|
|
|
|
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
|
|
|
|
|
self.outputs['BatchCellPreAct'] = np.zeros(
|
|
|
|
@ -268,6 +286,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
|
|
|
|
|
self.check_grad(
|
|
|
|
|
['Input', 'Weight', 'ProjWeight', 'Bias', 'H0'], ['Projection'],
|
|
|
|
|
max_relative_error=1e-2,
|
|
|
|
|
numeric_grad_delta=0.0000005,
|
|
|
|
|
no_grad_set=set('C0'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|