|
|
|
@ -28,6 +28,14 @@ def relu(x):
|
|
|
|
|
return np.maximum(x, 0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ACTVATION = {
|
|
|
|
|
'identity': identity,
|
|
|
|
|
'sigmoid': sigmoid,
|
|
|
|
|
'tanh': tanh,
|
|
|
|
|
'relu': relu
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def lstm(
|
|
|
|
|
input, # T x 4D
|
|
|
|
|
lod, # 1 x N
|
|
|
|
@ -37,37 +45,45 @@ def lstm(
|
|
|
|
|
w_b=None, # 1 x 4D
|
|
|
|
|
w_c=None, # 1 x 3D
|
|
|
|
|
is_reverse=False,
|
|
|
|
|
gate_act=None,
|
|
|
|
|
cell_act=None,
|
|
|
|
|
cand_act=None):
|
|
|
|
|
def _step(x, w_h, w_c, h_pre, c_pre, gate_act, cell_act, cand_act):
|
|
|
|
|
act_gate=None,
|
|
|
|
|
act_cell=None,
|
|
|
|
|
act_cand=None):
|
|
|
|
|
def _step(x, w_h, w_c, h_pre, c_pre, act_gate, act_cell, act_cand):
|
|
|
|
|
g = np.dot(h_pre, w_h) # 1 x 4D
|
|
|
|
|
g = g + x
|
|
|
|
|
g = np.reshape(g, (1, g.size))
|
|
|
|
|
c_tmp, g_i, g_f, g_o = np.split(g, 4, axis=1)
|
|
|
|
|
if w_c is None:
|
|
|
|
|
g_i = gate_act(g_i) # 1 x D
|
|
|
|
|
g_f = gate_act(g_f) # 1 x D
|
|
|
|
|
g_i = act_gate(g_i) # 1 x D
|
|
|
|
|
g_f = act_gate(g_f) # 1 x D
|
|
|
|
|
else:
|
|
|
|
|
w_ic, w_fc, w_oc = np.split(w_c, 3, axis=1)
|
|
|
|
|
g_i = gate_act(g_i + w_ic * c_pre) # 1 x D
|
|
|
|
|
g_f = gate_act(g_f + w_fc * c_pre) # 1 x D
|
|
|
|
|
c = g_f * c_pre + g_i * cand_act(c_tmp) # 1 x D
|
|
|
|
|
g_i = act_gate(g_i + w_ic * c_pre) # 1 x D
|
|
|
|
|
g_f = act_gate(g_f + w_fc * c_pre) # 1 x D
|
|
|
|
|
c = g_f * c_pre + g_i * act_cand(c_tmp) # 1 x D
|
|
|
|
|
|
|
|
|
|
if w_c is None:
|
|
|
|
|
g_o = gate_act(g_o) # 1 x D
|
|
|
|
|
g_o = act_gate(g_o) # 1 x D
|
|
|
|
|
else:
|
|
|
|
|
_, _, w_oc = np.split(w_c, 3, axis=1)
|
|
|
|
|
g_o = gate_act(g_o + w_oc * c) # 1 x D
|
|
|
|
|
h = g_o * cell_act(c)
|
|
|
|
|
bg = np.concatenate((cand_act(c_tmp), g_i, g_f, g_o), axis=1)
|
|
|
|
|
g_o = act_gate(g_o + w_oc * c) # 1 x D
|
|
|
|
|
h = g_o * act_cell(c)
|
|
|
|
|
bg = np.concatenate((act_cand(c_tmp), g_i, g_f, g_o), axis=1)
|
|
|
|
|
return h, c, bg
|
|
|
|
|
|
|
|
|
|
def _reverse(x, lod):
|
|
|
|
|
y = np.zeros_like(x)
|
|
|
|
|
for i in range(len(lod) - 1):
|
|
|
|
|
b, e = lod[i], lod[i + 1]
|
|
|
|
|
y[b:e, :] = np.flip(x[b:e, :], 0)
|
|
|
|
|
return y
|
|
|
|
|
|
|
|
|
|
offset = lod[0]
|
|
|
|
|
batch_size = len(offset) - 1
|
|
|
|
|
hidden = []
|
|
|
|
|
cell = []
|
|
|
|
|
gate = []
|
|
|
|
|
input = _reverse(input, offset) if is_reverse else input
|
|
|
|
|
if w_b is not None:
|
|
|
|
|
input = input + np.tile(w_b, (offset[-1], 1))
|
|
|
|
|
for i in range(batch_size):
|
|
|
|
@ -78,8 +94,8 @@ def lstm(
|
|
|
|
|
c_pre = c0[i] # 1 x D
|
|
|
|
|
for j in range(seq_len):
|
|
|
|
|
# compute one step
|
|
|
|
|
h_pre, c_pre, g_pre = _step(x[j], w_h, w_c, h_pre, c_pre, gate_act,
|
|
|
|
|
cell_act, cand_act)
|
|
|
|
|
h_pre, c_pre, g_pre = _step(x[j], w_h, w_c, h_pre, c_pre, act_gate,
|
|
|
|
|
act_cell, act_cand)
|
|
|
|
|
hidden.append(h_pre.flatten())
|
|
|
|
|
cell.append(c_pre.flatten())
|
|
|
|
|
gate.append(g_pre.flatten())
|
|
|
|
@ -87,38 +103,53 @@ def lstm(
|
|
|
|
|
hidden = np.array(hidden).astype("float64")
|
|
|
|
|
cell = np.array(cell).astype("float64")
|
|
|
|
|
gate = np.array(gate).astype("float64")
|
|
|
|
|
|
|
|
|
|
hidden = _reverse(hidden, offset) if is_reverse else hidden
|
|
|
|
|
cell = _reverse(cell, offset) if is_reverse else cell
|
|
|
|
|
|
|
|
|
|
assert gate.shape == input.shape
|
|
|
|
|
assert hidden.shape == (input.shape[0], input.shape[1] / 4)
|
|
|
|
|
assert cell.shape == (input.shape[0], input.shape[1] / 4)
|
|
|
|
|
return hidden, cell, gate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LstmUnitTest(OpTest):
|
|
|
|
|
class TestLstmOp(OpTest):
|
|
|
|
|
def set_data(self):
|
|
|
|
|
D = 4
|
|
|
|
|
#lod = [[0, 2, 6, 9]]
|
|
|
|
|
lod = [[0, 1]]
|
|
|
|
|
shape = (1, D)
|
|
|
|
|
|
|
|
|
|
x = np.random.normal(size=(1, 4 * D)).astype("float64")
|
|
|
|
|
h0 = np.zeros((4, D)).astype("float64")
|
|
|
|
|
c0 = np.zeros((4, D)).astype("float64")
|
|
|
|
|
w = np.random.normal(size=(D, 4 * D)).astype("float64")
|
|
|
|
|
b = np.random.normal(size=(1, 7 * D)).astype("float64")
|
|
|
|
|
|
|
|
|
|
w_b = b[:, 0:4 * D]
|
|
|
|
|
w_c = b[:, 4 * D:]
|
|
|
|
|
#h, c, g = lstm(x, lod, h0, c0, w, w_b, w_c, False, sigmoid, tanh, tanh)
|
|
|
|
|
h, c, g = lstm(x, lod, h0, c0, w, w_b, w_c, False, identity, identity,
|
|
|
|
|
identity)
|
|
|
|
|
self.lod = [[0, 2, 6, 9]]
|
|
|
|
|
self.D = 64
|
|
|
|
|
self.sort_idx = [2, 6, 0, 3, 7, 1, 4, 8, 5]
|
|
|
|
|
|
|
|
|
|
self.act_gate = "sigmoid"
|
|
|
|
|
self.act_cell = "tanh"
|
|
|
|
|
self.act_cand = "tanh"
|
|
|
|
|
|
|
|
|
|
self.is_reverse = False
|
|
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
|
self.set_data()
|
|
|
|
|
self.op_type = "lstm"
|
|
|
|
|
|
|
|
|
|
T = self.lod[0][-1]
|
|
|
|
|
N = len(self.lod[0]) - 1
|
|
|
|
|
|
|
|
|
|
x = np.random.normal(size=(T, 4 * self.D)).astype("float64")
|
|
|
|
|
h0 = np.zeros((N, self.D)).astype("float64")
|
|
|
|
|
c0 = np.zeros((N, self.D)).astype("float64")
|
|
|
|
|
w = np.random.normal(size=(self.D, 4 * self.D)).astype("float64")
|
|
|
|
|
b = np.random.normal(size=(1, 7 * self.D)).astype("float64")
|
|
|
|
|
|
|
|
|
|
w_b = b[:, 0:4 * self.D]
|
|
|
|
|
w_c = b[:, 4 * self.D:]
|
|
|
|
|
h, c, g = lstm(x, self.lod, h0, c0, w, w_b, w_c, self.is_reverse,
|
|
|
|
|
ACTVATION[self.act_gate], ACTVATION[self.act_cell],
|
|
|
|
|
ACTVATION[self.act_cand])
|
|
|
|
|
|
|
|
|
|
g_sort = np.zeros_like(x)
|
|
|
|
|
#idx = [2,6,0,3,7,1,4,8,5]
|
|
|
|
|
#for i, j in enumerate(idx):
|
|
|
|
|
# g_sort[i, :] = g[j, :]
|
|
|
|
|
for i, j in enumerate(self.sort_idx):
|
|
|
|
|
g_sort[i, :] = g[j, :]
|
|
|
|
|
|
|
|
|
|
self.inputs = {
|
|
|
|
|
'Input': (x, lod),
|
|
|
|
|
'Input': (x, self.lod),
|
|
|
|
|
'H0': h0,
|
|
|
|
|
'C0': c0,
|
|
|
|
|
'Weight': w,
|
|
|
|
@ -127,19 +158,28 @@ class LstmUnitTest(OpTest):
|
|
|
|
|
self.outputs = {'Hidden': h, 'Cell': c, 'BatchGate': g_sort}
|
|
|
|
|
self.attrs = {
|
|
|
|
|
'usePeepholes': True,
|
|
|
|
|
'isReverse': False,
|
|
|
|
|
'gateActivation': 'linear',
|
|
|
|
|
'cellActivation': 'linear',
|
|
|
|
|
'candidateActivation': 'linear'
|
|
|
|
|
'isReverse': self.is_reverse,
|
|
|
|
|
'gateActivation': 'sigmoid',
|
|
|
|
|
'cellActivation': 'tanh',
|
|
|
|
|
'candidateActivation': 'tanh'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
|
self.set_data()
|
|
|
|
|
self.op_type = "lstm"
|
|
|
|
|
|
|
|
|
|
def test_check_output(self):
|
|
|
|
|
self.check_output()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestLstmOpRerverse(TestLstmOp):
|
|
|
|
|
def set_data(self):
|
|
|
|
|
self.lod = [[0, 2, 6, 9]]
|
|
|
|
|
self.D = 64
|
|
|
|
|
self.sort_idx = [2, 6, 0, 3, 7, 1, 4, 8, 5]
|
|
|
|
|
|
|
|
|
|
self.act_gate = "sigmoid"
|
|
|
|
|
self.act_cell = "tanh"
|
|
|
|
|
self.act_cand = "tanh"
|
|
|
|
|
|
|
|
|
|
self.is_reverse = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
unittest.main()
|
|
|
|
|