|
|
|
@ -21,6 +21,13 @@ def get_backward_op(scope, op, no_grad_set):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _reference_training(x, scale, offset, epsilon, data_format):
|
|
|
|
|
x_shape = x.shape
|
|
|
|
|
if len(x_shape) == 2:
|
|
|
|
|
if data_format == "NCHW":
|
|
|
|
|
x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1))
|
|
|
|
|
else:
|
|
|
|
|
x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1]))
|
|
|
|
|
|
|
|
|
|
if data_format == "NCHW":
|
|
|
|
|
n, c, h, w = x.shape
|
|
|
|
|
x_square = x * x
|
|
|
|
@ -39,6 +46,8 @@ def _reference_training(x, scale, offset, epsilon, data_format):
|
|
|
|
|
offset_tile = np.reshape(offset, (1, c, 1, 1))
|
|
|
|
|
offset_tile = np.reshape(offset_tile, (1, c, 1, 1))
|
|
|
|
|
y = normalized * scale_tile + offset_tile
|
|
|
|
|
if len(x_shape) == 2:
|
|
|
|
|
y = np.reshape(y, (y.shape[0], y.shape[1]))
|
|
|
|
|
return y, mean, var
|
|
|
|
|
elif data_format == "NHWC":
|
|
|
|
|
x_square = x * x
|
|
|
|
@ -48,7 +57,10 @@ def _reference_training(x, scale, offset, epsilon, data_format):
|
|
|
|
|
mean = x_sum / element_count
|
|
|
|
|
var = x_square_sum / element_count - mean * mean
|
|
|
|
|
normalized = (x - mean) / np.sqrt(var + epsilon)
|
|
|
|
|
return (normalized * scale + offset), mean, var
|
|
|
|
|
y = normalized * scale + offset
|
|
|
|
|
if len(x_shape) == 2:
|
|
|
|
|
y = np.reshape(y, x_shape)
|
|
|
|
|
return y, mean, var
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError("Unknown data order.")
|
|
|
|
|
|
|
|
|
@ -65,6 +77,18 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format):
|
|
|
|
|
# (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon))
|
|
|
|
|
|
|
|
|
|
# transfer from (N, C, H, W) to (N, H, W, C) to simplify computation
|
|
|
|
|
x_shape = x.shape
|
|
|
|
|
|
|
|
|
|
if len(x_shape) == 2:
|
|
|
|
|
if data_format == "NCHW":
|
|
|
|
|
x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1))
|
|
|
|
|
grad_y = np.reshape(grad_y,
|
|
|
|
|
(grad_y.shape[0], grad_y.shape[1], 1, 1))
|
|
|
|
|
else:
|
|
|
|
|
x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1]))
|
|
|
|
|
grad_y = np.reshape(grad_y,
|
|
|
|
|
(grad_y.shape[0], 1, 1, grad_y.shape[1]))
|
|
|
|
|
|
|
|
|
|
if data_format == "NCHW":
|
|
|
|
|
x = np.transpose(x, (0, 2, 3, 1))
|
|
|
|
|
grad_y = np.transpose(grad_y, (0, 2, 3, 1))
|
|
|
|
@ -83,6 +107,9 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format):
|
|
|
|
|
grad_x = np.transpose(grad_x, (0, 3, 1, 2))
|
|
|
|
|
x = np.transpose(x, (0, 3, 1, 2))
|
|
|
|
|
grad_y = np.transpose(grad_y, (0, 3, 1, 2))
|
|
|
|
|
|
|
|
|
|
if len(x_shape) == 2:
|
|
|
|
|
grad_x = np.reshape(grad_x, x_shape)
|
|
|
|
|
return grad_x, grad_scale, grad_offset
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -127,7 +154,7 @@ class TestBatchNormOp(OpTest):
|
|
|
|
|
momentum = 0.9
|
|
|
|
|
|
|
|
|
|
# N, H, W, C: 2, 3, 4, 2
|
|
|
|
|
n, h, w, c = 2, 3, 4, 2
|
|
|
|
|
n, h, w, c = 2, 3, 4, 5
|
|
|
|
|
x_shape = [n, h, w, c]
|
|
|
|
|
scale_shape = [c]
|
|
|
|
|
|
|
|
|
@ -184,20 +211,23 @@ class TestBatchNormOp(OpTest):
|
|
|
|
|
print 'python: NHWC, NCHW, backward checking passed'
|
|
|
|
|
|
|
|
|
|
def test_forward_backward(self):
|
|
|
|
|
def test_with_place(place, tensor_format):
|
|
|
|
|
def test_with_place(place, tensor_format, shape):
|
|
|
|
|
# attr
|
|
|
|
|
epsilon = 0.00001
|
|
|
|
|
momentum = 0.9
|
|
|
|
|
|
|
|
|
|
# N, H, W, C: 12, 3, 4, 2
|
|
|
|
|
n, h, w, c = 2, 3, 4, 2
|
|
|
|
|
|
|
|
|
|
if data_format == "NHWC":
|
|
|
|
|
x_shape = [n, h, w, c]
|
|
|
|
|
elif data_format == "NCHW":
|
|
|
|
|
x_shape = [n, c, h, w]
|
|
|
|
|
if len(shape) == 2:
|
|
|
|
|
x_shape = shape
|
|
|
|
|
c = shape[1]
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError("Unknown data type.")
|
|
|
|
|
# n, h, w, c = 2, 3, 4, 2
|
|
|
|
|
n, h, w, c = shape[0], shape[1], shape[2], shape[3]
|
|
|
|
|
if data_format == "NHWC":
|
|
|
|
|
x_shape = [n, h, w, c]
|
|
|
|
|
elif data_format == "NCHW":
|
|
|
|
|
x_shape = [n, c, h, w]
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError("Unknown data type.")
|
|
|
|
|
scale_shape = [c]
|
|
|
|
|
|
|
|
|
|
x_val = np.random.random_sample(x_shape).astype(np.float32)
|
|
|
|
@ -219,7 +249,10 @@ class TestBatchNormOp(OpTest):
|
|
|
|
|
# for gradient test
|
|
|
|
|
# y_grad = np.ones(x_shape).astype(np.float32)
|
|
|
|
|
y_grad = np.zeros(x_shape).astype(np.float32)
|
|
|
|
|
y_grad[0, 0, 0, 0] = 1.
|
|
|
|
|
if len(y_grad.shape) == 2:
|
|
|
|
|
y_grad[0, 0] = 1.
|
|
|
|
|
else:
|
|
|
|
|
y_grad[0, 0, 0, 0] = 1.
|
|
|
|
|
# y_grad = np.random.random_sample(x_shape).astype(np.float32)
|
|
|
|
|
x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad(
|
|
|
|
|
x_val, y_grad, scale_val, saved_mean, var_ref, epsilon,
|
|
|
|
@ -313,7 +346,8 @@ class TestBatchNormOp(OpTest):
|
|
|
|
|
places.append(core.GPUPlace(0))
|
|
|
|
|
for place in places:
|
|
|
|
|
for data_format in ["NCHW", "NHWC"]:
|
|
|
|
|
test_with_place(place, data_format)
|
|
|
|
|
test_with_place(place, data_format, [2, 3, 4, 5])
|
|
|
|
|
test_with_place(place, data_format, [2, 3])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|