fix GPU hete

pull/6232/head
baihuawei 4 years ago
parent fc8bd0dd03
commit 09a3f2ff5e

@ -174,7 +174,9 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
} }
} }
if (need_sync) { if (need_sync) {
tensor->set_device_address(device_address); if (AnfAlgo::IsParameterWeight(input_node->cast<ParameterPtr>())) {
tensor->set_device_address(device_address);
}
MS_EXCEPTION_IF_NULL(device_address); MS_EXCEPTION_IF_NULL(device_address);
if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0), if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0),
LongToSize(tensor->data().nbytes()), tensor->data_type(), LongToSize(tensor->data().nbytes()), tensor->data_type(),

@ -19,6 +19,8 @@ import pytest
import mindspore.context as context import mindspore.context as context
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import Tensor from mindspore import Tensor
from mindspore.common.parameter import Parameter
from mindspore.common.initializer import initializer
from mindspore.common.api import ms_function from mindspore.common.api import ms_function
from mindspore.ops.operations import _quant_ops as Q from mindspore.ops.operations import _quant_ops as Q
@ -26,13 +28,15 @@ context.set_context(device_target='GPU')
class Net(nn.Cell): class Net(nn.Cell):
def __init__(self): def __init__(self, mean, variance):
super(Net, self).__init__() super(Net, self).__init__()
self.mean = mean
self.variance = variance
self.op = Q.BatchNormFold(momentum=0.9, freeze_bn=10) self.op = Q.BatchNormFold(momentum=0.9, freeze_bn=10)
@ms_function @ms_function
def construct(self, x, mean, variance, current_step): def construct(self, x, current_step):
a, b, c, d = self.op(x, mean, variance, current_step) a, b, c, d = self.op(x, self.mean, self.variance, current_step)
return a, b, c, d return a, b, c, d
@ -52,16 +56,17 @@ def np_result(x, mean, var, momentum, epsilon):
@pytest.mark.platform_x86_gpu_training @pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard @pytest.mark.env_onecard
def test_batchnorm_fold(): def test_batchnorm_fold():
net = Net()
c = 64 c = 64
x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32') x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32')
mean = np.random.uniform(1, 10, size=[c]).astype('float32') mean = np.random.uniform(1, 10, size=[c]).astype('float32')
variance = np.random.uniform(1, 10, size=[c]).astype('float32') variance = np.random.uniform(1, 10, size=[c]).astype('float32')
current_step = np.array([0]).astype('int32') current_step = np.array([0]).astype('int32')
ms_mean = Tensor(mean) ms_mean_t = Tensor(mean)
ms_var = Tensor(variance) ms_var_t = Tensor(variance)
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), ms_mean, ms_var, ms_mean = Parameter(initializer(ms_mean_t, ms_mean_t.shape), name='mean')
Tensor(current_step)) ms_var = Parameter(initializer(ms_var_t, ms_var_t.shape), name='var')
net = Net(ms_mean, ms_var)
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), Tensor(current_step))
expect1, expect2, expect3, expect4, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12) expect1, expect2, expect3, expect4, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12)
assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5) assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5)
@ -76,16 +81,17 @@ def test_batchnorm_fold():
@pytest.mark.platform_x86_gpu_training @pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard @pytest.mark.env_onecard
def test_batchnorm_fold2(): def test_batchnorm_fold2():
net = Net()
c = 64 c = 64
x = np.random.uniform(1, 10, size=[3, c, 512, 512]).astype('float32') x = np.random.uniform(1, 10, size=[3, c, 512, 512]).astype('float32')
mean = np.random.uniform(1, 10, size=[c]).astype('float32') mean = np.random.uniform(1, 10, size=[c]).astype('float32')
variance = np.random.uniform(1, 10, size=[c]).astype('float32') variance = np.random.uniform(1, 10, size=[c]).astype('float32')
current_step = np.array([0]).astype('int32') current_step = np.array([0]).astype('int32')
ms_mean = Tensor(mean) ms_mean_t = Tensor(mean)
ms_var = Tensor(variance) ms_var_t = Tensor(variance)
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), ms_mean, ms_var, ms_mean = Parameter(initializer(ms_mean_t, ms_mean_t.shape), name='mean')
Tensor(current_step)) ms_var = Parameter(initializer(ms_var_t, ms_var_t.shape), name='var')
net = Net(ms_mean, ms_var)
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), Tensor(current_step))
expect1, expect2, expect3, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12) expect1, expect2, expect3, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12)
assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5) assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5)
assert np.allclose(batch_var.asnumpy(), expect2, rtol=1.e-7, atol=1.e-5) assert np.allclose(batch_var.asnumpy(), expect2, rtol=1.e-7, atol=1.e-5)
@ -98,16 +104,17 @@ def test_batchnorm_fold2():
@pytest.mark.platform_x86_gpu_training @pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard @pytest.mark.env_onecard
def test_batchnorm_fold_freeze(): def test_batchnorm_fold_freeze():
net = Net()
c = 64 c = 64
x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32') x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32')
mean = np.random.uniform(1, 10, size=[c]).astype('float32') mean = np.random.uniform(1, 10, size=[c]).astype('float32')
variance = np.random.uniform(1, 10, size=[c]).astype('float32') variance = np.random.uniform(1, 10, size=[c]).astype('float32')
current_step = np.array([10]).astype('int32') current_step = np.array([10]).astype('int32')
ms_mean = Tensor(mean) ms_mean_t = Tensor(mean)
ms_var = Tensor(variance) ms_var_t = Tensor(variance)
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), ms_mean, ms_var, ms_mean = Parameter(initializer(ms_mean_t, ms_mean_t.shape), name='mean')
Tensor(current_step)) ms_var = Parameter(initializer(ms_var_t, ms_var_t.shape), name='var')
net = Net(ms_mean, ms_var)
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), Tensor(current_step))
_, _, _, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12) _, _, _, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12)
assert np.allclose(batch_mean.asnumpy(), np.zeros_like(mean), rtol=1.e-7, atol=1.e-5) assert np.allclose(batch_mean.asnumpy(), np.zeros_like(mean), rtol=1.e-7, atol=1.e-5)
assert np.allclose(batch_var.asnumpy(), np.ones_like(mean), rtol=1.e-7, atol=1.e-5) assert np.allclose(batch_var.asnumpy(), np.ones_like(mean), rtol=1.e-7, atol=1.e-5)

@ -19,35 +19,48 @@ import pytest
import mindspore.context as context import mindspore.context as context
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import Tensor from mindspore import Tensor
from mindspore.common.parameter import Parameter
from mindspore.common.initializer import initializer
from mindspore.ops import operations as P from mindspore.ops import operations as P
context.set_context(mode=context.GRAPH_MODE, device_target="GPU") context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
class NetCenteredRMSProp(nn.Cell): class NetCenteredRMSProp(nn.Cell):
def __init__(self, lr, decay, momentum, epsilon): def __init__(self, lr, decay, momentum, epsilon, var, g, mg, rms, mom):
super(NetCenteredRMSProp, self).__init__() super(NetCenteredRMSProp, self).__init__()
self.rms_opt = P.ApplyCenteredRMSProp() self.rms_opt = P.ApplyCenteredRMSProp()
self.lr = lr self.lr = lr
self.decay = decay self.decay = decay
self.momentum = momentum self.momentum = momentum
self.epsilon = epsilon self.epsilon = epsilon
self.var = var
self.g = g
self.mg = mg
self.rms = rms
self.mom = mom
def construct(self, var, g, mg, rms, mom): def construct(self):
return self.rms_opt(var, mg, rms, mom, g, self.lr, self.decay, self.momentum, self.epsilon) return self.rms_opt(self.var, self.mg, self.rms, self.mom, self.g, self.lr, self.decay, self.momentum,
self.epsilon)
class NetRMSProp(nn.Cell): class NetRMSProp(nn.Cell):
def __init__(self, lr, decay, momentum, epsilon): def __init__(self, lr, decay, momentum, epsilon, var, g, mg, rms, mom):
super(NetRMSProp, self).__init__() super(NetRMSProp, self).__init__()
self.lr = lr self.lr = lr
self.decay = decay self.decay = decay
self.momentum = momentum self.momentum = momentum
self.epsilon = epsilon self.epsilon = epsilon
self.var = var
self.g = g
self.mg = mg
self.rms = rms
self.mom = mom
self.rms_opt = P.ApplyRMSProp() self.rms_opt = P.ApplyRMSProp()
def construct(self, var, g, mg, rms, mom): def construct(self):
return self.rms_opt(var, rms, mom, self.lr, g, self.decay, self.momentum, self.epsilon) return self.rms_opt(self.var, self.rms, self.mom, self.lr, self.g, self.decay, self.momentum, self.epsilon)
def rmsprop_numpy(variable, gradients, mean_square, moment, def rmsprop_numpy(variable, gradients, mean_square, moment,
@ -67,6 +80,7 @@ def rmspropcented_numpy(variable, gradients, mean_gradients, mean_square, moment
variable = variable - moment variable = variable - moment
return variable, gradients, mean_gradients, mean_square, moment return variable, gradients, mean_gradients, mean_square, moment
@pytest.mark.level0 @pytest.mark.level0
@pytest.mark.platform_x86_gpu_training @pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard @pytest.mark.env_onecard
@ -79,25 +93,33 @@ def test_rmsprop():
mean_square_np = np.array([epsilon, epsilon], dtype=np.float32) mean_square_np = np.array([epsilon, epsilon], dtype=np.float32)
moment_np = np.array([0.0, 0.0], dtype=np.float32) moment_np = np.array([0.0, 0.0], dtype=np.float32)
variable_ms = Tensor(variable_np) variable = Tensor(variable_np)
gradients_ms = Tensor(gradients_np) gradients = Tensor(gradients_np)
mean_gradients_ms = Tensor(mean_gradients_np) mean_gradients = Tensor(mean_gradients_np)
mean_square_ms = Tensor(mean_square_np) mean_square = Tensor(mean_square_np)
moment_ms = Tensor(moment_np) moment = Tensor(moment_np)
variable_ms = Parameter(initializer(variable, variable.shape), name='var')
gradients_ms = Parameter(initializer(gradients, gradients.shape), name='grad')
mean_gradients_ms = Parameter(initializer(mean_gradients, mean_gradients.shape), name='mg')
mean_square_ms = Parameter(initializer(mean_square, mean_square.shape), name='msr')
moment_ms = Parameter(initializer(moment, moment.shape), name='mom')
if centered: if centered:
variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \ variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \
rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np, rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np,
learning_rate, decay, momentum, epsilon) learning_rate, decay, momentum, epsilon)
net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon) net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms,
_ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms) mean_square_ms, moment_ms)
_ = net()
else: else:
variable_np, gradients_np, mean_square_np, moment_np = \ variable_np, gradients_np, mean_square_np, moment_np = \
rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np, rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np,
learning_rate, decay, momentum, epsilon) learning_rate, decay, momentum, epsilon)
net = NetRMSProp(learning_rate, decay, momentum, epsilon) net = NetRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms,
_ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms) mean_square_ms, moment_ms)
_ = net()
error = np.ones(shape=variable_np.shape) * 10e-6 error = np.ones(shape=variable_np.shape) * 10e-6
diff = variable_ms.asnumpy() - variable_np diff = variable_ms.asnumpy() - variable_np
@ -132,24 +154,32 @@ def test_rmspropcenter():
mean_square_np = np.array([epsilon, epsilon], dtype=np.float32) mean_square_np = np.array([epsilon, epsilon], dtype=np.float32)
moment_np = np.array([0.0, 0.0], dtype=np.float32) moment_np = np.array([0.0, 0.0], dtype=np.float32)
variable_ms = Tensor(variable_np) variable = Tensor(variable_np)
gradients_ms = Tensor(gradients_np) gradients = Tensor(gradients_np)
mean_gradients_ms = Tensor(mean_gradients_np) mean_gradients = Tensor(mean_gradients_np)
mean_square_ms = Tensor(mean_square_np) mean_square = Tensor(mean_square_np)
moment_ms = Tensor(moment_np) moment = Tensor(moment_np)
variable_ms = Parameter(initializer(variable, variable.shape), name='var')
gradients_ms = Parameter(initializer(gradients, gradients.shape), name='grad')
mean_gradients_ms = Parameter(initializer(mean_gradients, mean_gradients.shape), name='mg')
mean_square_ms = Parameter(initializer(mean_square, mean_square.shape), name='msr')
moment_ms = Parameter(initializer(moment, moment.shape), name='mom')
if centered: if centered:
variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \ variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \
rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np, rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np,
learning_rate, decay, momentum, epsilon) learning_rate, decay, momentum, epsilon)
net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon) net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms,
_ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms) mean_square_ms, moment_ms)
_ = net()
else: else:
variable_np, gradients_np, mean_square_np, moment_np = \ variable_np, gradients_np, mean_square_np, moment_np = \
rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np, rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np,
learning_rate, decay, momentum, epsilon) learning_rate, decay, momentum, epsilon)
net = NetRMSProp(learning_rate, decay, momentum, epsilon) net = NetRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms,
_ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms) mean_square_ms, moment_ms)
_ = net()
error = np.ones(shape=variable_np.shape) * 10e-6 error = np.ones(shape=variable_np.shape) * 10e-6
diff = variable_ms.asnumpy() - variable_np diff = variable_ms.asnumpy() - variable_np

Loading…
Cancel
Save