diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index eb5332b486..ce39abdbb4 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -174,7 +174,9 @@ void GPUSession::LoadInputData(const std::shared_ptr &kernel_graph, } } if (need_sync) { - tensor->set_device_address(device_address); + if (AnfAlgo::IsParameterWeight(input_node->cast())) { + tensor->set_device_address(device_address); + } MS_EXCEPTION_IF_NULL(device_address); if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0), LongToSize(tensor->data().nbytes()), tensor->data_type(), diff --git a/tests/st/ops/gpu/test_batchnorm_fold_op.py b/tests/st/ops/gpu/test_batchnorm_fold_op.py index b5b09a24d4..01f81cb2ad 100644 --- a/tests/st/ops/gpu/test_batchnorm_fold_op.py +++ b/tests/st/ops/gpu/test_batchnorm_fold_op.py @@ -19,6 +19,8 @@ import pytest import mindspore.context as context import mindspore.nn as nn from mindspore import Tensor +from mindspore.common.parameter import Parameter +from mindspore.common.initializer import initializer from mindspore.common.api import ms_function from mindspore.ops.operations import _quant_ops as Q @@ -26,13 +28,15 @@ context.set_context(device_target='GPU') class Net(nn.Cell): - def __init__(self): + def __init__(self, mean, variance): super(Net, self).__init__() + self.mean = mean + self.variance = variance self.op = Q.BatchNormFold(momentum=0.9, freeze_bn=10) @ms_function - def construct(self, x, mean, variance, current_step): - a, b, c, d = self.op(x, mean, variance, current_step) + def construct(self, x, current_step): + a, b, c, d = self.op(x, self.mean, self.variance, current_step) return a, b, c, d @@ -52,16 +56,17 @@ def np_result(x, mean, var, momentum, epsilon): @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_batchnorm_fold(): - net = Net() c = 64 x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32') mean = np.random.uniform(1, 10, size=[c]).astype('float32') variance = np.random.uniform(1, 10, size=[c]).astype('float32') current_step = np.array([0]).astype('int32') - ms_mean = Tensor(mean) - ms_var = Tensor(variance) - batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), ms_mean, ms_var, - Tensor(current_step)) + ms_mean_t = Tensor(mean) + ms_var_t = Tensor(variance) + ms_mean = Parameter(initializer(ms_mean_t, ms_mean_t.shape), name='mean') + ms_var = Parameter(initializer(ms_var_t, ms_var_t.shape), name='var') + net = Net(ms_mean, ms_var) + batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), Tensor(current_step)) expect1, expect2, expect3, expect4, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12) assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5) @@ -76,16 +81,17 @@ def test_batchnorm_fold(): @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_batchnorm_fold2(): - net = Net() c = 64 x = np.random.uniform(1, 10, size=[3, c, 512, 512]).astype('float32') mean = np.random.uniform(1, 10, size=[c]).astype('float32') variance = np.random.uniform(1, 10, size=[c]).astype('float32') current_step = np.array([0]).astype('int32') - ms_mean = Tensor(mean) - ms_var = Tensor(variance) - batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), ms_mean, ms_var, - Tensor(current_step)) + ms_mean_t = Tensor(mean) + ms_var_t = Tensor(variance) + ms_mean = Parameter(initializer(ms_mean_t, ms_mean_t.shape), name='mean') + ms_var = Parameter(initializer(ms_var_t, ms_var_t.shape), name='var') + net = Net(ms_mean, ms_var) + batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), Tensor(current_step)) expect1, expect2, expect3, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12) assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5) assert np.allclose(batch_var.asnumpy(), expect2, rtol=1.e-7, atol=1.e-5) @@ -98,16 +104,17 @@ def test_batchnorm_fold2(): @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_batchnorm_fold_freeze(): - net = Net() c = 64 x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32') mean = np.random.uniform(1, 10, size=[c]).astype('float32') variance = np.random.uniform(1, 10, size=[c]).astype('float32') current_step = np.array([10]).astype('int32') - ms_mean = Tensor(mean) - ms_var = Tensor(variance) - batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), ms_mean, ms_var, - Tensor(current_step)) + ms_mean_t = Tensor(mean) + ms_var_t = Tensor(variance) + ms_mean = Parameter(initializer(ms_mean_t, ms_mean_t.shape), name='mean') + ms_var = Parameter(initializer(ms_var_t, ms_var_t.shape), name='var') + net = Net(ms_mean, ms_var) + batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), Tensor(current_step)) _, _, _, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12) assert np.allclose(batch_mean.asnumpy(), np.zeros_like(mean), rtol=1.e-7, atol=1.e-5) assert np.allclose(batch_var.asnumpy(), np.ones_like(mean), rtol=1.e-7, atol=1.e-5) diff --git a/tests/st/ops/gpu/test_rmsprop.py b/tests/st/ops/gpu/test_rmsprop.py index f578fb82c8..4cad24ef53 100644 --- a/tests/st/ops/gpu/test_rmsprop.py +++ b/tests/st/ops/gpu/test_rmsprop.py @@ -19,35 +19,48 @@ import pytest import mindspore.context as context import mindspore.nn as nn from mindspore import Tensor +from mindspore.common.parameter import Parameter +from mindspore.common.initializer import initializer from mindspore.ops import operations as P context.set_context(mode=context.GRAPH_MODE, device_target="GPU") class NetCenteredRMSProp(nn.Cell): - def __init__(self, lr, decay, momentum, epsilon): + def __init__(self, lr, decay, momentum, epsilon, var, g, mg, rms, mom): super(NetCenteredRMSProp, self).__init__() self.rms_opt = P.ApplyCenteredRMSProp() self.lr = lr self.decay = decay self.momentum = momentum self.epsilon = epsilon + self.var = var + self.g = g + self.mg = mg + self.rms = rms + self.mom = mom - def construct(self, var, g, mg, rms, mom): - return self.rms_opt(var, mg, rms, mom, g, self.lr, self.decay, self.momentum, self.epsilon) + def construct(self): + return self.rms_opt(self.var, self.mg, self.rms, self.mom, self.g, self.lr, self.decay, self.momentum, + self.epsilon) class NetRMSProp(nn.Cell): - def __init__(self, lr, decay, momentum, epsilon): + def __init__(self, lr, decay, momentum, epsilon, var, g, mg, rms, mom): super(NetRMSProp, self).__init__() self.lr = lr self.decay = decay self.momentum = momentum self.epsilon = epsilon + self.var = var + self.g = g + self.mg = mg + self.rms = rms + self.mom = mom self.rms_opt = P.ApplyRMSProp() - def construct(self, var, g, mg, rms, mom): - return self.rms_opt(var, rms, mom, self.lr, g, self.decay, self.momentum, self.epsilon) + def construct(self): + return self.rms_opt(self.var, self.rms, self.mom, self.lr, self.g, self.decay, self.momentum, self.epsilon) def rmsprop_numpy(variable, gradients, mean_square, moment, @@ -67,6 +80,7 @@ def rmspropcented_numpy(variable, gradients, mean_gradients, mean_square, moment variable = variable - moment return variable, gradients, mean_gradients, mean_square, moment + @pytest.mark.level0 @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard @@ -79,25 +93,33 @@ def test_rmsprop(): mean_square_np = np.array([epsilon, epsilon], dtype=np.float32) moment_np = np.array([0.0, 0.0], dtype=np.float32) - variable_ms = Tensor(variable_np) - gradients_ms = Tensor(gradients_np) - mean_gradients_ms = Tensor(mean_gradients_np) - mean_square_ms = Tensor(mean_square_np) - moment_ms = Tensor(moment_np) + variable = Tensor(variable_np) + gradients = Tensor(gradients_np) + mean_gradients = Tensor(mean_gradients_np) + mean_square = Tensor(mean_square_np) + moment = Tensor(moment_np) + + variable_ms = Parameter(initializer(variable, variable.shape), name='var') + gradients_ms = Parameter(initializer(gradients, gradients.shape), name='grad') + mean_gradients_ms = Parameter(initializer(mean_gradients, mean_gradients.shape), name='mg') + mean_square_ms = Parameter(initializer(mean_square, mean_square.shape), name='msr') + moment_ms = Parameter(initializer(moment, moment.shape), name='mom') if centered: variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \ - rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np, - learning_rate, decay, momentum, epsilon) - net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon) - _ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms) + rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np, + learning_rate, decay, momentum, epsilon) + net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms, + mean_square_ms, moment_ms) + _ = net() else: variable_np, gradients_np, mean_square_np, moment_np = \ - rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np, - learning_rate, decay, momentum, epsilon) - net = NetRMSProp(learning_rate, decay, momentum, epsilon) - _ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms) + rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np, + learning_rate, decay, momentum, epsilon) + net = NetRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms, + mean_square_ms, moment_ms) + _ = net() error = np.ones(shape=variable_np.shape) * 10e-6 diff = variable_ms.asnumpy() - variable_np @@ -132,24 +154,32 @@ def test_rmspropcenter(): mean_square_np = np.array([epsilon, epsilon], dtype=np.float32) moment_np = np.array([0.0, 0.0], dtype=np.float32) - variable_ms = Tensor(variable_np) - gradients_ms = Tensor(gradients_np) - mean_gradients_ms = Tensor(mean_gradients_np) - mean_square_ms = Tensor(mean_square_np) - moment_ms = Tensor(moment_np) + variable = Tensor(variable_np) + gradients = Tensor(gradients_np) + mean_gradients = Tensor(mean_gradients_np) + mean_square = Tensor(mean_square_np) + moment = Tensor(moment_np) + + variable_ms = Parameter(initializer(variable, variable.shape), name='var') + gradients_ms = Parameter(initializer(gradients, gradients.shape), name='grad') + mean_gradients_ms = Parameter(initializer(mean_gradients, mean_gradients.shape), name='mg') + mean_square_ms = Parameter(initializer(mean_square, mean_square.shape), name='msr') + moment_ms = Parameter(initializer(moment, moment.shape), name='mom') if centered: variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \ - rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np, - learning_rate, decay, momentum, epsilon) - net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon) - _ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms) + rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np, + learning_rate, decay, momentum, epsilon) + net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms, + mean_square_ms, moment_ms) + _ = net() else: variable_np, gradients_np, mean_square_np, moment_np = \ - rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np, - learning_rate, decay, momentum, epsilon) - net = NetRMSProp(learning_rate, decay, momentum, epsilon) - _ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms) + rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np, + learning_rate, decay, momentum, epsilon) + net = NetRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms, + mean_square_ms, moment_ms) + _ = net() error = np.ones(shape=variable_np.shape) * 10e-6 diff = variable_ms.asnumpy() - variable_np