|
|
|
@ -22,7 +22,7 @@ import mindspore.common.dtype as mstype
|
|
|
|
|
from mindspore._checkparam import check_bool
|
|
|
|
|
from mindspore._checkparam import Validator as validator
|
|
|
|
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
|
|
|
from mindspore.parallel._utils import _get_device_num, _get_mirror_mean
|
|
|
|
|
from mindspore.parallel._utils import _get_device_num, _get_gradients_mean
|
|
|
|
|
from src.grad_reducer_thor import DistributedGradReducerThor
|
|
|
|
|
|
|
|
|
|
_momentum_opt = C.MultitypeFuncGraph("momentum_opt")
|
|
|
|
@ -85,7 +85,7 @@ class THOR_GPU(Optimizer):
|
|
|
|
|
self.assign = P.Assign()
|
|
|
|
|
self.mul = P.Mul()
|
|
|
|
|
|
|
|
|
|
mean = _get_mirror_mean()
|
|
|
|
|
mean = _get_gradients_mean()
|
|
|
|
|
degree = _get_device_num()
|
|
|
|
|
|
|
|
|
|
parameter_length = len(self.feature_map)
|
|
|
|
@ -193,7 +193,7 @@ class THOR(Optimizer):
|
|
|
|
|
1.0 / 196, 1.0 / 196, 1.0 / 196,
|
|
|
|
|
1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49,
|
|
|
|
|
1.0]
|
|
|
|
|
mean = _get_mirror_mean()
|
|
|
|
|
mean = _get_gradients_mean()
|
|
|
|
|
degree = _get_device_num()
|
|
|
|
|
parameter_length = len(self.feature_map)
|
|
|
|
|
self.grad_reducer_Amax = DistributedGradReducerThor(parameter_length, ((27,), 2), mean, degree)
|
|
|
|
|