From 1497ad11120bca059e0207c6359adf724309f6d9 Mon Sep 17 00:00:00 2001
From: gerayking <919179287@qq.com>
Date: Tue, 30 Mar 2021 15:28:34 +0800
Subject: [PATCH] Add optimizer formula to comments - Adagrad

---
 mindspore/nn/optim/ada_grad.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/mindspore/nn/optim/ada_grad.py b/mindspore/nn/optim/ada_grad.py
index 7ebb38788e..3b1253c32a 100644
--- a/mindspore/nn/optim/ada_grad.py
+++ b/mindspore/nn/optim/ada_grad.py
@@ -36,12 +36,22 @@ def _check_param_value(accum, update_slots, prim_name=None):
 
 
 class Adagrad(Optimizer):
-    """
+    r"""
     Implements the Adagrad algorithm with ApplyAdagrad Operator.
 
     Adagrad is an online Learning and Stochastic Optimization.
     Refer to paper `Efficient Learning using Forward-Backward Splitting
     <https://proceedings.neurips.cc/paper/2009/file/621bf66ddb7c962aa0d22ac97d69b793-Paper.pdf>`_.
+    The updating formulas are as follows,
+
+    .. math::
+        \begin{array}{ll} \\
+            h_{t} = h_{t-1} + g\\
+            w_{t} = w_{t-1} - lr*\frac{1}{\sqrt{h_{t}}}*g
+        \end{array}
+
+    :math:`h` represents the cumulative sum of gradient squared, :math:`g` represents `gradients`.
+    :math:`lr` represents `learning_rate`, :math:`w` represents `params`.
 
     Note:
         When separating parameter groups, the weight decay in each group will be applied on the parameters if the