A better error message for gradient checker

* Give which parameter, which element are wrong. And what max_diff is.
8 years ago · c957445c72
parent be9867f91b
commit c957445c72
2 changed files with 30 additions and 20 deletions
--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@ -22,6 +22,7 @@ limitations under the License. */
 #include "paddle/operators/net_op.h"
 #include "paddle/platform/enforce.h"
 #include "paddle/platform/place.h"
 #include "paddle/string/to_string.h"
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
@ -205,9 +206,13 @@ All parameter, weight, gradient are variables in Paddle.
                  });
  // clang-format on
-  py::class_<paddle::platform::GPUPlace>(m, "GPUPlace").def(py::init<int>());
+  py::class_<platform::GPUPlace>(m, "GPUPlace")
      .def(py::init<int>())
      .def("__str__", string::to_string<const platform::GPUPlace &>);
-  py::class_<paddle::platform::CPUPlace>(m, "CPUPlace").def(py::init<>());
+  py::class_<paddle::platform::CPUPlace>(m, "CPUPlace")
      .def(py::init<>())
      .def("__str__", string::to_string<const platform::CPUPlace &>);
  py::class_<OperatorBase, std::shared_ptr<OperatorBase>> operator_base(
      m, "Operator");
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@ -92,15 +92,26 @@ def get_numeric_gradient(op,
 class GradientChecker(unittest.TestCase):
-    def __is_close(self, numeric_grads, scope, max_relative_error):
+    def __is_close(self, numeric_grads, scope, max_relative_error, msg_prefix):
        for name in numeric_grads:
-            op_grad = numpy.array(
+            b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor())
-                scope.find_var(grad_var_name(name)).get_tensor())
+            a = numeric_grads[name]
-            is_close = numpy.allclose(
+
-                numeric_grads[name], op_grad, rtol=max_relative_error, atol=100)
+            abs_a = numpy.abs(a)
-            if not is_close:
+            # if abs_a is nearly zero, then use abs error for a, not relative
-                return False
+            # error.
-        return True
+            abs_a[abs_a < 1e-3] = 1
            diff_mat = numpy.abs(a - b) / abs_a
            max_diff = numpy.max(diff_mat)
            def err_msg():
                offset = numpy.argmax(diff_mat > max_relative_error)
                return "%s Variable %s max gradient diff %f over limit %f, the first " \
                       "error element is %d" % (
                       msg_prefix, name, max_diff, max_relative_error, offset)
            self.assertLessEqual(max_diff, max_relative_error, err_msg())
    def check_grad(self,
                   forward_op,
@ -145,7 +156,8 @@ class GradientChecker(unittest.TestCase):
        # get numeric gradient
        for check_name in inputs_to_check:
            numeric_grad[check_name] = \
-                get_numeric_gradient(forward_op, input_vars, output_name, check_name)
+                get_numeric_gradient(forward_op, input_vars, output_name,
                                     check_name)
        # get operator gradient according to different device
        for place in places:
@ -187,15 +199,8 @@ class GradientChecker(unittest.TestCase):
            backward_op.infer_shape(scope)
            backward_op.run(scope, ctx)
-            if isinstance(place, core.CPUPlace):
+            self.__is_close(numeric_grad, scope, max_relative_error,
-                msg = "CPU kernel gradient is not close to numeric gradient"
+                            "Gradient Check On %s" % str(place))
            else:
                if isinstance(place, core.GPUPlace):
                    msg = "GPU kernel gradient is not close to numeric gradient"
                else:
                    raise ValueError("unknown place " + type(place))
            self.assertTrue(
                self.__is_close(numeric_grad, scope, max_relative_error), msg)
 if __name__ == '__main__':