Call new cudnn batch norm API regardless of data type and data layout (#30157)

4 years ago · 666efc2336
parent 5c8455d6ea
commit 666efc2336
7 changed files with 156 additions and 187 deletions
--- a/paddle/fluid/operators/batch_norm_op.cu
+++ b/paddle/fluid/operators/batch_norm_op.cu
--- a/paddle/fluid/operators/inplace_abn_op.cc
+++ b/paddle/fluid/operators/inplace_abn_op.cc
@ -178,6 +178,9 @@ class InplaceABNOpGradMaker : public framework::SingleGradOpMaker<T> {
    op->SetInput("Bias", this->Input("Bias"));
    op->SetInput("SavedMean", this->Output("SavedMean"));
    op->SetInput("SavedVariance", this->Output("SavedVariance"));
+    if (this->HasOutput("ReserveSpace")) {
+      op->SetInput("ReserveSpace", this->Output("ReserveSpace"));
+    }

    // used when setting use_global_stats True during training
    if (BOOST_GET_CONST(bool, this->GetAttr("use_global_stats"))) {
--- a/python/paddle/fluid/dygraph/nn.py
+++ b/python/paddle/fluid/dygraph/nn.py
@ -1309,12 +1309,6 @@ class BatchNorm(layers.Layer):
            dtype=self._dtype)
        self._variance.stop_gradient = True

-        self._has_reserve_space = False
-        if data_layout == 'NHWC':
-            flag = os.environ.get('FLAGS_cudnn_batchnorm_spatial_persistent')
-            if flag is not None and flag.lower() in ['true', '1']:
-                self._has_reserve_space = True
-
        self._in_place = in_place
        self._data_layout = data_layout
        self._momentum = momentum
@ -1341,7 +1335,6 @@ class BatchNorm(layers.Layer):
            batch_norm_out, _, _, _, _, _ = core.ops.batch_norm(
                input, self.weight, self.bias, self._mean, self._variance,
                mean_out, variance_out, *attrs)
-
            return dygraph_utils._append_activation_in_dygraph(
                batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn)

@ -1371,11 +1364,8 @@ class BatchNorm(layers.Layer):
            dtype=self._dtype, stop_gradient=True)
        saved_variance = self._helper.create_variable_for_type_inference(
            dtype=self._dtype, stop_gradient=True)
-
-        reserve_space = None
-        if self._has_reserve_space:
-            reserve_space = self._helper.create_variable_for_type_inference(
-                dtype=core.VarDesc.VarType.FP16, stop_gradient=True)
+        reserve_space = self._helper.create_variable_for_type_inference(
+            dtype=self._helper.input_dtype(input), stop_gradient=True)

        batch_norm_out = input if self._in_place else self._helper.create_variable_for_type_inference(
            self._dtype)
@ -1388,7 +1378,7 @@ class BatchNorm(layers.Layer):
            "SavedVariance": [saved_variance]
        }
        if reserve_space is not None:
-            outputs["ReserveSpace"] = reserve_space
+            outputs["ReserveSpace"] = [reserve_space]

        self._helper.append_op(
            type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs)
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@ -2792,12 +2792,6 @@ def batch_norm(input,
                             'batch_norm')
    dtype = helper.input_dtype()

-    has_reserve_space = False
-    if data_layout == 'NHWC':
-        flag = os.environ.get('FLAGS_cudnn_batchnorm_spatial_persistent')
-        if flag is not None and flag.lower() in ['true', '1']:
-            has_reserve_space = True
-
    # use fp32 for bn parameter
    if dtype == core.VarDesc.VarType.FP16:
        dtype = core.VarDesc.VarType.FP32
@ -2845,17 +2839,16 @@ def batch_norm(input,
    # create output
    # mean and mean_out share the same memory
    mean_out = mean
-    # variance and variance out share the same memory
+    # variance and variance_out share the same memory
    variance_out = variance
    saved_mean = helper.create_variable_for_type_inference(
        dtype=dtype, stop_gradient=True)
    saved_variance = helper.create_variable_for_type_inference(
        dtype=dtype, stop_gradient=True)
-
    reserve_space = None
-    if has_reserve_space:
+    if not is_test:
        reserve_space = helper.create_variable_for_type_inference(
-            dtype=core.VarDesc.VarType.FP16, stop_gradient=True)
+            dtype=helper.input_dtype(), stop_gradient=True)

    batch_norm_out = input if in_place else \
            helper.create_variable_for_type_inference(dtype)
@ -2998,12 +2991,6 @@ def inplace_abn(input,
                             'inplace_abn')
    dtype = helper.input_dtype()

-    has_reserve_space = False
-    if data_layout == 'NHWC':
-        flag = os.environ.get('FLAGS_cudnn_batchnorm_spatial_persistent')
-        if flag is not None and flag.lower() in ['true', '1']:
-            has_reserve_space = True
-
    input_shape = input.shape
    if data_layout == 'NCHW':
        channel_num = input_shape[1]
@ -3053,12 +3040,8 @@ def inplace_abn(input,
        dtype=dtype, stop_gradient=True)
    saved_variance = helper.create_variable_for_type_inference(
        dtype=dtype, stop_gradient=True)
-
-    reserve_space = None
-    if has_reserve_space:
-        reserve_space = helper.create_variable_for_type_inference(
-            dtype=core.VarDesc.VarType.FP16, stop_gradient=True)
-
+    reserve_space = helper.create_variable_for_type_inference(
+        dtype=dtype, stop_gradient=True)
    batch_norm_out = input

    inputs = {
@ -3082,7 +3065,6 @@ def inplace_abn(input,
        inputs['MomemtumTensor'] = momentum
    else:
        attrs['momentum'] = momentum
-
    outputs = {
        "Y": batch_norm_out,
        "MeanOut": mean_out,
--- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
@ -440,16 +440,8 @@ class TestBatchNormOpTraining(unittest.TestCase):
                    "SavedMean": block.var('saved_mean'),
                    "SavedVariance": block.var('saved_variance')
                }
-                has_reserve_space = False
-                if data_format == 'NHWC':
-                    flag = os.environ.get(
-                        'FLAGS_cudnn_batchnorm_spatial_persistent')
-                    if flag is not None and flag.lower() in ['true', '1']:
-                        has_reserve_space = True
-                if has_reserve_space:
-                    block.create_var(name="reserve_space", dtype='float16')
-                    outputs["ReserveSpace"] = block.var('reserve_space')
-                    del os.environ['FLAGS_cudnn_batchnorm_spatial_persistent']
+                block.create_var(name="reserve_space", dtype='float32')
+                outputs["ReserveSpace"] = block.var('reserve_space')
                bn_op = block.append_op(
                    type="batch_norm",
                    inputs=inputs,
--- a/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
@ -122,7 +122,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
        if not only_forward:
            others = [
                'batch_norm_0.tmp_0', 'batch_norm_0.tmp_1', 'bn_scale@GRAD',
-                'bn_bias@GRAD', 'batch_norm_0.tmp_2@GRAD', 'conv2d_0.tmp_0@GRAD'
+                'bn_bias@GRAD', 'batch_norm_0.tmp_3@GRAD', 'conv2d_0.tmp_0@GRAD'
            ]
            fetch_names += others
        bn_fetches = exe.run(program=main,
@ -142,7 +142,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
        if not only_forward:
            others = [
                'batch_norm_0.tmp_0', 'batch_norm_0.tmp_1', 'bn_scale@GRAD',
-                'bn_bias@GRAD', 'batch_norm_0.tmp_2@GRAD', 'conv2d_0.tmp_0@GRAD'
+                'bn_bias@GRAD', 'batch_norm_0.tmp_3@GRAD', 'conv2d_0.tmp_0@GRAD'
            ]
            fetch_names += others
        for nm in fetch_names:
--- a/python/paddle/nn/functional/norm.py
+++ b/python/paddle/nn/functional/norm.py
@ -166,7 +166,6 @@ def batch_norm(x,
          batch_norm_out = paddle.nn.functional.batch_norm(x, rm, rv, w, b)
          print(batch_norm_out)
    """
-
    assert len(x.shape) >= 2, "input dim must be larger than 1"

    # input ad out must share the memory
@ -196,7 +195,6 @@ def batch_norm(x,
        batch_norm_out, _, _, _, _, _ = core.ops.batch_norm(
            x, weight, bias, running_mean, running_var, mean_out, variance_out,
            *attrs)
-
        return dygraph_utils._append_activation_in_dygraph(
            batch_norm_out, act=None)

@ -230,13 +228,16 @@ def batch_norm(x,
    saved_variance = helper.create_variable_for_type_inference(
        dtype=dtype, stop_gradient=True)
    batch_norm_out = helper.create_variable_for_type_inference(dtype)
+    reserve_space = helper.create_variable_for_type_inference(
+        dtype=x.dtype, stop_gradient=True)

    outputs = {
        "Y": [batch_norm_out],
        "MeanOut": [running_mean],
        "VarianceOut": [running_var],
        "SavedMean": [saved_mean],
-        "SavedVariance": [saved_variance]
+        "SavedVariance": [saved_variance],
+        "ReserveSpace": [reserve_space]
    }

    helper.append_op(