flush denormals to zero, test=develop (#29924)
* flush denormals to zero, test=develop * add comments, test=developrevert-31562-mean
parent
14bd77f941
commit
181ea1870b
@ -0,0 +1,80 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/platform/denormal.h"
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
// Refer to https://github.com/tensorflow/tensorflow/pull/17141
|
||||
|
||||
// If we're on gcc 4.8 or older, there's a known bug that prevents the use of
|
||||
// intrinsics when the architecture is not defined in the flags. See
|
||||
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57202
|
||||
#if !defined(__SSE3__) && !defined(__clang__) && \
|
||||
(defined(__GNUC__) && (__GNUC__ < 4) || \
|
||||
((__GNUC__ == 4) && (__GNUC_MINOR__ < 9)))
|
||||
#define GCC_WITHOUT_INTRINSICS
|
||||
#endif
|
||||
|
||||
#if !defined(GCC_WITHOUT_INTRINSICS)
|
||||
#define DENORM_USE_INTRINSICS
|
||||
#endif
|
||||
|
||||
#ifdef DENORM_USE_INTRINSICS
|
||||
#include <pmmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
|
||||
static void SetDenormalState(bool flush_zero_mode, bool denormals_zero_mode) {
|
||||
#ifdef DENORM_USE_INTRINSICS
|
||||
#ifdef PADDLE_WITH_SSE3
|
||||
// Intel's C and Fortran compilers enable the denormals-are-zero (DAZ) and
|
||||
// flush-to-zero (FTZ) flags for SSE by default for optimization levels higher
|
||||
// than -O0.
|
||||
// AArch32 NEON (SIMD) FPU always uses a flush-to-zero mode.
|
||||
// Refer to https://en.wikipedia.org/wiki/Denormal_number
|
||||
// and https://software.intel.com/sites/landingpage/IntrinsicsGuide/
|
||||
_MM_SET_FLUSH_ZERO_MODE(flush_zero_mode ? _MM_FLUSH_ZERO_ON
|
||||
: _MM_FLUSH_ZERO_OFF);
|
||||
_MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode ? _MM_DENORMALS_ZERO_ON
|
||||
: _MM_DENORMALS_ZERO_OFF);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
static std::pair<bool, bool> GetDenormalState() {
|
||||
#ifdef DENORM_USE_INTRINSICS
|
||||
#ifdef PADDLE_WITH_SSE3
|
||||
bool flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON;
|
||||
bool denormals_zero_mode =
|
||||
_MM_GET_DENORMALS_ZERO_MODE() == _MM_DENORMALS_ZERO_ON;
|
||||
return {flush_zero_mode, denormals_zero_mode};
|
||||
#endif
|
||||
#endif
|
||||
return {false, false};
|
||||
}
|
||||
|
||||
ScopedRestoreFlushDenormalState::ScopedRestoreFlushDenormalState() {
|
||||
std::tie(flush_zero_mode_, denormals_zero_mode_) = GetDenormalState();
|
||||
}
|
||||
|
||||
ScopedRestoreFlushDenormalState::~ScopedRestoreFlushDenormalState() {
|
||||
SetDenormalState(flush_zero_mode_, denormals_zero_mode_);
|
||||
}
|
||||
|
||||
ScopedFlushDenormal::ScopedFlushDenormal() { SetDenormalState(true, true); }
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
@ -0,0 +1,43 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "paddle/fluid/platform/macros.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
|
||||
// Used to restore the initial value at the end of the scope.
|
||||
class ScopedRestoreFlushDenormalState {
|
||||
public:
|
||||
ScopedRestoreFlushDenormalState();
|
||||
~ScopedRestoreFlushDenormalState();
|
||||
|
||||
private:
|
||||
bool flush_zero_mode_;
|
||||
bool denormals_zero_mode_;
|
||||
DISABLE_COPY_AND_ASSIGN(ScopedRestoreFlushDenormalState);
|
||||
};
|
||||
|
||||
class ScopedFlushDenormal {
|
||||
public:
|
||||
ScopedFlushDenormal();
|
||||
|
||||
private:
|
||||
ScopedRestoreFlushDenormalState restore_;
|
||||
DISABLE_COPY_AND_ASSIGN(ScopedFlushDenormal);
|
||||
};
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
Loading…
Reference in new issue