1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include <tuple>
17
18#include "tensorflow/core/platform/denormal.h"
19#include "tensorflow/core/platform/cpu_info.h"
20#include "tensorflow/core/platform/logging.h"
21#include "tensorflow/core/platform/platform.h"
22// If we're on gcc 4.8 or older, there's a known bug that prevents the use of
23// intrinsics when the architecture is not defined in the flags. See
24// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57202
25#if !defined(__SSE3__) && !defined(__clang__) && \
26 (defined(__GNUC__) && (__GNUC__ < 4) || \
27 ((__GNUC__ == 4) && (__GNUC_MINOR__ < 9)))
28#define GCC_WITHOUT_INTRINSICS
29#endif
30// Only try to use SSE3 instructions if we're on an x86 platform, and it's not
31// mobile, and we're not on a known bad gcc version.
32#if defined(PLATFORM_IS_X86) && !defined(IS_MOBILE_PLATFORM) && \
33 !defined(GCC_WITHOUT_INTRINSICS)
34#define DENORM_USE_INTRINSICS
35#endif
36
37#ifdef DENORM_USE_INTRINSICS
38#include <pmmintrin.h>
39#endif
40
41namespace tensorflow {
42namespace port {
43
44static void SetDenormalState(bool flush_zero_mode, bool denormals_zero_mode) {
45 // For now, we flush denormals only on SSE 3. Other architectures such as ARM
46 // can be added as needed.
47
48#ifdef DENORM_USE_INTRINSICS
49 if (TestCPUFeature(SSE3)) {
50 // Restore flags
51 _MM_SET_FLUSH_ZERO_MODE(flush_zero_mode ? _MM_FLUSH_ZERO_ON
52 : _MM_FLUSH_ZERO_OFF);
53 _MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode ? _MM_DENORMALS_ZERO_ON
54 : _MM_DENORMALS_ZERO_OFF);
55 }
56#endif
57}
58
59static std::pair<bool, bool> GetDernormalState() {
60 // For now, we flush denormals only on SSE 3. Other architectures such as ARM
61 // can be added as needed.
62
63#ifdef DENORM_USE_INTRINSICS
64 if (TestCPUFeature(SSE3)) {
65 // Save existing flags
66 bool flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON;
67 bool denormals_zero_mode =
68 _MM_GET_DENORMALS_ZERO_MODE() == _MM_DENORMALS_ZERO_ON;
69 return {flush_zero_mode, denormals_zero_mode};
70 }
71#endif
72 return {false, false};
73}
74
75ScopedRestoreFlushDenormalState::ScopedRestoreFlushDenormalState() {
76 std::tie(flush_zero_mode_, denormals_zero_mode_) = GetDernormalState();
77}
78
79ScopedRestoreFlushDenormalState::~ScopedRestoreFlushDenormalState() {
80 SetDenormalState(flush_zero_mode_, denormals_zero_mode_);
81}
82
83ScopedFlushDenormal::ScopedFlushDenormal() {
84 SetDenormalState(/*flush_zero_mode=*/true, /*denormals_zero_mode=*/true);
85}
86
87ScopedDontFlushDenormal::ScopedDontFlushDenormal() {
88 SetDenormalState(/*flush_zero_mode=*/false, /*denormals_zero_mode=*/false);
89}
90
91} // namespace port
92} // namespace tensorflow
93