1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include <tuple> |
17 | |
18 | #include "tensorflow/core/platform/denormal.h" |
19 | #include "tensorflow/core/platform/cpu_info.h" |
20 | #include "tensorflow/core/platform/logging.h" |
21 | #include "tensorflow/core/platform/platform.h" |
22 | // If we're on gcc 4.8 or older, there's a known bug that prevents the use of |
23 | // intrinsics when the architecture is not defined in the flags. See |
24 | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57202 |
25 | #if !defined(__SSE3__) && !defined(__clang__) && \ |
26 | (defined(__GNUC__) && (__GNUC__ < 4) || \ |
27 | ((__GNUC__ == 4) && (__GNUC_MINOR__ < 9))) |
28 | #define GCC_WITHOUT_INTRINSICS |
29 | #endif |
30 | // Only try to use SSE3 instructions if we're on an x86 platform, and it's not |
31 | // mobile, and we're not on a known bad gcc version. |
32 | #if defined(PLATFORM_IS_X86) && !defined(IS_MOBILE_PLATFORM) && \ |
33 | !defined(GCC_WITHOUT_INTRINSICS) |
34 | #define DENORM_USE_INTRINSICS |
35 | #endif |
36 | |
37 | #ifdef DENORM_USE_INTRINSICS |
38 | #include <pmmintrin.h> |
39 | #endif |
40 | |
41 | namespace tensorflow { |
42 | namespace port { |
43 | |
44 | static void SetDenormalState(bool flush_zero_mode, bool denormals_zero_mode) { |
45 | // For now, we flush denormals only on SSE 3. Other architectures such as ARM |
46 | // can be added as needed. |
47 | |
48 | #ifdef DENORM_USE_INTRINSICS |
49 | if (TestCPUFeature(SSE3)) { |
50 | // Restore flags |
51 | _MM_SET_FLUSH_ZERO_MODE(flush_zero_mode ? _MM_FLUSH_ZERO_ON |
52 | : _MM_FLUSH_ZERO_OFF); |
53 | _MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode ? _MM_DENORMALS_ZERO_ON |
54 | : _MM_DENORMALS_ZERO_OFF); |
55 | } |
56 | #endif |
57 | } |
58 | |
59 | static std::pair<bool, bool> GetDernormalState() { |
60 | // For now, we flush denormals only on SSE 3. Other architectures such as ARM |
61 | // can be added as needed. |
62 | |
63 | #ifdef DENORM_USE_INTRINSICS |
64 | if (TestCPUFeature(SSE3)) { |
65 | // Save existing flags |
66 | bool flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON; |
67 | bool denormals_zero_mode = |
68 | _MM_GET_DENORMALS_ZERO_MODE() == _MM_DENORMALS_ZERO_ON; |
69 | return {flush_zero_mode, denormals_zero_mode}; |
70 | } |
71 | #endif |
72 | return {false, false}; |
73 | } |
74 | |
75 | ScopedRestoreFlushDenormalState::ScopedRestoreFlushDenormalState() { |
76 | std::tie(flush_zero_mode_, denormals_zero_mode_) = GetDernormalState(); |
77 | } |
78 | |
79 | ScopedRestoreFlushDenormalState::~ScopedRestoreFlushDenormalState() { |
80 | SetDenormalState(flush_zero_mode_, denormals_zero_mode_); |
81 | } |
82 | |
83 | ScopedFlushDenormal::ScopedFlushDenormal() { |
84 | SetDenormalState(/*flush_zero_mode=*/true, /*denormals_zero_mode=*/true); |
85 | } |
86 | |
87 | ScopedDontFlushDenormal::ScopedDontFlushDenormal() { |
88 | SetDenormalState(/*flush_zero_mode=*/false, /*denormals_zero_mode=*/false); |
89 | } |
90 | |
91 | } // namespace port |
92 | } // namespace tensorflow |
93 | |