| 1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
| 2 | |
| 3 | Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | you may not use this file except in compliance with the License. |
| 5 | You may obtain a copy of the License at |
| 6 | |
| 7 | http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | |
| 9 | Unless required by applicable law or agreed to in writing, software |
| 10 | distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | See the License for the specific language governing permissions and |
| 13 | limitations under the License. |
| 14 | ==============================================================================*/ |
| 15 | #include <algorithm> |
| 16 | #include <cmath> |
| 17 | #include <limits> |
| 18 | |
| 19 | #include "tensorflow/contrib/lite/kernels/internal/compatibility.h" |
| 20 | #include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" |
| 21 | #include "tensorflow/contrib/lite/kernels/internal/round.h" |
| 22 | |
| 23 | namespace tflite { |
| 24 | |
| 25 | void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier, |
| 26 | int* shift) { |
| 27 | if (double_multiplier == 0.) { |
| 28 | *quantized_multiplier = 0; |
| 29 | *shift = 0; |
| 30 | return; |
| 31 | } |
| 32 | const double q = std::frexp(double_multiplier, shift); |
| 33 | auto q_fixed = static_cast<int64_t>(TfLiteRound(q * (1ll << 31))); |
| 34 | TFLITE_CHECK(q_fixed <= (1ll << 31)); |
| 35 | if (q_fixed == (1ll << 31)) { |
| 36 | q_fixed /= 2; |
| 37 | ++*shift; |
| 38 | } |
| 39 | TFLITE_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max()); |
| 40 | *quantized_multiplier = static_cast<int32_t>(q_fixed); |
| 41 | } |
| 42 | |
| 43 | void QuantizeMultiplierGreaterThanOne(double double_multiplier, |
| 44 | int32_t* quantized_multiplier, |
| 45 | int* left_shift) { |
| 46 | TFLITE_CHECK_GT(double_multiplier, 1.); |
| 47 | QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift); |
| 48 | TFLITE_CHECK_GE(*left_shift, 0); |
| 49 | } |
| 50 | |
| 51 | void QuantizeMultiplierSmallerThanOne(double double_multiplier, |
| 52 | int32_t* quantized_multiplier, |
| 53 | int* right_shift) { |
| 54 | TFLITE_CHECK_LT(double_multiplier, 1.); |
| 55 | TFLITE_CHECK_GT(double_multiplier, 0.); |
| 56 | int shift; |
| 57 | QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift); |
| 58 | TFLITE_CHECK_LE(shift, 0); |
| 59 | *right_shift = -shift; |
| 60 | } |
| 61 | |
| 62 | void PreprocessSoftmaxScaling(double beta, double input_scale, |
| 63 | int input_integer_bits, |
| 64 | int32_t* quantized_multiplier, int* left_shift) { |
| 65 | // If the overall multiplier (input and beta) is large, then exp() of an |
| 66 | // input difference of 1 scaled by this will be large. In other words, we |
| 67 | // can cap the multiplier and know that, when it is used, the output will be |
| 68 | // (round to) zero wherever the input is not at the maximum value. |
| 69 | |
| 70 | // If the overall scale is less than one, and input_integer_bits=0, then the |
| 71 | // result is double equivalent of Q0.31 (actually with more precision). Thus |
| 72 | // this generates a Q(input_integer_bits).(31-input_integer_bits) |
| 73 | // representation. |
| 74 | const double input_beta_real_multiplier = std::min( |
| 75 | beta * input_scale * (1 << (31 - input_integer_bits)), (1ll << 31) - 1.0); |
| 76 | |
| 77 | QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, |
| 78 | quantized_multiplier, left_shift); |
| 79 | } |
| 80 | |
| 81 | int CalculateInputRadius(int input_integer_bits, int input_left_shift) { |
| 82 | const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) * |
| 83 | (1ll << (31 - input_integer_bits)) / |
| 84 | (1ll << input_left_shift); |
| 85 | // Tighten bound using floor. Suppose that we could use the exact value. |
| 86 | // After scaling the difference, the result would be at the maximum. Thus we |
| 87 | // must ensure that our value has lower magnitude. |
| 88 | return static_cast<int>(std::floor(max_input_rescaled)); |
| 89 | } |
| 90 | |
| 91 | } // namespace tflite |
| 92 | |