RMW desert 1.0
|
Main header file for half-precision functionality. More...
Go to the source code of this file.
Namespaces | |
namespace | half_float |
namespace | std |
Extensions to the C++ standard library. | |
Macros | |
#define | HALF_GCC_VERSION (__GNUC__*100+__GNUC_MINOR__) |
#define | HALF_ICC_VERSION 0 |
#define | HALF_ERRHANDLING (HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS) |
#define | HALF_UNUSED_NOERR(name) |
#define | HALF_CONSTEXPR |
#define | HALF_CONSTEXPR_CONST const |
#define | HALF_CONSTEXPR_NOERR |
#define | HALF_NOEXCEPT |
#define | HALF_NOTHROW throw() |
#define | HALF_THREAD_LOCAL static |
#define | HALF_ENABLE_F16C_INTRINSICS __F16C__ |
#define | HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1 |
#define | HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1 |
#define | HALF_ROUND_STYLE 1 |
#define | HUGE_VALH std::numeric_limits<half_float::half>::infinity() |
#define | FP_FAST_FMAH 1 |
#define | HLF_ROUNDS HALF_ROUND_STYLE |
#define | FP_ILOGB0 INT_MIN |
#define | FP_ILOGBNAN INT_MAX |
#define | FP_SUBNORMAL 0 |
#define | FP_ZERO 1 |
#define | FP_NAN 2 |
#define | FP_INFINITE 3 |
#define | FP_NORMAL 4 |
#define | FE_INVALID 0x10 |
#define | FE_DIVBYZERO 0x08 |
#define | FE_OVERFLOW 0x04 |
#define | FE_UNDERFLOW 0x02 |
#define | FE_INEXACT 0x01 |
#define | FE_ALL_EXCEPT (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT) |
Typedefs | |
typedef bool_type< true > | half_float::detail::true_type |
typedef bool_type< false > | half_float::detail::false_type |
typedef unsigned short | half_float::detail::uint16 |
Unsigned integer of (at least) 16 bits width. | |
typedef unsigned long | half_float::detail::uint32 |
Fastest unsigned integer of (at least) 32 bits width. | |
typedef long | half_float::detail::int32 |
Fastest unsigned integer of (at least) 32 bits width. | |
Functions | |
Implementation defined classification and arithmetic | |
template<typename T > | |
bool | half_float::detail::builtin_isinf (T arg) |
template<typename T > | |
bool | half_float::detail::builtin_isnan (T arg) |
template<typename T > | |
bool | half_float::detail::builtin_signbit (T arg) |
uint32 | half_float::detail::sign_mask (uint32 arg) |
uint32 | half_float::detail::arithmetic_shift (uint32 arg, int i) |
Error handling | |
int & | half_float::detail::errflags () |
void | half_float::detail::raise (int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond)=true) |
HALF_CONSTEXPR_NOERR bool | half_float::detail::compsignal (unsigned int x, unsigned int y) |
HALF_CONSTEXPR_NOERR unsigned int | half_float::detail::signal (unsigned int nan) |
HALF_CONSTEXPR_NOERR unsigned int | half_float::detail::signal (unsigned int x, unsigned int y) |
HALF_CONSTEXPR_NOERR unsigned int | half_float::detail::signal (unsigned int x, unsigned int y, unsigned int z) |
HALF_CONSTEXPR_NOERR unsigned int | half_float::detail::select (unsigned int x, unsigned int HALF_UNUSED_NOERR(y)) |
HALF_CONSTEXPR_NOERR unsigned int | half_float::detail::invalid () |
HALF_CONSTEXPR_NOERR unsigned int | half_float::detail::pole (unsigned int sign=0) |
HALF_CONSTEXPR_NOERR unsigned int | half_float::detail::check_underflow (unsigned int arg) |
Conversion and rounding | |
template<std::float_round_style R> | |
HALF_CONSTEXPR_NOERR unsigned int | half_float::detail::overflow (unsigned int sign=0) |
template<std::float_round_style R> | |
HALF_CONSTEXPR_NOERR unsigned int | half_float::detail::underflow (unsigned int sign=0) |
template<std::float_round_style R, bool I> | |
HALF_CONSTEXPR_NOERR unsigned int | half_float::detail::rounded (unsigned int value, int g, int s) |
template<std::float_round_style R, bool E, bool I> | |
unsigned int | half_float::detail::integral (unsigned int value) |
template<std::float_round_style R, unsigned int F, bool S, bool N, bool I> | |
unsigned int | half_float::detail::fixed2half (uint32 m, int exp=14, unsigned int sign=0, int s=0) |
template<std::float_round_style R> | |
unsigned int | half_float::detail::float2half_impl (float value, true_type) |
template<std::float_round_style R> | |
unsigned int | half_float::detail::float2half_impl (double value, true_type) |
template<std::float_round_style R, typename T > | |
unsigned int | half_float::detail::float2half_impl (T value,...) |
template<std::float_round_style R, typename T > | |
unsigned int | half_float::detail::float2half (T value) |
template<std::float_round_style R, typename T > | |
unsigned int | half_float::detail::int2half (T value) |
float | half_float::detail::half2float_impl (unsigned int value, float, true_type) |
double | half_float::detail::half2float_impl (unsigned int value, double, true_type) |
template<typename T > | |
T | half_float::detail::half2float_impl (unsigned int value, T,...) |
template<typename T > | |
T | half_float::detail::half2float (unsigned int value) |
template<std::float_round_style R, bool E, bool I, typename T > | |
T | half_float::detail::half2int (unsigned int value) |
Mathematics | |
template<std::float_round_style R> | |
uint32 | half_float::detail::mulhi (uint32 x, uint32 y) |
uint32 | half_float::detail::multiply64 (uint32 x, uint32 y) |
uint32 | half_float::detail::divide64 (uint32 x, uint32 y, int &s) |
template<bool Q, bool R> | |
unsigned int | half_float::detail::mod (unsigned int x, unsigned int y, int *quo=NULL) |
template<unsigned int F> | |
uint32 | half_float::detail::sqrt (uint32 &r, int &exp) |
uint32 | half_float::detail::exp2 (uint32 m, unsigned int n=32) |
uint32 | half_float::detail::log2 (uint32 m, unsigned int n=32) |
std::pair< uint32, uint32 > | half_float::detail::sincos (uint32 mz, unsigned int n=31) |
uint32 | half_float::detail::atan2 (uint32 my, uint32 mx, unsigned int n=31) |
uint32 | half_float::detail::angle_arg (unsigned int abs, int &k) |
std::pair< uint32, uint32 > | half_float::detail::atan2_args (unsigned int abs) |
std::pair< uint32, uint32 > | half_float::detail::hyperbolic_args (unsigned int abs, int &exp, unsigned int n=32) |
template<std::float_round_style R> | |
unsigned int | half_float::detail::exp2_post (uint32 m, int exp, bool esign, unsigned int sign=0, unsigned int n=32) |
template<std::float_round_style R, uint32 L> | |
unsigned int | half_float::detail::log2_post (uint32 m, int ilog, int exp, unsigned int sign=0) |
template<std::float_round_style R> | |
unsigned int | half_float::detail::hypot_post (uint32 r, int exp) |
template<std::float_round_style R> | |
unsigned int | half_float::detail::tangent_post (uint32 my, uint32 mx, int exp, unsigned int sign=0) |
template<std::float_round_style R, bool S> | |
unsigned int | half_float::detail::area (unsigned int arg) |
template<std::float_round_style R, bool C> | |
unsigned int | half_float::detail::erf (unsigned int arg) |
template<std::float_round_style R, bool L> | |
unsigned int | half_float::detail::gamma (unsigned int arg) |
Comparison operators | |
HALF_CONSTEXPR_NOERR bool | half_float::operator== (half x, half y) |
HALF_CONSTEXPR_NOERR bool | half_float::operator!= (half x, half y) |
HALF_CONSTEXPR_NOERR bool | half_float::operator< (half x, half y) |
HALF_CONSTEXPR_NOERR bool | half_float::operator> (half x, half y) |
HALF_CONSTEXPR_NOERR bool | half_float::operator<= (half x, half y) |
HALF_CONSTEXPR_NOERR bool | half_float::operator>= (half x, half y) |
Arithmetic operators | |
HALF_CONSTEXPR half | half_float::operator+ (half arg) |
HALF_CONSTEXPR half | half_float::operator- (half arg) |
half | half_float::operator+ (half x, half y) |
half | half_float::operator- (half x, half y) |
half | half_float::operator* (half x, half y) |
half | half_float::operator/ (half x, half y) |
Input and output | |
template<typename charT , typename traits > | |
std::basic_ostream< charT, traits > & | half_float::operator<< (std::basic_ostream< charT, traits > &out, half arg) |
template<typename charT , typename traits > | |
std::basic_istream< charT, traits > & | half_float::operator>> (std::basic_istream< charT, traits > &in, half &arg) |
Basic mathematical operations | |
HALF_CONSTEXPR half | half_float::fabs (half arg) |
HALF_CONSTEXPR half | half_float::abs (half arg) |
half | half_float::fmod (half x, half y) |
half | half_float::remainder (half x, half y) |
half | half_float::remquo (half x, half y, int *quo) |
half | half_float::fma (half x, half y, half z) |
HALF_CONSTEXPR_NOERR half | half_float::fmax (half x, half y) |
HALF_CONSTEXPR_NOERR half | half_float::fmin (half x, half y) |
half | half_float::fdim (half x, half y) |
half | half_float::nanh (const char *arg) |
Exponential functions | |
half | half_float::exp (half arg) |
half | half_float::exp2 (half arg) |
half | half_float::expm1 (half arg) |
half | half_float::log (half arg) |
half | half_float::log10 (half arg) |
half | half_float::log2 (half arg) |
half | half_float::log1p (half arg) |
Power functions | |
half | half_float::sqrt (half arg) |
half | half_float::rsqrt (half arg) |
half | half_float::cbrt (half arg) |
half | half_float::hypot (half x, half y) |
half | half_float::hypot (half x, half y, half z) |
half | half_float::pow (half x, half y) |
Trigonometric functions | |
void | half_float::sincos (half arg, half *sin, half *cos) |
half | half_float::sin (half arg) |
half | half_float::cos (half arg) |
half | half_float::tan (half arg) |
half | half_float::asin (half arg) |
half | half_float::acos (half arg) |
half | half_float::atan (half arg) |
half | half_float::atan2 (half y, half x) |
Hyperbolic functions | |
half | half_float::sinh (half arg) |
half | half_float::cosh (half arg) |
half | half_float::tanh (half arg) |
half | half_float::asinh (half arg) |
half | half_float::acosh (half arg) |
half | half_float::atanh (half arg) |
Error and gamma functions | |
half | half_float::erf (half arg) |
half | half_float::erfc (half arg) |
half | half_float::lgamma (half arg) |
half | half_float::tgamma (half arg) |
Rounding | |
half | half_float::ceil (half arg) |
half | half_float::floor (half arg) |
half | half_float::trunc (half arg) |
half | half_float::round (half arg) |
long | half_float::lround (half arg) |
half | half_float::rint (half arg) |
long | half_float::lrint (half arg) |
half | half_float::nearbyint (half arg) |
Floating point manipulation | |
half | half_float::frexp (half arg, int *exp) |
half | half_float::scalbln (half arg, long exp) |
half | half_float::scalbn (half arg, int exp) |
half | half_float::ldexp (half arg, int exp) |
half | half_float::modf (half arg, half *iptr) |
int | half_float::ilogb (half arg) |
half | half_float::logb (half arg) |
half | half_float::nextafter (half from, half to) |
half | half_float::nexttoward (half from, long double to) |
HALF_CONSTEXPR half | half_float::copysign (half x, half y) |
Floating point classification | |
HALF_CONSTEXPR int | half_float::fpclassify (half arg) |
HALF_CONSTEXPR bool | half_float::isfinite (half arg) |
HALF_CONSTEXPR bool | half_float::isinf (half arg) |
HALF_CONSTEXPR bool | half_float::isnan (half arg) |
HALF_CONSTEXPR bool | half_float::isnormal (half arg) |
HALF_CONSTEXPR bool | half_float::signbit (half arg) |
Comparison | |
HALF_CONSTEXPR bool | half_float::isgreater (half x, half y) |
HALF_CONSTEXPR bool | half_float::isgreaterequal (half x, half y) |
HALF_CONSTEXPR bool | half_float::isless (half x, half y) |
HALF_CONSTEXPR bool | half_float::islessequal (half x, half y) |
HALF_CONSTEXPR bool | half_float::islessgreater (half x, half y) |
HALF_CONSTEXPR bool | half_float::isunordered (half x, half y) |
Casting | |
template<typename T , typename U > | |
T | half_float::half_cast (U arg) |
template<typename T , std::float_round_style R, typename U > | |
T | half_float::half_cast (U arg) |
Error handling | |
int | half_float::feclearexcept (int excepts) |
int | half_float::fetestexcept (int excepts) |
int | half_float::feraiseexcept (int excepts) |
int | half_float::fegetexceptflag (int *flagp, int excepts) |
int | half_float::fesetexceptflag (const int *flagp, int excepts) |
void | half_float::fethrowexcept (int excepts, const char *msg="") |
Variables | |
HALF_CONSTEXPR_CONST binary_t | half_float::detail::binary = binary_t() |
Tag for binary construction. | |
Main header file for half-precision functionality.
Main header file for half-precision functionality.
#define FP_FAST_FMAH 1 |
Fast half-precision fma function. This symbol is defined if the fma() function generally executes as fast as, or faster than, a separate half-precision multiplication followed by an addition, which is always the case.
See also: Documentation for FP_FAST_FMA
#define HALF_ENABLE_F16C_INTRINSICS __F16C__ |
Enable F16C intruction set intrinsics. Defining this to 1 enables the use of F16C compiler intrinsics for converting between half-precision and single-precision values which may result in improved performance. This will not perform additional checks for support of the F16C instruction set, so an appropriate target platform is required when enabling this feature.
Unless predefined it will be enabled automatically when the __F16C__
symbol is defined, which some compilers do on supporting platforms.
#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1 |
Raise INEXACT exception on overflow. Defining this to 1 (default) causes overflow errors to automatically raise inexact exceptions in addition. These will be raised after any possible handling of the underflow exception.
#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1 |
Raise INEXACT exception on underflow. Defining this to 1 (default) causes underflow errors to automatically raise inexact exceptions in addition. These will be raised after any possible handling of the underflow exception.
Note: This will actually cause underflow (and the accompanying inexact) exceptions to be raised only when the result is inexact, while if disabled bare underflow errors will be raised for any (possibly exact) subnormal result.
#define HALF_ROUND_STYLE 1 |
Default rounding mode. This specifies the rounding mode used for all conversions between halfs and more precise types (unless using half_cast() and specifying the rounding mode directly) as well as in arithmetic operations and mathematical functions. It can be redefined (before including half.hpp) to one of the standard rounding modes using their respective constants or the equivalent values of std::float_round_style:
std::float_round_style | value | rounding |
---|---|---|
std::round_indeterminate | -1 | fastest |
std::round_toward_zero | 0 | toward zero |
std::round_to_nearest | 1 | to nearest (default) |
std::round_toward_infinity | 2 | toward positive infinity |
std::round_toward_neg_infinity | 3 | toward negative infinity |
By default this is set to 1
(std::round_to_nearest
), which rounds results to the nearest representable value. It can even be set to std::numeric_limits<float>::round_style to synchronize the rounding mode with that of the built-in single-precision implementation (which is likely std::round_to_nearest
, though).
#define HLF_ROUNDS HALF_ROUND_STYLE |
Half rounding mode. In correspondence with FLT_ROUNDS
from <cfloat>
this symbol expands to the rounding mode used for half-precision operations. It is an alias for HALF_ROUND_STYLE.
See also: Documentation for FLT_ROUNDS
#define HUGE_VALH std::numeric_limits<half_float::half>::infinity() |
Value signaling overflow. In correspondence with HUGE_VAL[F|L]
from <cmath>
this symbol expands to a positive value signaling the overflow of an operation, in particular it just evaluates to positive infinity.
See also: Documentation for HUGE_VAL
|
inline |
Reduce argument for trigonometric functions.
abs | half-precision floating-point value |
k | value to take quarter period |
unsigned int half_float::detail::area | ( | unsigned int | arg | ) |
Area function and postprocessing. This computes the value directly in Q2.30 using the representation asinh|acosh(x) = log(x+sqrt(x^2+|-1))
.
R | rounding mode to use |
S | true for asinh, false for acosh |
arg | half-precision argument |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if no other exception occurred |
|
inline |
Platform-independent arithmetic right shift.
arg | integer value in two's complement |
i | shift amount (at most 31) |
Fixed point arc tangent. This uses the CORDIC algorithm in vectoring mode.
my | y coordinate as Q0.30 |
mx | x coordinate as Q0.30 |
n | number of iterations (at most 31) |
|
inline |
Get arguments for atan2 function.
abs | half-precision floating-point value |
bool half_float::detail::builtin_isinf | ( | T | arg | ) |
Check for infinity.
T | argument type (builtin floating-point type) |
arg | value to query |
true | if infinity |
false | else |
bool half_float::detail::builtin_isnan | ( | T | arg | ) |
Check for NaN.
T | argument type (builtin floating-point type) |
arg | value to query |
true | if not a number |
false | else |
bool half_float::detail::builtin_signbit | ( | T | arg | ) |
Check sign.
T | argument type (builtin floating-point type) |
arg | value to query |
true | if signbit set |
false | else |
|
inline |
Check value for underflow.
arg | non-zero half-precision value to check |
FE_UNDERFLOW | if arg is subnormal |
|
inline |
Check and signal for any NaN.
x | first half-precision value to check |
y | second half-precision value to check |
true | if either x or y is NaN |
false | else |
FE_INVALID | if x or y is NaN |
64-bit division.
x | upper 32 bit of dividend |
y | divisor |
s | variable to store sticky bit for rounding |
unsigned int half_float::detail::erf | ( | unsigned int | arg | ) |
Error function and postprocessing. This computes the value directly in Q1.31 using the approximations given here.
R | rounding mode to use |
C | true for comlementary error function, false else |
arg | half-precision function argument |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if no other exception occurred |
|
inline |
Internal exception flags.
|
inline |
Fixed point binary exponential. This uses the BKM algorithm in E-mode.
m | exponent in [0,1) as Q0.31 |
n | number of iterations (at most 32) |
unsigned int half_float::detail::exp2_post | ( | uint32 | m, |
int | exp, | ||
bool | esign, | ||
unsigned int | sign = 0 , |
||
unsigned int | n = 32 |
||
) |
Postprocessing for binary exponential.
R | rounding mode to use |
m | fractional part of as Q0.31 |
exp | absolute value of unbiased exponent |
esign | sign of actual exponent |
sign | sign bit of result |
n | number of BKM iterations (at most 32) |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded or I is `true` |
unsigned int half_float::detail::fixed2half | ( | uint32 | m, |
int | exp = 14 , |
||
unsigned int | sign = 0 , |
||
int | s = 0 |
||
) |
Convert fixed point to half-precision floating-point.
R | rounding mode to use |
F | number of fractional bits in [11,31] |
S | true for signed, false for unsigned |
N | true for additional normalization step, false if already normalized to 1.F |
I | true to always raise INEXACT exception, false to raise only for rounded results |
m | mantissa in Q1.F fixed point format |
exp | biased exponent - 1 |
sign | half-precision value with sign bit only |
s | sticky bit (or of all but the most significant already discarded bits) |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded or I is `true` |
unsigned int half_float::detail::float2half | ( | T | value | ) |
Convert floating-point to half-precision.
R | rounding mode to use |
T | source type (builtin floating-point type) |
value | floating-point value to convert |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded |
unsigned int half_float::detail::float2half_impl | ( | double | value, |
true_type | |||
) |
Convert IEEE double-precision to half-precision.
R | rounding mode to use |
value | double-precision value to convert |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded |
unsigned int half_float::detail::float2half_impl | ( | float | value, |
true_type | |||
) |
Convert IEEE single-precision to half-precision. Credit for this goes to Jeroen van der Zijp.
R | rounding mode to use |
value | single-precision value to convert |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded |
unsigned int half_float::detail::float2half_impl | ( | T | value, |
... | |||
) |
Convert non-IEEE floating-point to half-precision.
R | rounding mode to use |
T | source type (builtin floating-point type) |
value | floating-point value to convert |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded |
unsigned int half_float::detail::gamma | ( | unsigned int | arg | ) |
Gamma function and postprocessing. This approximates the value of either the gamma function or its logarithm directly in Q1.31.
R | rounding mode to use |
L | true for lograithm of gamma function, false for gamma function |
arg | half-precision floating-point value |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if arg is not a positive integer |
T half_float::detail::half2float | ( | unsigned int | value | ) |
Convert half-precision to floating-point.
T | type to convert to (builtin integer type) |
value | half-precision value to convert |
|
inline |
Convert half-precision to IEEE double-precision.
value | half-precision value to convert |
|
inline |
Convert half-precision to IEEE single-precision. Credit for this goes to Jeroen van der Zijp.
value | half-precision value to convert |
T half_float::detail::half2float_impl | ( | unsigned int | value, |
T | , | ||
... | |||
) |
Convert half-precision to non-IEEE floating-point.
T | type to convert to (builtin integer type) |
value | half-precision value to convert |
T half_float::detail::half2int | ( | unsigned int | value | ) |
Convert half-precision floating-point to integer.
R | rounding mode to use |
E | true for round to even, false for round away from zero |
I | true to raise INEXACT exception (if inexact), false to never raise it |
T | type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) |
value | half-precision value to convert |
FE_INVALID | if value is not representable in type T |
FE_INEXACT | if value had to be rounded and I is `true` |
|
inline |
Get exponentials for hyperbolic computation
abs | half-precision floating-point value |
exp | variable to take unbiased exponent of larger result |
n | number of BKM iterations (at most 32) |
unsigned int half_float::detail::hypot_post | ( | uint32 | r, |
int | exp | ||
) |
Hypotenuse square root and postprocessing.
R | rounding mode to use |
r | mantissa as Q2.30 |
exp | biased exponent |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded |
unsigned int half_float::detail::int2half | ( | T | value | ) |
Convert integer to half-precision floating-point.
R | rounding mode to use |
T | type to convert (builtin integer type) |
value | integral value to convert |
FE_OVERFLOW | on overflows |
FE_INEXACT | if value had to be rounded |
unsigned int half_float::detail::integral | ( | unsigned int | value | ) |
Round half-precision number to nearest integer value.
R | rounding mode to use |
E | true for round to even, false for round away from zero |
I | true to raise INEXACT exception (if inexact), false to never raise it |
value | half-precision value to round |
FE_INVALID | for signaling NaN |
FE_INEXACT | if value had to be rounded and I is `true` |
|
inline |
Raise domain error and return NaN. return quiet NaN
FE_INVALID |
|
inline |
Fixed point binary logarithm. This uses the BKM algorithm in L-mode.
m | mantissa in [1,2) as Q1.30 |
n | number of iterations (at most 32) |
unsigned int half_float::detail::log2_post | ( | uint32 | m, |
int | ilog, | ||
int | exp, | ||
unsigned int | sign = 0 |
||
) |
Postprocessing for binary logarithm.
R | rounding mode to use |
L | logarithm for base transformation as Q1.31 |
m | fractional part of logarithm as Q0.31 |
ilog | signed integer part of logarithm |
exp | biased exponent of result |
sign | sign bit of result |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if no other exception occurred |
unsigned int half_float::detail::mod | ( | unsigned int | x, |
unsigned int | y, | ||
int * | quo = NULL |
||
) |
Half precision positive modulus.
Q | true to compute full quotient, false else |
R | true to compute signed remainder, false for positive remainder |
x | first operand as positive finite half-precision value |
y | second operand as positive finite half-precision value |
quo | adress to store quotient at, nullptr if Q false |
upper part of 64-bit multiplication.
R | rounding mode to use |
x | first factor |
y | second factor |
64-bit multiplication.
x | first factor |
y | second factor |
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::overflow | ( | unsigned int | sign = 0 | ) |
Half-precision overflow.
R | rounding mode to use |
sign | half-precision value with sign bit only |
FE_OVERFLOW |
|
inline |
Raise pole error and return infinity.
sign | half-precision value with sign bit only |
FE_DIVBYZERO |
|
inline |
Raise floating-point exception.
flags | exceptions to raise |
cond | condition to raise exceptions for |
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::rounded | ( | unsigned int | value, |
int | g, | ||
int | s | ||
) |
Round half-precision number.
R | rounding mode to use |
I | true to always raise INEXACT exception, false to raise only for rounded results |
value | finite half-precision number to round |
g | guard bit (most significant discarded bit) |
s | sticky bit (or of all but the most significant discarded bits) |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded or I is `true` |
|
inline |
Select value or signaling NaN.
x | preferred half-precision value |
y | ignored half-precision value except for signaling NaN |
FE_INVALID | if y is signaling NaN |
|
inline |
Platform-independent sign mask.
arg | integer value in two's complement |
-1 | if arg negative |
0 | if arg positive |
|
inline |
Signal and silence signaling NaN.
nan | half-precision NaN value |
FE_INVALID | if nan is signaling NaN |
|
inline |
Signal and silence signaling NaNs.
x | first half-precision value to check |
y | second half-precision value to check |
FE_INVALID | if x or y is signaling NaN |
|
inline |
Signal and silence signaling NaNs.
x | first half-precision value to check |
y | second half-precision value to check |
z | third half-precision value to check |
FE_INVALID | if x, y or z is signaling NaN |
|
inline |
Fixed point sine and cosine. This uses the CORDIC algorithm in rotation mode.
mz | angle in [-pi/2,pi/2] as Q1.30 |
n | number of iterations (at most 31) |
uint32 half_float::detail::sqrt | ( | uint32 & | r, |
int & | exp | ||
) |
Fixed point square root.
F | number of fractional bits |
r | radicand in Q1.F fixed point format |
exp | exponent |
unsigned int half_float::detail::tangent_post | ( | uint32 | my, |
uint32 | mx, | ||
int | exp, | ||
unsigned int | sign = 0 |
||
) |
Division and postprocessing for tangents.
R | rounding mode to use |
my | dividend as Q1.31 |
mx | divisor as Q1.31 |
exp | biased exponent of result |
sign | sign bit of result |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if no other exception occurred |
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::underflow | ( | unsigned int | sign = 0 | ) |
Half-precision underflow.
R | rounding mode to use |
sign | half-precision value with sign bit only |
FE_UNDERFLOW |