RMW desert 1.0
Loading...
Searching...
No Matches
Classes | Namespaces | Macros | Typedefs | Variables
half.hpp File Reference

Main header file for half-precision functionality. More...

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  half_float::detail::conditional< bool, T, typename >
 Conditional type. More...
 
struct  half_float::detail::conditional< false, T, F >
 
struct  half_float::detail::bool_type< bool >
 Helper for tag dispatching. More...
 
struct  half_float::detail::is_float< typename >
 Type traits for floating-point types. More...
 
struct  half_float::detail::is_float< const T >
 
struct  half_float::detail::is_float< volatile T >
 
struct  half_float::detail::is_float< const volatile T >
 
struct  half_float::detail::is_float< float >
 
struct  half_float::detail::is_float< double >
 
struct  half_float::detail::is_float< long double >
 
struct  half_float::detail::bits< T >
 Type traits for floating-point bits. More...
 
struct  half_float::detail::bits< const T >
 
struct  half_float::detail::bits< volatile T >
 
struct  half_float::detail::bits< const volatile T >
 
struct  half_float::detail::bits< float >
 Unsigned integer of (at least) 32 bits width. More...
 
struct  half_float::detail::bits< double >
 Unsigned integer of (at least) 64 bits width. More...
 
struct  half_float::detail::binary_t
 Tag type for binary construction. More...
 
struct  half_float::detail::f31
 Class for 1.31 unsigned floating-point computation. More...
 
class  half_float::half
 
struct  half_float::detail::half_caster< T, U, R >
 
class  std::numeric_limits< half_float::half >
 

Namespaces

namespace  half_float
 
namespace  std
 Extensions to the C++ standard library.
 

Macros

#define HALF_GCC_VERSION   (__GNUC__*100+__GNUC_MINOR__)
 
#define HALF_ICC_VERSION   0
 
#define HALF_ERRHANDLING   (HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS)
 
#define HALF_UNUSED_NOERR(name)
 
#define HALF_CONSTEXPR
 
#define HALF_CONSTEXPR_CONST   const
 
#define HALF_CONSTEXPR_NOERR
 
#define HALF_NOEXCEPT
 
#define HALF_NOTHROW   throw()
 
#define HALF_THREAD_LOCAL   static
 
#define HALF_ENABLE_F16C_INTRINSICS   __F16C__
 
#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT   1
 
#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT   1
 
#define HALF_ROUND_STYLE   1
 
#define HUGE_VALH   std::numeric_limits<half_float::half>::infinity()
 
#define FP_FAST_FMAH   1
 
#define HLF_ROUNDS   HALF_ROUND_STYLE
 
#define FP_ILOGB0   INT_MIN
 
#define FP_ILOGBNAN   INT_MAX
 
#define FP_SUBNORMAL   0
 
#define FP_ZERO   1
 
#define FP_NAN   2
 
#define FP_INFINITE   3
 
#define FP_NORMAL   4
 
#define FE_INVALID   0x10
 
#define FE_DIVBYZERO   0x08
 
#define FE_OVERFLOW   0x04
 
#define FE_UNDERFLOW   0x02
 
#define FE_INEXACT   0x01
 
#define FE_ALL_EXCEPT   (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT)
 

Typedefs

typedef bool_type< true > half_float::detail::true_type
 
typedef bool_type< false > half_float::detail::false_type
 
typedef unsigned short half_float::detail::uint16
 Unsigned integer of (at least) 16 bits width.
 
typedef unsigned long half_float::detail::uint32
 Fastest unsigned integer of (at least) 32 bits width.
 
typedef long half_float::detail::int32
 Fastest unsigned integer of (at least) 32 bits width.
 

Functions

Implementation defined classification and arithmetic
template<typename T >
bool half_float::detail::builtin_isinf (T arg)
 
template<typename T >
bool half_float::detail::builtin_isnan (T arg)
 
template<typename T >
bool half_float::detail::builtin_signbit (T arg)
 
uint32 half_float::detail::sign_mask (uint32 arg)
 
uint32 half_float::detail::arithmetic_shift (uint32 arg, int i)
 
Error handling
int & half_float::detail::errflags ()
 
void half_float::detail::raise (int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond)=true)
 
HALF_CONSTEXPR_NOERR bool half_float::detail::compsignal (unsigned int x, unsigned int y)
 
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal (unsigned int nan)
 
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal (unsigned int x, unsigned int y)
 
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal (unsigned int x, unsigned int y, unsigned int z)
 
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::select (unsigned int x, unsigned int HALF_UNUSED_NOERR(y))
 
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::invalid ()
 
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::pole (unsigned int sign=0)
 
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::check_underflow (unsigned int arg)
 
Conversion and rounding
template<std::float_round_style R>
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::overflow (unsigned int sign=0)
 
template<std::float_round_style R>
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::underflow (unsigned int sign=0)
 
template<std::float_round_style R, bool I>
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::rounded (unsigned int value, int g, int s)
 
template<std::float_round_style R, bool E, bool I>
unsigned int half_float::detail::integral (unsigned int value)
 
template<std::float_round_style R, unsigned int F, bool S, bool N, bool I>
unsigned int half_float::detail::fixed2half (uint32 m, int exp=14, unsigned int sign=0, int s=0)
 
template<std::float_round_style R>
unsigned int half_float::detail::float2half_impl (float value, true_type)
 
template<std::float_round_style R>
unsigned int half_float::detail::float2half_impl (double value, true_type)
 
template<std::float_round_style R, typename T >
unsigned int half_float::detail::float2half_impl (T value,...)
 
template<std::float_round_style R, typename T >
unsigned int half_float::detail::float2half (T value)
 
template<std::float_round_style R, typename T >
unsigned int half_float::detail::int2half (T value)
 
float half_float::detail::half2float_impl (unsigned int value, float, true_type)
 
double half_float::detail::half2float_impl (unsigned int value, double, true_type)
 
template<typename T >
half_float::detail::half2float_impl (unsigned int value, T,...)
 
template<typename T >
half_float::detail::half2float (unsigned int value)
 
template<std::float_round_style R, bool E, bool I, typename T >
half_float::detail::half2int (unsigned int value)
 
Mathematics
template<std::float_round_style R>
uint32 half_float::detail::mulhi (uint32 x, uint32 y)
 
uint32 half_float::detail::multiply64 (uint32 x, uint32 y)
 
uint32 half_float::detail::divide64 (uint32 x, uint32 y, int &s)
 
template<bool Q, bool R>
unsigned int half_float::detail::mod (unsigned int x, unsigned int y, int *quo=NULL)
 
template<unsigned int F>
uint32 half_float::detail::sqrt (uint32 &r, int &exp)
 
uint32 half_float::detail::exp2 (uint32 m, unsigned int n=32)
 
uint32 half_float::detail::log2 (uint32 m, unsigned int n=32)
 
std::pair< uint32, uint32half_float::detail::sincos (uint32 mz, unsigned int n=31)
 
uint32 half_float::detail::atan2 (uint32 my, uint32 mx, unsigned int n=31)
 
uint32 half_float::detail::angle_arg (unsigned int abs, int &k)
 
std::pair< uint32, uint32half_float::detail::atan2_args (unsigned int abs)
 
std::pair< uint32, uint32half_float::detail::hyperbolic_args (unsigned int abs, int &exp, unsigned int n=32)
 
template<std::float_round_style R>
unsigned int half_float::detail::exp2_post (uint32 m, int exp, bool esign, unsigned int sign=0, unsigned int n=32)
 
template<std::float_round_style R, uint32 L>
unsigned int half_float::detail::log2_post (uint32 m, int ilog, int exp, unsigned int sign=0)
 
template<std::float_round_style R>
unsigned int half_float::detail::hypot_post (uint32 r, int exp)
 
template<std::float_round_style R>
unsigned int half_float::detail::tangent_post (uint32 my, uint32 mx, int exp, unsigned int sign=0)
 
template<std::float_round_style R, bool S>
unsigned int half_float::detail::area (unsigned int arg)
 
template<std::float_round_style R, bool C>
unsigned int half_float::detail::erf (unsigned int arg)
 
template<std::float_round_style R, bool L>
unsigned int half_float::detail::gamma (unsigned int arg)
 
Comparison operators

HALF_CONSTEXPR_NOERR bool half_float::operator== (half x, half y)
 
HALF_CONSTEXPR_NOERR bool half_float::operator!= (half x, half y)
 
HALF_CONSTEXPR_NOERR bool half_float::operator< (half x, half y)
 
HALF_CONSTEXPR_NOERR bool half_float::operator> (half x, half y)
 
HALF_CONSTEXPR_NOERR bool half_float::operator<= (half x, half y)
 
HALF_CONSTEXPR_NOERR bool half_float::operator>= (half x, half y)
 
Arithmetic operators

HALF_CONSTEXPR half half_float::operator+ (half arg)
 
HALF_CONSTEXPR half half_float::operator- (half arg)
 
half half_float::operator+ (half x, half y)
 
half half_float::operator- (half x, half y)
 
half half_float::operator* (half x, half y)
 
half half_float::operator/ (half x, half y)
 
Input and output

template<typename charT , typename traits >
std::basic_ostream< charT, traits > & half_float::operator<< (std::basic_ostream< charT, traits > &out, half arg)
 
template<typename charT , typename traits >
std::basic_istream< charT, traits > & half_float::operator>> (std::basic_istream< charT, traits > &in, half &arg)
 
Basic mathematical operations

HALF_CONSTEXPR half half_float::fabs (half arg)
 
HALF_CONSTEXPR half half_float::abs (half arg)
 
half half_float::fmod (half x, half y)
 
half half_float::remainder (half x, half y)
 
half half_float::remquo (half x, half y, int *quo)
 
half half_float::fma (half x, half y, half z)
 
HALF_CONSTEXPR_NOERR half half_float::fmax (half x, half y)
 
HALF_CONSTEXPR_NOERR half half_float::fmin (half x, half y)
 
half half_float::fdim (half x, half y)
 
half half_float::nanh (const char *arg)
 
Exponential functions

half half_float::exp (half arg)
 
half half_float::exp2 (half arg)
 
half half_float::expm1 (half arg)
 
half half_float::log (half arg)
 
half half_float::log10 (half arg)
 
half half_float::log2 (half arg)
 
half half_float::log1p (half arg)
 
Power functions

half half_float::sqrt (half arg)
 
half half_float::rsqrt (half arg)
 
half half_float::cbrt (half arg)
 
half half_float::hypot (half x, half y)
 
half half_float::hypot (half x, half y, half z)
 
half half_float::pow (half x, half y)
 
Trigonometric functions

void half_float::sincos (half arg, half *sin, half *cos)
 
half half_float::sin (half arg)
 
half half_float::cos (half arg)
 
half half_float::tan (half arg)
 
half half_float::asin (half arg)
 
half half_float::acos (half arg)
 
half half_float::atan (half arg)
 
half half_float::atan2 (half y, half x)
 
Hyperbolic functions

half half_float::sinh (half arg)
 
half half_float::cosh (half arg)
 
half half_float::tanh (half arg)
 
half half_float::asinh (half arg)
 
half half_float::acosh (half arg)
 
half half_float::atanh (half arg)
 
Error and gamma functions

half half_float::erf (half arg)
 
half half_float::erfc (half arg)
 
half half_float::lgamma (half arg)
 
half half_float::tgamma (half arg)
 
Rounding

half half_float::ceil (half arg)
 
half half_float::floor (half arg)
 
half half_float::trunc (half arg)
 
half half_float::round (half arg)
 
long half_float::lround (half arg)
 
half half_float::rint (half arg)
 
long half_float::lrint (half arg)
 
half half_float::nearbyint (half arg)
 
Floating point manipulation

half half_float::frexp (half arg, int *exp)
 
half half_float::scalbln (half arg, long exp)
 
half half_float::scalbn (half arg, int exp)
 
half half_float::ldexp (half arg, int exp)
 
half half_float::modf (half arg, half *iptr)
 
int half_float::ilogb (half arg)
 
half half_float::logb (half arg)
 
half half_float::nextafter (half from, half to)
 
half half_float::nexttoward (half from, long double to)
 
HALF_CONSTEXPR half half_float::copysign (half x, half y)
 
Floating point classification

HALF_CONSTEXPR int half_float::fpclassify (half arg)
 
HALF_CONSTEXPR bool half_float::isfinite (half arg)
 
HALF_CONSTEXPR bool half_float::isinf (half arg)
 
HALF_CONSTEXPR bool half_float::isnan (half arg)
 
HALF_CONSTEXPR bool half_float::isnormal (half arg)
 
HALF_CONSTEXPR bool half_float::signbit (half arg)
 
Comparison

HALF_CONSTEXPR bool half_float::isgreater (half x, half y)
 
HALF_CONSTEXPR bool half_float::isgreaterequal (half x, half y)
 
HALF_CONSTEXPR bool half_float::isless (half x, half y)
 
HALF_CONSTEXPR bool half_float::islessequal (half x, half y)
 
HALF_CONSTEXPR bool half_float::islessgreater (half x, half y)
 
HALF_CONSTEXPR bool half_float::isunordered (half x, half y)
 
Casting

template<typename T , typename U >
half_float::half_cast (U arg)
 
template<typename T , std::float_round_style R, typename U >
half_float::half_cast (U arg)
 
Error handling

int half_float::feclearexcept (int excepts)
 
int half_float::fetestexcept (int excepts)
 
int half_float::feraiseexcept (int excepts)
 
int half_float::fegetexceptflag (int *flagp, int excepts)
 
int half_float::fesetexceptflag (const int *flagp, int excepts)
 
void half_float::fethrowexcept (int excepts, const char *msg="")
 

Variables

HALF_CONSTEXPR_CONST binary_t half_float::detail::binary = binary_t()
 Tag for binary construction.
 

Detailed Description

Main header file for half-precision functionality.

Main header file for half-precision functionality.

Macro Definition Documentation

◆ FP_FAST_FMAH

#define FP_FAST_FMAH   1

Fast half-precision fma function. This symbol is defined if the fma() function generally executes as fast as, or faster than, a separate half-precision multiplication followed by an addition, which is always the case.

See also: Documentation for FP_FAST_FMA

◆ HALF_ENABLE_F16C_INTRINSICS

#define HALF_ENABLE_F16C_INTRINSICS   __F16C__

Enable F16C intruction set intrinsics. Defining this to 1 enables the use of F16C compiler intrinsics for converting between half-precision and single-precision values which may result in improved performance. This will not perform additional checks for support of the F16C instruction set, so an appropriate target platform is required when enabling this feature.

Unless predefined it will be enabled automatically when the __F16C__ symbol is defined, which some compilers do on supporting platforms.

◆ HALF_ERRHANDLING_OVERFLOW_TO_INEXACT

#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT   1

Raise INEXACT exception on overflow. Defining this to 1 (default) causes overflow errors to automatically raise inexact exceptions in addition. These will be raised after any possible handling of the underflow exception.

◆ HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT

#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT   1

Raise INEXACT exception on underflow. Defining this to 1 (default) causes underflow errors to automatically raise inexact exceptions in addition. These will be raised after any possible handling of the underflow exception.

Note: This will actually cause underflow (and the accompanying inexact) exceptions to be raised only when the result is inexact, while if disabled bare underflow errors will be raised for any (possibly exact) subnormal result.

◆ HALF_ROUND_STYLE

#define HALF_ROUND_STYLE   1

Default rounding mode. This specifies the rounding mode used for all conversions between halfs and more precise types (unless using half_cast() and specifying the rounding mode directly) as well as in arithmetic operations and mathematical functions. It can be redefined (before including half.hpp) to one of the standard rounding modes using their respective constants or the equivalent values of std::float_round_style:

std::float_round_style value rounding
std::round_indeterminate -1 fastest
std::round_toward_zero 0 toward zero
std::round_to_nearest 1 to nearest (default)
std::round_toward_infinity 2 toward positive infinity
std::round_toward_neg_infinity 3 toward negative infinity

By default this is set to 1 (std::round_to_nearest), which rounds results to the nearest representable value. It can even be set to std::numeric_limits<float>::round_style to synchronize the rounding mode with that of the built-in single-precision implementation (which is likely std::round_to_nearest, though).

◆ HLF_ROUNDS

#define HLF_ROUNDS   HALF_ROUND_STYLE

Half rounding mode. In correspondence with FLT_ROUNDS from <cfloat> this symbol expands to the rounding mode used for half-precision operations. It is an alias for HALF_ROUND_STYLE.

See also: Documentation for FLT_ROUNDS

◆ HUGE_VALH

#define HUGE_VALH   std::numeric_limits<half_float::half>::infinity()

Value signaling overflow. In correspondence with HUGE_VAL[F|L] from <cmath> this symbol expands to a positive value signaling the overflow of an operation, in particular it just evaluates to positive infinity.

See also: Documentation for HUGE_VAL

Function Documentation

◆ angle_arg()

uint32 half_float::detail::angle_arg ( unsigned int  abs,
int &  k 
)
inline

Reduce argument for trigonometric functions.

Parameters
abshalf-precision floating-point value
kvalue to take quarter period
Returns
abs reduced to [-pi/4,pi/4] as Q0.30

◆ area()

template<std::float_round_style R, bool S>
unsigned int half_float::detail::area ( unsigned int  arg)

Area function and postprocessing. This computes the value directly in Q2.30 using the representation asinh|acosh(x) = log(x+sqrt(x^2+|-1)).

Template Parameters
Rrounding mode to use
Strue for asinh, false for acosh
Parameters
arghalf-precision argument
Returns
asinh|acosh(arg) converted to half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif no other exception occurred

◆ arithmetic_shift()

uint32 half_float::detail::arithmetic_shift ( uint32  arg,
int  i 
)
inline

Platform-independent arithmetic right shift.

Parameters
arginteger value in two's complement
ishift amount (at most 31)
Returns
arg right shifted for i bits with possible sign extension

◆ atan2()

uint32 half_float::detail::atan2 ( uint32  my,
uint32  mx,
unsigned int  n = 31 
)
inline

Fixed point arc tangent. This uses the CORDIC algorithm in vectoring mode.

Parameters
myy coordinate as Q0.30
mxx coordinate as Q0.30
nnumber of iterations (at most 31)
Returns
arc tangent of my / mx as Q1.30

◆ atan2_args()

std::pair< uint32, uint32 > half_float::detail::atan2_args ( unsigned int  abs)
inline

Get arguments for atan2 function.

Parameters
abshalf-precision floating-point value
Returns
abs and sqrt(1 - abs^2) as Q0.30

◆ builtin_isinf()

template<typename T >
bool half_float::detail::builtin_isinf ( arg)

Check for infinity.

Template Parameters
Targument type (builtin floating-point type)
Parameters
argvalue to query
Return values
trueif infinity
falseelse

◆ builtin_isnan()

template<typename T >
bool half_float::detail::builtin_isnan ( arg)

Check for NaN.

Template Parameters
Targument type (builtin floating-point type)
Parameters
argvalue to query
Return values
trueif not a number
falseelse

◆ builtin_signbit()

template<typename T >
bool half_float::detail::builtin_signbit ( arg)

Check sign.

Template Parameters
Targument type (builtin floating-point type)
Parameters
argvalue to query
Return values
trueif signbit set
falseelse

◆ check_underflow()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::check_underflow ( unsigned int  arg)
inline

Check value for underflow.

Parameters
argnon-zero half-precision value to check
Returns
arg
Exceptions
FE_UNDERFLOWif arg is subnormal

◆ compsignal()

HALF_CONSTEXPR_NOERR bool half_float::detail::compsignal ( unsigned int  x,
unsigned int  y 
)
inline

Check and signal for any NaN.

Parameters
xfirst half-precision value to check
ysecond half-precision value to check
Return values
trueif either x or y is NaN
falseelse
Exceptions
FE_INVALIDif x or y is NaN

◆ divide64()

uint32 half_float::detail::divide64 ( uint32  x,
uint32  y,
int &  s 
)
inline

64-bit division.

Parameters
xupper 32 bit of dividend
ydivisor
svariable to store sticky bit for rounding
Returns
(x << 32) / y

◆ erf()

template<std::float_round_style R, bool C>
unsigned int half_float::detail::erf ( unsigned int  arg)

Error function and postprocessing. This computes the value directly in Q1.31 using the approximations given here.

Template Parameters
Rrounding mode to use
Ctrue for comlementary error function, false else
Parameters
arghalf-precision function argument
Returns
approximated value of error function in half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif no other exception occurred

◆ errflags()

int & half_float::detail::errflags ( )
inline

Internal exception flags.

Returns
reference to global exception flags

◆ exp2()

uint32 half_float::detail::exp2 ( uint32  m,
unsigned int  n = 32 
)
inline

Fixed point binary exponential. This uses the BKM algorithm in E-mode.

Parameters
mexponent in [0,1) as Q0.31
nnumber of iterations (at most 32)
Returns
2 ^ m as Q1.31

◆ exp2_post()

template<std::float_round_style R>
unsigned int half_float::detail::exp2_post ( uint32  m,
int  exp,
bool  esign,
unsigned int  sign = 0,
unsigned int  n = 32 
)

Postprocessing for binary exponential.

Template Parameters
Rrounding mode to use
Parameters
mfractional part of as Q0.31
expabsolute value of unbiased exponent
esignsign of actual exponent
signsign bit of result
nnumber of BKM iterations (at most 32)
Returns
value converted to half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded or I is `true`

◆ fixed2half()

template<std::float_round_style R, unsigned int F, bool S, bool N, bool I>
unsigned int half_float::detail::fixed2half ( uint32  m,
int  exp = 14,
unsigned int  sign = 0,
int  s = 0 
)

Convert fixed point to half-precision floating-point.

Template Parameters
Rrounding mode to use
Fnumber of fractional bits in [11,31]
Strue for signed, false for unsigned
Ntrue for additional normalization step, false if already normalized to 1.F
Itrue to always raise INEXACT exception, false to raise only for rounded results
Parameters
mmantissa in Q1.F fixed point format
expbiased exponent - 1
signhalf-precision value with sign bit only
ssticky bit (or of all but the most significant already discarded bits)
Returns
value converted to half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded or I is `true`

◆ float2half()

template<std::float_round_style R, typename T >
unsigned int half_float::detail::float2half ( value)

Convert floating-point to half-precision.

Template Parameters
Rrounding mode to use
Tsource type (builtin floating-point type)
Parameters
valuefloating-point value to convert
Returns
rounded half-precision value
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

◆ float2half_impl() [1/3]

template<std::float_round_style R>
unsigned int half_float::detail::float2half_impl ( double  value,
true_type   
)

Convert IEEE double-precision to half-precision.

Template Parameters
Rrounding mode to use
Parameters
valuedouble-precision value to convert
Returns
rounded half-precision value
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

◆ float2half_impl() [2/3]

template<std::float_round_style R>
unsigned int half_float::detail::float2half_impl ( float  value,
true_type   
)

Convert IEEE single-precision to half-precision. Credit for this goes to Jeroen van der Zijp.

Template Parameters
Rrounding mode to use
Parameters
valuesingle-precision value to convert
Returns
rounded half-precision value
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

◆ float2half_impl() [3/3]

template<std::float_round_style R, typename T >
unsigned int half_float::detail::float2half_impl ( value,
  ... 
)

Convert non-IEEE floating-point to half-precision.

Template Parameters
Rrounding mode to use
Tsource type (builtin floating-point type)
Parameters
valuefloating-point value to convert
Returns
rounded half-precision value
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

◆ gamma()

template<std::float_round_style R, bool L>
unsigned int half_float::detail::gamma ( unsigned int  arg)

Gamma function and postprocessing. This approximates the value of either the gamma function or its logarithm directly in Q1.31.

Template Parameters
Rrounding mode to use
Ltrue for lograithm of gamma function, false for gamma function
Parameters
arghalf-precision floating-point value
Returns
lgamma/tgamma(arg) in half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif arg is not a positive integer

◆ half2float()

template<typename T >
T half_float::detail::half2float ( unsigned int  value)

Convert half-precision to floating-point.

Template Parameters
Ttype to convert to (builtin integer type)
Parameters
valuehalf-precision value to convert
Returns
floating-point value

◆ half2float_impl() [1/3]

double half_float::detail::half2float_impl ( unsigned int  value,
double  ,
true_type   
)
inline

Convert half-precision to IEEE double-precision.

Parameters
valuehalf-precision value to convert
Returns
double-precision value

◆ half2float_impl() [2/3]

float half_float::detail::half2float_impl ( unsigned int  value,
float  ,
true_type   
)
inline

Convert half-precision to IEEE single-precision. Credit for this goes to Jeroen van der Zijp.

Parameters
valuehalf-precision value to convert
Returns
single-precision value

◆ half2float_impl() [3/3]

template<typename T >
T half_float::detail::half2float_impl ( unsigned int  value,
,
  ... 
)

Convert half-precision to non-IEEE floating-point.

Template Parameters
Ttype to convert to (builtin integer type)
Parameters
valuehalf-precision value to convert
Returns
floating-point value

◆ half2int()

template<std::float_round_style R, bool E, bool I, typename T >
T half_float::detail::half2int ( unsigned int  value)

Convert half-precision floating-point to integer.

Template Parameters
Rrounding mode to use
Etrue for round to even, false for round away from zero
Itrue to raise INEXACT exception (if inexact), false to never raise it
Ttype to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
Parameters
valuehalf-precision value to convert
Returns
rounded integer value
Exceptions
FE_INVALIDif value is not representable in type T
FE_INEXACTif value had to be rounded and I is `true`

◆ hyperbolic_args()

std::pair< uint32, uint32 > half_float::detail::hyperbolic_args ( unsigned int  abs,
int &  exp,
unsigned int  n = 32 
)
inline

Get exponentials for hyperbolic computation

Parameters
abshalf-precision floating-point value
expvariable to take unbiased exponent of larger result
nnumber of BKM iterations (at most 32)
Returns
exp(abs) and exp(-abs) as Q1.31 with same exponent

◆ hypot_post()

template<std::float_round_style R>
unsigned int half_float::detail::hypot_post ( uint32  r,
int  exp 
)

Hypotenuse square root and postprocessing.

Template Parameters
Rrounding mode to use
Parameters
rmantissa as Q2.30
expbiased exponent
Returns
square root converted to half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

◆ int2half()

template<std::float_round_style R, typename T >
unsigned int half_float::detail::int2half ( value)

Convert integer to half-precision floating-point.

Template Parameters
Rrounding mode to use
Ttype to convert (builtin integer type)
Parameters
valueintegral value to convert
Returns
rounded half-precision value
Exceptions
FE_OVERFLOWon overflows
FE_INEXACTif value had to be rounded

◆ integral()

template<std::float_round_style R, bool E, bool I>
unsigned int half_float::detail::integral ( unsigned int  value)

Round half-precision number to nearest integer value.

Template Parameters
Rrounding mode to use
Etrue for round to even, false for round away from zero
Itrue to raise INEXACT exception (if inexact), false to never raise it
Parameters
valuehalf-precision value to round
Returns
half-precision bits for nearest integral value
Exceptions
FE_INVALIDfor signaling NaN
FE_INEXACTif value had to be rounded and I is `true`

◆ invalid()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::invalid ( )
inline

Raise domain error and return NaN. return quiet NaN

Exceptions
FE_INVALID

◆ log2()

uint32 half_float::detail::log2 ( uint32  m,
unsigned int  n = 32 
)
inline

Fixed point binary logarithm. This uses the BKM algorithm in L-mode.

Parameters
mmantissa in [1,2) as Q1.30
nnumber of iterations (at most 32)
Returns
log2(m) as Q0.31

◆ log2_post()

template<std::float_round_style R, uint32 L>
unsigned int half_float::detail::log2_post ( uint32  m,
int  ilog,
int  exp,
unsigned int  sign = 0 
)

Postprocessing for binary logarithm.

Template Parameters
Rrounding mode to use
Llogarithm for base transformation as Q1.31
Parameters
mfractional part of logarithm as Q0.31
ilogsigned integer part of logarithm
expbiased exponent of result
signsign bit of result
Returns
value base-transformed and converted to half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif no other exception occurred

◆ mod()

template<bool Q, bool R>
unsigned int half_float::detail::mod ( unsigned int  x,
unsigned int  y,
int *  quo = NULL 
)

Half precision positive modulus.

Template Parameters
Qtrue to compute full quotient, false else
Rtrue to compute signed remainder, false for positive remainder
Parameters
xfirst operand as positive finite half-precision value
ysecond operand as positive finite half-precision value
quoadress to store quotient at, nullptr if Q false
Returns
modulus of x / y

◆ mulhi()

template<std::float_round_style R>
uint32 half_float::detail::mulhi ( uint32  x,
uint32  y 
)

upper part of 64-bit multiplication.

Template Parameters
Rrounding mode to use
Parameters
xfirst factor
ysecond factor
Returns
upper 32 bit of x * y

◆ multiply64()

uint32 half_float::detail::multiply64 ( uint32  x,
uint32  y 
)
inline

64-bit multiplication.

Parameters
xfirst factor
ysecond factor
Returns
upper 32 bit of x * y rounded to nearest

◆ overflow()

template<std::float_round_style R>
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::overflow ( unsigned int  sign = 0)

Half-precision overflow.

Template Parameters
Rrounding mode to use
Parameters
signhalf-precision value with sign bit only
Returns
rounded overflowing half-precision value
Exceptions
FE_OVERFLOW

◆ pole()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::pole ( unsigned int  sign = 0)
inline

Raise pole error and return infinity.

Parameters
signhalf-precision value with sign bit only
Returns
half-precision infinity with sign of sign
Exceptions
FE_DIVBYZERO

◆ raise()

void half_float::detail::raise ( int   HALF_UNUSED_NOERRflags,
bool   HALF_UNUSED_NOERRcond = true 
)
inline

Raise floating-point exception.

Parameters
flagsexceptions to raise
condcondition to raise exceptions for

◆ rounded()

template<std::float_round_style R, bool I>
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::rounded ( unsigned int  value,
int  g,
int  s 
)

Round half-precision number.

Template Parameters
Rrounding mode to use
Itrue to always raise INEXACT exception, false to raise only for rounded results
Parameters
valuefinite half-precision number to round
gguard bit (most significant discarded bit)
ssticky bit (or of all but the most significant discarded bits)
Returns
rounded half-precision value
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded or I is `true`

◆ select()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::select ( unsigned int  x,
unsigned int   HALF_UNUSED_NOERR
)
inline

Select value or signaling NaN.

Parameters
xpreferred half-precision value
yignored half-precision value except for signaling NaN
Returns
y if signaling NaN, x otherwise
Exceptions
FE_INVALIDif y is signaling NaN

◆ sign_mask()

uint32 half_float::detail::sign_mask ( uint32  arg)
inline

Platform-independent sign mask.

Parameters
arginteger value in two's complement
Return values
-1if arg negative
0if arg positive

◆ signal() [1/3]

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal ( unsigned int  nan)
inline

Signal and silence signaling NaN.

Parameters
nanhalf-precision NaN value
Returns
quiet NaN
Exceptions
FE_INVALIDif nan is signaling NaN

◆ signal() [2/3]

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal ( unsigned int  x,
unsigned int  y 
)
inline

Signal and silence signaling NaNs.

Parameters
xfirst half-precision value to check
ysecond half-precision value to check
Returns
quiet NaN
Exceptions
FE_INVALIDif x or y is signaling NaN

◆ signal() [3/3]

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal ( unsigned int  x,
unsigned int  y,
unsigned int  z 
)
inline

Signal and silence signaling NaNs.

Parameters
xfirst half-precision value to check
ysecond half-precision value to check
zthird half-precision value to check
Returns
quiet NaN
Exceptions
FE_INVALIDif x, y or z is signaling NaN

◆ sincos()

std::pair< uint32, uint32 > half_float::detail::sincos ( uint32  mz,
unsigned int  n = 31 
)
inline

Fixed point sine and cosine. This uses the CORDIC algorithm in rotation mode.

Parameters
mzangle in [-pi/2,pi/2] as Q1.30
nnumber of iterations (at most 31)
Returns
sine and cosine of mz as Q1.30

◆ sqrt()

template<unsigned int F>
uint32 half_float::detail::sqrt ( uint32 r,
int &  exp 
)

Fixed point square root.

Template Parameters
Fnumber of fractional bits
Parameters
rradicand in Q1.F fixed point format
expexponent
Returns
square root as Q1.F/2

◆ tangent_post()

template<std::float_round_style R>
unsigned int half_float::detail::tangent_post ( uint32  my,
uint32  mx,
int  exp,
unsigned int  sign = 0 
)

Division and postprocessing for tangents.

Template Parameters
Rrounding mode to use
Parameters
mydividend as Q1.31
mxdivisor as Q1.31
expbiased exponent of result
signsign bit of result
Returns
quotient converted to half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif no other exception occurred

◆ underflow()

template<std::float_round_style R>
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::underflow ( unsigned int  sign = 0)

Half-precision underflow.

Template Parameters
Rrounding mode to use
Parameters
signhalf-precision value with sign bit only
Returns
rounded underflowing half-precision value
Exceptions
FE_UNDERFLOW