| Line | Exclusive | Inclusive | Code |
|---|---|---|---|
| 1 | # This file is a part of Julia. License is MIT: https://julialang.org/license | ||
| 2 | |||
| 3 | const IEEEFloat = Union{Float16, Float32, Float64} | ||
| 4 | |||
| 5 | ## floating point traits ## | ||
| 6 | |||
| 7 | """ | ||
| 8 | Inf16 | ||
| 9 | |||
| 10 | Positive infinity of type [`Float16`](@ref). | ||
| 11 | """ | ||
| 12 | const Inf16 = bitcast(Float16, 0x7c00) | ||
| 13 | """ | ||
| 14 | NaN16 | ||
| 15 | |||
| 16 | A not-a-number value of type [`Float16`](@ref). | ||
| 17 | """ | ||
| 18 | const NaN16 = bitcast(Float16, 0x7e00) | ||
| 19 | """ | ||
| 20 | Inf32 | ||
| 21 | |||
| 22 | Positive infinity of type [`Float32`](@ref). | ||
| 23 | """ | ||
| 24 | const Inf32 = bitcast(Float32, 0x7f800000) | ||
| 25 | """ | ||
| 26 | NaN32 | ||
| 27 | |||
| 28 | A not-a-number value of type [`Float32`](@ref). | ||
| 29 | """ | ||
| 30 | const NaN32 = bitcast(Float32, 0x7fc00000) | ||
| 31 | const Inf64 = bitcast(Float64, 0x7ff0000000000000) | ||
| 32 | const NaN64 = bitcast(Float64, 0x7ff8000000000000) | ||
| 33 | |||
| 34 | const Inf = Inf64 | ||
| 35 | """ | ||
| 36 | Inf, Inf64 | ||
| 37 | |||
| 38 | Positive infinity of type [`Float64`](@ref). | ||
| 39 | |||
| 40 | See also: [`isfinite`](@ref), [`typemax`](@ref), [`NaN`](@ref), [`Inf32`](@ref). | ||
| 41 | |||
| 42 | # Examples | ||
| 43 | ```jldoctest | ||
| 44 | julia> π/0 | ||
| 45 | Inf | ||
| 46 | |||
| 47 | julia> +1.0 / -0.0 | ||
| 48 | -Inf | ||
| 49 | |||
| 50 | julia> ℯ^-Inf | ||
| 51 | 0.0 | ||
| 52 | ``` | ||
| 53 | """ | ||
| 54 | Inf, Inf64 | ||
| 55 | |||
| 56 | const NaN = NaN64 | ||
| 57 | """ | ||
| 58 | NaN, NaN64 | ||
| 59 | |||
| 60 | A not-a-number value of type [`Float64`](@ref). | ||
| 61 | |||
| 62 | See also: [`isnan`](@ref), [`missing`](@ref), [`NaN32`](@ref), [`Inf`](@ref). | ||
| 63 | |||
| 64 | # Examples | ||
| 65 | ```jldoctest | ||
| 66 | julia> 0/0 | ||
| 67 | NaN | ||
| 68 | |||
| 69 | julia> Inf - Inf | ||
| 70 | NaN | ||
| 71 | |||
| 72 | julia> NaN == NaN, isequal(NaN, NaN), NaN === NaN | ||
| 73 | (false, true, true) | ||
| 74 | ``` | ||
| 75 | """ | ||
| 76 | NaN, NaN64 | ||
| 77 | |||
| 78 | # bit patterns | ||
| 79 | reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64, x) | ||
| 80 | reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32, x) | ||
| 81 | reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16, x) | ||
| 82 | reinterpret(::Type{Signed}, x::Float64) = reinterpret(Int64, x) | ||
| 83 | reinterpret(::Type{Signed}, x::Float32) = reinterpret(Int32, x) | ||
| 84 | reinterpret(::Type{Signed}, x::Float16) = reinterpret(Int16, x) | ||
| 85 | |||
| 86 | sign_mask(::Type{Float64}) = 0x8000_0000_0000_0000 | ||
| 87 | exponent_mask(::Type{Float64}) = 0x7ff0_0000_0000_0000 | ||
| 88 | exponent_one(::Type{Float64}) = 0x3ff0_0000_0000_0000 | ||
| 89 | exponent_half(::Type{Float64}) = 0x3fe0_0000_0000_0000 | ||
| 90 | significand_mask(::Type{Float64}) = 0x000f_ffff_ffff_ffff | ||
| 91 | |||
| 92 | sign_mask(::Type{Float32}) = 0x8000_0000 | ||
| 93 | exponent_mask(::Type{Float32}) = 0x7f80_0000 | ||
| 94 | exponent_one(::Type{Float32}) = 0x3f80_0000 | ||
| 95 | exponent_half(::Type{Float32}) = 0x3f00_0000 | ||
| 96 | significand_mask(::Type{Float32}) = 0x007f_ffff | ||
| 97 | |||
| 98 | sign_mask(::Type{Float16}) = 0x8000 | ||
| 99 | exponent_mask(::Type{Float16}) = 0x7c00 | ||
| 100 | exponent_one(::Type{Float16}) = 0x3c00 | ||
| 101 | exponent_half(::Type{Float16}) = 0x3800 | ||
| 102 | significand_mask(::Type{Float16}) = 0x03ff | ||
| 103 | |||
| 104 | mantissa(x::T) where {T} = reinterpret(Unsigned, x) & significand_mask(T) | ||
| 105 | |||
| 106 | for T in (Float16, Float32, Float64) | ||
| 107 | @eval significand_bits(::Type{$T}) = $(trailing_ones(significand_mask(T))) | ||
| 108 | @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - significand_bits(T) - 1) | ||
| 109 | @eval exponent_bias(::Type{$T}) = $(Int(exponent_one(T) >> significand_bits(T))) | ||
| 110 | # maximum float exponent | ||
| 111 | @eval exponent_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T) - 1) | ||
| 112 | # maximum float exponent without bias | ||
| 113 | @eval exponent_raw_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T))) | ||
| 114 | end | ||
| 115 | |||
| 116 | """ | ||
| 117 | exponent_max(T) | ||
| 118 | |||
| 119 | Maximum [`exponent`](@ref) value for a floating point number of type `T`. | ||
| 120 | |||
| 121 | # Examples | ||
| 122 | ```jldoctest | ||
| 123 | julia> Base.exponent_max(Float64) | ||
| 124 | 1023 | ||
| 125 | ``` | ||
| 126 | |||
| 127 | Note, `exponent_max(T) + 1` is a possible value of the exponent field | ||
| 128 | with bias, which might be used as sentinel value for `Inf` or `NaN`. | ||
| 129 | """ | ||
| 130 | function exponent_max end | ||
| 131 | |||
| 132 | """ | ||
| 133 | exponent_raw_max(T) | ||
| 134 | |||
| 135 | Maximum value of the [`exponent`](@ref) field for a floating point number of type `T` without bias, | ||
| 136 | i.e. the maximum integer value representable by [`exponent_bits(T)`](@ref) bits. | ||
| 137 | """ | ||
| 138 | function exponent_raw_max end | ||
| 139 | |||
| 140 | """ | ||
| 141 | uabs(x::Integer) | ||
| 142 | |||
| 143 | Return the absolute value of `x`, possibly returning a different type should the | ||
| 144 | operation be susceptible to overflow. This typically arises when `x` is a two's complement | ||
| 145 | signed integer, so that `abs(typemin(x)) == typemin(x) < 0`, in which case the result of | ||
| 146 | `uabs(x)` will be an unsigned integer of the same size. | ||
| 147 | """ | ||
| 148 | uabs(x::Integer) = abs(x) | ||
| 149 | uabs(x::BitSigned) = unsigned(abs(x)) | ||
| 150 | |||
| 151 | ## conversions to floating-point ## | ||
| 152 | |||
| 153 | # TODO: deprecate in 2.0 | ||
| 154 | Float16(x::Integer) = convert(Float16, convert(Float32, x)::Float32) | ||
| 155 | |||
| 156 | for t1 in (Float16, Float32, Float64) | ||
| 157 | for st in (Int8, Int16, Int32, Int64) | ||
| 158 | @eval begin | ||
| 159 | (::Type{$t1})(x::($st)) = sitofp($t1, x) | ||
| 160 | promote_rule(::Type{$t1}, ::Type{$st}) = $t1 | ||
| 161 | end | ||
| 162 | end | ||
| 163 | for ut in (Bool, UInt8, UInt16, UInt32, UInt64) | ||
| 164 | @eval begin | ||
| 165 | (::Type{$t1})(x::($ut)) = uitofp($t1, x) | ||
| 166 | promote_rule(::Type{$t1}, ::Type{$ut}) = $t1 | ||
| 167 | end | ||
| 168 | end | ||
| 169 | end | ||
| 170 | |||
| 171 | Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x)) | ||
| 172 | |||
| 173 | promote_rule(::Type{Float64}, ::Type{UInt128}) = Float64 | ||
| 174 | promote_rule(::Type{Float64}, ::Type{Int128}) = Float64 | ||
| 175 | promote_rule(::Type{Float32}, ::Type{UInt128}) = Float32 | ||
| 176 | promote_rule(::Type{Float32}, ::Type{Int128}) = Float32 | ||
| 177 | promote_rule(::Type{Float16}, ::Type{UInt128}) = Float16 | ||
| 178 | promote_rule(::Type{Float16}, ::Type{Int128}) = Float16 | ||
| 179 | |||
| 180 | function Float64(x::UInt128) | ||
| 181 | if x < UInt128(1) << 104 # Can fit it in two 52 bits mantissas | ||
| 182 | low_exp = 0x1p52 | ||
| 183 | high_exp = 0x1p104 | ||
| 184 | low_bits = (x % UInt64) & Base.significand_mask(Float64) | ||
| 185 | low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp | ||
| 186 | high_bits = ((x >> 52) % UInt64) | ||
| 187 | high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp | ||
| 188 | low_value + high_value | ||
| 189 | else # Large enough that low bits only affect rounding, pack low bits | ||
| 190 | low_exp = 0x1p76 | ||
| 191 | high_exp = 0x1p128 | ||
| 192 | low_bits = ((x >> 12) % UInt64) >> 12 | (x % UInt64) & 0xFFFFFF | ||
| 193 | low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp | ||
| 194 | high_bits = ((x >> 76) % UInt64) | ||
| 195 | high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp | ||
| 196 | low_value + high_value | ||
| 197 | end | ||
| 198 | end | ||
| 199 | |||
| 200 | function Float64(x::Int128) | ||
| 201 | sign_bit = ((x >> 127) % UInt64) << 63 | ||
| 202 | ux = uabs(x) | ||
| 203 | if ux < UInt128(1) << 104 # Can fit it in two 52 bits mantissas | ||
| 204 | low_exp = 0x1p52 | ||
| 205 | high_exp = 0x1p104 | ||
| 206 | low_bits = (ux % UInt64) & Base.significand_mask(Float64) | ||
| 207 | low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp | ||
| 208 | high_bits = ((ux >> 52) % UInt64) | ||
| 209 | high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp | ||
| 210 | reinterpret(Float64, sign_bit | reinterpret(UInt64, low_value + high_value)) | ||
| 211 | else # Large enough that low bits only affect rounding, pack low bits | ||
| 212 | low_exp = 0x1p76 | ||
| 213 | high_exp = 0x1p128 | ||
| 214 | low_bits = ((ux >> 12) % UInt64) >> 12 | (ux % UInt64) & 0xFFFFFF | ||
| 215 | low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp | ||
| 216 | high_bits = ((ux >> 76) % UInt64) | ||
| 217 | high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp | ||
| 218 | reinterpret(Float64, sign_bit | reinterpret(UInt64, low_value + high_value)) | ||
| 219 | end | ||
| 220 | end | ||
| 221 | |||
| 222 | function Float32(x::UInt128) | ||
| 223 | x == 0 && return 0f0 | ||
| 224 | n = top_set_bit(x) # ndigits0z(x,2) | ||
| 225 | if n <= 24 | ||
| 226 | y = ((x % UInt32) << (24-n)) & 0x007f_ffff | ||
| 227 | else | ||
| 228 | y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit | ||
| 229 | y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent) | ||
| 230 | y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even | ||
| 231 | end | ||
| 232 | d = ((n+126) % UInt32) << 23 | ||
| 233 | reinterpret(Float32, d + y) | ||
| 234 | end | ||
| 235 | |||
| 236 | function Float32(x::Int128) | ||
| 237 | x == 0 && return 0f0 | ||
| 238 | s = ((x >>> 96) % UInt32) & 0x8000_0000 # sign bit | ||
| 239 | x = abs(x) % UInt128 | ||
| 240 | n = top_set_bit(x) # ndigits0z(x,2) | ||
| 241 | if n <= 24 | ||
| 242 | y = ((x % UInt32) << (24-n)) & 0x007f_ffff | ||
| 243 | else | ||
| 244 | y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit | ||
| 245 | y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent) | ||
| 246 | y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even | ||
| 247 | end | ||
| 248 | d = ((n+126) % UInt32) << 23 | ||
| 249 | reinterpret(Float32, s | d + y) | ||
| 250 | end | ||
| 251 | |||
| 252 | # TODO: optimize | ||
| 253 | Float16(x::UInt128) = convert(Float16, Float64(x)) | ||
| 254 | Float16(x::Int128) = convert(Float16, Float64(x)) | ||
| 255 | |||
| 256 | Float16(x::Float32) = fptrunc(Float16, x) | ||
| 257 | Float16(x::Float64) = fptrunc(Float16, x) | ||
| 258 | Float32(x::Float64) = fptrunc(Float32, x) | ||
| 259 | |||
| 260 | Float32(x::Float16) = fpext(Float32, x) | ||
| 261 | Float64(x::Float32) = fpext(Float64, x) | ||
| 262 | Float64(x::Float16) = fpext(Float64, x) | ||
| 263 | |||
| 264 | AbstractFloat(x::Bool) = Float64(x) | ||
| 265 | AbstractFloat(x::Int8) = Float64(x) | ||
| 266 | AbstractFloat(x::Int16) = Float64(x) | ||
| 267 | AbstractFloat(x::Int32) = Float64(x) | ||
| 268 | AbstractFloat(x::Int64) = Float64(x) # LOSSY | ||
| 269 | AbstractFloat(x::Int128) = Float64(x) # LOSSY | ||
| 270 | AbstractFloat(x::UInt8) = Float64(x) | ||
| 271 | AbstractFloat(x::UInt16) = Float64(x) | ||
| 272 | AbstractFloat(x::UInt32) = Float64(x) | ||
| 273 | AbstractFloat(x::UInt64) = Float64(x) # LOSSY | ||
| 274 | AbstractFloat(x::UInt128) = Float64(x) # LOSSY | ||
| 275 | |||
| 276 | Bool(x::Float16) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x)) | ||
| 277 | |||
| 278 | """ | ||
| 279 | float(x) | ||
| 280 | |||
| 281 | Convert a number or array to a floating point data type. | ||
| 282 | |||
| 283 | See also: [`complex`](@ref), [`oftype`](@ref), [`convert`](@ref). | ||
| 284 | |||
| 285 | # Examples | ||
| 286 | ```jldoctest | ||
| 287 | julia> float(1:1000) | ||
| 288 | 1.0:1.0:1000.0 | ||
| 289 | |||
| 290 | julia> float(typemax(Int32)) | ||
| 291 | 2.147483647e9 | ||
| 292 | ``` | ||
| 293 | """ | ||
| 294 | float(x) = AbstractFloat(x) | ||
| 295 | |||
| 296 | """ | ||
| 297 | float(T::Type) | ||
| 298 | |||
| 299 | Return an appropriate type to represent a value of type `T` as a floating point value. | ||
| 300 | Equivalent to `typeof(float(zero(T)))`. | ||
| 301 | |||
| 302 | # Examples | ||
| 303 | ```jldoctest | ||
| 304 | julia> float(Complex{Int}) | ||
| 305 | ComplexF64 (alias for Complex{Float64}) | ||
| 306 | |||
| 307 | julia> float(Int) | ||
| 308 | Float64 | ||
| 309 | ``` | ||
| 310 | """ | ||
| 311 | float(::Type{T}) where {T<:Number} = typeof(float(zero(T))) | ||
| 312 | float(::Type{T}) where {T<:AbstractFloat} = T | ||
| 313 | float(::Type{Union{}}, slurp...) = Union{}(0.0) | ||
| 314 | |||
| 315 | """ | ||
| 316 | unsafe_trunc(T, x) | ||
| 317 | |||
| 318 | Return the nearest integral value of type `T` whose absolute value is | ||
| 319 | less than or equal to the absolute value of `x`. If the value is not representable by `T`, | ||
| 320 | an arbitrary value will be returned. | ||
| 321 | See also [`trunc`](@ref). | ||
| 322 | |||
| 323 | # Examples | ||
| 324 | ```jldoctest | ||
| 325 | julia> unsafe_trunc(Int, -2.2) | ||
| 326 | -2 | ||
| 327 | |||
| 328 | julia> unsafe_trunc(Int, NaN) | ||
| 329 | -9223372036854775808 | ||
| 330 | ``` | ||
| 331 | """ | ||
| 332 | function unsafe_trunc end | ||
| 333 | |||
| 334 | for Ti in (Int8, Int16, Int32, Int64) | ||
| 335 | @eval begin | ||
| 336 | unsafe_trunc(::Type{$Ti}, x::IEEEFloat) = fptosi($Ti, x) | ||
| 337 | end | ||
| 338 | end | ||
| 339 | for Ti in (UInt8, UInt16, UInt32, UInt64) | ||
| 340 | @eval begin | ||
| 341 | unsafe_trunc(::Type{$Ti}, x::IEEEFloat) = fptoui($Ti, x) | ||
| 342 | end | ||
| 343 | end | ||
| 344 | |||
| 345 | function unsafe_trunc(::Type{UInt128}, x::Float64) | ||
| 346 | xu = reinterpret(UInt64,x) | ||
| 347 | k = Int(xu >> 52) & 0x07ff - 1075 | ||
| 348 | xu = (xu & 0x000f_ffff_ffff_ffff) | 0x0010_0000_0000_0000 | ||
| 349 | if k <= 0 | ||
| 350 | UInt128(xu >> -k) | ||
| 351 | else | ||
| 352 | UInt128(xu) << k | ||
| 353 | end | ||
| 354 | end | ||
| 355 | function unsafe_trunc(::Type{Int128}, x::Float64) | ||
| 356 | copysign(unsafe_trunc(UInt128,x) % Int128, x) | ||
| 357 | end | ||
| 358 | |||
| 359 | function unsafe_trunc(::Type{UInt128}, x::Float32) | ||
| 360 | xu = reinterpret(UInt32,x) | ||
| 361 | k = Int(xu >> 23) & 0x00ff - 150 | ||
| 362 | xu = (xu & 0x007f_ffff) | 0x0080_0000 | ||
| 363 | if k <= 0 | ||
| 364 | UInt128(xu >> -k) | ||
| 365 | else | ||
| 366 | UInt128(xu) << k | ||
| 367 | end | ||
| 368 | end | ||
| 369 | function unsafe_trunc(::Type{Int128}, x::Float32) | ||
| 370 | copysign(unsafe_trunc(UInt128,x) % Int128, x) | ||
| 371 | end | ||
| 372 | |||
| 373 | unsafe_trunc(::Type{UInt128}, x::Float16) = unsafe_trunc(UInt128, Float32(x)) | ||
| 374 | unsafe_trunc(::Type{Int128}, x::Float16) = unsafe_trunc(Int128, Float32(x)) | ||
| 375 | |||
| 376 | # matches convert methods | ||
| 377 | # also determines floor, ceil, round | ||
| 378 | trunc(::Type{Signed}, x::IEEEFloat) = trunc(Int,x) | ||
| 379 | trunc(::Type{Unsigned}, x::IEEEFloat) = trunc(UInt,x) | ||
| 380 | trunc(::Type{Integer}, x::IEEEFloat) = trunc(Int,x) | ||
| 381 | |||
| 382 | # fallbacks | ||
| 383 | floor(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundDown)) | ||
| 384 | ceil(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundUp)) | ||
| 385 | round(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundNearest)) | ||
| 386 | |||
| 387 | # Bool | ||
| 388 | trunc(::Type{Bool}, x::AbstractFloat) = (-1 < x < 2) ? 1 <= x : throw(InexactError(:trunc, Bool, x)) | ||
| 389 | floor(::Type{Bool}, x::AbstractFloat) = (0 <= x < 2) ? 1 <= x : throw(InexactError(:floor, Bool, x)) | ||
| 390 | ceil(::Type{Bool}, x::AbstractFloat) = (-1 < x <= 1) ? 0 < x : throw(InexactError(:ceil, Bool, x)) | ||
| 391 | round(::Type{Bool}, x::AbstractFloat) = (-0.5 <= x < 1.5) ? 0.5 < x : throw(InexactError(:round, Bool, x)) | ||
| 392 | |||
| 393 | round(x::IEEEFloat, r::RoundingMode{:ToZero}) = trunc_llvm(x) | ||
| 394 | round(x::IEEEFloat, r::RoundingMode{:Down}) = floor_llvm(x) | ||
| 395 | round(x::IEEEFloat, r::RoundingMode{:Up}) = ceil_llvm(x) | ||
| 396 | round(x::IEEEFloat, r::RoundingMode{:Nearest}) = rint_llvm(x) | ||
| 397 | |||
| 398 | ## floating point promotions ## | ||
| 399 | promote_rule(::Type{Float32}, ::Type{Float16}) = Float32 | ||
| 400 | promote_rule(::Type{Float64}, ::Type{Float16}) = Float64 | ||
| 401 | promote_rule(::Type{Float64}, ::Type{Float32}) = Float64 | ||
| 402 | |||
| 403 | widen(::Type{Float16}) = Float32 | ||
| 404 | widen(::Type{Float32}) = Float64 | ||
| 405 | |||
| 406 | ## floating point arithmetic ## | ||
| 407 | -(x::IEEEFloat) = neg_float(x) | ||
| 408 | |||
| 409 | +(x::T, y::T) where {T<:IEEEFloat} = add_float(x, y) | ||
| 410 | -(x::T, y::T) where {T<:IEEEFloat} = sub_float(x, y) | ||
| 411 | *(x::T, y::T) where {T<:IEEEFloat} = mul_float(x, y) | ||
| 412 | /(x::T, y::T) where {T<:IEEEFloat} = div_float(x, y) | ||
| 413 | |||
| 414 | 1 (2 %) | 1 (2 %) |
1 (2 %)
samples spent in muladd
muladd(x::T, y::T, z::T) where {T<:IEEEFloat} = muladd_float(x, y, z)
1 (100 %) (ex.), 1 (100 %) (incl.) when called from macro expansion line 187 |
| 415 | |||
| 416 | # TODO: faster floating point div? | ||
| 417 | # TODO: faster floating point fld? | ||
| 418 | # TODO: faster floating point mod? | ||
| 419 | |||
| 420 | function unbiased_exponent(x::T) where {T<:IEEEFloat} | ||
| 421 | return (reinterpret(Unsigned, x) & exponent_mask(T)) >> significand_bits(T) | ||
| 422 | end | ||
| 423 | |||
| 424 | function explicit_mantissa_noinfnan(x::T) where {T<:IEEEFloat} | ||
| 425 | m = mantissa(x) | ||
| 426 | issubnormal(x) || (m |= significand_mask(T) + uinttype(T)(1)) | ||
| 427 | return m | ||
| 428 | end | ||
| 429 | |||
| 430 | function _to_float(number::U, ep) where {U<:Unsigned} | ||
| 431 | F = floattype(U) | ||
| 432 | S = signed(U) | ||
| 433 | epint = unsafe_trunc(S,ep) | ||
| 434 | lz::signed(U) = unsafe_trunc(S, Core.Intrinsics.ctlz_int(number) - U(exponent_bits(F))) | ||
| 435 | number <<= lz | ||
| 436 | epint -= lz | ||
| 437 | bits = U(0) | ||
| 438 | if epint >= 0 | ||
| 439 | bits = number & significand_mask(F) | ||
| 440 | bits |= ((epint + S(1)) << significand_bits(F)) & exponent_mask(F) | ||
| 441 | else | ||
| 442 | bits = (number >> -epint) & significand_mask(F) | ||
| 443 | end | ||
| 444 | return reinterpret(F, bits) | ||
| 445 | end | ||
| 446 | |||
| 447 | @assume_effects :terminates_locally :nothrow function rem_internal(x::T, y::T) where {T<:IEEEFloat} | ||
| 448 | xuint = reinterpret(Unsigned, x) | ||
| 449 | yuint = reinterpret(Unsigned, y) | ||
| 450 | if xuint <= yuint | ||
| 451 | if xuint < yuint | ||
| 452 | return x | ||
| 453 | end | ||
| 454 | return zero(T) | ||
| 455 | end | ||
| 456 | |||
| 457 | e_x = unbiased_exponent(x) | ||
| 458 | e_y = unbiased_exponent(y) | ||
| 459 | # Most common case where |y| is "very normal" and |x/y| < 2^EXPONENT_WIDTH | ||
| 460 | if e_y > (significand_bits(T)) && (e_x - e_y) <= (exponent_bits(T)) | ||
| 461 | m_x = explicit_mantissa_noinfnan(x) | ||
| 462 | m_y = explicit_mantissa_noinfnan(y) | ||
| 463 | d = urem_int((m_x << (e_x - e_y)), m_y) | ||
| 464 | iszero(d) && return zero(T) | ||
| 465 | return _to_float(d, e_y - uinttype(T)(1)) | ||
| 466 | end | ||
| 467 | # Both are subnormals | ||
| 468 | if e_x == 0 && e_y == 0 | ||
| 469 | return reinterpret(T, urem_int(xuint, yuint) & significand_mask(T)) | ||
| 470 | end | ||
| 471 | |||
| 472 | m_x = explicit_mantissa_noinfnan(x) | ||
| 473 | e_x -= uinttype(T)(1) | ||
| 474 | m_y = explicit_mantissa_noinfnan(y) | ||
| 475 | lz_m_y = uinttype(T)(exponent_bits(T)) | ||
| 476 | if e_y > 0 | ||
| 477 | e_y -= uinttype(T)(1) | ||
| 478 | else | ||
| 479 | m_y = mantissa(y) | ||
| 480 | lz_m_y = Core.Intrinsics.ctlz_int(m_y) | ||
| 481 | end | ||
| 482 | |||
| 483 | tz_m_y = Core.Intrinsics.cttz_int(m_y) | ||
| 484 | sides_zeroes_cnt = lz_m_y + tz_m_y | ||
| 485 | |||
| 486 | # n>0 | ||
| 487 | exp_diff = e_x - e_y | ||
| 488 | # Shift hy right until the end or n = 0 | ||
| 489 | right_shift = min(exp_diff, tz_m_y) | ||
| 490 | m_y >>= right_shift | ||
| 491 | exp_diff -= right_shift | ||
| 492 | e_y += right_shift | ||
| 493 | # Shift hx left until the end or n = 0 | ||
| 494 | left_shift = min(exp_diff, uinttype(T)(exponent_bits(T))) | ||
| 495 | m_x <<= left_shift | ||
| 496 | exp_diff -= left_shift | ||
| 497 | |||
| 498 | m_x = urem_int(m_x, m_y) | ||
| 499 | iszero(m_x) && return zero(T) | ||
| 500 | iszero(exp_diff) && return _to_float(m_x, e_y) | ||
| 501 | |||
| 502 | while exp_diff > sides_zeroes_cnt | ||
| 503 | exp_diff -= sides_zeroes_cnt | ||
| 504 | m_x <<= sides_zeroes_cnt | ||
| 505 | m_x = urem_int(m_x, m_y) | ||
| 506 | end | ||
| 507 | m_x <<= exp_diff | ||
| 508 | m_x = urem_int(m_x, m_y) | ||
| 509 | return _to_float(m_x, e_y) | ||
| 510 | end | ||
| 511 | |||
| 512 | function rem(x::T, y::T) where {T<:IEEEFloat} | ||
| 513 | if isfinite(x) && !iszero(x) && isfinite(y) && !iszero(y) | ||
| 514 | return copysign(rem_internal(abs(x), abs(y)), x) | ||
| 515 | elseif isinf(x) || isnan(y) || iszero(y) # y can still be Inf | ||
| 516 | return T(NaN) | ||
| 517 | else | ||
| 518 | return x | ||
| 519 | end | ||
| 520 | end | ||
| 521 | |||
| 522 | function mod(x::T, y::T) where {T<:AbstractFloat} | ||
| 523 | r = rem(x,y) | ||
| 524 | if r == 0 | ||
| 525 | copysign(r,y) | ||
| 526 | elseif (r > 0) ⊻ (y > 0) | ||
| 527 | r+y | ||
| 528 | else | ||
| 529 | r | ||
| 530 | end | ||
| 531 | end | ||
| 532 | |||
| 533 | ## floating point comparisons ## | ||
| 534 | ==(x::T, y::T) where {T<:IEEEFloat} = eq_float(x, y) | ||
| 535 | !=(x::T, y::T) where {T<:IEEEFloat} = ne_float(x, y) | ||
| 536 | <( x::T, y::T) where {T<:IEEEFloat} = lt_float(x, y) | ||
| 537 | <=(x::T, y::T) where {T<:IEEEFloat} = le_float(x, y) | ||
| 538 | |||
| 539 | isequal(x::T, y::T) where {T<:IEEEFloat} = fpiseq(x, y) | ||
| 540 | |||
| 541 | # interpret as sign-magnitude integer | ||
| 542 | @inline function _fpint(x) | ||
| 543 | IntT = inttype(typeof(x)) | ||
| 544 | ix = reinterpret(IntT, x) | ||
| 545 | return ifelse(ix < zero(IntT), ix ⊻ typemax(IntT), ix) | ||
| 546 | end | ||
| 547 | |||
| 548 | @inline function isless(a::T, b::T) where T<:IEEEFloat | ||
| 549 | (isnan(a) || isnan(b)) && return !isnan(a) | ||
| 550 | |||
| 551 | return _fpint(a) < _fpint(b) | ||
| 552 | end | ||
| 553 | |||
| 554 | # Exact Float (Tf) vs Integer (Ti) comparisons | ||
| 555 | # Assumes: | ||
| 556 | # - typemax(Ti) == 2^n-1 | ||
| 557 | # - typemax(Ti) can't be exactly represented by Tf: | ||
| 558 | # => Tf(typemax(Ti)) == 2^n or Inf | ||
| 559 | # - typemin(Ti) can be exactly represented by Tf | ||
| 560 | # | ||
| 561 | # 1. convert y::Ti to float fy::Tf | ||
| 562 | # 2. perform Tf comparison x vs fy | ||
| 563 | # 3. if x == fy, check if (1) resulted in rounding: | ||
| 564 | # a. convert fy back to Ti and compare with original y | ||
| 565 | # b. unsafe_convert undefined behaviour if fy == Tf(typemax(Ti)) | ||
| 566 | # (but consequently x == fy > y) | ||
| 567 | for Ti in (Int64,UInt64,Int128,UInt128) | ||
| 568 | for Tf in (Float32,Float64) | ||
| 569 | @eval begin | ||
| 570 | function ==(x::$Tf, y::$Ti) | ||
| 571 | fy = ($Tf)(y) | ||
| 572 | (x == fy) & (fy != $(Tf(typemax(Ti)))) & (y == unsafe_trunc($Ti,fy)) | ||
| 573 | end | ||
| 574 | ==(y::$Ti, x::$Tf) = x==y | ||
| 575 | |||
| 576 | function <(x::$Ti, y::$Tf) | ||
| 577 | fx = ($Tf)(x) | ||
| 578 | (fx < y) | ((fx == y) & ((fx == $(Tf(typemax(Ti)))) | (x < unsafe_trunc($Ti,fx)) )) | ||
| 579 | end | ||
| 580 | function <=(x::$Ti, y::$Tf) | ||
| 581 | fx = ($Tf)(x) | ||
| 582 | (fx < y) | ((fx == y) & ((fx == $(Tf(typemax(Ti)))) | (x <= unsafe_trunc($Ti,fx)) )) | ||
| 583 | end | ||
| 584 | |||
| 585 | function <(x::$Tf, y::$Ti) | ||
| 586 | fy = ($Tf)(y) | ||
| 587 | (x < fy) | ((x == fy) & (fy < $(Tf(typemax(Ti)))) & (unsafe_trunc($Ti,fy) < y)) | ||
| 588 | end | ||
| 589 | function <=(x::$Tf, y::$Ti) | ||
| 590 | fy = ($Tf)(y) | ||
| 591 | (x < fy) | ((x == fy) & (fy < $(Tf(typemax(Ti)))) & (unsafe_trunc($Ti,fy) <= y)) | ||
| 592 | end | ||
| 593 | end | ||
| 594 | end | ||
| 595 | end | ||
| 596 | for op in (:(==), :<, :<=) | ||
| 597 | @eval begin | ||
| 598 | ($op)(x::Float16, y::Union{Int128,UInt128,Int64,UInt64}) = ($op)(Float64(x), Float64(y)) | ||
| 599 | ($op)(x::Union{Int128,UInt128,Int64,UInt64}, y::Float16) = ($op)(Float64(x), Float64(y)) | ||
| 600 | |||
| 601 | ($op)(x::Union{Float16,Float32}, y::Union{Int32,UInt32}) = ($op)(Float64(x), Float64(y)) | ||
| 602 | ($op)(x::Union{Int32,UInt32}, y::Union{Float16,Float32}) = ($op)(Float64(x), Float64(y)) | ||
| 603 | |||
| 604 | ($op)(x::Float16, y::Union{Int16,UInt16}) = ($op)(Float32(x), Float32(y)) | ||
| 605 | ($op)(x::Union{Int16,UInt16}, y::Float16) = ($op)(Float32(x), Float32(y)) | ||
| 606 | end | ||
| 607 | end | ||
| 608 | |||
| 609 | |||
| 610 | abs(x::IEEEFloat) = abs_float(x) | ||
| 611 | |||
| 612 | """ | ||
| 613 | isnan(f) -> Bool | ||
| 614 | |||
| 615 | Test whether a number value is a NaN, an indeterminate value which is neither an infinity | ||
| 616 | nor a finite number ("not a number"). | ||
| 617 | |||
| 618 | See also: [`iszero`](@ref), [`isone`](@ref), [`isinf`](@ref), [`ismissing`](@ref). | ||
| 619 | """ | ||
| 620 | isnan(x::AbstractFloat) = (x != x)::Bool | ||
| 621 | isnan(x::Number) = false | ||
| 622 | |||
| 623 | isfinite(x::AbstractFloat) = !isnan(x - x) | ||
| 624 | isfinite(x::Real) = decompose(x)[3] != 0 | ||
| 625 | isfinite(x::Integer) = true | ||
| 626 | |||
| 627 | """ | ||
| 628 | isinf(f) -> Bool | ||
| 629 | |||
| 630 | Test whether a number is infinite. | ||
| 631 | |||
| 632 | See also: [`Inf`](@ref), [`iszero`](@ref), [`isfinite`](@ref), [`isnan`](@ref). | ||
| 633 | """ | ||
| 634 | isinf(x::Real) = !isnan(x) & !isfinite(x) | ||
| 635 | isinf(x::IEEEFloat) = abs(x) === oftype(x, Inf) | ||
| 636 | |||
| 637 | const hx_NaN = hash_uint64(reinterpret(UInt64, NaN)) | ||
| 638 | function hash(x::Float64, h::UInt) | ||
| 639 | # see comments on trunc and hash(Real, UInt) | ||
| 640 | if typemin(Int64) <= x < typemax(Int64) | ||
| 641 | xi = fptosi(Int64, x) | ||
| 642 | if isequal(xi, x) | ||
| 643 | return hash(xi, h) | ||
| 644 | end | ||
| 645 | elseif typemin(UInt64) <= x < typemax(UInt64) | ||
| 646 | xu = fptoui(UInt64, x) | ||
| 647 | if isequal(xu, x) | ||
| 648 | return hash(xu, h) | ||
| 649 | end | ||
| 650 | elseif isnan(x) | ||
| 651 | return hx_NaN ⊻ h # NaN does not have a stable bit pattern | ||
| 652 | end | ||
| 653 | return hash_uint64(bitcast(UInt64, x)) - 3h | ||
| 654 | end | ||
| 655 | |||
| 656 | hash(x::Float32, h::UInt) = hash(Float64(x), h) | ||
| 657 | |||
| 658 | function hash(x::Float16, h::UInt) | ||
| 659 | # see comments on trunc and hash(Real, UInt) | ||
| 660 | if isfinite(x) # all finite Float16 fit in Int64 | ||
| 661 | xi = fptosi(Int64, x) | ||
| 662 | if isequal(xi, x) | ||
| 663 | return hash(xi, h) | ||
| 664 | end | ||
| 665 | elseif isnan(x) | ||
| 666 | return hx_NaN ⊻ h # NaN does not have a stable bit pattern | ||
| 667 | end | ||
| 668 | return hash_uint64(bitcast(UInt64, Float64(x))) - 3h | ||
| 669 | end | ||
| 670 | |||
| 671 | ## generic hashing for rational values ## | ||
| 672 | function hash(x::Real, h::UInt) | ||
| 673 | # decompose x as num*2^pow/den | ||
| 674 | num, pow, den = decompose(x) | ||
| 675 | |||
| 676 | # handle special values | ||
| 677 | num == 0 && den == 0 && return hash(NaN, h) | ||
| 678 | num == 0 && return hash(ifelse(den > 0, 0.0, -0.0), h) | ||
| 679 | den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h) | ||
| 680 | |||
| 681 | # normalize decomposition | ||
| 682 | if den < 0 | ||
| 683 | num = -num | ||
| 684 | den = -den | ||
| 685 | end | ||
| 686 | num_z = trailing_zeros(num) | ||
| 687 | num >>= num_z | ||
| 688 | den_z = trailing_zeros(den) | ||
| 689 | den >>= den_z | ||
| 690 | pow += num_z - den_z | ||
| 691 | # If the real can be represented as an Int64, UInt64, or Float64, hash as those types. | ||
| 692 | # To be an Integer the denominator must be 1 and the power must be non-negative. | ||
| 693 | if den == 1 | ||
| 694 | # left = ceil(log2(num*2^pow)) | ||
| 695 | left = top_set_bit(abs(num)) + pow | ||
| 696 | # 2^-1074 is the minimum Float64 so if the power is smaller, not a Float64 | ||
| 697 | if -1074 <= pow | ||
| 698 | if 0 <= pow # if pow is non-negative, it is an integer | ||
| 699 | left <= 63 && return hash(Int64(num) << Int(pow), h) | ||
| 700 | left <= 64 && !signbit(num) && return hash(UInt64(num) << Int(pow), h) | ||
| 701 | end # typemin(Int64) handled by Float64 case | ||
| 702 | # 2^1024 is the maximum Float64 so if the power is greater, not a Float64 | ||
| 703 | # Float64s only have 53 mantisa bits (including implicit bit) | ||
| 704 | left <= 1024 && left - pow <= 53 && return hash(ldexp(Float64(num), pow), h) | ||
| 705 | end | ||
| 706 | else | ||
| 707 | h = hash_integer(den, h) | ||
| 708 | end | ||
| 709 | # handle generic rational values | ||
| 710 | h = hash_integer(pow, h) | ||
| 711 | h = hash_integer(num, h) | ||
| 712 | return h | ||
| 713 | end | ||
| 714 | |||
| 715 | #= | ||
| 716 | `decompose(x)`: non-canonical decomposition of rational values as `num*2^pow/den`. | ||
| 717 | |||
| 718 | The decompose function is the point where rational-valued numeric types that support | ||
| 719 | hashing hook into the hashing protocol. `decompose(x)` should return three integer | ||
| 720 | values `num, pow, den`, such that the value of `x` is mathematically equal to | ||
| 721 | |||
| 722 | num*2^pow/den | ||
| 723 | |||
| 724 | The decomposition need not be canonical in the sense that it just needs to be *some* | ||
| 725 | way to express `x` in this form, not any particular way – with the restriction that | ||
| 726 | `num` and `den` may not share any odd common factors. They may, however, have powers | ||
| 727 | of two in common – the generic hashing code will normalize those as necessary. | ||
| 728 | |||
| 729 | Special values: | ||
| 730 | |||
| 731 | - `x` is zero: `num` should be zero and `den` should have the same sign as `x` | ||
| 732 | - `x` is infinite: `den` should be zero and `num` should have the same sign as `x` | ||
| 733 | - `x` is not a number: `num` and `den` should both be zero | ||
| 734 | =# | ||
| 735 | |||
| 736 | decompose(x::Integer) = x, 0, 1 | ||
| 737 | |||
| 738 | function decompose(x::Float16)::NTuple{3,Int} | ||
| 739 | isnan(x) && return 0, 0, 0 | ||
| 740 | isinf(x) && return ifelse(x < 0, -1, 1), 0, 0 | ||
| 741 | n = reinterpret(UInt16, x) | ||
| 742 | s = (n & 0x03ff) % Int16 | ||
| 743 | e = ((n & 0x7c00) >> 10) % Int | ||
| 744 | s |= Int16(e != 0) << 10 | ||
| 745 | d = ifelse(signbit(x), -1, 1) | ||
| 746 | s, e - 25 + (e == 0), d | ||
| 747 | end | ||
| 748 | |||
| 749 | function decompose(x::Float32)::NTuple{3,Int} | ||
| 750 | isnan(x) && return 0, 0, 0 | ||
| 751 | isinf(x) && return ifelse(x < 0, -1, 1), 0, 0 | ||
| 752 | n = reinterpret(UInt32, x) | ||
| 753 | s = (n & 0x007fffff) % Int32 | ||
| 754 | e = ((n & 0x7f800000) >> 23) % Int | ||
| 755 | s |= Int32(e != 0) << 23 | ||
| 756 | d = ifelse(signbit(x), -1, 1) | ||
| 757 | s, e - 150 + (e == 0), d | ||
| 758 | end | ||
| 759 | |||
| 760 | function decompose(x::Float64)::Tuple{Int64, Int, Int} | ||
| 761 | isnan(x) && return 0, 0, 0 | ||
| 762 | isinf(x) && return ifelse(x < 0, -1, 1), 0, 0 | ||
| 763 | n = reinterpret(UInt64, x) | ||
| 764 | s = (n & 0x000fffffffffffff) % Int64 | ||
| 765 | e = ((n & 0x7ff0000000000000) >> 52) % Int | ||
| 766 | s |= Int64(e != 0) << 52 | ||
| 767 | d = ifelse(signbit(x), -1, 1) | ||
| 768 | s, e - 1075 + (e == 0), d | ||
| 769 | end | ||
| 770 | |||
| 771 | |||
| 772 | """ | ||
| 773 | precision(num::AbstractFloat; base::Integer=2) | ||
| 774 | precision(T::Type; base::Integer=2) | ||
| 775 | |||
| 776 | Get the precision of a floating point number, as defined by the effective number of bits in | ||
| 777 | the significand, or the precision of a floating-point type `T` (its current default, if | ||
| 778 | `T` is a variable-precision type like [`BigFloat`](@ref)). | ||
| 779 | |||
| 780 | If `base` is specified, then it returns the maximum corresponding | ||
| 781 | number of significand digits in that base. | ||
| 782 | |||
| 783 | !!! compat "Julia 1.8" | ||
| 784 | The `base` keyword requires at least Julia 1.8. | ||
| 785 | """ | ||
| 786 | function precision end | ||
| 787 | |||
| 788 | _precision(::Type{Float16}) = 11 | ||
| 789 | _precision(::Type{Float32}) = 24 | ||
| 790 | _precision(::Type{Float64}) = 53 | ||
| 791 | function _precision(x, base::Integer=2) | ||
| 792 | base > 1 || throw(DomainError(base, "`base` cannot be less than 2.")) | ||
| 793 | p = _precision(x) | ||
| 794 | return base == 2 ? Int(p) : floor(Int, p / log2(base)) | ||
| 795 | end | ||
| 796 | precision(::Type{T}; base::Integer=2) where {T<:AbstractFloat} = _precision(T, base) | ||
| 797 | precision(::T; base::Integer=2) where {T<:AbstractFloat} = precision(T; base) | ||
| 798 | |||
| 799 | |||
| 800 | """ | ||
| 801 | nextfloat(x::AbstractFloat, n::Integer) | ||
| 802 | |||
| 803 | The result of `n` iterative applications of `nextfloat` to `x` if `n >= 0`, or `-n` | ||
| 804 | applications of [`prevfloat`](@ref) if `n < 0`. | ||
| 805 | """ | ||
| 806 | function nextfloat(f::IEEEFloat, d::Integer) | ||
| 807 | F = typeof(f) | ||
| 808 | fumax = reinterpret(Unsigned, F(Inf)) | ||
| 809 | U = typeof(fumax) | ||
| 810 | |||
| 811 | isnan(f) && return f | ||
| 812 | fi = reinterpret(Signed, f) | ||
| 813 | fneg = fi < 0 | ||
| 814 | fu = unsigned(fi & typemax(fi)) | ||
| 815 | |||
| 816 | dneg = d < 0 | ||
| 817 | da = uabs(d) | ||
| 818 | if da > typemax(U) | ||
| 819 | fneg = dneg | ||
| 820 | fu = fumax | ||
| 821 | else | ||
| 822 | du = da % U | ||
| 823 | if fneg ⊻ dneg | ||
| 824 | if du > fu | ||
| 825 | fu = min(fumax, du - fu) | ||
| 826 | fneg = !fneg | ||
| 827 | else | ||
| 828 | fu = fu - du | ||
| 829 | end | ||
| 830 | else | ||
| 831 | if fumax - fu < du | ||
| 832 | fu = fumax | ||
| 833 | else | ||
| 834 | fu = fu + du | ||
| 835 | end | ||
| 836 | end | ||
| 837 | end | ||
| 838 | if fneg | ||
| 839 | fu |= sign_mask(F) | ||
| 840 | end | ||
| 841 | reinterpret(F, fu) | ||
| 842 | end | ||
| 843 | |||
| 844 | """ | ||
| 845 | nextfloat(x::AbstractFloat) | ||
| 846 | |||
| 847 | Return the smallest floating point number `y` of the same type as `x` such `x < y`. If no | ||
| 848 | such `y` exists (e.g. if `x` is `Inf` or `NaN`), then return `x`. | ||
| 849 | |||
| 850 | See also: [`prevfloat`](@ref), [`eps`](@ref), [`issubnormal`](@ref). | ||
| 851 | """ | ||
| 852 | nextfloat(x::AbstractFloat) = nextfloat(x,1) | ||
| 853 | |||
| 854 | """ | ||
| 855 | prevfloat(x::AbstractFloat, n::Integer) | ||
| 856 | |||
| 857 | The result of `n` iterative applications of `prevfloat` to `x` if `n >= 0`, or `-n` | ||
| 858 | applications of [`nextfloat`](@ref) if `n < 0`. | ||
| 859 | """ | ||
| 860 | prevfloat(x::AbstractFloat, d::Integer) = nextfloat(x, -d) | ||
| 861 | |||
| 862 | """ | ||
| 863 | prevfloat(x::AbstractFloat) | ||
| 864 | |||
| 865 | Return the largest floating point number `y` of the same type as `x` such `y < x`. If no | ||
| 866 | such `y` exists (e.g. if `x` is `-Inf` or `NaN`), then return `x`. | ||
| 867 | """ | ||
| 868 | prevfloat(x::AbstractFloat) = nextfloat(x,-1) | ||
| 869 | |||
| 870 | for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128) | ||
| 871 | for Tf in (Float16, Float32, Float64) | ||
| 872 | if Ti <: Unsigned || sizeof(Ti) < sizeof(Tf) | ||
| 873 | # Here `Tf(typemin(Ti))-1` is exact, so we can compare the lower-bound | ||
| 874 | # directly. `Tf(typemax(Ti))+1` is either always exactly representable, or | ||
| 875 | # rounded to `Inf` (e.g. when `Ti==UInt128 && Tf==Float32`). | ||
| 876 | @eval begin | ||
| 877 | function trunc(::Type{$Ti},x::$Tf) | ||
| 878 | if $(Tf(typemin(Ti))-one(Tf)) < x < $(Tf(typemax(Ti))+one(Tf)) | ||
| 879 | return unsafe_trunc($Ti,x) | ||
| 880 | else | ||
| 881 | throw(InexactError(:trunc, $Ti, x)) | ||
| 882 | end | ||
| 883 | end | ||
| 884 | function (::Type{$Ti})(x::$Tf) | ||
| 885 | # When typemax(Ti) is not representable by Tf but typemax(Ti) + 1 is, | ||
| 886 | # then < Tf(typemax(Ti) + 1) is stricter than <= Tf(typemax(Ti)). Using | ||
| 887 | # the former causes us to throw on UInt64(Float64(typemax(UInt64))+1) | ||
| 888 | if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti))+one(Tf))) && isinteger(x) | ||
| 889 | return unsafe_trunc($Ti,x) | ||
| 890 | else | ||
| 891 | throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x)) | ||
| 892 | end | ||
| 893 | end | ||
| 894 | end | ||
| 895 | else | ||
| 896 | # Here `eps(Tf(typemin(Ti))) > 1`, so the only value which can be truncated to | ||
| 897 | # `Tf(typemin(Ti)` is itself. Similarly, `Tf(typemax(Ti))` is inexact and will | ||
| 898 | # be rounded up. This assumes that `Tf(typemin(Ti)) > -Inf`, which is true for | ||
| 899 | # these types, but not for `Float16` or larger integer types. | ||
| 900 | @eval begin | ||
| 901 | function trunc(::Type{$Ti},x::$Tf) | ||
| 902 | if $(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti))) | ||
| 903 | return unsafe_trunc($Ti,x) | ||
| 904 | else | ||
| 905 | throw(InexactError(:trunc, $Ti, x)) | ||
| 906 | end | ||
| 907 | end | ||
| 908 | function (::Type{$Ti})(x::$Tf) | ||
| 909 | if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))) && isinteger(x) | ||
| 910 | return unsafe_trunc($Ti,x) | ||
| 911 | else | ||
| 912 | throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x)) | ||
| 913 | end | ||
| 914 | end | ||
| 915 | end | ||
| 916 | end | ||
| 917 | end | ||
| 918 | end | ||
| 919 | |||
| 920 | """ | ||
| 921 | issubnormal(f) -> Bool | ||
| 922 | |||
| 923 | Test whether a floating point number is subnormal. | ||
| 924 | |||
| 925 | An IEEE floating point number is [subnormal](https://en.wikipedia.org/wiki/Subnormal_number) | ||
| 926 | when its exponent bits are zero and its significand is not zero. | ||
| 927 | |||
| 928 | # Examples | ||
| 929 | ```jldoctest | ||
| 930 | julia> floatmin(Float32) | ||
| 931 | 1.1754944f-38 | ||
| 932 | |||
| 933 | julia> issubnormal(1.0f-37) | ||
| 934 | false | ||
| 935 | |||
| 936 | julia> issubnormal(1.0f-38) | ||
| 937 | true | ||
| 938 | ``` | ||
| 939 | """ | ||
| 940 | function issubnormal(x::T) where {T<:IEEEFloat} | ||
| 941 | y = reinterpret(Unsigned, x) | ||
| 942 | (y & exponent_mask(T) == 0) & (y & significand_mask(T) != 0) | ||
| 943 | end | ||
| 944 | |||
| 945 | ispow2(x::AbstractFloat) = !iszero(x) && frexp(x)[1] == 0.5 | ||
| 946 | iseven(x::AbstractFloat) = isinteger(x) && (abs(x) > maxintfloat(x) || iseven(Integer(x))) | ||
| 947 | isodd(x::AbstractFloat) = isinteger(x) && abs(x) ≤ maxintfloat(x) && isodd(Integer(x)) | ||
| 948 | |||
| 949 | @eval begin | ||
| 950 | typemin(::Type{Float16}) = $(bitcast(Float16, 0xfc00)) | ||
| 951 | typemax(::Type{Float16}) = $(Inf16) | ||
| 952 | typemin(::Type{Float32}) = $(-Inf32) | ||
| 953 | typemax(::Type{Float32}) = $(Inf32) | ||
| 954 | typemin(::Type{Float64}) = $(-Inf64) | ||
| 955 | typemax(::Type{Float64}) = $(Inf64) | ||
| 956 | typemin(x::T) where {T<:Real} = typemin(T) | ||
| 957 | typemax(x::T) where {T<:Real} = typemax(T) | ||
| 958 | |||
| 959 | floatmin(::Type{Float16}) = $(bitcast(Float16, 0x0400)) | ||
| 960 | floatmin(::Type{Float32}) = $(bitcast(Float32, 0x00800000)) | ||
| 961 | floatmin(::Type{Float64}) = $(bitcast(Float64, 0x0010000000000000)) | ||
| 962 | floatmax(::Type{Float16}) = $(bitcast(Float16, 0x7bff)) | ||
| 963 | floatmax(::Type{Float32}) = $(bitcast(Float32, 0x7f7fffff)) | ||
| 964 | floatmax(::Type{Float64}) = $(bitcast(Float64, 0x7fefffffffffffff)) | ||
| 965 | |||
| 966 | eps(x::AbstractFloat) = isfinite(x) ? abs(x) >= floatmin(x) ? ldexp(eps(typeof(x)), exponent(x)) : nextfloat(zero(x)) : oftype(x, NaN) | ||
| 967 | eps(::Type{Float16}) = $(bitcast(Float16, 0x1400)) | ||
| 968 | eps(::Type{Float32}) = $(bitcast(Float32, 0x34000000)) | ||
| 969 | eps(::Type{Float64}) = $(bitcast(Float64, 0x3cb0000000000000)) | ||
| 970 | eps() = eps(Float64) | ||
| 971 | end | ||
| 972 | |||
| 973 | """ | ||
| 974 | floatmin(T = Float64) | ||
| 975 | |||
| 976 | Return the smallest positive normal number representable by the floating-point | ||
| 977 | type `T`. | ||
| 978 | |||
| 979 | # Examples | ||
| 980 | ```jldoctest | ||
| 981 | julia> floatmin(Float16) | ||
| 982 | Float16(6.104e-5) | ||
| 983 | |||
| 984 | julia> floatmin(Float32) | ||
| 985 | 1.1754944f-38 | ||
| 986 | |||
| 987 | julia> floatmin() | ||
| 988 | 2.2250738585072014e-308 | ||
| 989 | ``` | ||
| 990 | """ | ||
| 991 | floatmin(x::T) where {T<:AbstractFloat} = floatmin(T) | ||
| 992 | |||
| 993 | """ | ||
| 994 | floatmax(T = Float64) | ||
| 995 | |||
| 996 | Return the largest finite number representable by the floating-point type `T`. | ||
| 997 | |||
| 998 | See also: [`typemax`](@ref), [`floatmin`](@ref), [`eps`](@ref). | ||
| 999 | |||
| 1000 | # Examples | ||
| 1001 | ```jldoctest | ||
| 1002 | julia> floatmax(Float16) | ||
| 1003 | Float16(6.55e4) | ||
| 1004 | |||
| 1005 | julia> floatmax(Float32) | ||
| 1006 | 3.4028235f38 | ||
| 1007 | |||
| 1008 | julia> floatmax() | ||
| 1009 | 1.7976931348623157e308 | ||
| 1010 | |||
| 1011 | julia> typemax(Float64) | ||
| 1012 | Inf | ||
| 1013 | ``` | ||
| 1014 | """ | ||
| 1015 | floatmax(x::T) where {T<:AbstractFloat} = floatmax(T) | ||
| 1016 | |||
| 1017 | floatmin() = floatmin(Float64) | ||
| 1018 | floatmax() = floatmax(Float64) | ||
| 1019 | |||
| 1020 | """ | ||
| 1021 | eps(::Type{T}) where T<:AbstractFloat | ||
| 1022 | eps() | ||
| 1023 | |||
| 1024 | Return the *machine epsilon* of the floating point type `T` (`T = Float64` by | ||
| 1025 | default). This is defined as the gap between 1 and the next largest value representable by | ||
| 1026 | `typeof(one(T))`, and is equivalent to `eps(one(T))`. (Since `eps(T)` is a | ||
| 1027 | bound on the *relative error* of `T`, it is a "dimensionless" quantity like [`one`](@ref).) | ||
| 1028 | |||
| 1029 | # Examples | ||
| 1030 | ```jldoctest | ||
| 1031 | julia> eps() | ||
| 1032 | 2.220446049250313e-16 | ||
| 1033 | |||
| 1034 | julia> eps(Float32) | ||
| 1035 | 1.1920929f-7 | ||
| 1036 | |||
| 1037 | julia> 1.0 + eps() | ||
| 1038 | 1.0000000000000002 | ||
| 1039 | |||
| 1040 | julia> 1.0 + eps()/2 | ||
| 1041 | 1.0 | ||
| 1042 | ``` | ||
| 1043 | """ | ||
| 1044 | eps(::Type{<:AbstractFloat}) | ||
| 1045 | |||
| 1046 | """ | ||
| 1047 | eps(x::AbstractFloat) | ||
| 1048 | |||
| 1049 | Return the *unit in last place* (ulp) of `x`. This is the distance between consecutive | ||
| 1050 | representable floating point values at `x`. In most cases, if the distance on either side | ||
| 1051 | of `x` is different, then the larger of the two is taken, that is | ||
| 1052 | |||
| 1053 | eps(x) == max(x-prevfloat(x), nextfloat(x)-x) | ||
| 1054 | |||
| 1055 | The exceptions to this rule are the smallest and largest finite values | ||
| 1056 | (e.g. `nextfloat(-Inf)` and `prevfloat(Inf)` for [`Float64`](@ref)), which round to the | ||
| 1057 | smaller of the values. | ||
| 1058 | |||
| 1059 | The rationale for this behavior is that `eps` bounds the floating point rounding | ||
| 1060 | error. Under the default `RoundNearest` rounding mode, if ``y`` is a real number and ``x`` | ||
| 1061 | is the nearest floating point number to ``y``, then | ||
| 1062 | |||
| 1063 | ```math | ||
| 1064 | |y-x| \\leq \\operatorname{eps}(x)/2. | ||
| 1065 | ``` | ||
| 1066 | |||
| 1067 | See also: [`nextfloat`](@ref), [`issubnormal`](@ref), [`floatmax`](@ref). | ||
| 1068 | |||
| 1069 | # Examples | ||
| 1070 | ```jldoctest | ||
| 1071 | julia> eps(1.0) | ||
| 1072 | 2.220446049250313e-16 | ||
| 1073 | |||
| 1074 | julia> eps(prevfloat(2.0)) | ||
| 1075 | 2.220446049250313e-16 | ||
| 1076 | |||
| 1077 | julia> eps(2.0) | ||
| 1078 | 4.440892098500626e-16 | ||
| 1079 | |||
| 1080 | julia> x = prevfloat(Inf) # largest finite Float64 | ||
| 1081 | 1.7976931348623157e308 | ||
| 1082 | |||
| 1083 | julia> x + eps(x)/2 # rounds up | ||
| 1084 | Inf | ||
| 1085 | |||
| 1086 | julia> x + prevfloat(eps(x)/2) # rounds down | ||
| 1087 | 1.7976931348623157e308 | ||
| 1088 | ``` | ||
| 1089 | """ | ||
| 1090 | eps(::AbstractFloat) | ||
| 1091 | |||
| 1092 | |||
| 1093 | ## byte order swaps for arbitrary-endianness serialization/deserialization ## | ||
| 1094 | bswap(x::IEEEFloat) = bswap_int(x) | ||
| 1095 | |||
| 1096 | # integer size of float | ||
| 1097 | uinttype(::Type{Float64}) = UInt64 | ||
| 1098 | uinttype(::Type{Float32}) = UInt32 | ||
| 1099 | uinttype(::Type{Float16}) = UInt16 | ||
| 1100 | inttype(::Type{Float64}) = Int64 | ||
| 1101 | inttype(::Type{Float32}) = Int32 | ||
| 1102 | inttype(::Type{Float16}) = Int16 | ||
| 1103 | # float size of integer | ||
| 1104 | floattype(::Type{UInt64}) = Float64 | ||
| 1105 | floattype(::Type{UInt32}) = Float32 | ||
| 1106 | floattype(::Type{UInt16}) = Float16 | ||
| 1107 | floattype(::Type{Int64}) = Float64 | ||
| 1108 | floattype(::Type{Int32}) = Float32 | ||
| 1109 | floattype(::Type{Int16}) = Float16 | ||
| 1110 | |||
| 1111 | |||
| 1112 | ## Array operations on floating point numbers ## | ||
| 1113 | |||
| 1114 | float(A::AbstractArray{<:AbstractFloat}) = A | ||
| 1115 | |||
| 1116 | function float(A::AbstractArray{T}) where T | ||
| 1117 | if !isconcretetype(T) | ||
| 1118 | error("`float` not defined on abstractly-typed arrays; please convert to a more specific type") | ||
| 1119 | end | ||
| 1120 | convert(AbstractArray{typeof(float(zero(T)))}, A) | ||
| 1121 | end | ||
| 1122 | |||
| 1123 | float(r::StepRange) = float(r.start):float(r.step):float(last(r)) | ||
| 1124 | float(r::UnitRange) = float(r.start):float(last(r)) | ||
| 1125 | float(r::StepRangeLen{T}) where {T} = | ||
| 1126 | StepRangeLen{typeof(float(T(r.ref)))}(float(r.ref), float(r.step), length(r), r.offset) | ||
| 1127 | function float(r::LinRange) | ||
| 1128 | LinRange(float(r.start), float(r.stop), length(r)) | ||
| 1129 | end |