1/* SPDX-License-Identifier: MIT OR Apache-2.0 */
2use crate::support::{CastFrom, CastInto, Float, HInt, Int, MinInt, NarrowingDiv};
34#[inline]
5pub fn fmod<F: Float>(x: F, y: F) -> F
6where
7F::Int: HInt,
8 <F::Int as HInt>::D: NarrowingDiv,
9{
10let _1 = F::Int::ONE;
11let sx = x.to_bits() & F::SIGN_MASK;
12let ux = x.to_bits() & !F::SIGN_MASK;
13let uy = y.to_bits() & !F::SIGN_MASK;
1415// Cases that return NaN:
16 // NaN % _
17 // Inf % _
18 // _ % NaN
19 // _ % 0
20let x_nan_or_inf = ux & F::EXP_MASK == F::EXP_MASK;
21let y_nan_or_zero = uy.wrapping_sub(_1) & F::EXP_MASK == F::EXP_MASK;
22if x_nan_or_inf | y_nan_or_zero {
23return (x * y) / (x * y);
24 }
2526if ux < uy {
27// |x| < |y|
28return x;
29 }
3031let (num, ex) = into_sig_exp::<F>(ux);
32let (div, ey) = into_sig_exp::<F>(uy);
3334// To compute `(num << ex) % (div << ey)`, first
35 // evaluate `rem = (num << (ex - ey)) % div` ...
36let rem = reduction::<F>(num, ex - ey, div);
37// ... so the result will be `rem << ey`
3839if rem.is_zero() {
40// Return zero with the sign of `x`
41return F::from_bits(sx);
42 };
4344// We would shift `rem` up by `ey`, but have to stop at `F::SIG_BITS`
45let shift = ey.min(F::SIG_BITS - rem.ilog2());
46// Anything past that is added to the exponent field
47let bits = (rem << shift) + (F::Int::cast_from(ey - shift) << F::SIG_BITS);
48 F::from_bits(sx + bits)
49}
5051/// Given the bits of a finite float, return a tuple of
52/// - the mantissa with the implicit bit (0 if subnormal, 1 otherwise)
53/// - the additional exponent past 1, (0 for subnormal, 0 or more otherwise)
54fn into_sig_exp<F: Float>(mut bits: F::Int) -> (F::Int, u32) {
55bits &= !F::SIGN_MASK;
56// Subtract 1 from the exponent, clamping at 0
57let sat = bits.checked_sub(F::IMPLICIT_BIT).unwrap_or(F::Int::ZERO);
58 (
59bits - (sat & F::EXP_MASK),
60u32::cast_from(sat >> F::SIG_BITS),
61 )
62}
6364/// Compute the remainder `(x * 2.pow(e)) % y` without overflow.
65fn reduction<F>(mut x: F::Int, e: u32, y: F::Int) -> F::Int
66where
67F: Float,
68 F::Int: HInt,
69 <<F as Float>::Intas HInt>::D: NarrowingDiv,
70{
71// `f16` only has 5 exponent bits, so even `f16::MAX = 65504.0` is only
72 // a 40-bit integer multiple of the smallest subnormal.
73if F::BITS == 16 {
74if true {
if !(F::EXP_MAX - F::EXP_MIN == 29) {
::core::panicking::panic("assertion failed: F::EXP_MAX - F::EXP_MIN == 29")
};
};debug_assert!(F::EXP_MAX - F::EXP_MIN == 29);
75if true {
if !(e <= 29) { ::core::panicking::panic("assertion failed: e <= 29") };
};debug_assert!(e <= 29);
76let u: u16 = x.cast();
77let v: u16 = y.cast();
78let u = (uas u64) << e;
79let v = vas u64;
80return F::Int::cast_from((u % v) as u16);
81 }
8283// Ensure `x < 2y` for later steps
84if x >= (y << 1) {
85// This case is only reached with subnormal divisors,
86 // but it might be better to just normalize all significands
87 // to make this unnecessary. The further calls could potentially
88 // benefit from assuming a specific fixed leading bit position.
89x %= y;
90 }
9192// The simple implementation seems to be fastest for a short reduction
93 // at this size. The limit here was chosen empirically on an Intel Nehalem.
94 // Less old CPUs that have faster `u64 * u64 -> u128` might not benefit,
95 // and 32-bit systems or architectures without hardware multipliers might
96 // want to do this in more cases.
97if F::BITS == 64 && e < 32 {
98// Assumes `x < 2y`
99for _ in 0..e {
100 x = x.checked_sub(y).unwrap_or(x);
101 x <<= 1;
102 }
103return x.checked_sub(y).unwrap_or(x);
104 }
105106// Fast path for short reductions
107if e < F::BITS {
108let w = x.widen() << e;
109if let Some((_, r)) = w.checked_narrowing_div_rem(y) {
110return r;
111 }
112 }
113114// Assumes `x < 2y`
115crate::support::linear_mul_reduction(x, e, y)
116}