Skip to main content

core\num\imp/
int_sqrt.rs

1//! These functions use the [Karatsuba square root algorithm][1] to compute the
2//! [integer square root](https://en.wikipedia.org/wiki/Integer_square_root)
3//! for the primitive integer types.
4//!
5//! The signed integer functions can only handle **nonnegative** inputs, so
6//! that must be checked before calling those.
7//!
8//! [1]: <https://web.archive.org/web/20230511212802/https://inria.hal.science/inria-00072854v1/file/RR-3805.pdf>
9//! "Paul Zimmermann. Karatsuba Square Root. \[Research Report\] RR-3805,
10//! INRIA. 1999, pp.8. (inria-00072854)"
11
12/// This array stores the [integer square roots](
13/// https://en.wikipedia.org/wiki/Integer_square_root) and remainders of each
14/// [`u8`](prim@u8) value. For example, `U8_ISQRT_WITH_REMAINDER[17]` will be
15/// `(4, 1)` because the integer square root of 17 is 4 and because 17 is 1
16/// higher than 4 squared.
17const U8_ISQRT_WITH_REMAINDER: [(u8, u8); 256] = {
18    let mut result = [(0, 0); 256];
19
20    let mut n: usize = 0;
21    let mut isqrt_n: usize = 0;
22    while n < result.len() {
23        result[n] = (isqrt_n as u8, (n - isqrt_n.pow(2)) as u8);
24
25        n += 1;
26        if n == (isqrt_n + 1).pow(2) {
27            isqrt_n += 1;
28        }
29    }
30
31    result
32};
33
34/// Returns the [integer square root](
35/// https://en.wikipedia.org/wiki/Integer_square_root) of any [`u8`](prim@u8)
36/// input.
37#[must_use = "this returns the result of the operation, \
38              without modifying the original"]
39#[inline]
40pub(in crate::num) const fn u8(n: u8) -> u8 {
41    U8_ISQRT_WITH_REMAINDER[n as usize].0
42}
43
44/// Generates a `u*` function that returns the [integer square root](
45/// https://en.wikipedia.org/wiki/Integer_square_root) of any input of
46/// a specific unsigned integer type.
47macro_rules! unsigned_fn {
48    ($UnsignedT:ident, $HalfBitsT:ident, $stages:ident) => {
49        /// Returns the [integer square root](
50        /// https://en.wikipedia.org/wiki/Integer_square_root) of any
51        #[doc = concat!("[`", stringify!($UnsignedT), "`](prim@", stringify!($UnsignedT), ")")]
52        /// input.
53        #[must_use = "this returns the result of the operation, \
54                      without modifying the original"]
55        #[inline]
56        pub(in crate::num) const fn $UnsignedT(mut n: $UnsignedT) -> $UnsignedT {
57            if n <= <$HalfBitsT>::MAX as $UnsignedT {
58                $HalfBitsT(n as $HalfBitsT) as $UnsignedT
59            } else {
60                // The normalization shift satisfies the Karatsuba square root
61                // algorithm precondition "a₃ ≥ b/4" where a₃ is the most
62                // significant quarter of `n`'s bits and b is the number of
63                // values that can be represented by that quarter of the bits.
64                //
65                // b/4 would then be all 0s except the second most significant
66                // bit (010...0) in binary. Since a₃ must be at least b/4, a₃'s
67                // most significant bit or its neighbor must be a 1. Since a₃'s
68                // most significant bits are `n`'s most significant bits, the
69                // same applies to `n`.
70                //
71                // The reason to shift by an even number of bits is because an
72                // even number of bits produces the square root shifted to the
73                // left by half of the normalization shift:
74                //
75                // sqrt(n << (2 * p))
76                // sqrt(2.pow(2 * p) * n)
77                // sqrt(2.pow(2 * p)) * sqrt(n)
78                // 2.pow(p) * sqrt(n)
79                // sqrt(n) << p
80                //
81                // Shifting by an odd number of bits leaves an ugly sqrt(2)
82                // multiplied in:
83                //
84                // sqrt(n << (2 * p + 1))
85                // sqrt(2.pow(2 * p + 1) * n)
86                // sqrt(2 * 2.pow(2 * p) * n)
87                // sqrt(2) * sqrt(2.pow(2 * p)) * sqrt(n)
88                // sqrt(2) * 2.pow(p) * sqrt(n)
89                // sqrt(2) * (sqrt(n) << p)
90                const EVEN_MAKING_BITMASK: u32 = !1;
91                let normalization_shift = n.leading_zeros() & EVEN_MAKING_BITMASK;
92                n <<= normalization_shift;
93
94                let s = $stages(n);
95
96                let denormalization_shift = normalization_shift >> 1;
97                s >> denormalization_shift
98            }
99        }
100    };
101}
102
103/// Generates the first stage of the computation after normalization.
104///
105/// # Safety
106///
107/// `$n` must be nonzero.
108macro_rules! first_stage {
109    ($original_bits:literal, $n:ident) => {{
110        debug_assert!($n != 0, "`$n` is  zero in `first_stage!`.");
111
112        const N_SHIFT: u32 = $original_bits - 8;
113        let n = $n >> N_SHIFT;
114
115        let (s, r) = U8_ISQRT_WITH_REMAINDER[n as usize];
116
117        // Inform the optimizer that `s` is nonzero. This will allow it to
118        // avoid generating code to handle division-by-zero panics in the next
119        // stage.
120        //
121        // SAFETY: If the original `$n` is zero, the top of the `unsigned_fn`
122        // macro recurses instead of continuing to this point, so the original
123        // `$n` wasn't a 0 if we've reached here.
124        //
125        // Then the `unsigned_fn` macro normalizes `$n` so that at least one of
126        // its two most-significant bits is a 1.
127        //
128        // Then this stage puts the eight most-significant bits of `$n` into
129        // `n`. This means that `n` here has at least one 1 bit in its two
130        // most-significant bits, making `n` nonzero.
131        //
132        // `U8_ISQRT_WITH_REMAINDER[n as usize]` will give a nonzero `s` when
133        // given a nonzero `n`.
134        unsafe { crate::hint::assert_unchecked(s != 0) };
135        (s, r)
136    }};
137}
138
139/// Generates a middle stage of the computation.
140///
141/// # Safety
142///
143/// `$s` must be nonzero.
144macro_rules! middle_stage {
145    ($original_bits:literal, $ty:ty, $n:ident, $s:ident, $r:ident) => {{
146        debug_assert!($s != 0, "`$s` is  zero in `middle_stage!`.");
147
148        const N_SHIFT: u32 = $original_bits - <$ty>::BITS;
149        let n = ($n >> N_SHIFT) as $ty;
150
151        const HALF_BITS: u32 = <$ty>::BITS >> 1;
152        const QUARTER_BITS: u32 = <$ty>::BITS >> 2;
153        const LOWER_HALF_1_BITS: $ty = (1 << HALF_BITS) - 1;
154        const LOWEST_QUARTER_1_BITS: $ty = (1 << QUARTER_BITS) - 1;
155
156        let lo = n & LOWER_HALF_1_BITS;
157        let numerator = (($r as $ty) << QUARTER_BITS) | (lo >> QUARTER_BITS);
158        let denominator = ($s as $ty) << 1;
159        let q = numerator / denominator;
160        let u = numerator % denominator;
161
162        let mut s = ($s << QUARTER_BITS) as $ty + q;
163        let (mut r, overflow) =
164            ((u << QUARTER_BITS) | (lo & LOWEST_QUARTER_1_BITS)).overflowing_sub(q * q);
165        if overflow {
166            r = r.wrapping_add(2 * s - 1);
167            s -= 1;
168        }
169
170        // Inform the optimizer that `s` is nonzero. This will allow it to
171        // avoid generating code to handle division-by-zero panics in the next
172        // stage.
173        //
174        // SAFETY: If the original `$n` is zero, the top of the `unsigned_fn`
175        // macro recurses instead of continuing to this point, so the original
176        // `$n` wasn't a 0 if we've reached here.
177        //
178        // Then the `unsigned_fn` macro normalizes `$n` so that at least one of
179        // its two most-significant bits is a 1.
180        //
181        // Then these stages take as many of the most-significant bits of `$n`
182        // as will fit in this stage's type. For example, the stage that
183        // handles `u32` deals with the 32 most-significant bits of `$n`. This
184        // means that each stage has at least one 1 bit in `n`'s two
185        // most-significant bits, making `n` nonzero.
186        //
187        // Then this stage will produce the correct integer square root for
188        // that `n` value. Since `n` is nonzero, `s` will also be nonzero.
189        unsafe { crate::hint::assert_unchecked(s != 0) };
190        (s, r)
191    }};
192}
193
194/// Generates the last stage of the computation before denormalization.
195///
196/// # Safety
197///
198/// `$s` must be nonzero.
199macro_rules! last_stage {
200    ($ty:ty, $n:ident, $s:ident, $r:ident) => {{
201        debug_assert!($s != 0, "`$s` is  zero in `last_stage!`.");
202
203        const HALF_BITS: u32 = <$ty>::BITS >> 1;
204        const QUARTER_BITS: u32 = <$ty>::BITS >> 2;
205        const LOWER_HALF_1_BITS: $ty = (1 << HALF_BITS) - 1;
206
207        let lo = $n & LOWER_HALF_1_BITS;
208        let numerator = (($r as $ty) << QUARTER_BITS) | (lo >> QUARTER_BITS);
209        let denominator = ($s as $ty) << 1;
210
211        let q = numerator / denominator;
212        let mut s = ($s << QUARTER_BITS) as $ty + q;
213        let (s_squared, overflow) = s.overflowing_mul(s);
214        if overflow || s_squared > $n {
215            s -= 1;
216        }
217        s
218    }};
219}
220
221/// Takes the normalized [`u16`](prim@u16) input and gets its normalized
222/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root).
223///
224/// # Safety
225///
226/// `n` must be nonzero.
227#[inline]
228const fn u16_stages(n: u16) -> u16 {
229    let (s, r) = first_stage!(16, n);
230    last_stage!(u16, n, s, r)
231}
232
233/// Takes the normalized [`u32`](prim@u32) input and gets its normalized
234/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root).
235///
236/// # Safety
237///
238/// `n` must be nonzero.
239#[inline]
240const fn u32_stages(n: u32) -> u32 {
241    let (s, r) = first_stage!(32, n);
242    let (s, r) = middle_stage!(32, u16, n, s, r);
243    last_stage!(u32, n, s, r)
244}
245
246/// Takes the normalized [`u64`](prim@u64) input and gets its normalized
247/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root).
248///
249/// # Safety
250///
251/// `n` must be nonzero.
252#[inline]
253const fn u64_stages(n: u64) -> u64 {
254    let (s, r) = first_stage!(64, n);
255    let (s, r) = middle_stage!(64, u16, n, s, r);
256    let (s, r) = middle_stage!(64, u32, n, s, r);
257    last_stage!(u64, n, s, r)
258}
259
260/// Takes the normalized [`u128`](prim@u128) input and gets its normalized
261/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root).
262///
263/// # Safety
264///
265/// `n` must be nonzero.
266#[inline]
267const fn u128_stages(n: u128) -> u128 {
268    let (s, r) = first_stage!(128, n);
269    let (s, r) = middle_stage!(128, u16, n, s, r);
270    let (s, r) = middle_stage!(128, u32, n, s, r);
271    let (s, r) = middle_stage!(128, u64, n, s, r);
272    last_stage!(u128, n, s, r)
273}
274
275unsigned_fn!(u16, u8, u16_stages);
276unsigned_fn!(u32, u16, u32_stages);
277unsigned_fn!(u64, u32, u64_stages);
278unsigned_fn!(u128, u64, u128_stages);
279
280/// Instantiate this panic logic once, rather than for all the isqrt methods
281/// on every single primitive type.
282#[cold]
283#[track_caller]
284pub(in crate::num) const fn panic_for_negative_argument() -> ! {
285    panic!("argument of integer square root cannot be negative")
286}