zmij/
lib.rs

1//! [![github]](https://github.com/dtolnay/zmij)&ensp;[![crates-io]](https://crates.io/crates/zmij)&ensp;[![docs-rs]](https://docs.rs/zmij)
2//!
3//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github
4//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust
5//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs
6//!
7//! <br>
8//!
9//! A double-to-string conversion algorithm based on [Schubfach] and [yy].
10//!
11//! This Rust implementation is a line-by-line port of Victor Zverovich's
12//! implementation in C++, <https://github.com/vitaut/zmij>.
13//!
14//! [Schubfach]: https://fmt.dev/papers/Schubfach4.pdf
15//! [yy]: https://github.com/ibireme/c_numconv_benchmark/blob/master/vendor/yy_double/yy_double.c
16//!
17//! <br>
18//!
19//! # Example
20//!
21//! ```
22//! fn main() {
23//!     let mut buffer = zmij::Buffer::new();
24//!     let printed = buffer.format(1.234);
25//!     assert_eq!(printed, "1.234");
26//! }
27//! ```
28//!
29//! <br>
30//!
31//! ## Performance
32//!
33//! The [dtoa-benchmark] compares this library and other Rust floating point
34//! formatting implementations across a range of precisions. The vertical axis
35//! in this chart shows nanoseconds taken by a single execution of
36//! `zmij::Buffer::new().format_finite(value)` so a lower result indicates a
37//! faster library.
38//!
39//! [dtoa-benchmark]: https://github.com/dtolnay/dtoa-benchmark
40//!
41//! ![performance](https://raw.githubusercontent.com/dtolnay/zmij/master/dtoa-benchmark.png)
42
43#![no_std]
44#![doc(html_root_url = "https://docs.rs/zmij/1.0.21")]
45#![deny(unsafe_op_in_unsafe_fn)]
46#![allow(non_camel_case_types, non_snake_case)]
47#![allow(
48    clippy::blocks_in_conditions,
49    clippy::cast_possible_truncation,
50    clippy::cast_possible_wrap,
51    clippy::cast_ptr_alignment,
52    clippy::cast_sign_loss,
53    clippy::doc_markdown,
54    clippy::incompatible_msrv,
55    clippy::items_after_statements,
56    clippy::many_single_char_names,
57    clippy::modulo_one,
58    clippy::must_use_candidate,
59    clippy::needless_doctest_main,
60    clippy::never_loop,
61    clippy::redundant_else,
62    clippy::similar_names,
63    clippy::too_many_arguments,
64    clippy::too_many_lines,
65    clippy::unreadable_literal,
66    clippy::used_underscore_items,
67    clippy::while_immutable_condition,
68    clippy::wildcard_imports
69)]
70
71#[cfg(zmij_no_select_unpredictable)]
72mod hint;
73#[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
74mod stdarch_x86;
75#[cfg(test)]
76mod tests;
77mod traits;
78
79#[cfg(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri)))]
80use core::arch::asm;
81#[cfg(not(zmij_no_select_unpredictable))]
82use core::hint;
83use core::mem::{self, MaybeUninit};
84use core::ptr;
85use core::slice;
86use core::str;
87#[cfg(feature = "no-panic")]
88use no_panic::no_panic;
89
90const BUFFER_SIZE: usize = 24;
91const NAN: &str = "NaN";
92const INFINITY: &str = "inf";
93const NEG_INFINITY: &str = "-inf";
94
95// Returns true_value if lhs < rhs, else false_value, without branching.
96#[inline]
97fn select_if_less(lhs: u64, rhs: u64, true_value: i64, false_value: i64) -> i64 {
98    hint::select_unpredictable(lhs < rhs, true_value, false_value)
99}
100
101#[derive(#[automatically_derived]
impl ::core::marker::Copy for uint128 { }Copy, #[automatically_derived]
impl ::core::clone::Clone for uint128 {
    #[inline]
    fn clone(&self) -> uint128 {
        let _: ::core::clone::AssertParamIsClone<u64>;
        *self
    }
}Clone)]
102#[cfg_attr(test, derive(Debug, PartialEq))]
103struct uint128 {
104    hi: u64,
105    lo: u64,
106}
107
108// Use umul128_hi64 for division.
109const USE_UMUL128_HI64: bool = falsecfg!(target_vendor = "apple");
110
111// Computes 128-bit result of multiplication of two 64-bit unsigned integers.
112const fn umul128(x: u64, y: u64) -> u128 {
113    x as u128 * y as u128
114}
115
116const fn umul128_hi64(x: u64, y: u64) -> u64 {
117    (umul128(x, y) >> 64) as u64
118}
119
120#[cfg_attr(feature = "no-panic", no_panic)]
121fn umul192_hi128(x_hi: u64, x_lo: u64, y: u64) -> uint128 {
122    let p = umul128(x_hi, y);
123    let lo = (p as u64).wrapping_add((umul128(x_lo, y) >> 64) as u64);
124    uint128 {
125        hi: (p >> 64) as u64 + u64::from(lo < p as u64),
126        lo,
127    }
128}
129
130// Computes high 64 bits of multiplication of x and y, discards the least
131// significant bit and rounds to odd, where x = uint128_t(x_hi << 64) | x_lo.
132#[cfg_attr(feature = "no-panic", no_panic)]
133fn umulhi_inexact_to_odd<UInt>(x_hi: u64, x_lo: u64, y: UInt) -> UInt
134where
135    UInt: traits::UInt,
136{
137    let num_bits = mem::size_of::<UInt>() * 8;
138    if num_bits == 64 {
139        let p = umul192_hi128(x_hi, x_lo, y.into());
140        UInt::truncate(p.hi | u64::from((p.lo >> 1) != 0))
141    } else {
142        let p = (umul128(x_hi, y.into()) >> 32) as u64;
143        UInt::enlarge((p >> 32) as u32 | u32::from((p as u32 >> 1) != 0))
144    }
145}
146
147trait FloatTraits: traits::Float {
148    const NUM_BITS: i32;
149    const NUM_SIG_BITS: i32 = Self::MANTISSA_DIGITS as i32 - 1;
150    const NUM_EXP_BITS: i32 = Self::NUM_BITS - Self::NUM_SIG_BITS - 1;
151    const EXP_MASK: i32 = (1 << Self::NUM_EXP_BITS) - 1;
152    const EXP_BIAS: i32 = (1 << (Self::NUM_EXP_BITS - 1)) - 1;
153    const EXP_OFFSET: i32 = Self::EXP_BIAS + Self::NUM_SIG_BITS;
154
155    type SigType: traits::UInt;
156    const IMPLICIT_BIT: Self::SigType;
157
158    fn to_bits(self) -> Self::SigType;
159
160    fn is_negative(bits: Self::SigType) -> bool {
161        (bits >> (Self::NUM_BITS - 1)) != Self::SigType::from(0)
162    }
163
164    fn get_sig(bits: Self::SigType) -> Self::SigType {
165        bits & (Self::IMPLICIT_BIT - Self::SigType::from(1))
166    }
167
168    fn get_exp(bits: Self::SigType) -> i64 {
169        (bits << 1u8 >> (Self::NUM_SIG_BITS + 1)).into() as i64
170    }
171}
172
173impl FloatTraits for f32 {
174    const NUM_BITS: i32 = 32;
175    const IMPLICIT_BIT: u32 = 1 << Self::NUM_SIG_BITS;
176
177    type SigType = u32;
178
179    fn to_bits(self) -> Self::SigType {
180        self.to_bits()
181    }
182}
183
184impl FloatTraits for f64 {
185    const NUM_BITS: i32 = 64;
186    const IMPLICIT_BIT: u64 = 1 << Self::NUM_SIG_BITS;
187
188    type SigType = u64;
189
190    fn to_bits(self) -> Self::SigType {
191        self.to_bits()
192    }
193}
194
195#[rustfmt::skip]
196const POW10S: [u64; 28] = [
197    0x8000000000000000, 0xa000000000000000, 0xc800000000000000,
198    0xfa00000000000000, 0x9c40000000000000, 0xc350000000000000,
199    0xf424000000000000, 0x9896800000000000, 0xbebc200000000000,
200    0xee6b280000000000, 0x9502f90000000000, 0xba43b74000000000,
201    0xe8d4a51000000000, 0x9184e72a00000000, 0xb5e620f480000000,
202    0xe35fa931a0000000, 0x8e1bc9bf04000000, 0xb1a2bc2ec5000000,
203    0xde0b6b3a76400000, 0x8ac7230489e80000, 0xad78ebc5ac620000,
204    0xd8d726b7177a8000, 0x878678326eac9000, 0xa968163f0a57b400,
205    0xd3c21bcecceda100, 0x84595161401484a0, 0xa56fa5b99019a5c8,
206    0xcecb8f27f4200f3a,
207];
208
209#[rustfmt::skip]
210const HIGH_PARTS: [uint128; 23] = [
211    uint128 { hi: 0xaf8e5410288e1b6f, lo: 0x07ecf0ae5ee44dda },
212    uint128 { hi: 0xb1442798f49ffb4a, lo: 0x99cd11cfdf41779d },
213    uint128 { hi: 0xb2fe3f0b8599ef07, lo: 0x861fa7e6dcb4aa15 },
214    uint128 { hi: 0xb4bca50b065abe63, lo: 0x0fed077a756b53aa },
215    uint128 { hi: 0xb67f6455292cbf08, lo: 0x1a3bc84c17b1d543 },
216    uint128 { hi: 0xb84687c269ef3bfb, lo: 0x3d5d514f40eea742 },
217    uint128 { hi: 0xba121a4650e4ddeb, lo: 0x92f34d62616ce413 },
218    uint128 { hi: 0xbbe226efb628afea, lo: 0x890489f70a55368c },
219    uint128 { hi: 0xbdb6b8e905cb600f, lo: 0x5400e987bbc1c921 },
220    uint128 { hi: 0xbf8fdb78849a5f96, lo: 0xde98520472bdd034 },
221    uint128 { hi: 0xc16d9a0095928a27, lo: 0x75b7053c0f178294 },
222    uint128 { hi: 0xc350000000000000, lo: 0x0000000000000000 },
223    uint128 { hi: 0xc5371912364ce305, lo: 0x6c28000000000000 },
224    uint128 { hi: 0xc722f0ef9d80aad6, lo: 0x424d3ad2b7b97ef6 },
225    uint128 { hi: 0xc913936dd571c84c, lo: 0x03bc3a19cd1e38ea },
226    uint128 { hi: 0xcb090c8001ab551c, lo: 0x5cadf5bfd3072cc6 },
227    uint128 { hi: 0xcd036837130890a1, lo: 0x36dba887c37a8c10 },
228    uint128 { hi: 0xcf02b2c21207ef2e, lo: 0x94f967e45e03f4bc },
229    uint128 { hi: 0xd106f86e69d785c7, lo: 0xe13336d701beba52 },
230    uint128 { hi: 0xd31045a8341ca07c, lo: 0x1ede48111209a051 },
231    uint128 { hi: 0xd51ea6fa85785631, lo: 0x552a74227f3ea566 },
232    uint128 { hi: 0xd732290fbacaf133, lo: 0xa97c177947ad4096 },
233    uint128 { hi: 0xd94ad8b1c7380874, lo: 0x18375281ae7822bc },
234];
235
236#[rustfmt::skip]
237const FIXUPS: [u32; 20] = [
238    0x05271b1f, 0x00000c20, 0x00003200, 0x12100020,
239    0x00000000, 0x06000000, 0xc16409c0, 0xaf26700f,
240    0xeb987b07, 0x0000000d, 0x00000000, 0x66fbfffe,
241    0xb74100ec, 0xa0669fe8, 0xedb21280, 0x00000686,
242    0x0a021200, 0x29b89c20, 0x08bc0eda, 0x00000000,
243];
244
245// 128-bit significands of powers of 10 rounded down.
246#[repr(C, align(64))]
247struct Pow10SignificandsTable {
248    data: [u64; if Self::COMPRESS {
249        0
250    } else {
251        Self::NUM_POW10 * 2
252    }],
253}
254
255impl Pow10SignificandsTable {
256    const COMPRESS: bool = false;
257    const SPLIT_TABLES: bool = !Self::COMPRESS && falsecfg!(target_arch = "aarch64");
258    const NUM_POW10: usize = 617;
259
260    // Computes the 128-bit significand of 10**i using method by Dougall Johnson.
261    const fn compute(i: u32) -> uint128 {
262        let m = unsafe { *POW10S.as_ptr().add(((i + 11) % 28) as usize) };
263        let h = unsafe { *HIGH_PARTS.as_ptr().add(((i + 11) / 28) as usize) };
264
265        let h1 = umul128_hi64(h.lo, m);
266
267        let c0 = h.lo.wrapping_mul(m);
268        let c1 = h1.wrapping_add(h.hi.wrapping_mul(m));
269        let c2 = (c1 < h1) as u64 + umul128_hi64(h.hi, m);
270
271        let mut result = if (c2 >> 63) != 0 {
272            uint128 { hi: c2, lo: c1 }
273        } else {
274            uint128 {
275                hi: (c2 << 1) | (c1 >> 63),
276                lo: (c1 << 1) | (c0 >> 63),
277            }
278        };
279        result.lo -= ((unsafe { *FIXUPS.as_ptr().add((i >> 5) as usize) } >> (i & 31)) & 1) as u64;
280        result
281    }
282
283    const fn new() -> Self {
284        let mut data = [0; if Self::COMPRESS {
285            0
286        } else {
287            Self::NUM_POW10 * 2
288        }];
289
290        let mut i = 0;
291        while i < Self::NUM_POW10 && !Self::COMPRESS {
292            let result = Self::compute(i as u32);
293            if Self::SPLIT_TABLES {
294                data[Self::NUM_POW10 - i - 1] = result.hi;
295                data[Self::NUM_POW10 * 2 - i - 1] = result.lo;
296            } else {
297                data[i * 2] = result.hi;
298                data[i * 2 + 1] = result.lo;
299            }
300            i += 1;
301        }
302
303        Pow10SignificandsTable { data }
304    }
305
306    unsafe fn get_unchecked(&self, dec_exp: i32) -> uint128 {
307        const DEC_EXP_MIN: i32 = -292;
308        if Self::COMPRESS {
309            return Self::compute((dec_exp - DEC_EXP_MIN) as u32);
310        }
311        if !Self::SPLIT_TABLES {
312            let index = ((dec_exp - DEC_EXP_MIN) * 2) as usize;
313            return uint128 {
314                hi: unsafe { *self.data.get_unchecked(index) },
315                lo: unsafe { *self.data.get_unchecked(index + 1) },
316            };
317        }
318
319        unsafe {
320            #[cfg_attr(
321                not(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri))),
322                allow(unused_mut)
323            )]
324            let mut hi = self
325                .data
326                .as_ptr()
327                .offset(Self::NUM_POW10 as isize + DEC_EXP_MIN as isize - 1);
328            #[cfg_attr(
329                not(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri))),
330                allow(unused_mut)
331            )]
332            let mut lo = hi.add(Self::NUM_POW10);
333
334            // Force indexed loads.
335            #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri)))]
336            asm!("/*{0}{1}*/", inout(reg) hi, inout(reg) lo);
337            uint128 {
338                hi: *hi.offset(-dec_exp as isize),
339                lo: *lo.offset(-dec_exp as isize),
340            }
341        }
342    }
343
344    #[cfg(test)]
345    fn get(&self, dec_exp: i32) -> uint128 {
346        const DEC_EXP_MIN: i32 = -292;
347        assert!((DEC_EXP_MIN..DEC_EXP_MIN + Self::NUM_POW10 as i32).contains(&dec_exp));
348        unsafe { self.get_unchecked(dec_exp) }
349    }
350}
351
352static POW10_SIGNIFICANDS: Pow10SignificandsTable = Pow10SignificandsTable::new();
353
354// Computes the decimal exponent as floor(log10(2**bin_exp)) if regular or
355// floor(log10(3/4 * 2**bin_exp)) otherwise, without branching.
356const fn compute_dec_exp(bin_exp: i32, regular: bool) -> i32 {
357    if true {
    if !(bin_exp >= -1334 && bin_exp <= 2620) {
        ::core::panicking::panic("assertion failed: bin_exp >= -1334 && bin_exp <= 2620")
    };
};debug_assert!(bin_exp >= -1334 && bin_exp <= 2620);
358    // log10_3_over_4_sig = -log10(3/4) * 2**log10_2_exp rounded to a power of 2
359    const LOG10_3_OVER_4_SIG: i32 = 131_072;
360    // log10_2_sig = round(log10(2) * 2**log10_2_exp)
361    const LOG10_2_SIG: i32 = 315_653;
362    const LOG10_2_EXP: i32 = 20;
363    (bin_exp * LOG10_2_SIG - !regular as i32 * LOG10_3_OVER_4_SIG) >> LOG10_2_EXP
364}
365
366#[inline]
367const fn do_compute_exp_shift(bin_exp: i32, dec_exp: i32) -> u8 {
368    if true {
    if !(dec_exp >= -350 && dec_exp <= 350) {
        ::core::panicking::panic("assertion failed: dec_exp >= -350 && dec_exp <= 350")
    };
};debug_assert!(dec_exp >= -350 && dec_exp <= 350);
369    // log2_pow10_sig = round(log2(10) * 2**log2_pow10_exp) + 1
370    const LOG2_POW10_SIG: i32 = 217_707;
371    const LOG2_POW10_EXP: i32 = 16;
372    // pow10_bin_exp = floor(log2(10**-dec_exp))
373    let pow10_bin_exp = (-dec_exp * LOG2_POW10_SIG) >> LOG2_POW10_EXP;
374    // pow10 = ((pow10_hi << 64) | pow10_lo) * 2**(pow10_bin_exp - 127)
375    (bin_exp + pow10_bin_exp + 1) as u8
376}
377
378struct ExpShiftTable {
379    data: [u8; if Self::ENABLE {
380        f64::EXP_MASK as usize + 1
381    } else {
382        1
383    }],
384}
385
386impl ExpShiftTable {
387    const ENABLE: bool = true;
388}
389
390static EXP_SHIFTS: ExpShiftTable = {
391    let mut data = [0u8; if ExpShiftTable::ENABLE {
392        f64::EXP_MASK as usize + 1
393    } else {
394        1
395    }];
396
397    let mut raw_exp = 0;
398    while raw_exp < data.len() && ExpShiftTable::ENABLE {
399        let mut bin_exp = raw_exp as i32 - f64::EXP_OFFSET;
400        if raw_exp == 0 {
401            bin_exp += 1;
402        }
403        let dec_exp = compute_dec_exp(bin_exp, true);
404        data[raw_exp] = do_compute_exp_shift(bin_exp, dec_exp) as u8;
405        raw_exp += 1;
406    }
407
408    ExpShiftTable { data }
409};
410
411// Computes a shift so that, after scaling by a power of 10, the intermediate
412// result always has a fixed 128-bit fractional part (for double).
413//
414// Different binary exponents can map to the same decimal exponent, but place
415// the decimal point at different bit positions. The shift compensates for this.
416//
417// For example, both 3 * 2**59 and 3 * 2**60 have dec_exp = 2, but dividing by
418// 10^dec_exp puts the decimal point in different bit positions:
419//   3 * 2**59 / 100 = 1.72...e+16  (needs shift = 1 + 1)
420//   3 * 2**60 / 100 = 3.45...e+16  (needs shift = 2 + 1)
421#[inline]
422unsafe fn compute_exp_shift<UInt, const ONLY_REGULAR: bool>(bin_exp: i32, dec_exp: i32) -> u8
423where
424    UInt: traits::UInt,
425{
426    let num_bits = mem::size_of::<UInt>() * 8;
427    if num_bits == 64 && ExpShiftTable::ENABLE && ONLY_REGULAR {
428        unsafe {
429            *EXP_SHIFTS
430                .data
431                .as_ptr()
432                .add((bin_exp + f64::EXP_OFFSET) as usize)
433        }
434    } else {
435        do_compute_exp_shift(bin_exp, dec_exp)
436    }
437}
438
439#[cfg_attr(feature = "no-panic", no_panic)]
440fn count_trailing_nonzeros(x: u64) -> usize {
441    // We count the number of bytes until there are only zeros left.
442    // The code is equivalent to
443    //    8 - x.leading_zeros() / 8
444    // but if the BSR instruction is emitted (as gcc on x64 does with default
445    // settings), subtracting the constant before dividing allows the compiler
446    // to combine it with the subtraction which it inserts due to BSR counting
447    // in the opposite direction.
448    //
449    // Additionally, the BSR instruction requires a zero check. Since the high
450    // bit is unused we can avoid the zero check by shifting the datum left by
451    // one and inserting a sentinel bit at the end. This can be faster than the
452    // automatically inserted range check.
453    (70 - ((x.to_le() << 1) | 1).leading_zeros() as usize) / 8
454}
455
456// Align data since unaligned access may be slower when crossing a
457// hardware-specific boundary.
458#[repr(C, align(2))]
459struct Digits2([u8; 200]);
460
461static DIGITS2: Digits2 = Digits2(
462    *b"0001020304050607080910111213141516171819\
463       2021222324252627282930313233343536373839\
464       4041424344454647484950515253545556575859\
465       6061626364656667686970717273747576777879\
466       8081828384858687888990919293949596979899",
467);
468
469// Converts value in the range [0, 100) to a string. GCC generates a bit better
470// code when value is pointer-size (https://www.godbolt.org/z/5fEPMT1cc).
471#[cfg_attr(feature = "no-panic", no_panic)]
472unsafe fn digits2(value: usize) -> &'static u16 {
473    if true {
    if !(value < 100) {
        ::core::panicking::panic("assertion failed: value < 100")
    };
};debug_assert!(value < 100);
474
475    #[allow(clippy::cast_ptr_alignment)]
476    unsafe {
477        &*DIGITS2.0.as_ptr().cast::<u16>().add(value)
478    }
479}
480
481const DIV10K_EXP: i32 = 40;
482const DIV10K_SIG: u32 = ((1u64 << DIV10K_EXP) / 10000 + 1) as u32;
483const NEG10K: u32 = ((1u64 << 32) - 10000) as u32;
484
485const DIV100_EXP: i32 = 19;
486const DIV100_SIG: u32 = (1 << DIV100_EXP) / 100 + 1;
487const NEG100: u32 = (1 << 16) - 100;
488
489const DIV10_EXP: i32 = 10;
490const DIV10_SIG: u32 = (1 << DIV10_EXP) / 10 + 1;
491const NEG10: u32 = (1 << 8) - 10;
492
493const ZEROS: u64 = 0x0101010101010101 * b'0' as u64;
494
495#[cfg_attr(feature = "no-panic", no_panic)]
496fn to_bcd8(abcdefgh: u64) -> u64 {
497    // An optimization from Xiang JunBo.
498    // Three steps BCD. Base 10000 -> base 100 -> base 10.
499    // div and mod are evaluated simultaneously as, e.g.
500    //   (abcdefgh / 10000) << 32 + (abcdefgh % 10000)
501    //      == abcdefgh + (2**32 - 10000) * (abcdefgh / 10000)))
502    // where the division on the RHS is implemented by the usual multiply + shift
503    // trick and the fractional bits are masked away.
504    let abcd_efgh =
505        abcdefgh + u64::from(NEG10K) * ((abcdefgh * u64::from(DIV10K_SIG)) >> DIV10K_EXP);
506    let ab_cd_ef_gh = abcd_efgh
507        + u64::from(NEG100) * (((abcd_efgh * u64::from(DIV100_SIG)) >> DIV100_EXP) & 0x7f0000007f);
508    let a_b_c_d_e_f_g_h = ab_cd_ef_gh
509        + u64::from(NEG10)
510            * (((ab_cd_ef_gh * u64::from(DIV10_SIG)) >> DIV10_EXP) & 0xf000f000f000f);
511    a_b_c_d_e_f_g_h.to_be()
512}
513
514unsafe fn write_if(buffer: *mut u8, digit: u32, condition: bool) -> *mut u8 {
515    unsafe {
516        *buffer = b'0' + digit as u8;
517        buffer.add(usize::from(condition))
518    }
519}
520
521unsafe fn write8(buffer: *mut u8, value: u64) {
522    unsafe {
523        buffer.cast::<u64>().write_unaligned(value);
524    }
525}
526
527// Writes a significand and removes trailing zeros. value has up to 17 decimal
528// digits (16-17 for normals) for double (num_bits == 64) and up to 9 digits
529// (8-9 for normals) for float. The significant digits start from buffer[1].
530// buffer[0] may contain '0' after this function if the leading digit is zero.
531#[cfg_attr(feature = "no-panic", no_panic)]
532#[inline]
533unsafe fn write_significand<Float>(mut buffer: *mut u8, value: u64, extra_digit: bool) -> *mut u8
534where
535    Float: FloatTraits,
536{
537    if Float::NUM_BITS == 32 {
538        buffer = unsafe { write_if(buffer, (value / 100_000_000) as u32, extra_digit) };
539        let bcd = to_bcd8(value % 100_000_000);
540        unsafe {
541            write8(buffer, bcd + ZEROS);
542            return buffer.add(count_trailing_nonzeros(bcd));
543        }
544    }
545
546    #[cfg(not(any(
547        all(target_arch = "aarch64", target_feature = "neon", not(miri)),
548        all(target_arch = "x86_64", target_feature = "sse2", not(miri)),
549    )))]
550    {
551        // Digits/pairs of digits are denoted by letters: value = abbccddeeffgghhii.
552        let abbccddee = (value / 100_000_000) as u32;
553        let ffgghhii = (value % 100_000_000) as u32;
554        buffer = unsafe { write_if(buffer, abbccddee / 100_000_000, extra_digit) };
555        let bcd = to_bcd8(u64::from(abbccddee % 100_000_000));
556        unsafe {
557            write8(buffer, bcd + ZEROS);
558        }
559        if ffgghhii == 0 {
560            return unsafe { buffer.add(count_trailing_nonzeros(bcd)) };
561        }
562        let bcd = to_bcd8(u64::from(ffgghhii));
563        unsafe {
564            write8(buffer.add(8), bcd + ZEROS);
565            buffer.add(8).add(count_trailing_nonzeros(bcd))
566        }
567    }
568
569    #[cfg(all(target_arch = "aarch64", target_feature = "neon", not(miri)))]
570    {
571        // An optimized version for NEON by Dougall Johnson.
572
573        use core::arch::aarch64::*;
574
575        const NEG10K: i32 = -10000 + 0x10000;
576
577        #[repr(C, align(64))]
578        struct Consts {
579            mul_const: u64,
580            hundred_million: u64,
581            multipliers32: int32x4_t,
582            multipliers16: int16x8_t,
583        }
584
585        static CONSTS: Consts = Consts {
586            mul_const: 0xabcc77118461cefd,
587            hundred_million: 100000000,
588            multipliers32: unsafe {
589                mem::transmute::<[i32; 4], int32x4_t>([
590                    DIV10K_SIG as i32,
591                    NEG10K,
592                    (DIV100_SIG << 12) as i32,
593                    NEG100 as i32,
594                ])
595            },
596            multipliers16: unsafe {
597                mem::transmute::<[i16; 8], int16x8_t>([0xce0, NEG10 as i16, 0, 0, 0, 0, 0, 0])
598            },
599        };
600
601        let mut c = ptr::addr_of!(CONSTS);
602
603        // Compiler barrier, or clang doesn't load from memory and generates 15
604        // more instructions.
605        let c = unsafe {
606            asm!("/*{0}*/", inout(reg) c);
607            &*c
608        };
609
610        let mut hundred_million = c.hundred_million;
611
612        // Compiler barrier, or clang narrows the load to 32-bit and unpairs it.
613        unsafe {
614            asm!("/*{0}*/", inout(reg) hundred_million);
615        }
616
617        // Equivalent to abbccddee = value / 100000000, ffgghhii = value % 100000000.
618        let abbccddee = (umul128(value, c.mul_const) >> 90) as u64;
619        let ffgghhii = value - abbccddee * hundred_million;
620
621        // We could probably make this bit faster, but we're preferring to
622        // reuse the constants for now.
623        let a = (umul128(abbccddee, c.mul_const) >> 90) as u64;
624        let bbccddee = abbccddee - a * hundred_million;
625
626        buffer = unsafe { write_if(buffer, a as u32, extra_digit) };
627
628        unsafe {
629            let ffgghhii_bbccddee_64: uint64x1_t =
630                mem::transmute::<u64, uint64x1_t>((ffgghhii << 32) | bbccddee);
631            let bbccddee_ffgghhii: int32x2_t = vreinterpret_s32_u64(ffgghhii_bbccddee_64);
632
633            let bbcc_ffgg: int32x2_t = vreinterpret_s32_u32(vshr_n_u32(
634                vreinterpret_u32_s32(vqdmulh_n_s32(
635                    bbccddee_ffgghhii,
636                    mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[0],
637                )),
638                9,
639            ));
640            let ddee_bbcc_hhii_ffgg_32: int32x2_t = vmla_n_s32(
641                bbccddee_ffgghhii,
642                bbcc_ffgg,
643                mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[1],
644            );
645
646            let mut ddee_bbcc_hhii_ffgg: int32x4_t =
647                vreinterpretq_s32_u32(vshll_n_u16(vreinterpret_u16_s32(ddee_bbcc_hhii_ffgg_32), 0));
648
649            // Compiler barrier, or clang breaks the subsequent MLA into UADDW +
650            // MUL.
651            asm!("/*{:v}*/", inout(vreg) ddee_bbcc_hhii_ffgg);
652
653            let dd_bb_hh_ff: int32x4_t = vqdmulhq_n_s32(
654                ddee_bbcc_hhii_ffgg,
655                mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[2],
656            );
657            let ee_dd_cc_bb_ii_hh_gg_ff: int16x8_t = vreinterpretq_s16_s32(vmlaq_n_s32(
658                ddee_bbcc_hhii_ffgg,
659                dd_bb_hh_ff,
660                mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[3],
661            ));
662            let high_10s: int16x8_t = vqdmulhq_n_s16(
663                ee_dd_cc_bb_ii_hh_gg_ff,
664                mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[0],
665            );
666            let digits: uint8x16_t = vrev64q_u8(vreinterpretq_u8_s16(vmlaq_n_s16(
667                ee_dd_cc_bb_ii_hh_gg_ff,
668                high_10s,
669                mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[1],
670            )));
671            let str: uint16x8_t = vaddq_u16(
672                vreinterpretq_u16_u8(digits),
673                vreinterpretq_u16_s8(vdupq_n_s8(b'0' as i8)),
674            );
675
676            buffer.cast::<uint16x8_t>().write_unaligned(str);
677
678            let is_not_zero: uint16x8_t =
679                vreinterpretq_u16_u8(vcgtzq_s8(vreinterpretq_s8_u8(digits)));
680            let zeros: u64 = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(is_not_zero, 4)), 0);
681
682            buffer.add(16 - (zeros.leading_zeros() as usize >> 2))
683        }
684    }
685
686    #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
687    {
688        use crate::stdarch_x86::*;
689
690        let abbccddee = (value / 100_000_000) as u32;
691        let ffgghhii = (value % 100_000_000) as u32;
692        let a = abbccddee / 100_000_000;
693        let bbccddee = abbccddee % 100_000_000;
694
695        buffer = unsafe { write_if(buffer, a, extra_digit) };
696
697        #[repr(C, align(64))]
698        struct Consts {
699            div10k: u128,
700            neg10k: u128,
701            div100: u128,
702            div10: u128,
703            #[cfg(target_feature = "sse4.1")]
704            neg100: u128,
705            #[cfg(target_feature = "sse4.1")]
706            neg10: u128,
707            #[cfg(target_feature = "sse4.1")]
708            bswap: u128,
709            #[cfg(not(target_feature = "sse4.1"))]
710            hundred: u128,
711            #[cfg(not(target_feature = "sse4.1"))]
712            moddiv10: u128,
713            zeros: u128,
714        }
715
716        impl Consts {
717            const fn splat64(x: u64) -> u128 {
718                ((x as u128) << 64) | x as u128
719            }
720
721            const fn splat32(x: u32) -> u128 {
722                Self::splat64(((x as u64) << 32) | x as u64)
723            }
724
725            const fn splat16(x: u16) -> u128 {
726                Self::splat32(((x as u32) << 16) | x as u32)
727            }
728
729            #[cfg(target_feature = "sse4.1")]
730            const fn pack8(a: u8, b: u8, c: u8, d: u8, e: u8, f: u8, g: u8, h: u8) -> u64 {
731                ((h as u64) << 56)
732                    | ((g as u64) << 48)
733                    | ((f as u64) << 40)
734                    | ((e as u64) << 32)
735                    | ((d as u64) << 24)
736                    | ((c as u64) << 16)
737                    | ((b as u64) << 8)
738                    | a as u64
739            }
740        }
741
742        static CONSTS: Consts = Consts {
743            div10k: Consts::splat64(DIV10K_SIG as u64),
744            neg10k: Consts::splat64(NEG10K as u64),
745            div100: Consts::splat32(DIV100_SIG),
746            div10: Consts::splat16(((1u32 << 16) / 10 + 1) as u16),
747            #[cfg(target_feature = "sse4.1")]
748            neg100: Consts::splat32(NEG100),
749            #[cfg(target_feature = "sse4.1")]
750            neg10: Consts::splat16((1 << 8) - 10),
751            #[cfg(target_feature = "sse4.1")]
752            bswap: Consts::pack8(15, 14, 13, 12, 11, 10, 9, 8) as u128
753                | (Consts::pack8(7, 6, 5, 4, 3, 2, 1, 0) as u128) << 64,
754            #[cfg(not(target_feature = "sse4.1"))]
755            hundred: Consts::splat32(100),
756            #[cfg(not(target_feature = "sse4.1"))]
757            moddiv10: Consts::splat16(10 * (1 << 8) - 1),
758            zeros: Consts::splat64(ZEROS),
759        };
760
761        let mut c = &raw const CONSTSptr::addr_of!(CONSTS);
762        // Load constants from memory.
763        unsafe {
764            asm!("/*{0}*/", inout(reg) c);
765        }
766
767        let div10k = unsafe { _mm_load_si128(&raw const (*c).div10kptr::addr_of!((*c).div10k).cast::<__m128i>()) };
768        let neg10k = unsafe { _mm_load_si128(&raw const (*c).neg10kptr::addr_of!((*c).neg10k).cast::<__m128i>()) };
769        let div100 = unsafe { _mm_load_si128(&raw const (*c).div100ptr::addr_of!((*c).div100).cast::<__m128i>()) };
770        let div10 = unsafe { _mm_load_si128(&raw const (*c).div10ptr::addr_of!((*c).div10).cast::<__m128i>()) };
771        #[cfg(target_feature = "sse4.1")]
772        let neg100 = unsafe { _mm_load_si128(ptr::addr_of!((*c).neg100).cast::<__m128i>()) };
773        #[cfg(target_feature = "sse4.1")]
774        let neg10 = unsafe { _mm_load_si128(ptr::addr_of!((*c).neg10).cast::<__m128i>()) };
775        #[cfg(target_feature = "sse4.1")]
776        let bswap = unsafe { _mm_load_si128(ptr::addr_of!((*c).bswap).cast::<__m128i>()) };
777        #[cfg(not(target_feature = "sse4.1"))]
778        let hundred = unsafe { _mm_load_si128(&raw const (*c).hundredptr::addr_of!((*c).hundred).cast::<__m128i>()) };
779        #[cfg(not(target_feature = "sse4.1"))]
780        let moddiv10 = unsafe { _mm_load_si128(&raw const (*c).moddiv10ptr::addr_of!((*c).moddiv10).cast::<__m128i>()) };
781        let zeros = unsafe { _mm_load_si128(&raw const (*c).zerosptr::addr_of!((*c).zeros).cast::<__m128i>()) };
782
783        // The BCD sequences are based on ones provided by Xiang JunBo.
784        unsafe {
785            let x: __m128i = _mm_set_epi64x(i64::from(bbccddee), i64::from(ffgghhii));
786            let y: __m128i = _mm_add_epi64(
787                x,
788                _mm_mul_epu32(neg10k, _mm_srli_epi64(_mm_mul_epu32(x, div10k), DIV10K_EXP)),
789            );
790
791            #[cfg(target_feature = "sse4.1")]
792            let bcd: __m128i = {
793                // _mm_mullo_epi32 is SSE 4.1
794                let z: __m128i = _mm_add_epi64(
795                    y,
796                    _mm_mullo_epi32(neg100, _mm_srli_epi32(_mm_mulhi_epu16(y, div100), 3)),
797                );
798                let big_endian_bcd: __m128i =
799                    _mm_add_epi64(z, _mm_mullo_epi16(neg10, _mm_mulhi_epu16(z, div10)));
800                // SSSE3
801                _mm_shuffle_epi8(big_endian_bcd, bswap)
802            };
803
804            #[cfg(not(target_feature = "sse4.1"))]
805            let bcd: __m128i = {
806                let y_div_100: __m128i = _mm_srli_epi16(_mm_mulhi_epu16(y, div100), 3);
807                let y_mod_100: __m128i = _mm_sub_epi16(y, _mm_mullo_epi16(y_div_100, hundred));
808                let z: __m128i = _mm_or_si128(_mm_slli_epi32(y_mod_100, 16), y_div_100);
809                let bcd_shuffled: __m128i = _mm_sub_epi16(
810                    _mm_slli_epi16(z, 8),
811                    _mm_mullo_epi16(moddiv10, _mm_mulhi_epu16(z, div10)),
812                );
813                _mm_shuffle_epi32(bcd_shuffled, _MM_SHUFFLE(0, 1, 2, 3))
814            };
815
816            let digits = _mm_or_si128(bcd, zeros);
817
818            // Count leading zeros.
819            let mask128: __m128i = _mm_cmpgt_epi8(bcd, _mm_setzero_si128());
820            let mask = _mm_movemask_epi8(mask128) as u32;
821            let len = 32 - mask.leading_zeros() as usize;
822
823            _mm_storeu_si128(buffer.cast::<__m128i>(), digits);
824            buffer.add(len)
825        }
826    }
827}
828
829struct ToDecimalResult {
830    sig: i64,
831    exp: i32,
832}
833
834#[cfg_attr(feature = "no-panic", no_panic)]
835#[inline]
836fn to_decimal_schubfach<UInt>(bin_sig: UInt, bin_exp: i64, regular: bool) -> ToDecimalResult
837where
838    UInt: traits::UInt,
839{
840    let num_bits = mem::size_of::<UInt>() as i32 * 8;
841    let dec_exp = compute_dec_exp(bin_exp as i32, regular);
842    let exp_shift = unsafe { compute_exp_shift::<UInt, false>(bin_exp as i32, dec_exp) };
843    let mut pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
844
845    // Fallback to Schubfach to guarantee correctness in boundary cases. This
846    // requires switching to strict overestimates of powers of 10.
847    if num_bits == 64 {
848        pow10.lo += 1;
849    } else {
850        pow10.hi += 1;
851    }
852
853    // Shift the significand so that boundaries are integer.
854    const BOUND_SHIFT: u32 = 2;
855    let bin_sig_shifted = bin_sig << BOUND_SHIFT;
856
857    // Compute the estimates of lower and upper bounds of the rounding interval
858    // by multiplying them by the power of 10 and applying modified rounding.
859    let lsb = bin_sig & UInt::from(1);
860    let lower = (bin_sig_shifted - (UInt::from(regular) + UInt::from(1))) << exp_shift;
861    let lower = umulhi_inexact_to_odd(pow10.hi, pow10.lo, lower) + lsb;
862    let upper = (bin_sig_shifted + UInt::from(2)) << exp_shift;
863    let upper = umulhi_inexact_to_odd(pow10.hi, pow10.lo, upper) - lsb;
864
865    // The idea of using a single shorter candidate is by Cassio Neri.
866    // It is less or equal to the upper bound by construction.
867    let shorter = (upper >> BOUND_SHIFT) / UInt::from(10) * UInt::from(10);
868    if (shorter << BOUND_SHIFT) >= lower {
869        return ToDecimalResult {
870            sig: shorter.into() as i64,
871            exp: dec_exp,
872        };
873    }
874
875    let scaled_sig = umulhi_inexact_to_odd(pow10.hi, pow10.lo, bin_sig_shifted << exp_shift);
876    let longer_below = scaled_sig >> BOUND_SHIFT;
877    let longer_above = longer_below + UInt::from(1);
878
879    // Pick the closest of longer_below and longer_above and check if it's in
880    // the rounding interval.
881    let cmp = scaled_sig
882        .wrapping_sub((longer_below + longer_above) << 1)
883        .to_signed();
884    let below_closer = cmp < UInt::from(0).to_signed()
885        || (cmp == UInt::from(0).to_signed() && (longer_below & UInt::from(1)) == UInt::from(0));
886    let below_in = (longer_below << BOUND_SHIFT) >= lower;
887    let dec_sig = if below_closer & below_in {
888        longer_below
889    } else {
890        longer_above
891    };
892    ToDecimalResult {
893        sig: dec_sig.into() as i64,
894        exp: dec_exp,
895    }
896}
897
898// Here be 🐉s.
899// Converts a binary FP number bin_sig * 2**bin_exp to the shortest decimal
900// representation, where bin_exp = raw_exp - exp_offset.
901#[cfg_attr(feature = "no-panic", no_panic)]
902#[inline]
903fn to_decimal_fast<Float, UInt>(bin_sig: UInt, raw_exp: i64, regular: bool) -> ToDecimalResult
904where
905    Float: FloatTraits,
906    UInt: traits::UInt,
907{
908    let bin_exp = raw_exp - i64::from(Float::EXP_OFFSET);
909    let num_bits = mem::size_of::<UInt>() as i32 * 8;
910    // An optimization from yy by Yaoyuan Guo:
911    while regular {
912        let dec_exp = if USE_UMUL128_HI64 {
913            umul128_hi64(bin_exp as u64, 0x4d10500000000000) as i32
914        } else {
915            compute_dec_exp(bin_exp as i32, true)
916        };
917        let exp_shift = unsafe { compute_exp_shift::<UInt, true>(bin_exp as i32, dec_exp) };
918        let pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
919
920        let integral; // integral part of bin_sig * pow10
921        let fractional; // fractional part of bin_sig * pow10
922        if num_bits == 64 {
923            let p = umul192_hi128(pow10.hi, pow10.lo, (bin_sig << exp_shift).into());
924            integral = UInt::truncate(p.hi);
925            fractional = p.lo;
926        } else {
927            let p = umul128(pow10.hi, (bin_sig << exp_shift).into());
928            integral = UInt::truncate((p >> 64) as u64);
929            fractional = p as u64;
930        }
931        const HALF_ULP: u64 = 1 << 63;
932
933        // Exact half-ulp tie when rounding to nearest integer.
934        let cmp = fractional.wrapping_sub(HALF_ULP) as i64;
935        if cmp == 0 {
936            break;
937        }
938
939        // An optimization of integral % 10 by Dougall Johnson. Relies on range
940        // calculation: (max_bin_sig << max_exp_shift) * max_u128.
941        // (1 << 63) / 5 == (1 << 64) / 10 without an intermediate int128.
942        const DIV10_SIG64: u64 = (1 << 63) / 5 + 1;
943        let div10 = umul128_hi64(integral.into(), DIV10_SIG64);
944        #[allow(unused_mut)]
945        let mut digit = integral.into() - div10 * 10;
946        // or it narrows to 32-bit and doesn't use madd/msub
947        #[cfg(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri)))]
948        unsafe {
949            asm!("/*{0}*/", inout(reg) digit);
950        }
951
952        // Switch to a fixed-point representation with the least significant
953        // integral digit in the upper bits and fractional digits in the lower
954        // bits.
955        let num_integral_bits = if num_bits == 64 { 4 } else { 32 };
956        let num_fractional_bits = 64 - num_integral_bits;
957        let ten = 10u64 << num_fractional_bits;
958        // Fixed-point remainder of the scaled significand modulo 10.
959        let scaled_sig_mod10 = (digit << num_fractional_bits) | (fractional >> num_integral_bits);
960
961        // scaled_half_ulp = 0.5 * pow10 in the fixed-point format.
962        // dec_exp is chosen so that 10**dec_exp <= 2**bin_exp < 10**(dec_exp + 1).
963        // Since 1ulp == 2**bin_exp it will be in the range [1, 10) after scaling
964        // by 10**dec_exp. Add 1 to combine the shift with division by two.
965        let scaled_half_ulp = pow10.hi >> (num_integral_bits - exp_shift + 1);
966        let upper = scaled_sig_mod10 + scaled_half_ulp;
967
968        // value = 5.0507837461e-27
969        // next  = 5.0507837461000010e-27
970        //
971        // c = integral.fractional' = 50507837461000003.153987... (value)
972        //                            50507837461000010.328635... (next)
973        //          scaled_half_ulp =                 3.587324...
974        //
975        // fractional' = fractional / 2**64, fractional = 2840565642863009226
976        //
977        //      50507837461000000       c               upper     50507837461000010
978        //              s              l|   L             |               S
979        // ───┬────┬────┼────┬────┬────┼*-──┼────┬────┬───*┬────┬────┬────┼-*--┬───
980        //    8    9    0    1    2    3    4    5    6    7    8    9    0 |  1
981        //            └─────────────────┼─────────────────┘                next
982        //                             1ulp
983        //
984        // s - shorter underestimate, S - shorter overestimate
985        // l - longer underestimate,  L - longer overestimate
986
987        // Check for boundary case when rounding down to nearest 10 and
988        // near-boundary case when rounding up to nearest 10.
989        // Case where upper == ten is insufficient: 1.342178e+08f.
990        if ten.wrapping_sub(upper) <= 1 // upper == ten || upper == ten - 1
991            || scaled_sig_mod10 == scaled_half_ulp
992        {
993            break;
994        }
995
996        let shorter = (integral.into() - digit) as i64;
997        let longer = (integral.into() + u64::from(cmp >= 0)) as i64;
998        let dec_sig = select_if_less(scaled_sig_mod10, scaled_half_ulp, shorter, longer);
999        return ToDecimalResult {
1000            sig: select_if_less(ten, upper, shorter + 10, dec_sig),
1001            exp: dec_exp,
1002        };
1003    }
1004    to_decimal_schubfach(bin_sig, bin_exp, regular)
1005}
1006
1007/// Writes the shortest correctly rounded decimal representation of `value` to
1008/// `buffer`. `buffer` should point to a buffer of size `buffer_size` or larger.
1009#[cfg_attr(feature = "no-panic", no_panic)]
1010unsafe fn write<Float>(value: Float, mut buffer: *mut u8) -> *mut u8
1011where
1012    Float: FloatTraits,
1013{
1014    let bits = value.to_bits();
1015    // It is beneficial to extract exponent and significand early.
1016    let bin_exp = Float::get_exp(bits); // binary exponent
1017    let bin_sig = Float::get_sig(bits); // binary significand
1018
1019    unsafe {
1020        *buffer = b'-';
1021    }
1022    buffer = unsafe { buffer.add(usize::from(Float::is_negative(bits))) };
1023
1024    let mut dec;
1025    let threshold = if Float::NUM_BITS == 64 {
1026        10_000_000_000_000_000
1027    } else {
1028        100_000_000
1029    };
1030    if bin_exp == 0 {
1031        if bin_sig == Float::SigType::from(0) {
1032            return unsafe {
1033                *buffer = b'0';
1034                *buffer.add(1) = b'.';
1035                *buffer.add(2) = b'0';
1036                buffer.add(3)
1037            };
1038        }
1039        dec = to_decimal_schubfach(bin_sig, i64::from(1 - Float::EXP_OFFSET), true);
1040        while dec.sig < threshold {
1041            dec.sig *= 10;
1042            dec.exp -= 1;
1043        }
1044    } else {
1045        dec = to_decimal_fast::<Float, Float::SigType>(
1046            bin_sig | Float::IMPLICIT_BIT,
1047            bin_exp,
1048            bin_sig != Float::SigType::from(0),
1049        );
1050    }
1051    let mut dec_exp = dec.exp;
1052    let extra_digit = dec.sig >= threshold;
1053    dec_exp += Float::MAX_DIGITS10 as i32 - 2 + i32::from(extra_digit);
1054    if Float::NUM_BITS == 32 && dec.sig < 10_000_000 {
1055        dec.sig *= 10;
1056        dec_exp -= 1;
1057    }
1058
1059    // Write significand.
1060    let end = unsafe { write_significand::<Float>(buffer.add(1), dec.sig as u64, extra_digit) };
1061
1062    let length = unsafe { end.offset_from(buffer.add(1)) } as usize;
1063
1064    if Float::NUM_BITS == 32 && (-6..=12).contains(&dec_exp)
1065        || Float::NUM_BITS == 64 && (-5..=15).contains(&dec_exp)
1066    {
1067        if length as i32 - 1 <= dec_exp {
1068            // 1234e7 -> 12340000000.0
1069            return unsafe {
1070                ptr::copy(buffer.add(1), buffer, length);
1071                ptr::write_bytes(buffer.add(length), b'0', dec_exp as usize + 3 - length);
1072                *buffer.add(dec_exp as usize + 1) = b'.';
1073                buffer.add(dec_exp as usize + 3)
1074            };
1075        } else if 0 <= dec_exp {
1076            // 1234e-2 -> 12.34
1077            return unsafe {
1078                ptr::copy(buffer.add(1), buffer, dec_exp as usize + 1);
1079                *buffer.add(dec_exp as usize + 1) = b'.';
1080                buffer.add(length + 1)
1081            };
1082        } else {
1083            // 1234e-6 -> 0.001234
1084            return unsafe {
1085                ptr::copy(buffer.add(1), buffer.add((1 - dec_exp) as usize), length);
1086                ptr::write_bytes(buffer, b'0', (1 - dec_exp) as usize);
1087                *buffer.add(1) = b'.';
1088                buffer.add((1 - dec_exp) as usize + length)
1089            };
1090        }
1091    }
1092
1093    unsafe {
1094        // 1234e30 -> 1.234e33
1095        *buffer = *buffer.add(1);
1096        *buffer.add(1) = b'.';
1097    }
1098    buffer = unsafe { buffer.add(length + usize::from(length > 1)) };
1099
1100    // Write exponent.
1101    let sign_ptr = buffer;
1102    let e_sign = if dec_exp >= 0 {
1103        (u16::from(b'+') << 8) | u16::from(b'e')
1104    } else {
1105        (u16::from(b'-') << 8) | u16::from(b'e')
1106    };
1107    buffer = unsafe { buffer.add(1) };
1108    dec_exp = if dec_exp >= 0 { dec_exp } else { -dec_exp };
1109    buffer = unsafe { buffer.add(usize::from(dec_exp >= 10)) };
1110    if Float::MIN_10_EXP > -100 && Float::MAX_10_EXP < 100 {
1111        unsafe {
1112            buffer
1113                .cast::<u16>()
1114                .write_unaligned(*digits2(dec_exp as usize));
1115            sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1116            return buffer.add(2);
1117        }
1118    }
1119    // digit = dec_exp / 100
1120    let digit = if USE_UMUL128_HI64 {
1121        umul128_hi64(dec_exp as u64, 0x290000000000000) as u32
1122    } else {
1123        (dec_exp as u32 * DIV100_SIG) >> DIV100_EXP
1124    };
1125    unsafe {
1126        *buffer = b'0' + digit as u8;
1127    }
1128    buffer = unsafe { buffer.add(usize::from(dec_exp >= 100)) };
1129    unsafe {
1130        buffer
1131            .cast::<u16>()
1132            .write_unaligned(*digits2((dec_exp as u32 - digit * 100) as usize));
1133        sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1134        buffer.add(2)
1135    }
1136}
1137
1138/// Safe API for formatting floating point numbers to text.
1139///
1140/// ## Example
1141///
1142/// ```
1143/// let mut buffer = zmij::Buffer::new();
1144/// let printed = buffer.format_finite(1.234);
1145/// assert_eq!(printed, "1.234");
1146/// ```
1147pub struct Buffer {
1148    bytes: [MaybeUninit<u8>; BUFFER_SIZE],
1149}
1150
1151impl Buffer {
1152    /// This is a cheap operation; you don't need to worry about reusing buffers
1153    /// for efficiency.
1154    #[inline]
1155    #[cfg_attr(feature = "no-panic", no_panic)]
1156    pub fn new() -> Self {
1157        let bytes = [MaybeUninit::<u8>::uninit(); BUFFER_SIZE];
1158        Buffer { bytes }
1159    }
1160
1161    /// Print a floating point number into this buffer and return a reference to
1162    /// its string representation within the buffer.
1163    ///
1164    /// # Special cases
1165    ///
1166    /// This function formats NaN as the string "NaN", positive infinity as
1167    /// "inf", and negative infinity as "-inf" to match std::fmt.
1168    ///
1169    /// If your input is known to be finite, you may get better performance by
1170    /// calling the `format_finite` method instead of `format` to avoid the
1171    /// checks for special cases.
1172    #[cfg_attr(feature = "no-panic", no_panic)]
1173    pub fn format<F: Float>(&mut self, f: F) -> &str {
1174        if f.is_nonfinite() {
1175            f.format_nonfinite()
1176        } else {
1177            self.format_finite(f)
1178        }
1179    }
1180
1181    /// Print a floating point number into this buffer and return a reference to
1182    /// its string representation within the buffer.
1183    ///
1184    /// # Special cases
1185    ///
1186    /// This function **does not** check for NaN or infinity. If the input
1187    /// number is not a finite float, the printed representation will be some
1188    /// correctly formatted but unspecified numerical value.
1189    ///
1190    /// Please check [`is_finite`] yourself before calling this function, or
1191    /// check [`is_nan`] and [`is_infinite`] and handle those cases yourself.
1192    ///
1193    /// [`is_finite`]: f64::is_finite
1194    /// [`is_nan`]: f64::is_nan
1195    /// [`is_infinite`]: f64::is_infinite
1196    #[cfg_attr(feature = "no-panic", no_panic)]
1197    pub fn format_finite<F: Float>(&mut self, f: F) -> &str {
1198        unsafe {
1199            let end = f.write_to_zmij_buffer(self.bytes.as_mut_ptr().cast::<u8>());
1200            let len = end.offset_from(self.bytes.as_ptr().cast::<u8>()) as usize;
1201            let slice = slice::from_raw_parts(self.bytes.as_ptr().cast::<u8>(), len);
1202            str::from_utf8_unchecked(slice)
1203        }
1204    }
1205}
1206
1207/// A floating point number, f32 or f64, that can be written into a
1208/// [`zmij::Buffer`][Buffer].
1209///
1210/// This trait is sealed and cannot be implemented for types outside of the
1211/// `zmij` crate.
1212#[allow(unknown_lints)] // rustc older than 1.74
1213#[allow(private_bounds)]
1214pub trait Float: private::Sealed {}
1215impl Float for f32 {}
1216impl Float for f64 {}
1217
1218mod private {
1219    pub trait Sealed: crate::traits::Float {
1220        fn is_nonfinite(self) -> bool;
1221        fn format_nonfinite(self) -> &'static str;
1222        unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8;
1223    }
1224
1225    impl Sealed for f32 {
1226        #[inline]
1227        fn is_nonfinite(self) -> bool {
1228            const EXP_MASK: u32 = 0x7f800000;
1229            let bits = self.to_bits();
1230            bits & EXP_MASK == EXP_MASK
1231        }
1232
1233        #[cold]
1234        #[cfg_attr(feature = "no-panic", inline)]
1235        fn format_nonfinite(self) -> &'static str {
1236            const MANTISSA_MASK: u32 = 0x007fffff;
1237            const SIGN_MASK: u32 = 0x80000000;
1238            let bits = self.to_bits();
1239            if bits & MANTISSA_MASK != 0 {
1240                crate::NAN
1241            } else if bits & SIGN_MASK != 0 {
1242                crate::NEG_INFINITY
1243            } else {
1244                crate::INFINITY
1245            }
1246        }
1247
1248        #[cfg_attr(feature = "no-panic", inline)]
1249        unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8 {
1250            unsafe { crate::write(self, buffer) }
1251        }
1252    }
1253
1254    impl Sealed for f64 {
1255        #[inline]
1256        fn is_nonfinite(self) -> bool {
1257            const EXP_MASK: u64 = 0x7ff0000000000000;
1258            let bits = self.to_bits();
1259            bits & EXP_MASK == EXP_MASK
1260        }
1261
1262        #[cold]
1263        #[cfg_attr(feature = "no-panic", inline)]
1264        fn format_nonfinite(self) -> &'static str {
1265            const MANTISSA_MASK: u64 = 0x000fffffffffffff;
1266            const SIGN_MASK: u64 = 0x8000000000000000;
1267            let bits = self.to_bits();
1268            if bits & MANTISSA_MASK != 0 {
1269                crate::NAN
1270            } else if bits & SIGN_MASK != 0 {
1271                crate::NEG_INFINITY
1272            } else {
1273                crate::INFINITY
1274            }
1275        }
1276
1277        #[cfg_attr(feature = "no-panic", inline)]
1278        unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8 {
1279            unsafe { crate::write(self, buffer) }
1280        }
1281    }
1282}
1283
1284impl Default for Buffer {
1285    #[inline]
1286    #[cfg_attr(feature = "no-panic", no_panic)]
1287    fn default() -> Self {
1288        Buffer::new()
1289    }
1290}
zmij/lib.rs

zmij/
lib.rs