1#![no_std]
44#![doc(html_root_url = "https://docs.rs/zmij/1.0.21")]
45#![deny(unsafe_op_in_unsafe_fn)]
46#![allow(non_camel_case_types, non_snake_case)]
47#![allow(
48 clippy::blocks_in_conditions,
49 clippy::cast_possible_truncation,
50 clippy::cast_possible_wrap,
51 clippy::cast_ptr_alignment,
52 clippy::cast_sign_loss,
53 clippy::doc_markdown,
54 clippy::incompatible_msrv,
55 clippy::items_after_statements,
56 clippy::many_single_char_names,
57 clippy::modulo_one,
58 clippy::must_use_candidate,
59 clippy::needless_doctest_main,
60 clippy::never_loop,
61 clippy::redundant_else,
62 clippy::similar_names,
63 clippy::too_many_arguments,
64 clippy::too_many_lines,
65 clippy::unreadable_literal,
66 clippy::used_underscore_items,
67 clippy::while_immutable_condition,
68 clippy::wildcard_imports
69)]
70
71#[cfg(zmij_no_select_unpredictable)]
72mod hint;
73#[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
74mod stdarch_x86;
75#[cfg(test)]
76mod tests;
77mod traits;
78
79#[cfg(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri)))]
80use core::arch::asm;
81#[cfg(not(zmij_no_select_unpredictable))]
82use core::hint;
83use core::mem::{self, MaybeUninit};
84use core::ptr;
85use core::slice;
86use core::str;
87#[cfg(feature = "no-panic")]
88use no_panic::no_panic;
89
90const BUFFER_SIZE: usize = 24;
91const NAN: &str = "NaN";
92const INFINITY: &str = "inf";
93const NEG_INFINITY: &str = "-inf";
94
95#[inline]
97fn select_if_less(lhs: u64, rhs: u64, true_value: i64, false_value: i64) -> i64 {
98 hint::select_unpredictable(lhs < rhs, true_value, false_value)
99}
100
101#[derive(#[automatically_derived]
impl ::core::marker::Copy for uint128 { }Copy, #[automatically_derived]
impl ::core::clone::Clone for uint128 {
#[inline]
fn clone(&self) -> uint128 {
let _: ::core::clone::AssertParamIsClone<u64>;
*self
}
}Clone)]
102#[cfg_attr(test, derive(Debug, PartialEq))]
103struct uint128 {
104 hi: u64,
105 lo: u64,
106}
107
108const USE_UMUL128_HI64: bool = falsecfg!(target_vendor = "apple");
110
111const fn umul128(x: u64, y: u64) -> u128 {
113 x as u128 * y as u128
114}
115
116const fn umul128_hi64(x: u64, y: u64) -> u64 {
117 (umul128(x, y) >> 64) as u64
118}
119
120#[cfg_attr(feature = "no-panic", no_panic)]
121fn umul192_hi128(x_hi: u64, x_lo: u64, y: u64) -> uint128 {
122 let p = umul128(x_hi, y);
123 let lo = (p as u64).wrapping_add((umul128(x_lo, y) >> 64) as u64);
124 uint128 {
125 hi: (p >> 64) as u64 + u64::from(lo < p as u64),
126 lo,
127 }
128}
129
130#[cfg_attr(feature = "no-panic", no_panic)]
133fn umulhi_inexact_to_odd<UInt>(x_hi: u64, x_lo: u64, y: UInt) -> UInt
134where
135 UInt: traits::UInt,
136{
137 let num_bits = mem::size_of::<UInt>() * 8;
138 if num_bits == 64 {
139 let p = umul192_hi128(x_hi, x_lo, y.into());
140 UInt::truncate(p.hi | u64::from((p.lo >> 1) != 0))
141 } else {
142 let p = (umul128(x_hi, y.into()) >> 32) as u64;
143 UInt::enlarge((p >> 32) as u32 | u32::from((p as u32 >> 1) != 0))
144 }
145}
146
147trait FloatTraits: traits::Float {
148 const NUM_BITS: i32;
149 const NUM_SIG_BITS: i32 = Self::MANTISSA_DIGITS as i32 - 1;
150 const NUM_EXP_BITS: i32 = Self::NUM_BITS - Self::NUM_SIG_BITS - 1;
151 const EXP_MASK: i32 = (1 << Self::NUM_EXP_BITS) - 1;
152 const EXP_BIAS: i32 = (1 << (Self::NUM_EXP_BITS - 1)) - 1;
153 const EXP_OFFSET: i32 = Self::EXP_BIAS + Self::NUM_SIG_BITS;
154
155 type SigType: traits::UInt;
156 const IMPLICIT_BIT: Self::SigType;
157
158 fn to_bits(self) -> Self::SigType;
159
160 fn is_negative(bits: Self::SigType) -> bool {
161 (bits >> (Self::NUM_BITS - 1)) != Self::SigType::from(0)
162 }
163
164 fn get_sig(bits: Self::SigType) -> Self::SigType {
165 bits & (Self::IMPLICIT_BIT - Self::SigType::from(1))
166 }
167
168 fn get_exp(bits: Self::SigType) -> i64 {
169 (bits << 1u8 >> (Self::NUM_SIG_BITS + 1)).into() as i64
170 }
171}
172
173impl FloatTraits for f32 {
174 const NUM_BITS: i32 = 32;
175 const IMPLICIT_BIT: u32 = 1 << Self::NUM_SIG_BITS;
176
177 type SigType = u32;
178
179 fn to_bits(self) -> Self::SigType {
180 self.to_bits()
181 }
182}
183
184impl FloatTraits for f64 {
185 const NUM_BITS: i32 = 64;
186 const IMPLICIT_BIT: u64 = 1 << Self::NUM_SIG_BITS;
187
188 type SigType = u64;
189
190 fn to_bits(self) -> Self::SigType {
191 self.to_bits()
192 }
193}
194
195#[rustfmt::skip]
196const POW10S: [u64; 28] = [
197 0x8000000000000000, 0xa000000000000000, 0xc800000000000000,
198 0xfa00000000000000, 0x9c40000000000000, 0xc350000000000000,
199 0xf424000000000000, 0x9896800000000000, 0xbebc200000000000,
200 0xee6b280000000000, 0x9502f90000000000, 0xba43b74000000000,
201 0xe8d4a51000000000, 0x9184e72a00000000, 0xb5e620f480000000,
202 0xe35fa931a0000000, 0x8e1bc9bf04000000, 0xb1a2bc2ec5000000,
203 0xde0b6b3a76400000, 0x8ac7230489e80000, 0xad78ebc5ac620000,
204 0xd8d726b7177a8000, 0x878678326eac9000, 0xa968163f0a57b400,
205 0xd3c21bcecceda100, 0x84595161401484a0, 0xa56fa5b99019a5c8,
206 0xcecb8f27f4200f3a,
207];
208
209#[rustfmt::skip]
210const HIGH_PARTS: [uint128; 23] = [
211 uint128 { hi: 0xaf8e5410288e1b6f, lo: 0x07ecf0ae5ee44dda },
212 uint128 { hi: 0xb1442798f49ffb4a, lo: 0x99cd11cfdf41779d },
213 uint128 { hi: 0xb2fe3f0b8599ef07, lo: 0x861fa7e6dcb4aa15 },
214 uint128 { hi: 0xb4bca50b065abe63, lo: 0x0fed077a756b53aa },
215 uint128 { hi: 0xb67f6455292cbf08, lo: 0x1a3bc84c17b1d543 },
216 uint128 { hi: 0xb84687c269ef3bfb, lo: 0x3d5d514f40eea742 },
217 uint128 { hi: 0xba121a4650e4ddeb, lo: 0x92f34d62616ce413 },
218 uint128 { hi: 0xbbe226efb628afea, lo: 0x890489f70a55368c },
219 uint128 { hi: 0xbdb6b8e905cb600f, lo: 0x5400e987bbc1c921 },
220 uint128 { hi: 0xbf8fdb78849a5f96, lo: 0xde98520472bdd034 },
221 uint128 { hi: 0xc16d9a0095928a27, lo: 0x75b7053c0f178294 },
222 uint128 { hi: 0xc350000000000000, lo: 0x0000000000000000 },
223 uint128 { hi: 0xc5371912364ce305, lo: 0x6c28000000000000 },
224 uint128 { hi: 0xc722f0ef9d80aad6, lo: 0x424d3ad2b7b97ef6 },
225 uint128 { hi: 0xc913936dd571c84c, lo: 0x03bc3a19cd1e38ea },
226 uint128 { hi: 0xcb090c8001ab551c, lo: 0x5cadf5bfd3072cc6 },
227 uint128 { hi: 0xcd036837130890a1, lo: 0x36dba887c37a8c10 },
228 uint128 { hi: 0xcf02b2c21207ef2e, lo: 0x94f967e45e03f4bc },
229 uint128 { hi: 0xd106f86e69d785c7, lo: 0xe13336d701beba52 },
230 uint128 { hi: 0xd31045a8341ca07c, lo: 0x1ede48111209a051 },
231 uint128 { hi: 0xd51ea6fa85785631, lo: 0x552a74227f3ea566 },
232 uint128 { hi: 0xd732290fbacaf133, lo: 0xa97c177947ad4096 },
233 uint128 { hi: 0xd94ad8b1c7380874, lo: 0x18375281ae7822bc },
234];
235
236#[rustfmt::skip]
237const FIXUPS: [u32; 20] = [
238 0x05271b1f, 0x00000c20, 0x00003200, 0x12100020,
239 0x00000000, 0x06000000, 0xc16409c0, 0xaf26700f,
240 0xeb987b07, 0x0000000d, 0x00000000, 0x66fbfffe,
241 0xb74100ec, 0xa0669fe8, 0xedb21280, 0x00000686,
242 0x0a021200, 0x29b89c20, 0x08bc0eda, 0x00000000,
243];
244
245#[repr(C, align(64))]
247struct Pow10SignificandsTable {
248 data: [u64; if Self::COMPRESS {
249 0
250 } else {
251 Self::NUM_POW10 * 2
252 }],
253}
254
255impl Pow10SignificandsTable {
256 const COMPRESS: bool = false;
257 const SPLIT_TABLES: bool = !Self::COMPRESS && falsecfg!(target_arch = "aarch64");
258 const NUM_POW10: usize = 617;
259
260 const fn compute(i: u32) -> uint128 {
262 let m = unsafe { *POW10S.as_ptr().add(((i + 11) % 28) as usize) };
263 let h = unsafe { *HIGH_PARTS.as_ptr().add(((i + 11) / 28) as usize) };
264
265 let h1 = umul128_hi64(h.lo, m);
266
267 let c0 = h.lo.wrapping_mul(m);
268 let c1 = h1.wrapping_add(h.hi.wrapping_mul(m));
269 let c2 = (c1 < h1) as u64 + umul128_hi64(h.hi, m);
270
271 let mut result = if (c2 >> 63) != 0 {
272 uint128 { hi: c2, lo: c1 }
273 } else {
274 uint128 {
275 hi: (c2 << 1) | (c1 >> 63),
276 lo: (c1 << 1) | (c0 >> 63),
277 }
278 };
279 result.lo -= ((unsafe { *FIXUPS.as_ptr().add((i >> 5) as usize) } >> (i & 31)) & 1) as u64;
280 result
281 }
282
283 const fn new() -> Self {
284 let mut data = [0; if Self::COMPRESS {
285 0
286 } else {
287 Self::NUM_POW10 * 2
288 }];
289
290 let mut i = 0;
291 while i < Self::NUM_POW10 && !Self::COMPRESS {
292 let result = Self::compute(i as u32);
293 if Self::SPLIT_TABLES {
294 data[Self::NUM_POW10 - i - 1] = result.hi;
295 data[Self::NUM_POW10 * 2 - i - 1] = result.lo;
296 } else {
297 data[i * 2] = result.hi;
298 data[i * 2 + 1] = result.lo;
299 }
300 i += 1;
301 }
302
303 Pow10SignificandsTable { data }
304 }
305
306 unsafe fn get_unchecked(&self, dec_exp: i32) -> uint128 {
307 const DEC_EXP_MIN: i32 = -292;
308 if Self::COMPRESS {
309 return Self::compute((dec_exp - DEC_EXP_MIN) as u32);
310 }
311 if !Self::SPLIT_TABLES {
312 let index = ((dec_exp - DEC_EXP_MIN) * 2) as usize;
313 return uint128 {
314 hi: unsafe { *self.data.get_unchecked(index) },
315 lo: unsafe { *self.data.get_unchecked(index + 1) },
316 };
317 }
318
319 unsafe {
320 #[cfg_attr(
321 not(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri))),
322 allow(unused_mut)
323 )]
324 let mut hi = self
325 .data
326 .as_ptr()
327 .offset(Self::NUM_POW10 as isize + DEC_EXP_MIN as isize - 1);
328 #[cfg_attr(
329 not(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri))),
330 allow(unused_mut)
331 )]
332 let mut lo = hi.add(Self::NUM_POW10);
333
334 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri)))]
336 asm!("/*{0}{1}*/", inout(reg) hi, inout(reg) lo);
337 uint128 {
338 hi: *hi.offset(-dec_exp as isize),
339 lo: *lo.offset(-dec_exp as isize),
340 }
341 }
342 }
343
344 #[cfg(test)]
345 fn get(&self, dec_exp: i32) -> uint128 {
346 const DEC_EXP_MIN: i32 = -292;
347 assert!((DEC_EXP_MIN..DEC_EXP_MIN + Self::NUM_POW10 as i32).contains(&dec_exp));
348 unsafe { self.get_unchecked(dec_exp) }
349 }
350}
351
352static POW10_SIGNIFICANDS: Pow10SignificandsTable = Pow10SignificandsTable::new();
353
354const fn compute_dec_exp(bin_exp: i32, regular: bool) -> i32 {
357 if true {
if !(bin_exp >= -1334 && bin_exp <= 2620) {
::core::panicking::panic("assertion failed: bin_exp >= -1334 && bin_exp <= 2620")
};
};debug_assert!(bin_exp >= -1334 && bin_exp <= 2620);
358 const LOG10_3_OVER_4_SIG: i32 = 131_072;
360 const LOG10_2_SIG: i32 = 315_653;
362 const LOG10_2_EXP: i32 = 20;
363 (bin_exp * LOG10_2_SIG - !regular as i32 * LOG10_3_OVER_4_SIG) >> LOG10_2_EXP
364}
365
366#[inline]
367const fn do_compute_exp_shift(bin_exp: i32, dec_exp: i32) -> u8 {
368 if true {
if !(dec_exp >= -350 && dec_exp <= 350) {
::core::panicking::panic("assertion failed: dec_exp >= -350 && dec_exp <= 350")
};
};debug_assert!(dec_exp >= -350 && dec_exp <= 350);
369 const LOG2_POW10_SIG: i32 = 217_707;
371 const LOG2_POW10_EXP: i32 = 16;
372 let pow10_bin_exp = (-dec_exp * LOG2_POW10_SIG) >> LOG2_POW10_EXP;
374 (bin_exp + pow10_bin_exp + 1) as u8
376}
377
378struct ExpShiftTable {
379 data: [u8; if Self::ENABLE {
380 f64::EXP_MASK as usize + 1
381 } else {
382 1
383 }],
384}
385
386impl ExpShiftTable {
387 const ENABLE: bool = true;
388}
389
390static EXP_SHIFTS: ExpShiftTable = {
391 let mut data = [0u8; if ExpShiftTable::ENABLE {
392 f64::EXP_MASK as usize + 1
393 } else {
394 1
395 }];
396
397 let mut raw_exp = 0;
398 while raw_exp < data.len() && ExpShiftTable::ENABLE {
399 let mut bin_exp = raw_exp as i32 - f64::EXP_OFFSET;
400 if raw_exp == 0 {
401 bin_exp += 1;
402 }
403 let dec_exp = compute_dec_exp(bin_exp, true);
404 data[raw_exp] = do_compute_exp_shift(bin_exp, dec_exp) as u8;
405 raw_exp += 1;
406 }
407
408 ExpShiftTable { data }
409};
410
411#[inline]
422unsafe fn compute_exp_shift<UInt, const ONLY_REGULAR: bool>(bin_exp: i32, dec_exp: i32) -> u8
423where
424 UInt: traits::UInt,
425{
426 let num_bits = mem::size_of::<UInt>() * 8;
427 if num_bits == 64 && ExpShiftTable::ENABLE && ONLY_REGULAR {
428 unsafe {
429 *EXP_SHIFTS
430 .data
431 .as_ptr()
432 .add((bin_exp + f64::EXP_OFFSET) as usize)
433 }
434 } else {
435 do_compute_exp_shift(bin_exp, dec_exp)
436 }
437}
438
439#[cfg_attr(feature = "no-panic", no_panic)]
440fn count_trailing_nonzeros(x: u64) -> usize {
441 (70 - ((x.to_le() << 1) | 1).leading_zeros() as usize) / 8
454}
455
456#[repr(C, align(2))]
459struct Digits2([u8; 200]);
460
461static DIGITS2: Digits2 = Digits2(
462 *b"0001020304050607080910111213141516171819\
463 2021222324252627282930313233343536373839\
464 4041424344454647484950515253545556575859\
465 6061626364656667686970717273747576777879\
466 8081828384858687888990919293949596979899",
467);
468
469#[cfg_attr(feature = "no-panic", no_panic)]
472unsafe fn digits2(value: usize) -> &'static u16 {
473 if true {
if !(value < 100) {
::core::panicking::panic("assertion failed: value < 100")
};
};debug_assert!(value < 100);
474
475 #[allow(clippy::cast_ptr_alignment)]
476 unsafe {
477 &*DIGITS2.0.as_ptr().cast::<u16>().add(value)
478 }
479}
480
481const DIV10K_EXP: i32 = 40;
482const DIV10K_SIG: u32 = ((1u64 << DIV10K_EXP) / 10000 + 1) as u32;
483const NEG10K: u32 = ((1u64 << 32) - 10000) as u32;
484
485const DIV100_EXP: i32 = 19;
486const DIV100_SIG: u32 = (1 << DIV100_EXP) / 100 + 1;
487const NEG100: u32 = (1 << 16) - 100;
488
489const DIV10_EXP: i32 = 10;
490const DIV10_SIG: u32 = (1 << DIV10_EXP) / 10 + 1;
491const NEG10: u32 = (1 << 8) - 10;
492
493const ZEROS: u64 = 0x0101010101010101 * b'0' as u64;
494
495#[cfg_attr(feature = "no-panic", no_panic)]
496fn to_bcd8(abcdefgh: u64) -> u64 {
497 let abcd_efgh =
505 abcdefgh + u64::from(NEG10K) * ((abcdefgh * u64::from(DIV10K_SIG)) >> DIV10K_EXP);
506 let ab_cd_ef_gh = abcd_efgh
507 + u64::from(NEG100) * (((abcd_efgh * u64::from(DIV100_SIG)) >> DIV100_EXP) & 0x7f0000007f);
508 let a_b_c_d_e_f_g_h = ab_cd_ef_gh
509 + u64::from(NEG10)
510 * (((ab_cd_ef_gh * u64::from(DIV10_SIG)) >> DIV10_EXP) & 0xf000f000f000f);
511 a_b_c_d_e_f_g_h.to_be()
512}
513
514unsafe fn write_if(buffer: *mut u8, digit: u32, condition: bool) -> *mut u8 {
515 unsafe {
516 *buffer = b'0' + digit as u8;
517 buffer.add(usize::from(condition))
518 }
519}
520
521unsafe fn write8(buffer: *mut u8, value: u64) {
522 unsafe {
523 buffer.cast::<u64>().write_unaligned(value);
524 }
525}
526
527#[cfg_attr(feature = "no-panic", no_panic)]
532#[inline]
533unsafe fn write_significand<Float>(mut buffer: *mut u8, value: u64, extra_digit: bool) -> *mut u8
534where
535 Float: FloatTraits,
536{
537 if Float::NUM_BITS == 32 {
538 buffer = unsafe { write_if(buffer, (value / 100_000_000) as u32, extra_digit) };
539 let bcd = to_bcd8(value % 100_000_000);
540 unsafe {
541 write8(buffer, bcd + ZEROS);
542 return buffer.add(count_trailing_nonzeros(bcd));
543 }
544 }
545
546 #[cfg(not(any(
547 all(target_arch = "aarch64", target_feature = "neon", not(miri)),
548 all(target_arch = "x86_64", target_feature = "sse2", not(miri)),
549 )))]
550 {
551 let abbccddee = (value / 100_000_000) as u32;
553 let ffgghhii = (value % 100_000_000) as u32;
554 buffer = unsafe { write_if(buffer, abbccddee / 100_000_000, extra_digit) };
555 let bcd = to_bcd8(u64::from(abbccddee % 100_000_000));
556 unsafe {
557 write8(buffer, bcd + ZEROS);
558 }
559 if ffgghhii == 0 {
560 return unsafe { buffer.add(count_trailing_nonzeros(bcd)) };
561 }
562 let bcd = to_bcd8(u64::from(ffgghhii));
563 unsafe {
564 write8(buffer.add(8), bcd + ZEROS);
565 buffer.add(8).add(count_trailing_nonzeros(bcd))
566 }
567 }
568
569 #[cfg(all(target_arch = "aarch64", target_feature = "neon", not(miri)))]
570 {
571 use core::arch::aarch64::*;
574
575 const NEG10K: i32 = -10000 + 0x10000;
576
577 #[repr(C, align(64))]
578 struct Consts {
579 mul_const: u64,
580 hundred_million: u64,
581 multipliers32: int32x4_t,
582 multipliers16: int16x8_t,
583 }
584
585 static CONSTS: Consts = Consts {
586 mul_const: 0xabcc77118461cefd,
587 hundred_million: 100000000,
588 multipliers32: unsafe {
589 mem::transmute::<[i32; 4], int32x4_t>([
590 DIV10K_SIG as i32,
591 NEG10K,
592 (DIV100_SIG << 12) as i32,
593 NEG100 as i32,
594 ])
595 },
596 multipliers16: unsafe {
597 mem::transmute::<[i16; 8], int16x8_t>([0xce0, NEG10 as i16, 0, 0, 0, 0, 0, 0])
598 },
599 };
600
601 let mut c = ptr::addr_of!(CONSTS);
602
603 let c = unsafe {
606 asm!("/*{0}*/", inout(reg) c);
607 &*c
608 };
609
610 let mut hundred_million = c.hundred_million;
611
612 unsafe {
614 asm!("/*{0}*/", inout(reg) hundred_million);
615 }
616
617 let abbccddee = (umul128(value, c.mul_const) >> 90) as u64;
619 let ffgghhii = value - abbccddee * hundred_million;
620
621 let a = (umul128(abbccddee, c.mul_const) >> 90) as u64;
624 let bbccddee = abbccddee - a * hundred_million;
625
626 buffer = unsafe { write_if(buffer, a as u32, extra_digit) };
627
628 unsafe {
629 let ffgghhii_bbccddee_64: uint64x1_t =
630 mem::transmute::<u64, uint64x1_t>((ffgghhii << 32) | bbccddee);
631 let bbccddee_ffgghhii: int32x2_t = vreinterpret_s32_u64(ffgghhii_bbccddee_64);
632
633 let bbcc_ffgg: int32x2_t = vreinterpret_s32_u32(vshr_n_u32(
634 vreinterpret_u32_s32(vqdmulh_n_s32(
635 bbccddee_ffgghhii,
636 mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[0],
637 )),
638 9,
639 ));
640 let ddee_bbcc_hhii_ffgg_32: int32x2_t = vmla_n_s32(
641 bbccddee_ffgghhii,
642 bbcc_ffgg,
643 mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[1],
644 );
645
646 let mut ddee_bbcc_hhii_ffgg: int32x4_t =
647 vreinterpretq_s32_u32(vshll_n_u16(vreinterpret_u16_s32(ddee_bbcc_hhii_ffgg_32), 0));
648
649 asm!("/*{:v}*/", inout(vreg) ddee_bbcc_hhii_ffgg);
652
653 let dd_bb_hh_ff: int32x4_t = vqdmulhq_n_s32(
654 ddee_bbcc_hhii_ffgg,
655 mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[2],
656 );
657 let ee_dd_cc_bb_ii_hh_gg_ff: int16x8_t = vreinterpretq_s16_s32(vmlaq_n_s32(
658 ddee_bbcc_hhii_ffgg,
659 dd_bb_hh_ff,
660 mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[3],
661 ));
662 let high_10s: int16x8_t = vqdmulhq_n_s16(
663 ee_dd_cc_bb_ii_hh_gg_ff,
664 mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[0],
665 );
666 let digits: uint8x16_t = vrev64q_u8(vreinterpretq_u8_s16(vmlaq_n_s16(
667 ee_dd_cc_bb_ii_hh_gg_ff,
668 high_10s,
669 mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[1],
670 )));
671 let str: uint16x8_t = vaddq_u16(
672 vreinterpretq_u16_u8(digits),
673 vreinterpretq_u16_s8(vdupq_n_s8(b'0' as i8)),
674 );
675
676 buffer.cast::<uint16x8_t>().write_unaligned(str);
677
678 let is_not_zero: uint16x8_t =
679 vreinterpretq_u16_u8(vcgtzq_s8(vreinterpretq_s8_u8(digits)));
680 let zeros: u64 = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(is_not_zero, 4)), 0);
681
682 buffer.add(16 - (zeros.leading_zeros() as usize >> 2))
683 }
684 }
685
686 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
687 {
688 use crate::stdarch_x86::*;
689
690 let abbccddee = (value / 100_000_000) as u32;
691 let ffgghhii = (value % 100_000_000) as u32;
692 let a = abbccddee / 100_000_000;
693 let bbccddee = abbccddee % 100_000_000;
694
695 buffer = unsafe { write_if(buffer, a, extra_digit) };
696
697 #[repr(C, align(64))]
698 struct Consts {
699 div10k: u128,
700 neg10k: u128,
701 div100: u128,
702 div10: u128,
703 #[cfg(target_feature = "sse4.1")]
704 neg100: u128,
705 #[cfg(target_feature = "sse4.1")]
706 neg10: u128,
707 #[cfg(target_feature = "sse4.1")]
708 bswap: u128,
709 #[cfg(not(target_feature = "sse4.1"))]
710 hundred: u128,
711 #[cfg(not(target_feature = "sse4.1"))]
712 moddiv10: u128,
713 zeros: u128,
714 }
715
716 impl Consts {
717 const fn splat64(x: u64) -> u128 {
718 ((x as u128) << 64) | x as u128
719 }
720
721 const fn splat32(x: u32) -> u128 {
722 Self::splat64(((x as u64) << 32) | x as u64)
723 }
724
725 const fn splat16(x: u16) -> u128 {
726 Self::splat32(((x as u32) << 16) | x as u32)
727 }
728
729 #[cfg(target_feature = "sse4.1")]
730 const fn pack8(a: u8, b: u8, c: u8, d: u8, e: u8, f: u8, g: u8, h: u8) -> u64 {
731 ((h as u64) << 56)
732 | ((g as u64) << 48)
733 | ((f as u64) << 40)
734 | ((e as u64) << 32)
735 | ((d as u64) << 24)
736 | ((c as u64) << 16)
737 | ((b as u64) << 8)
738 | a as u64
739 }
740 }
741
742 static CONSTS: Consts = Consts {
743 div10k: Consts::splat64(DIV10K_SIG as u64),
744 neg10k: Consts::splat64(NEG10K as u64),
745 div100: Consts::splat32(DIV100_SIG),
746 div10: Consts::splat16(((1u32 << 16) / 10 + 1) as u16),
747 #[cfg(target_feature = "sse4.1")]
748 neg100: Consts::splat32(NEG100),
749 #[cfg(target_feature = "sse4.1")]
750 neg10: Consts::splat16((1 << 8) - 10),
751 #[cfg(target_feature = "sse4.1")]
752 bswap: Consts::pack8(15, 14, 13, 12, 11, 10, 9, 8) as u128
753 | (Consts::pack8(7, 6, 5, 4, 3, 2, 1, 0) as u128) << 64,
754 #[cfg(not(target_feature = "sse4.1"))]
755 hundred: Consts::splat32(100),
756 #[cfg(not(target_feature = "sse4.1"))]
757 moddiv10: Consts::splat16(10 * (1 << 8) - 1),
758 zeros: Consts::splat64(ZEROS),
759 };
760
761 let mut c = &raw const CONSTSptr::addr_of!(CONSTS);
762 unsafe {
764 asm!("/*{0}*/", inout(reg) c);
765 }
766
767 let div10k = unsafe { _mm_load_si128(&raw const (*c).div10kptr::addr_of!((*c).div10k).cast::<__m128i>()) };
768 let neg10k = unsafe { _mm_load_si128(&raw const (*c).neg10kptr::addr_of!((*c).neg10k).cast::<__m128i>()) };
769 let div100 = unsafe { _mm_load_si128(&raw const (*c).div100ptr::addr_of!((*c).div100).cast::<__m128i>()) };
770 let div10 = unsafe { _mm_load_si128(&raw const (*c).div10ptr::addr_of!((*c).div10).cast::<__m128i>()) };
771 #[cfg(target_feature = "sse4.1")]
772 let neg100 = unsafe { _mm_load_si128(ptr::addr_of!((*c).neg100).cast::<__m128i>()) };
773 #[cfg(target_feature = "sse4.1")]
774 let neg10 = unsafe { _mm_load_si128(ptr::addr_of!((*c).neg10).cast::<__m128i>()) };
775 #[cfg(target_feature = "sse4.1")]
776 let bswap = unsafe { _mm_load_si128(ptr::addr_of!((*c).bswap).cast::<__m128i>()) };
777 #[cfg(not(target_feature = "sse4.1"))]
778 let hundred = unsafe { _mm_load_si128(&raw const (*c).hundredptr::addr_of!((*c).hundred).cast::<__m128i>()) };
779 #[cfg(not(target_feature = "sse4.1"))]
780 let moddiv10 = unsafe { _mm_load_si128(&raw const (*c).moddiv10ptr::addr_of!((*c).moddiv10).cast::<__m128i>()) };
781 let zeros = unsafe { _mm_load_si128(&raw const (*c).zerosptr::addr_of!((*c).zeros).cast::<__m128i>()) };
782
783 unsafe {
785 let x: __m128i = _mm_set_epi64x(i64::from(bbccddee), i64::from(ffgghhii));
786 let y: __m128i = _mm_add_epi64(
787 x,
788 _mm_mul_epu32(neg10k, _mm_srli_epi64(_mm_mul_epu32(x, div10k), DIV10K_EXP)),
789 );
790
791 #[cfg(target_feature = "sse4.1")]
792 let bcd: __m128i = {
793 let z: __m128i = _mm_add_epi64(
795 y,
796 _mm_mullo_epi32(neg100, _mm_srli_epi32(_mm_mulhi_epu16(y, div100), 3)),
797 );
798 let big_endian_bcd: __m128i =
799 _mm_add_epi64(z, _mm_mullo_epi16(neg10, _mm_mulhi_epu16(z, div10)));
800 _mm_shuffle_epi8(big_endian_bcd, bswap)
802 };
803
804 #[cfg(not(target_feature = "sse4.1"))]
805 let bcd: __m128i = {
806 let y_div_100: __m128i = _mm_srli_epi16(_mm_mulhi_epu16(y, div100), 3);
807 let y_mod_100: __m128i = _mm_sub_epi16(y, _mm_mullo_epi16(y_div_100, hundred));
808 let z: __m128i = _mm_or_si128(_mm_slli_epi32(y_mod_100, 16), y_div_100);
809 let bcd_shuffled: __m128i = _mm_sub_epi16(
810 _mm_slli_epi16(z, 8),
811 _mm_mullo_epi16(moddiv10, _mm_mulhi_epu16(z, div10)),
812 );
813 _mm_shuffle_epi32(bcd_shuffled, _MM_SHUFFLE(0, 1, 2, 3))
814 };
815
816 let digits = _mm_or_si128(bcd, zeros);
817
818 let mask128: __m128i = _mm_cmpgt_epi8(bcd, _mm_setzero_si128());
820 let mask = _mm_movemask_epi8(mask128) as u32;
821 let len = 32 - mask.leading_zeros() as usize;
822
823 _mm_storeu_si128(buffer.cast::<__m128i>(), digits);
824 buffer.add(len)
825 }
826 }
827}
828
829struct ToDecimalResult {
830 sig: i64,
831 exp: i32,
832}
833
834#[cfg_attr(feature = "no-panic", no_panic)]
835#[inline]
836fn to_decimal_schubfach<UInt>(bin_sig: UInt, bin_exp: i64, regular: bool) -> ToDecimalResult
837where
838 UInt: traits::UInt,
839{
840 let num_bits = mem::size_of::<UInt>() as i32 * 8;
841 let dec_exp = compute_dec_exp(bin_exp as i32, regular);
842 let exp_shift = unsafe { compute_exp_shift::<UInt, false>(bin_exp as i32, dec_exp) };
843 let mut pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
844
845 if num_bits == 64 {
848 pow10.lo += 1;
849 } else {
850 pow10.hi += 1;
851 }
852
853 const BOUND_SHIFT: u32 = 2;
855 let bin_sig_shifted = bin_sig << BOUND_SHIFT;
856
857 let lsb = bin_sig & UInt::from(1);
860 let lower = (bin_sig_shifted - (UInt::from(regular) + UInt::from(1))) << exp_shift;
861 let lower = umulhi_inexact_to_odd(pow10.hi, pow10.lo, lower) + lsb;
862 let upper = (bin_sig_shifted + UInt::from(2)) << exp_shift;
863 let upper = umulhi_inexact_to_odd(pow10.hi, pow10.lo, upper) - lsb;
864
865 let shorter = (upper >> BOUND_SHIFT) / UInt::from(10) * UInt::from(10);
868 if (shorter << BOUND_SHIFT) >= lower {
869 return ToDecimalResult {
870 sig: shorter.into() as i64,
871 exp: dec_exp,
872 };
873 }
874
875 let scaled_sig = umulhi_inexact_to_odd(pow10.hi, pow10.lo, bin_sig_shifted << exp_shift);
876 let longer_below = scaled_sig >> BOUND_SHIFT;
877 let longer_above = longer_below + UInt::from(1);
878
879 let cmp = scaled_sig
882 .wrapping_sub((longer_below + longer_above) << 1)
883 .to_signed();
884 let below_closer = cmp < UInt::from(0).to_signed()
885 || (cmp == UInt::from(0).to_signed() && (longer_below & UInt::from(1)) == UInt::from(0));
886 let below_in = (longer_below << BOUND_SHIFT) >= lower;
887 let dec_sig = if below_closer & below_in {
888 longer_below
889 } else {
890 longer_above
891 };
892 ToDecimalResult {
893 sig: dec_sig.into() as i64,
894 exp: dec_exp,
895 }
896}
897
898#[cfg_attr(feature = "no-panic", no_panic)]
902#[inline]
903fn to_decimal_fast<Float, UInt>(bin_sig: UInt, raw_exp: i64, regular: bool) -> ToDecimalResult
904where
905 Float: FloatTraits,
906 UInt: traits::UInt,
907{
908 let bin_exp = raw_exp - i64::from(Float::EXP_OFFSET);
909 let num_bits = mem::size_of::<UInt>() as i32 * 8;
910 while regular {
912 let dec_exp = if USE_UMUL128_HI64 {
913 umul128_hi64(bin_exp as u64, 0x4d10500000000000) as i32
914 } else {
915 compute_dec_exp(bin_exp as i32, true)
916 };
917 let exp_shift = unsafe { compute_exp_shift::<UInt, true>(bin_exp as i32, dec_exp) };
918 let pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
919
920 let integral; let fractional; if num_bits == 64 {
923 let p = umul192_hi128(pow10.hi, pow10.lo, (bin_sig << exp_shift).into());
924 integral = UInt::truncate(p.hi);
925 fractional = p.lo;
926 } else {
927 let p = umul128(pow10.hi, (bin_sig << exp_shift).into());
928 integral = UInt::truncate((p >> 64) as u64);
929 fractional = p as u64;
930 }
931 const HALF_ULP: u64 = 1 << 63;
932
933 let cmp = fractional.wrapping_sub(HALF_ULP) as i64;
935 if cmp == 0 {
936 break;
937 }
938
939 const DIV10_SIG64: u64 = (1 << 63) / 5 + 1;
943 let div10 = umul128_hi64(integral.into(), DIV10_SIG64);
944 #[allow(unused_mut)]
945 let mut digit = integral.into() - div10 * 10;
946 #[cfg(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri)))]
948 unsafe {
949 asm!("/*{0}*/", inout(reg) digit);
950 }
951
952 let num_integral_bits = if num_bits == 64 { 4 } else { 32 };
956 let num_fractional_bits = 64 - num_integral_bits;
957 let ten = 10u64 << num_fractional_bits;
958 let scaled_sig_mod10 = (digit << num_fractional_bits) | (fractional >> num_integral_bits);
960
961 let scaled_half_ulp = pow10.hi >> (num_integral_bits - exp_shift + 1);
966 let upper = scaled_sig_mod10 + scaled_half_ulp;
967
968 if ten.wrapping_sub(upper) <= 1 || scaled_sig_mod10 == scaled_half_ulp
992 {
993 break;
994 }
995
996 let shorter = (integral.into() - digit) as i64;
997 let longer = (integral.into() + u64::from(cmp >= 0)) as i64;
998 let dec_sig = select_if_less(scaled_sig_mod10, scaled_half_ulp, shorter, longer);
999 return ToDecimalResult {
1000 sig: select_if_less(ten, upper, shorter + 10, dec_sig),
1001 exp: dec_exp,
1002 };
1003 }
1004 to_decimal_schubfach(bin_sig, bin_exp, regular)
1005}
1006
1007#[cfg_attr(feature = "no-panic", no_panic)]
1010unsafe fn write<Float>(value: Float, mut buffer: *mut u8) -> *mut u8
1011where
1012 Float: FloatTraits,
1013{
1014 let bits = value.to_bits();
1015 let bin_exp = Float::get_exp(bits); let bin_sig = Float::get_sig(bits); unsafe {
1020 *buffer = b'-';
1021 }
1022 buffer = unsafe { buffer.add(usize::from(Float::is_negative(bits))) };
1023
1024 let mut dec;
1025 let threshold = if Float::NUM_BITS == 64 {
1026 10_000_000_000_000_000
1027 } else {
1028 100_000_000
1029 };
1030 if bin_exp == 0 {
1031 if bin_sig == Float::SigType::from(0) {
1032 return unsafe {
1033 *buffer = b'0';
1034 *buffer.add(1) = b'.';
1035 *buffer.add(2) = b'0';
1036 buffer.add(3)
1037 };
1038 }
1039 dec = to_decimal_schubfach(bin_sig, i64::from(1 - Float::EXP_OFFSET), true);
1040 while dec.sig < threshold {
1041 dec.sig *= 10;
1042 dec.exp -= 1;
1043 }
1044 } else {
1045 dec = to_decimal_fast::<Float, Float::SigType>(
1046 bin_sig | Float::IMPLICIT_BIT,
1047 bin_exp,
1048 bin_sig != Float::SigType::from(0),
1049 );
1050 }
1051 let mut dec_exp = dec.exp;
1052 let extra_digit = dec.sig >= threshold;
1053 dec_exp += Float::MAX_DIGITS10 as i32 - 2 + i32::from(extra_digit);
1054 if Float::NUM_BITS == 32 && dec.sig < 10_000_000 {
1055 dec.sig *= 10;
1056 dec_exp -= 1;
1057 }
1058
1059 let end = unsafe { write_significand::<Float>(buffer.add(1), dec.sig as u64, extra_digit) };
1061
1062 let length = unsafe { end.offset_from(buffer.add(1)) } as usize;
1063
1064 if Float::NUM_BITS == 32 && (-6..=12).contains(&dec_exp)
1065 || Float::NUM_BITS == 64 && (-5..=15).contains(&dec_exp)
1066 {
1067 if length as i32 - 1 <= dec_exp {
1068 return unsafe {
1070 ptr::copy(buffer.add(1), buffer, length);
1071 ptr::write_bytes(buffer.add(length), b'0', dec_exp as usize + 3 - length);
1072 *buffer.add(dec_exp as usize + 1) = b'.';
1073 buffer.add(dec_exp as usize + 3)
1074 };
1075 } else if 0 <= dec_exp {
1076 return unsafe {
1078 ptr::copy(buffer.add(1), buffer, dec_exp as usize + 1);
1079 *buffer.add(dec_exp as usize + 1) = b'.';
1080 buffer.add(length + 1)
1081 };
1082 } else {
1083 return unsafe {
1085 ptr::copy(buffer.add(1), buffer.add((1 - dec_exp) as usize), length);
1086 ptr::write_bytes(buffer, b'0', (1 - dec_exp) as usize);
1087 *buffer.add(1) = b'.';
1088 buffer.add((1 - dec_exp) as usize + length)
1089 };
1090 }
1091 }
1092
1093 unsafe {
1094 *buffer = *buffer.add(1);
1096 *buffer.add(1) = b'.';
1097 }
1098 buffer = unsafe { buffer.add(length + usize::from(length > 1)) };
1099
1100 let sign_ptr = buffer;
1102 let e_sign = if dec_exp >= 0 {
1103 (u16::from(b'+') << 8) | u16::from(b'e')
1104 } else {
1105 (u16::from(b'-') << 8) | u16::from(b'e')
1106 };
1107 buffer = unsafe { buffer.add(1) };
1108 dec_exp = if dec_exp >= 0 { dec_exp } else { -dec_exp };
1109 buffer = unsafe { buffer.add(usize::from(dec_exp >= 10)) };
1110 if Float::MIN_10_EXP > -100 && Float::MAX_10_EXP < 100 {
1111 unsafe {
1112 buffer
1113 .cast::<u16>()
1114 .write_unaligned(*digits2(dec_exp as usize));
1115 sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1116 return buffer.add(2);
1117 }
1118 }
1119 let digit = if USE_UMUL128_HI64 {
1121 umul128_hi64(dec_exp as u64, 0x290000000000000) as u32
1122 } else {
1123 (dec_exp as u32 * DIV100_SIG) >> DIV100_EXP
1124 };
1125 unsafe {
1126 *buffer = b'0' + digit as u8;
1127 }
1128 buffer = unsafe { buffer.add(usize::from(dec_exp >= 100)) };
1129 unsafe {
1130 buffer
1131 .cast::<u16>()
1132 .write_unaligned(*digits2((dec_exp as u32 - digit * 100) as usize));
1133 sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1134 buffer.add(2)
1135 }
1136}
1137
1138pub struct Buffer {
1148 bytes: [MaybeUninit<u8>; BUFFER_SIZE],
1149}
1150
1151impl Buffer {
1152 #[inline]
1155 #[cfg_attr(feature = "no-panic", no_panic)]
1156 pub fn new() -> Self {
1157 let bytes = [MaybeUninit::<u8>::uninit(); BUFFER_SIZE];
1158 Buffer { bytes }
1159 }
1160
1161 #[cfg_attr(feature = "no-panic", no_panic)]
1173 pub fn format<F: Float>(&mut self, f: F) -> &str {
1174 if f.is_nonfinite() {
1175 f.format_nonfinite()
1176 } else {
1177 self.format_finite(f)
1178 }
1179 }
1180
1181 #[cfg_attr(feature = "no-panic", no_panic)]
1197 pub fn format_finite<F: Float>(&mut self, f: F) -> &str {
1198 unsafe {
1199 let end = f.write_to_zmij_buffer(self.bytes.as_mut_ptr().cast::<u8>());
1200 let len = end.offset_from(self.bytes.as_ptr().cast::<u8>()) as usize;
1201 let slice = slice::from_raw_parts(self.bytes.as_ptr().cast::<u8>(), len);
1202 str::from_utf8_unchecked(slice)
1203 }
1204 }
1205}
1206
1207#[allow(unknown_lints)] #[allow(private_bounds)]
1214pub trait Float: private::Sealed {}
1215impl Float for f32 {}
1216impl Float for f64 {}
1217
1218mod private {
1219 pub trait Sealed: crate::traits::Float {
1220 fn is_nonfinite(self) -> bool;
1221 fn format_nonfinite(self) -> &'static str;
1222 unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8;
1223 }
1224
1225 impl Sealed for f32 {
1226 #[inline]
1227 fn is_nonfinite(self) -> bool {
1228 const EXP_MASK: u32 = 0x7f800000;
1229 let bits = self.to_bits();
1230 bits & EXP_MASK == EXP_MASK
1231 }
1232
1233 #[cold]
1234 #[cfg_attr(feature = "no-panic", inline)]
1235 fn format_nonfinite(self) -> &'static str {
1236 const MANTISSA_MASK: u32 = 0x007fffff;
1237 const SIGN_MASK: u32 = 0x80000000;
1238 let bits = self.to_bits();
1239 if bits & MANTISSA_MASK != 0 {
1240 crate::NAN
1241 } else if bits & SIGN_MASK != 0 {
1242 crate::NEG_INFINITY
1243 } else {
1244 crate::INFINITY
1245 }
1246 }
1247
1248 #[cfg_attr(feature = "no-panic", inline)]
1249 unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8 {
1250 unsafe { crate::write(self, buffer) }
1251 }
1252 }
1253
1254 impl Sealed for f64 {
1255 #[inline]
1256 fn is_nonfinite(self) -> bool {
1257 const EXP_MASK: u64 = 0x7ff0000000000000;
1258 let bits = self.to_bits();
1259 bits & EXP_MASK == EXP_MASK
1260 }
1261
1262 #[cold]
1263 #[cfg_attr(feature = "no-panic", inline)]
1264 fn format_nonfinite(self) -> &'static str {
1265 const MANTISSA_MASK: u64 = 0x000fffffffffffff;
1266 const SIGN_MASK: u64 = 0x8000000000000000;
1267 let bits = self.to_bits();
1268 if bits & MANTISSA_MASK != 0 {
1269 crate::NAN
1270 } else if bits & SIGN_MASK != 0 {
1271 crate::NEG_INFINITY
1272 } else {
1273 crate::INFINITY
1274 }
1275 }
1276
1277 #[cfg_attr(feature = "no-panic", inline)]
1278 unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8 {
1279 unsafe { crate::write(self, buffer) }
1280 }
1281 }
1282}
1283
1284impl Default for Buffer {
1285 #[inline]
1286 #[cfg_attr(feature = "no-panic", no_panic)]
1287 fn default() -> Self {
1288 Buffer::new()
1289 }
1290}