1#![no_std]
44#![doc(html_root_url = "https://docs.rs/zmij/1.0.17")]
45#![deny(unsafe_op_in_unsafe_fn)]
46#![allow(non_camel_case_types, non_snake_case)]
47#![allow(
48 clippy::blocks_in_conditions,
49 clippy::cast_possible_truncation,
50 clippy::cast_possible_wrap,
51 clippy::cast_ptr_alignment,
52 clippy::cast_sign_loss,
53 clippy::doc_markdown,
54 clippy::incompatible_msrv,
55 clippy::items_after_statements,
56 clippy::many_single_char_names,
57 clippy::must_use_candidate,
58 clippy::needless_doctest_main,
59 clippy::never_loop,
60 clippy::redundant_else,
61 clippy::similar_names,
62 clippy::too_many_arguments,
63 clippy::too_many_lines,
64 clippy::unreadable_literal,
65 clippy::used_underscore_items,
66 clippy::while_immutable_condition,
67 clippy::wildcard_imports
68)]
69
70#[cfg(zmij_no_select_unpredictable)]
71mod hint;
72#[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
73mod stdarch_x86;
74#[cfg(test)]
75mod tests;
76mod traits;
77
78#[cfg(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri)))]
79use core::arch::asm;
80#[cfg(not(zmij_no_select_unpredictable))]
81use core::hint;
82use core::mem::{self, MaybeUninit};
83use core::ptr;
84use core::slice;
85use core::str;
86#[cfg(feature = "no-panic")]
87use no_panic::no_panic;
88
89const BUFFER_SIZE: usize = 24;
90const NAN: &str = "NaN";
91const INFINITY: &str = "inf";
92const NEG_INFINITY: &str = "-inf";
93
94#[cfg_attr(test, derive(Debug, PartialEq))]
95struct uint128 {
96 hi: u64,
97 lo: u64,
98}
99
100const USE_UMUL128_HI64: bool = falsecfg!(target_vendor = "apple");
102
103const fn umul128(x: u64, y: u64) -> u128 {
105 x as u128 * y as u128
106}
107
108const fn umul128_hi64(x: u64, y: u64) -> u64 {
109 (umul128(x, y) >> 64) as u64
110}
111
112#[cfg_attr(feature = "no-panic", no_panic)]
113fn umul192_hi128(x_hi: u64, x_lo: u64, y: u64) -> uint128 {
114 let p = umul128(x_hi, y);
115 let lo = (p as u64).wrapping_add((umul128(x_lo, y) >> 64) as u64);
116 uint128 {
117 hi: (p >> 64) as u64 + u64::from(lo < p as u64),
118 lo,
119 }
120}
121
122#[cfg_attr(feature = "no-panic", no_panic)]
125fn umulhi_inexact_to_odd<UInt>(x_hi: u64, x_lo: u64, y: UInt) -> UInt
126where
127 UInt: traits::UInt,
128{
129 let num_bits = mem::size_of::<UInt>() * 8;
130 if num_bits == 64 {
131 let p = umul192_hi128(x_hi, x_lo, y.into());
132 UInt::truncate(p.hi | u64::from((p.lo >> 1) != 0))
133 } else {
134 let p = (umul128(x_hi, y.into()) >> 32) as u64;
135 UInt::enlarge((p >> 32) as u32 | u32::from((p as u32 >> 1) != 0))
136 }
137}
138
139trait FloatTraits: traits::Float {
140 const NUM_BITS: i32;
141 const NUM_SIG_BITS: i32 = Self::MANTISSA_DIGITS as i32 - 1;
142 const NUM_EXP_BITS: i32 = Self::NUM_BITS - Self::NUM_SIG_BITS - 1;
143 const EXP_MASK: i32 = (1 << Self::NUM_EXP_BITS) - 1;
144 const EXP_BIAS: i32 = (1 << (Self::NUM_EXP_BITS - 1)) - 1;
145 const EXP_OFFSET: i32 = Self::EXP_BIAS + Self::NUM_SIG_BITS;
146
147 type SigType: traits::UInt;
148 const IMPLICIT_BIT: Self::SigType;
149
150 fn to_bits(self) -> Self::SigType;
151
152 fn is_negative(bits: Self::SigType) -> bool {
153 (bits >> (Self::NUM_BITS - 1)) != Self::SigType::from(0)
154 }
155
156 fn get_sig(bits: Self::SigType) -> Self::SigType {
157 bits & (Self::IMPLICIT_BIT - Self::SigType::from(1))
158 }
159
160 fn get_exp(bits: Self::SigType) -> i64 {
161 (bits << 1u8 >> (Self::NUM_SIG_BITS + 1)).into() as i64
162 }
163}
164
165impl FloatTraits for f32 {
166 const NUM_BITS: i32 = 32;
167 const IMPLICIT_BIT: u32 = 1 << Self::NUM_SIG_BITS;
168
169 type SigType = u32;
170
171 fn to_bits(self) -> Self::SigType {
172 self.to_bits()
173 }
174}
175
176impl FloatTraits for f64 {
177 const NUM_BITS: i32 = 64;
178 const IMPLICIT_BIT: u64 = 1 << Self::NUM_SIG_BITS;
179
180 type SigType = u64;
181
182 fn to_bits(self) -> Self::SigType {
183 self.to_bits()
184 }
185}
186
187#[repr(C, align(64))]
188struct Pow10SignificandsTable {
189 data: [u64; Self::NUM_POW10 * 2],
190}
191
192impl Pow10SignificandsTable {
193 const SPLIT_TABLES: bool = falsecfg!(target_arch = "aarch64");
194 const NUM_POW10: usize = 617;
195
196 unsafe fn get_unchecked(&self, dec_exp: i32) -> uint128 {
197 const DEC_EXP_MIN: i32 = -292;
198 if !Self::SPLIT_TABLES {
199 let index = ((dec_exp - DEC_EXP_MIN) * 2) as usize;
200 return uint128 {
201 hi: unsafe { *self.data.get_unchecked(index) },
202 lo: unsafe { *self.data.get_unchecked(index + 1) },
203 };
204 }
205
206 unsafe {
207 #[cfg_attr(
208 not(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri))),
209 allow(unused_mut)
210 )]
211 let mut hi = self
212 .data
213 .as_ptr()
214 .offset(Self::NUM_POW10 as isize + DEC_EXP_MIN as isize - 1);
215 #[cfg_attr(
216 not(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri))),
217 allow(unused_mut)
218 )]
219 let mut lo = hi.add(Self::NUM_POW10);
220
221 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri)))]
223 asm!("/*{0}{1}*/", inout(reg) hi, inout(reg) lo);
224 uint128 {
225 hi: *hi.offset(-dec_exp as isize),
226 lo: *lo.offset(-dec_exp as isize),
227 }
228 }
229 }
230
231 #[cfg(test)]
232 fn get(&self, dec_exp: i32) -> uint128 {
233 const DEC_EXP_MIN: i32 = -292;
234 assert!((DEC_EXP_MIN..DEC_EXP_MIN + Self::NUM_POW10 as i32).contains(&dec_exp));
235 unsafe { self.get_unchecked(dec_exp) }
236 }
237}
238
239static POW10_SIGNIFICANDS: Pow10SignificandsTable = {
242 let mut data = [0; Pow10SignificandsTable::NUM_POW10 * 2];
243
244 struct uint192 {
245 w0: u64, w1: u64,
247 w2: u64, }
249
250 let mut current = uint192 {
253 w0: 0xe000000000000000,
254 w1: 0x25e8e89c13bb0f7a,
255 w2: 0xff77b1fcbebcdc4f,
256 };
257 let ten = 0xa000000000000000;
258 let mut i = 0;
259 while i < Pow10SignificandsTable::NUM_POW10 {
260 if Pow10SignificandsTable::SPLIT_TABLES {
261 data[Pow10SignificandsTable::NUM_POW10 - i - 1] = current.w2;
262 data[Pow10SignificandsTable::NUM_POW10 * 2 - i - 1] = current.w1;
263 } else {
264 data[i * 2] = current.w2;
265 data[i * 2 + 1] = current.w1;
266 }
267
268 let h0: u64 = umul128_hi64(current.w0, ten);
269 let h1: u64 = umul128_hi64(current.w1, ten);
270
271 let c0: u64 = h0.wrapping_add(current.w1.wrapping_mul(ten));
272 let c1: u64 = ((c0 < h0) as u64 + h1).wrapping_add(current.w2.wrapping_mul(ten));
273 let c2: u64 = (c1 < h1) as u64 + umul128_hi64(current.w2, ten); if (c2 >> 63) != 0 {
277 current = uint192 {
278 w0: c0,
279 w1: c1,
280 w2: c2,
281 };
282 } else {
283 current = uint192 {
284 w0: c0 << 1,
285 w1: c1 << 1 | c0 >> 63,
286 w2: c2 << 1 | c1 >> 63,
287 };
288 }
289
290 i += 1;
291 }
292
293 Pow10SignificandsTable { data }
294};
295
296const fn compute_dec_exp(bin_exp: i32, regular: bool) -> i32 {
299 if true {
if !(bin_exp >= -1334 && bin_exp <= 2620) {
::core::panicking::panic("assertion failed: bin_exp >= -1334 && bin_exp <= 2620")
};
};debug_assert!(bin_exp >= -1334 && bin_exp <= 2620);
300 const LOG10_3_OVER_4_SIG: i32 = 131_072;
302 const LOG10_2_SIG: i32 = 315_653;
304 const LOG10_2_EXP: i32 = 20;
305 (bin_exp * LOG10_2_SIG - !regular as i32 * LOG10_3_OVER_4_SIG) >> LOG10_2_EXP
306}
307
308const fn do_compute_exp_shift(bin_exp: i32, dec_exp: i32) -> u8 {
309 if true {
if !(dec_exp >= -350 && dec_exp <= 350) {
::core::panicking::panic("assertion failed: dec_exp >= -350 && dec_exp <= 350")
};
};debug_assert!(dec_exp >= -350 && dec_exp <= 350);
310 const LOG2_POW10_SIG: i32 = 217_707;
312 const LOG2_POW10_EXP: i32 = 16;
313 let pow10_bin_exp = (-dec_exp * LOG2_POW10_SIG) >> LOG2_POW10_EXP;
315 (bin_exp + pow10_bin_exp + 1) as u8
317}
318
319struct ExpShiftTable {
320 data: [u8; if Self::ENABLE {
321 f64::EXP_MASK as usize + 1
322 } else {
323 1
324 }],
325}
326
327impl ExpShiftTable {
328 const ENABLE: bool = true;
329}
330
331static EXP_SHIFTS: ExpShiftTable = {
332 let mut data = [0u8; if ExpShiftTable::ENABLE {
333 f64::EXP_MASK as usize + 1
334 } else {
335 1
336 }];
337
338 let mut raw_exp = 0;
339 while raw_exp < data.len() && ExpShiftTable::ENABLE {
340 let mut bin_exp = raw_exp as i32 - f64::EXP_OFFSET;
341 if raw_exp == 0 {
342 bin_exp += 1;
343 }
344 let dec_exp = compute_dec_exp(bin_exp, true);
345 data[raw_exp] = do_compute_exp_shift(bin_exp, dec_exp) as u8;
346 raw_exp += 1;
347 }
348
349 ExpShiftTable { data }
350};
351
352unsafe fn compute_exp_shift<UInt, const ONLY_REGULAR: bool>(bin_exp: i32, dec_exp: i32) -> u8
363where
364 UInt: traits::UInt,
365{
366 let num_bits = mem::size_of::<UInt>() * 8;
367 if num_bits == 64 && ExpShiftTable::ENABLE && ONLY_REGULAR {
368 unsafe {
369 *EXP_SHIFTS
370 .data
371 .as_ptr()
372 .add((bin_exp + f64::EXP_OFFSET) as usize)
373 }
374 } else {
375 do_compute_exp_shift(bin_exp, dec_exp)
376 }
377}
378
379#[cfg_attr(feature = "no-panic", no_panic)]
380fn count_trailing_nonzeros(x: u64) -> usize {
381 (70 - ((x.to_le() << 1) | 1).leading_zeros() as usize) / 8
394}
395
396#[repr(C, align(2))]
399struct Digits2([u8; 200]);
400
401static DIGITS2: Digits2 = Digits2(
402 *b"0001020304050607080910111213141516171819\
403 2021222324252627282930313233343536373839\
404 4041424344454647484950515253545556575859\
405 6061626364656667686970717273747576777879\
406 8081828384858687888990919293949596979899",
407);
408
409#[cfg_attr(feature = "no-panic", no_panic)]
412unsafe fn digits2(value: usize) -> &'static u16 {
413 if true {
if !(value < 100) {
::core::panicking::panic("assertion failed: value < 100")
};
};debug_assert!(value < 100);
414
415 #[allow(clippy::cast_ptr_alignment)]
416 unsafe {
417 &*DIGITS2.0.as_ptr().cast::<u16>().add(value)
418 }
419}
420
421const DIV10K_EXP: i32 = 40;
422const DIV10K_SIG: u32 = ((1u64 << DIV10K_EXP) / 10000 + 1) as u32;
423const NEG10K: u32 = ((1u64 << 32) - 10000) as u32;
424
425const DIV100_EXP: i32 = 19;
426const DIV100_SIG: u32 = (1 << DIV100_EXP) / 100 + 1;
427const NEG100: u32 = (1 << 16) - 100;
428
429const DIV10_EXP: i32 = 10;
430const DIV10_SIG: u32 = (1 << DIV10_EXP) / 10 + 1;
431const NEG10: u32 = (1 << 8) - 10;
432
433const ZEROS: u64 = 0x0101010101010101 * b'0' as u64;
434
435#[cfg_attr(feature = "no-panic", no_panic)]
436fn to_bcd8(abcdefgh: u64) -> u64 {
437 let abcd_efgh =
445 abcdefgh + u64::from(NEG10K) * ((abcdefgh * u64::from(DIV10K_SIG)) >> DIV10K_EXP);
446 let ab_cd_ef_gh = abcd_efgh
447 + u64::from(NEG100) * (((abcd_efgh * u64::from(DIV100_SIG)) >> DIV100_EXP) & 0x7f0000007f);
448 let a_b_c_d_e_f_g_h = ab_cd_ef_gh
449 + u64::from(NEG10)
450 * (((ab_cd_ef_gh * u64::from(DIV10_SIG)) >> DIV10_EXP) & 0xf000f000f000f);
451 a_b_c_d_e_f_g_h.to_be()
452}
453
454unsafe fn write_if(buffer: *mut u8, digit: u32, condition: bool) -> *mut u8 {
455 unsafe {
456 *buffer = b'0' + digit as u8;
457 buffer.add(usize::from(condition))
458 }
459}
460
461unsafe fn write8(buffer: *mut u8, value: u64) {
462 unsafe {
463 buffer.cast::<u64>().write_unaligned(value);
464 }
465}
466
467#[cfg_attr(feature = "no-panic", no_panic)]
470unsafe fn write_significand9(mut buffer: *mut u8, value: u32, has9digits: bool) -> *mut u8 {
471 buffer = unsafe { write_if(buffer, value / 100_000_000, has9digits) };
472 let bcd = to_bcd8(u64::from(value % 100_000_000));
473 unsafe {
474 write8(buffer, bcd | ZEROS);
475 buffer.add(count_trailing_nonzeros(bcd))
476 }
477}
478
479#[cfg_attr(feature = "no-panic", no_panic)]
484unsafe fn write_significand17(
485 mut buffer: *mut u8,
486 value: u64,
487 has17digits: bool,
488 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))] value_div10: i64,
489) -> *mut u8 {
490 #[cfg(not(any(
491 all(target_arch = "aarch64", target_feature = "neon", not(miri)),
492 all(target_arch = "x86_64", target_feature = "sse2", not(miri)),
493 )))]
494 {
495 let abbccddee = (value / 100_000_000) as u32;
497 let ffgghhii = (value % 100_000_000) as u32;
498 buffer = unsafe { write_if(buffer, abbccddee / 100_000_000, has17digits) };
499 let bcd = to_bcd8(u64::from(abbccddee % 100_000_000));
500 unsafe {
501 write8(buffer, bcd | ZEROS);
502 }
503 if ffgghhii == 0 {
504 return unsafe { buffer.add(count_trailing_nonzeros(bcd)) };
505 }
506 let bcd = to_bcd8(u64::from(ffgghhii));
507 unsafe {
508 write8(buffer.add(8), bcd | ZEROS);
509 buffer.add(8).add(count_trailing_nonzeros(bcd))
510 }
511 }
512
513 #[cfg(all(target_arch = "aarch64", target_feature = "neon", not(miri)))]
514 {
515 use core::arch::aarch64::*;
518
519 const NEG10K: i32 = -10000 + 0x10000;
520
521 struct MulConstants {
522 mul_const: u64,
523 hundred_million: u64,
524 multipliers32: int32x4_t,
525 multipliers16: int16x8_t,
526 }
527
528 static CONSTANTS: MulConstants = MulConstants {
529 mul_const: 0xabcc77118461cefd,
530 hundred_million: 100000000,
531 multipliers32: unsafe {
532 mem::transmute::<[i32; 4], int32x4_t>([
533 DIV10K_SIG as i32,
534 NEG10K,
535 (DIV100_SIG << 12) as i32,
536 NEG100 as i32,
537 ])
538 },
539 multipliers16: unsafe {
540 mem::transmute::<[i16; 8], int16x8_t>([0xce0, NEG10 as i16, 0, 0, 0, 0, 0, 0])
541 },
542 };
543
544 let mut c = ptr::addr_of!(CONSTANTS);
545
546 let c = unsafe {
549 asm!("/*{0}*/", inout(reg) c);
550 &*c
551 };
552
553 let mut hundred_million = c.hundred_million;
554
555 unsafe {
557 asm!("/*{0}*/", inout(reg) hundred_million);
558 }
559
560 let abbccddee = (umul128(value, c.mul_const) >> 90) as u64;
562 let ffgghhii = value - abbccddee * hundred_million;
563
564 let a = (umul128(abbccddee, c.mul_const) >> 90) as u64;
567 let bbccddee = abbccddee - a * hundred_million;
568
569 buffer = unsafe { write_if(buffer, a as u32, has17digits) };
570
571 unsafe {
572 let ffgghhii_bbccddee_64: uint64x1_t =
573 mem::transmute::<u64, uint64x1_t>((ffgghhii << 32) | bbccddee);
574 let bbccddee_ffgghhii: int32x2_t = vreinterpret_s32_u64(ffgghhii_bbccddee_64);
575
576 let bbcc_ffgg: int32x2_t = vreinterpret_s32_u32(vshr_n_u32(
577 vreinterpret_u32_s32(vqdmulh_n_s32(
578 bbccddee_ffgghhii,
579 mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[0],
580 )),
581 9,
582 ));
583 let ddee_bbcc_hhii_ffgg_32: int32x2_t = vmla_n_s32(
584 bbccddee_ffgghhii,
585 bbcc_ffgg,
586 mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[1],
587 );
588
589 let mut ddee_bbcc_hhii_ffgg: int32x4_t =
590 vreinterpretq_s32_u32(vshll_n_u16(vreinterpret_u16_s32(ddee_bbcc_hhii_ffgg_32), 0));
591
592 asm!("/*{:v}*/", inout(vreg) ddee_bbcc_hhii_ffgg);
595
596 let dd_bb_hh_ff: int32x4_t = vqdmulhq_n_s32(
597 ddee_bbcc_hhii_ffgg,
598 mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[2],
599 );
600 let ee_dd_cc_bb_ii_hh_gg_ff: int16x8_t = vreinterpretq_s16_s32(vmlaq_n_s32(
601 ddee_bbcc_hhii_ffgg,
602 dd_bb_hh_ff,
603 mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[3],
604 ));
605 let high_10s: int16x8_t = vqdmulhq_n_s16(
606 ee_dd_cc_bb_ii_hh_gg_ff,
607 mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[0],
608 );
609 let digits: uint8x16_t = vrev64q_u8(vreinterpretq_u8_s16(vmlaq_n_s16(
610 ee_dd_cc_bb_ii_hh_gg_ff,
611 high_10s,
612 mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[1],
613 )));
614 let str: uint16x8_t = vaddq_u16(
615 vreinterpretq_u16_u8(digits),
616 vreinterpretq_u16_s8(vdupq_n_s8(b'0' as i8)),
617 );
618
619 buffer.cast::<uint16x8_t>().write_unaligned(str);
620
621 let is_not_zero: uint16x8_t =
622 vreinterpretq_u16_u8(vcgtzq_s8(vreinterpretq_s8_u8(digits)));
623 let zeros: u64 = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(is_not_zero, 4)), 0);
624
625 buffer.add(16 - (zeros.leading_zeros() as usize >> 2))
626 }
627 }
628
629 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
630 {
631 use crate::stdarch_x86::*;
632
633 let last_digit = (value - value_div10 as u64 * 10) as u32;
634
635 buffer = unsafe { buffer.offset(isize::from(has17digits) - 1) };
640 unsafe {
641 *buffer.add(16) = last_digit as u8 + b'0';
642 }
643
644 let abcdefgh = (value_div10 / 100_000_000) as u32;
645 let ijklmnop = (value_div10 % 100_000_000) as u32;
646
647 #[repr(C, align(64))]
648 struct Constants {
649 div10k: u128,
650 neg10k: u128,
651 div100: u128,
652 div10: u128,
653 #[cfg(target_feature = "sse4.1")]
654 neg100: u128,
655 #[cfg(target_feature = "sse4.1")]
656 neg10: u128,
657 #[cfg(target_feature = "sse4.1")]
658 bswap: u128,
659 #[cfg(not(target_feature = "sse4.1"))]
660 hundred: u128,
661 #[cfg(not(target_feature = "sse4.1"))]
662 moddiv10: u128,
663 zeros: u128,
664 }
665
666 impl Constants {
667 const fn splat64(x: u64) -> u128 {
668 ((x as u128) << 64) | x as u128
669 }
670
671 const fn splat32(x: u32) -> u128 {
672 Self::splat64(((x as u64) << 32) | x as u64)
673 }
674
675 const fn splat16(x: u16) -> u128 {
676 Self::splat32(((x as u32) << 16) | x as u32)
677 }
678
679 #[cfg(target_feature = "sse4.1")]
680 const fn pack8(a: u8, b: u8, c: u8, d: u8, e: u8, f: u8, g: u8, h: u8) -> u64 {
681 ((h as u64) << 56)
682 | ((g as u64) << 48)
683 | ((f as u64) << 40)
684 | ((e as u64) << 32)
685 | ((d as u64) << 24)
686 | ((c as u64) << 16)
687 | ((b as u64) << 8)
688 | a as u64
689 }
690 }
691
692 static CONSTS: Constants = Constants {
693 div10k: Constants::splat64(DIV10K_SIG as u64),
694 neg10k: Constants::splat64(NEG10K as u64),
695 div100: Constants::splat32(DIV100_SIG),
696 div10: Constants::splat16(((1u32 << 16) / 10 + 1) as u16),
697 #[cfg(target_feature = "sse4.1")]
698 neg100: Constants::splat32(NEG100),
699 #[cfg(target_feature = "sse4.1")]
700 neg10: Constants::splat16((1 << 8) - 10),
701 #[cfg(target_feature = "sse4.1")]
702 bswap: Constants::pack8(15, 14, 13, 12, 11, 10, 9, 8) as u128
703 | (Constants::pack8(7, 6, 5, 4, 3, 2, 1, 0) as u128) << 64,
704 #[cfg(not(target_feature = "sse4.1"))]
705 hundred: Constants::splat32(100),
706 #[cfg(not(target_feature = "sse4.1"))]
707 moddiv10: Constants::splat16(10 * (1 << 8) - 1),
708 zeros: Constants::splat64(ZEROS),
709 };
710
711 let mut c = &raw const CONSTSptr::addr_of!(CONSTS);
712 unsafe {
715 asm!("/*{0}*/", inout(reg) c);
716 }
717
718 let div10k = unsafe { _mm_load_si128(&raw const (*c).div10kptr::addr_of!((*c).div10k).cast::<__m128i>()) };
719 let neg10k = unsafe { _mm_load_si128(&raw const (*c).neg10kptr::addr_of!((*c).neg10k).cast::<__m128i>()) };
720 let div100 = unsafe { _mm_load_si128(&raw const (*c).div100ptr::addr_of!((*c).div100).cast::<__m128i>()) };
721 let div10 = unsafe { _mm_load_si128(&raw const (*c).div10ptr::addr_of!((*c).div10).cast::<__m128i>()) };
722 #[cfg(target_feature = "sse4.1")]
723 let neg100 = unsafe { _mm_load_si128(ptr::addr_of!((*c).neg100).cast::<__m128i>()) };
724 #[cfg(target_feature = "sse4.1")]
725 let neg10 = unsafe { _mm_load_si128(ptr::addr_of!((*c).neg10).cast::<__m128i>()) };
726 #[cfg(target_feature = "sse4.1")]
727 let bswap = unsafe { _mm_load_si128(ptr::addr_of!((*c).bswap).cast::<__m128i>()) };
728 #[cfg(not(target_feature = "sse4.1"))]
729 let hundred = unsafe { _mm_load_si128(&raw const (*c).hundredptr::addr_of!((*c).hundred).cast::<__m128i>()) };
730 #[cfg(not(target_feature = "sse4.1"))]
731 let moddiv10 = unsafe { _mm_load_si128(&raw const (*c).moddiv10ptr::addr_of!((*c).moddiv10).cast::<__m128i>()) };
732 let zeros = unsafe { _mm_load_si128(&raw const (*c).zerosptr::addr_of!((*c).zeros).cast::<__m128i>()) };
733
734 unsafe {
736 let x: __m128i = _mm_set_epi64x(i64::from(abcdefgh), i64::from(ijklmnop));
737 let y: __m128i = _mm_add_epi64(
738 x,
739 _mm_mul_epu32(neg10k, _mm_srli_epi64(_mm_mul_epu32(x, div10k), DIV10K_EXP)),
740 );
741
742 #[cfg(target_feature = "sse4.1")]
743 let bcd: __m128i = {
744 let z: __m128i = _mm_add_epi64(
746 y,
747 _mm_mullo_epi32(neg100, _mm_srli_epi32(_mm_mulhi_epu16(y, div100), 3)),
748 );
749 let big_endian_bcd: __m128i =
750 _mm_add_epi64(z, _mm_mullo_epi16(neg10, _mm_mulhi_epu16(z, div10)));
751 _mm_shuffle_epi8(big_endian_bcd, bswap)
753 };
754
755 #[cfg(not(target_feature = "sse4.1"))]
756 let bcd: __m128i = {
757 let y_div_100: __m128i = _mm_srli_epi16(_mm_mulhi_epu16(y, div100), 3);
758 let y_mod_100: __m128i = _mm_sub_epi16(y, _mm_mullo_epi16(y_div_100, hundred));
759 let z: __m128i = _mm_or_si128(_mm_slli_epi32(y_mod_100, 16), y_div_100);
760 let bcd_shuffled: __m128i = _mm_sub_epi16(
761 _mm_slli_epi16(z, 8),
762 _mm_mullo_epi16(moddiv10, _mm_mulhi_epu16(z, div10)),
763 );
764 _mm_shuffle_epi32(bcd_shuffled, _MM_SHUFFLE(0, 1, 2, 3))
765 };
766
767 let digits = _mm_or_si128(bcd, zeros);
768
769 let mask128: __m128i = _mm_cmpgt_epi8(bcd, _mm_setzero_si128());
771 let mask = _mm_movemask_epi8(mask128) as u32;
772 let len = 32 - mask.leading_zeros() as usize;
778
779 _mm_storeu_si128(buffer.cast::<__m128i>(), digits);
780 buffer.add(if last_digit != 0 { 17 } else { len })
781 }
782 }
783}
784
785struct ToDecimalResult {
786 sig: i64,
787 exp: i32,
788 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
789 sig_div10: i64,
790}
791
792#[cfg_attr(feature = "no-panic", no_panic)]
793fn to_decimal_schubfach<UInt>(bin_sig: UInt, bin_exp: i64, regular: bool) -> ToDecimalResult
794where
795 UInt: traits::UInt,
796{
797 let num_bits = mem::size_of::<UInt>() as i32 * 8;
798 let dec_exp = compute_dec_exp(bin_exp as i32, regular);
799 let exp_shift = unsafe { compute_exp_shift::<UInt, false>(bin_exp as i32, dec_exp) };
800 let mut pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
801
802 if num_bits == 64 {
805 pow10.lo += 1;
806 } else {
807 pow10.hi += 1;
808 }
809
810 const BOUND_SHIFT: u32 = 2;
812 let bin_sig_shifted = bin_sig << BOUND_SHIFT;
813
814 let lsb = bin_sig & UInt::from(1);
817 let lower = (bin_sig_shifted - (UInt::from(regular) + UInt::from(1))) << exp_shift;
818 let lower = umulhi_inexact_to_odd(pow10.hi, pow10.lo, lower) + lsb;
819 let upper = (bin_sig_shifted + UInt::from(2)) << exp_shift;
820 let upper = umulhi_inexact_to_odd(pow10.hi, pow10.lo, upper) - lsb;
821
822 let div10 = (upper >> BOUND_SHIFT) / UInt::from(10);
825 let shorter = div10 * UInt::from(10);
826 if (shorter << BOUND_SHIFT) >= lower {
827 let result = ToDecimalResult {
828 sig: shorter.into() as i64,
829 exp: dec_exp,
830 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
831 sig_div10: div10.into() as i64,
832 };
833 return result;
834 }
835
836 let scaled_sig = umulhi_inexact_to_odd(pow10.hi, pow10.lo, bin_sig_shifted << exp_shift);
837 let longer_below = scaled_sig >> BOUND_SHIFT;
838 let longer_above = longer_below + UInt::from(1);
839
840 let cmp = scaled_sig
843 .wrapping_sub((longer_below + longer_above) << 1)
844 .to_signed();
845 let below_closer = cmp < UInt::from(0).to_signed()
846 || (cmp == UInt::from(0).to_signed() && (longer_below & UInt::from(1)) == UInt::from(0));
847 let below_in = (longer_below << BOUND_SHIFT) >= lower;
848 let dec_sig = if below_closer & below_in {
849 longer_below
850 } else {
851 longer_above
852 };
853 ToDecimalResult {
854 sig: dec_sig.into() as i64,
855 exp: dec_exp,
856 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
857 sig_div10: (dec_sig / UInt::from(10)).into() as i64,
858 }
859}
860
861#[cfg_attr(feature = "no-panic", no_panic)]
865fn to_decimal_fast<Float, UInt>(bin_sig: UInt, raw_exp: i64, regular: bool) -> ToDecimalResult
866where
867 Float: FloatTraits,
868 UInt: traits::UInt,
869{
870 let bin_exp = raw_exp - i64::from(Float::EXP_OFFSET);
871 let num_bits = mem::size_of::<UInt>() as i32 * 8;
872 while regular {
874 let dec_exp = if USE_UMUL128_HI64 {
875 umul128_hi64(bin_exp as u64, 0x4d10500000000000) as i32
876 } else {
877 compute_dec_exp(bin_exp as i32, true)
878 };
879 let exp_shift = unsafe { compute_exp_shift::<UInt, true>(bin_exp as i32, dec_exp) };
880 let pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
881
882 let integral; let fractional; if num_bits == 64 {
885 let p = umul192_hi128(pow10.hi, pow10.lo, (bin_sig << exp_shift).into());
886 integral = UInt::truncate(p.hi);
887 fractional = p.lo;
888 } else {
889 let p = umul128(pow10.hi, (bin_sig << exp_shift).into());
890 integral = UInt::truncate((p >> 64) as u64);
891 fractional = p as u64;
892 }
893 const HALF_ULP: u64 = 1 << 63;
894
895 let cmp = fractional.wrapping_sub(HALF_ULP) as i64;
897 if cmp == 0 {
898 break;
899 }
900
901 const DIV10_SIG64: u64 = (1 << 63) / 5 + 1;
905 let div10 = umul128_hi64(integral.into(), DIV10_SIG64);
906 #[allow(unused_mut)]
907 let mut digit = integral.into() - div10 * 10;
908 #[cfg(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri)))]
910 unsafe {
911 asm!("/*{0}*/", inout(reg) digit);
912 }
913
914 let num_integral_bits = if num_bits == 64 { 4 } else { 32 };
918 let num_fractional_bits = 64 - num_integral_bits;
919 let ten = 10u64 << num_fractional_bits;
920 let scaled_sig_mod10 = (digit << num_fractional_bits) | (fractional >> num_integral_bits);
922
923 let scaled_half_ulp = pow10.hi >> (num_integral_bits - exp_shift + 1);
928 let upper = scaled_sig_mod10 + scaled_half_ulp;
929
930 if scaled_sig_mod10 == scaled_half_ulp
952 || ten.wrapping_sub(upper) <= 1
955 {
956 break;
957 }
958
959 let round_up = upper >= ten;
960 let mut shorter = (integral.into() - digit) as i64;
961 let longer = (integral.into() + u64::from(cmp >= 0)) as i64;
962 if falsecfg!(target_arch = "aarch64") {
963 let dec_sig =
965 hint::select_unpredictable(scaled_sig_mod10 < scaled_half_ulp, shorter, longer);
966 return ToDecimalResult {
967 sig: hint::select_unpredictable(round_up, shorter + 10, dec_sig),
968 exp: dec_exp,
969 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
970 sig_div10: 0,
971 };
972 }
973 shorter += i64::from(round_up) * 10;
974 let use_shorter = scaled_sig_mod10 <= scaled_half_ulp || round_up;
975 return ToDecimalResult {
976 sig: hint::select_unpredictable(use_shorter, shorter, longer),
977 exp: dec_exp,
978 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
979 sig_div10: div10 as i64 + i64::from(use_shorter) * i64::from(round_up),
980 };
981 }
982 to_decimal_schubfach(bin_sig, bin_exp, regular)
983}
984
985#[cfg_attr(feature = "no-panic", no_panic)]
988unsafe fn write<Float>(value: Float, mut buffer: *mut u8) -> *mut u8
989where
990 Float: FloatTraits,
991{
992 let bits = value.to_bits();
993 let bin_exp = Float::get_exp(bits); let bin_sig = Float::get_sig(bits); unsafe {
998 *buffer = b'-';
999 }
1000 buffer = unsafe { buffer.add(usize::from(Float::is_negative(bits))) };
1001
1002 let mut dec;
1003 let threshold = if Float::NUM_BITS == 64 {
1004 10_000_000_000_000_000
1005 } else {
1006 100_000_000
1007 };
1008 if bin_exp == 0 {
1009 if bin_sig == Float::SigType::from(0) {
1010 return unsafe {
1011 *buffer = b'0';
1012 *buffer.add(1) = b'.';
1013 *buffer.add(2) = b'0';
1014 buffer.add(3)
1015 };
1016 }
1017 dec = to_decimal_schubfach(bin_sig, i64::from(1 - Float::EXP_OFFSET), true);
1018 while dec.sig < threshold {
1019 dec.sig *= 10;
1020 dec.exp -= 1;
1021 }
1022 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
1023 {
1024 dec.sig_div10 = dec.sig / 10;
1025 }
1026 } else {
1027 dec = to_decimal_fast::<Float, Float::SigType>(
1028 bin_sig | Float::IMPLICIT_BIT,
1029 bin_exp,
1030 bin_sig != Float::SigType::from(0),
1031 );
1032 }
1033 let mut dec_exp = dec.exp;
1034 let extra_digit = dec.sig >= threshold;
1035 dec_exp += Float::MAX_DIGITS10 as i32 - 2 + i32::from(extra_digit);
1036 if Float::NUM_BITS == 32 && dec.sig < 10_000_000 {
1037 dec.sig *= 10;
1038 dec_exp -= 1;
1039 }
1040
1041 let end = if Float::NUM_BITS == 64 {
1043 unsafe {
1044 write_significand17(
1045 buffer.add(1),
1046 dec.sig as u64,
1047 extra_digit,
1048 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
1049 dec.sig_div10,
1050 )
1051 }
1052 } else {
1053 unsafe { write_significand9(buffer.add(1), dec.sig as u32, extra_digit) }
1054 };
1055
1056 let length = unsafe { end.offset_from(buffer.add(1)) } as usize;
1057
1058 if Float::NUM_BITS == 32 && (-6..=12).contains(&dec_exp)
1059 || Float::NUM_BITS == 64 && (-5..=15).contains(&dec_exp)
1060 {
1061 if length as i32 - 1 <= dec_exp {
1062 return unsafe {
1064 ptr::copy(buffer.add(1), buffer, length);
1065 ptr::write_bytes(buffer.add(length), b'0', dec_exp as usize + 3 - length);
1066 *buffer.add(dec_exp as usize + 1) = b'.';
1067 buffer.add(dec_exp as usize + 3)
1068 };
1069 } else if 0 <= dec_exp {
1070 return unsafe {
1072 ptr::copy(buffer.add(1), buffer, dec_exp as usize + 1);
1073 *buffer.add(dec_exp as usize + 1) = b'.';
1074 buffer.add(length + 1)
1075 };
1076 } else {
1077 return unsafe {
1079 ptr::copy(buffer.add(1), buffer.add((1 - dec_exp) as usize), length);
1080 ptr::write_bytes(buffer, b'0', (1 - dec_exp) as usize);
1081 *buffer.add(1) = b'.';
1082 buffer.add((1 - dec_exp) as usize + length)
1083 };
1084 }
1085 }
1086
1087 unsafe {
1088 *buffer = *buffer.add(1);
1090 *buffer.add(1) = b'.';
1091 }
1092 buffer = unsafe { buffer.add(length + usize::from(length > 1)) };
1093
1094 let sign_ptr = buffer;
1096 let e_sign = if dec_exp >= 0 {
1097 (u16::from(b'+') << 8) | u16::from(b'e')
1098 } else {
1099 (u16::from(b'-') << 8) | u16::from(b'e')
1100 };
1101 buffer = unsafe { buffer.add(1) };
1102 dec_exp = if dec_exp >= 0 { dec_exp } else { -dec_exp };
1103 buffer = unsafe { buffer.add(usize::from(dec_exp >= 10)) };
1104 if Float::MIN_10_EXP > -100 && Float::MAX_10_EXP < 100 {
1105 unsafe {
1106 buffer
1107 .cast::<u16>()
1108 .write_unaligned(*digits2(dec_exp as usize));
1109 sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1110 return buffer.add(2);
1111 }
1112 }
1113 let digit = if USE_UMUL128_HI64 {
1115 umul128_hi64(dec_exp as u64, 0x290000000000000) as u32
1116 } else {
1117 (dec_exp as u32 * DIV100_SIG) >> DIV100_EXP
1118 };
1119 unsafe {
1120 *buffer = b'0' + digit as u8;
1121 }
1122 buffer = unsafe { buffer.add(usize::from(dec_exp >= 100)) };
1123 unsafe {
1124 buffer
1125 .cast::<u16>()
1126 .write_unaligned(*digits2((dec_exp as u32 - digit * 100) as usize));
1127 sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1128 buffer.add(2)
1129 }
1130}
1131
1132pub struct Buffer {
1142 bytes: [MaybeUninit<u8>; BUFFER_SIZE],
1143}
1144
1145impl Buffer {
1146 #[inline]
1149 #[cfg_attr(feature = "no-panic", no_panic)]
1150 pub fn new() -> Self {
1151 let bytes = [MaybeUninit::<u8>::uninit(); BUFFER_SIZE];
1152 Buffer { bytes }
1153 }
1154
1155 #[cfg_attr(feature = "no-panic", no_panic)]
1167 pub fn format<F: Float>(&mut self, f: F) -> &str {
1168 if f.is_nonfinite() {
1169 f.format_nonfinite()
1170 } else {
1171 self.format_finite(f)
1172 }
1173 }
1174
1175 #[cfg_attr(feature = "no-panic", no_panic)]
1191 pub fn format_finite<F: Float>(&mut self, f: F) -> &str {
1192 unsafe {
1193 let end = f.write_to_zmij_buffer(self.bytes.as_mut_ptr().cast::<u8>());
1194 let len = end.offset_from(self.bytes.as_ptr().cast::<u8>()) as usize;
1195 let slice = slice::from_raw_parts(self.bytes.as_ptr().cast::<u8>(), len);
1196 str::from_utf8_unchecked(slice)
1197 }
1198 }
1199}
1200
1201#[allow(unknown_lints)] #[allow(private_bounds)]
1208pub trait Float: private::Sealed {}
1209impl Float for f32 {}
1210impl Float for f64 {}
1211
1212mod private {
1213 pub trait Sealed: crate::traits::Float {
1214 fn is_nonfinite(self) -> bool;
1215 fn format_nonfinite(self) -> &'static str;
1216 unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8;
1217 }
1218
1219 impl Sealed for f32 {
1220 #[inline]
1221 fn is_nonfinite(self) -> bool {
1222 const EXP_MASK: u32 = 0x7f800000;
1223 let bits = self.to_bits();
1224 bits & EXP_MASK == EXP_MASK
1225 }
1226
1227 #[cold]
1228 #[cfg_attr(feature = "no-panic", inline)]
1229 fn format_nonfinite(self) -> &'static str {
1230 const MANTISSA_MASK: u32 = 0x007fffff;
1231 const SIGN_MASK: u32 = 0x80000000;
1232 let bits = self.to_bits();
1233 if bits & MANTISSA_MASK != 0 {
1234 crate::NAN
1235 } else if bits & SIGN_MASK != 0 {
1236 crate::NEG_INFINITY
1237 } else {
1238 crate::INFINITY
1239 }
1240 }
1241
1242 #[cfg_attr(feature = "no-panic", inline)]
1243 unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8 {
1244 unsafe { crate::write(self, buffer) }
1245 }
1246 }
1247
1248 impl Sealed for f64 {
1249 #[inline]
1250 fn is_nonfinite(self) -> bool {
1251 const EXP_MASK: u64 = 0x7ff0000000000000;
1252 let bits = self.to_bits();
1253 bits & EXP_MASK == EXP_MASK
1254 }
1255
1256 #[cold]
1257 #[cfg_attr(feature = "no-panic", inline)]
1258 fn format_nonfinite(self) -> &'static str {
1259 const MANTISSA_MASK: u64 = 0x000fffffffffffff;
1260 const SIGN_MASK: u64 = 0x8000000000000000;
1261 let bits = self.to_bits();
1262 if bits & MANTISSA_MASK != 0 {
1263 crate::NAN
1264 } else if bits & SIGN_MASK != 0 {
1265 crate::NEG_INFINITY
1266 } else {
1267 crate::INFINITY
1268 }
1269 }
1270
1271 #[cfg_attr(feature = "no-panic", inline)]
1272 unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8 {
1273 unsafe { crate::write(self, buffer) }
1274 }
1275 }
1276}
1277
1278impl Default for Buffer {
1279 #[inline]
1280 #[cfg_attr(feature = "no-panic", no_panic)]
1281 fn default() -> Self {
1282 Buffer::new()
1283 }
1284}