1#![no_std]
44#![doc(html_root_url = "https://docs.rs/zmij/1.0.16")]
45#![deny(unsafe_op_in_unsafe_fn)]
46#![allow(non_camel_case_types, non_snake_case)]
47#![allow(
48 clippy::blocks_in_conditions,
49 clippy::cast_possible_truncation,
50 clippy::cast_possible_wrap,
51 clippy::cast_ptr_alignment,
52 clippy::cast_sign_loss,
53 clippy::doc_markdown,
54 clippy::incompatible_msrv,
55 clippy::items_after_statements,
56 clippy::many_single_char_names,
57 clippy::must_use_candidate,
58 clippy::needless_doctest_main,
59 clippy::never_loop,
60 clippy::redundant_else,
61 clippy::similar_names,
62 clippy::too_many_arguments,
63 clippy::too_many_lines,
64 clippy::unreadable_literal,
65 clippy::used_underscore_items,
66 clippy::while_immutable_condition,
67 clippy::wildcard_imports
68)]
69
70#[cfg(zmij_no_select_unpredictable)]
71mod hint;
72#[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
73mod stdarch_x86;
74#[cfg(test)]
75mod tests;
76mod traits;
77
78#[cfg(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri)))]
79use core::arch::asm;
80#[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
81use core::arch::x86_64::__m128i;
82#[cfg(not(zmij_no_select_unpredictable))]
83use core::hint;
84use core::mem::{self, MaybeUninit};
85use core::ptr;
86use core::slice;
87use core::str;
88#[cfg(feature = "no-panic")]
89use no_panic::no_panic;
90
91const BUFFER_SIZE: usize = 24;
92const NAN: &str = "NaN";
93const INFINITY: &str = "inf";
94const NEG_INFINITY: &str = "-inf";
95
96#[cfg_attr(test, derive(Debug, PartialEq))]
97struct uint128 {
98 hi: u64,
99 lo: u64,
100}
101
102const USE_UMUL128_HI64: bool = falsecfg!(target_vendor = "apple");
104
105const fn umul128(x: u64, y: u64) -> u128 {
107 x as u128 * y as u128
108}
109
110const fn umul128_hi64(x: u64, y: u64) -> u64 {
111 (umul128(x, y) >> 64) as u64
112}
113
114#[cfg_attr(feature = "no-panic", no_panic)]
115fn umul192_hi128(x_hi: u64, x_lo: u64, y: u64) -> uint128 {
116 let p = umul128(x_hi, y);
117 let lo = (p as u64).wrapping_add((umul128(x_lo, y) >> 64) as u64);
118 uint128 {
119 hi: (p >> 64) as u64 + u64::from(lo < p as u64),
120 lo,
121 }
122}
123
124#[cfg_attr(feature = "no-panic", no_panic)]
127fn umulhi_inexact_to_odd<UInt>(x_hi: u64, x_lo: u64, y: UInt) -> UInt
128where
129 UInt: traits::UInt,
130{
131 let num_bits = mem::size_of::<UInt>() * 8;
132 if num_bits == 64 {
133 let p = umul192_hi128(x_hi, x_lo, y.into());
134 UInt::truncate(p.hi | u64::from((p.lo >> 1) != 0))
135 } else {
136 let p = (umul128(x_hi, y.into()) >> 32) as u64;
137 UInt::enlarge((p >> 32) as u32 | u32::from((p as u32 >> 1) != 0))
138 }
139}
140
141trait FloatTraits: traits::Float {
142 const NUM_BITS: i32;
143 const NUM_SIG_BITS: i32 = Self::MANTISSA_DIGITS as i32 - 1;
144 const NUM_EXP_BITS: i32 = Self::NUM_BITS - Self::NUM_SIG_BITS - 1;
145 const EXP_MASK: i32 = (1 << Self::NUM_EXP_BITS) - 1;
146 const EXP_BIAS: i32 = (1 << (Self::NUM_EXP_BITS - 1)) - 1;
147 const EXP_OFFSET: i32 = Self::EXP_BIAS + Self::NUM_SIG_BITS;
148
149 type SigType: traits::UInt;
150 const IMPLICIT_BIT: Self::SigType;
151
152 fn to_bits(self) -> Self::SigType;
153
154 fn is_negative(bits: Self::SigType) -> bool {
155 (bits >> (Self::NUM_BITS - 1)) != Self::SigType::from(0)
156 }
157
158 fn get_sig(bits: Self::SigType) -> Self::SigType {
159 bits & (Self::IMPLICIT_BIT - Self::SigType::from(1))
160 }
161
162 fn get_exp(bits: Self::SigType) -> i64 {
163 (bits << 1u8 >> (Self::NUM_SIG_BITS + 1)).into() as i64
164 }
165}
166
167impl FloatTraits for f32 {
168 const NUM_BITS: i32 = 32;
169 const IMPLICIT_BIT: u32 = 1 << Self::NUM_SIG_BITS;
170
171 type SigType = u32;
172
173 fn to_bits(self) -> Self::SigType {
174 self.to_bits()
175 }
176}
177
178impl FloatTraits for f64 {
179 const NUM_BITS: i32 = 64;
180 const IMPLICIT_BIT: u64 = 1 << Self::NUM_SIG_BITS;
181
182 type SigType = u64;
183
184 fn to_bits(self) -> Self::SigType {
185 self.to_bits()
186 }
187}
188
189#[repr(C, align(64))]
190struct Pow10SignificandsTable {
191 data: [u64; Self::NUM_POW10 * 2],
192}
193
194impl Pow10SignificandsTable {
195 const SPLIT_TABLES: bool = falsecfg!(target_arch = "aarch64");
196 const NUM_POW10: usize = 617;
197
198 unsafe fn get_unchecked(&self, dec_exp: i32) -> uint128 {
199 const DEC_EXP_MIN: i32 = -292;
200 if !Self::SPLIT_TABLES {
201 let index = ((dec_exp - DEC_EXP_MIN) * 2) as usize;
202 return uint128 {
203 hi: unsafe { *self.data.get_unchecked(index) },
204 lo: unsafe { *self.data.get_unchecked(index + 1) },
205 };
206 }
207
208 unsafe {
209 #[cfg_attr(
210 not(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri))),
211 allow(unused_mut)
212 )]
213 let mut hi = self
214 .data
215 .as_ptr()
216 .offset(Self::NUM_POW10 as isize + DEC_EXP_MIN as isize - 1);
217 #[cfg_attr(
218 not(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri))),
219 allow(unused_mut)
220 )]
221 let mut lo = hi.add(Self::NUM_POW10);
222
223 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri)))]
225 asm!("/*{0}{1}*/", inout(reg) hi, inout(reg) lo);
226 uint128 {
227 hi: *hi.offset(-dec_exp as isize),
228 lo: *lo.offset(-dec_exp as isize),
229 }
230 }
231 }
232
233 #[cfg(test)]
234 fn get(&self, dec_exp: i32) -> uint128 {
235 const DEC_EXP_MIN: i32 = -292;
236 assert!((DEC_EXP_MIN..DEC_EXP_MIN + Self::NUM_POW10 as i32).contains(&dec_exp));
237 unsafe { self.get_unchecked(dec_exp) }
238 }
239}
240
241static POW10_SIGNIFICANDS: Pow10SignificandsTable = {
244 let mut data = [0; Pow10SignificandsTable::NUM_POW10 * 2];
245
246 struct uint192 {
247 w0: u64, w1: u64,
249 w2: u64, }
251
252 let mut current = uint192 {
255 w0: 0xe000000000000000,
256 w1: 0x25e8e89c13bb0f7a,
257 w2: 0xff77b1fcbebcdc4f,
258 };
259 let ten = 0xa000000000000000;
260 let mut i = 0;
261 while i < Pow10SignificandsTable::NUM_POW10 {
262 if Pow10SignificandsTable::SPLIT_TABLES {
263 data[Pow10SignificandsTable::NUM_POW10 - i - 1] = current.w2;
264 data[Pow10SignificandsTable::NUM_POW10 * 2 - i - 1] = current.w1;
265 } else {
266 data[i * 2] = current.w2;
267 data[i * 2 + 1] = current.w1;
268 }
269
270 let h0: u64 = umul128_hi64(current.w0, ten);
271 let h1: u64 = umul128_hi64(current.w1, ten);
272
273 let c0: u64 = h0.wrapping_add(current.w1.wrapping_mul(ten));
274 let c1: u64 = ((c0 < h0) as u64 + h1).wrapping_add(current.w2.wrapping_mul(ten));
275 let c2: u64 = (c1 < h1) as u64 + umul128_hi64(current.w2, ten); if (c2 >> 63) != 0 {
279 current = uint192 {
280 w0: c0,
281 w1: c1,
282 w2: c2,
283 };
284 } else {
285 current = uint192 {
286 w0: c0 << 1,
287 w1: c1 << 1 | c0 >> 63,
288 w2: c2 << 1 | c1 >> 63,
289 };
290 }
291
292 i += 1;
293 }
294
295 Pow10SignificandsTable { data }
296};
297
298const fn compute_dec_exp(bin_exp: i32, regular: bool) -> i32 {
301 if true {
if !(bin_exp >= -1334 && bin_exp <= 2620) {
::core::panicking::panic("assertion failed: bin_exp >= -1334 && bin_exp <= 2620")
};
};debug_assert!(bin_exp >= -1334 && bin_exp <= 2620);
302 const LOG10_3_OVER_4_SIG: i32 = 131_072;
304 const LOG10_2_SIG: i32 = 315_653;
306 const LOG10_2_EXP: i32 = 20;
307 (bin_exp * LOG10_2_SIG - !regular as i32 * LOG10_3_OVER_4_SIG) >> LOG10_2_EXP
308}
309
310const fn do_compute_exp_shift(bin_exp: i32, dec_exp: i32) -> u8 {
311 if true {
if !(dec_exp >= -350 && dec_exp <= 350) {
::core::panicking::panic("assertion failed: dec_exp >= -350 && dec_exp <= 350")
};
};debug_assert!(dec_exp >= -350 && dec_exp <= 350);
312 const LOG2_POW10_SIG: i32 = 217_707;
314 const LOG2_POW10_EXP: i32 = 16;
315 let pow10_bin_exp = (-dec_exp * LOG2_POW10_SIG) >> LOG2_POW10_EXP;
317 (bin_exp + pow10_bin_exp + 1) as u8
319}
320
321struct ExpShiftTable {
322 data: [u8; if Self::ENABLE {
323 Self::NUM_EXPS as usize
324 } else {
325 1
326 }],
327}
328
329impl ExpShiftTable {
330 const ENABLE: bool = true;
331 const NUM_EXPS: i32 = f64::EXP_MASK + 1;
332}
333
334static EXP_SHIFTS: ExpShiftTable = {
335 let mut data = [0u8; if ExpShiftTable::ENABLE {
336 ExpShiftTable::NUM_EXPS as usize
337 } else {
338 1
339 }];
340
341 if ExpShiftTable::ENABLE {
342 let mut raw_exp = 0;
343 while raw_exp < ExpShiftTable::NUM_EXPS {
344 let mut bin_exp = raw_exp - f64::EXP_OFFSET;
345 if raw_exp == 0 {
346 bin_exp += 1;
347 }
348 let dec_exp = compute_dec_exp(bin_exp, true);
349 data[raw_exp as usize] = do_compute_exp_shift(bin_exp, dec_exp) as u8;
350 raw_exp += 1;
351 }
352 }
353
354 ExpShiftTable { data }
355};
356
357unsafe fn compute_exp_shift<UInt, const ONLY_REGULAR: bool>(bin_exp: i32, dec_exp: i32) -> u8
368where
369 UInt: traits::UInt,
370{
371 let num_bits = mem::size_of::<UInt>() * 8;
372 if num_bits == 64 && ExpShiftTable::ENABLE && ONLY_REGULAR {
373 unsafe {
374 *EXP_SHIFTS
375 .data
376 .as_ptr()
377 .add((bin_exp + f64::EXP_OFFSET) as usize)
378 }
379 } else {
380 do_compute_exp_shift(bin_exp, dec_exp)
381 }
382}
383
384#[cfg_attr(feature = "no-panic", no_panic)]
385fn count_trailing_nonzeros(x: u64) -> usize {
386 (70 - ((x.to_le() << 1) | 1).leading_zeros() as usize) / 8
399}
400
401#[repr(C, align(2))]
404struct Digits2([u8; 200]);
405
406static DIGITS2: Digits2 = Digits2(
407 *b"0001020304050607080910111213141516171819\
408 2021222324252627282930313233343536373839\
409 4041424344454647484950515253545556575859\
410 6061626364656667686970717273747576777879\
411 8081828384858687888990919293949596979899",
412);
413
414#[cfg_attr(feature = "no-panic", no_panic)]
417unsafe fn digits2(value: usize) -> &'static u16 {
418 if true {
if !(value < 100) {
::core::panicking::panic("assertion failed: value < 100")
};
};debug_assert!(value < 100);
419
420 #[allow(clippy::cast_ptr_alignment)]
421 unsafe {
422 &*DIGITS2.0.as_ptr().cast::<u16>().add(value)
423 }
424}
425
426const DIV10K_EXP: i32 = 40;
427const DIV10K_SIG: u32 = ((1u64 << DIV10K_EXP) / 10000 + 1) as u32;
428const NEG10K: u32 = ((1u64 << 32) - 10000) as u32;
429const DIV100_EXP: i32 = 19;
430const DIV100_SIG: u32 = (1 << DIV100_EXP) / 100 + 1;
431const NEG100: u32 = (1 << 16) - 100;
432const DIV10_EXP: i32 = 10;
433const DIV10_SIG: u32 = (1 << DIV10_EXP) / 10 + 1;
434const NEG10: u32 = (1 << 8) - 10;
435
436const ZEROS: u64 = 0x0101010101010101 * b'0' as u64;
437
438#[cfg_attr(feature = "no-panic", no_panic)]
439fn to_bcd8(abcdefgh: u64) -> u64 {
440 let abcd_efgh =
448 abcdefgh + u64::from(NEG10K) * ((abcdefgh * u64::from(DIV10K_SIG)) >> DIV10K_EXP);
449 let ab_cd_ef_gh = abcd_efgh
450 + u64::from(NEG100) * (((abcd_efgh * u64::from(DIV100_SIG)) >> DIV100_EXP) & 0x7f0000007f);
451 let a_b_c_d_e_f_g_h = ab_cd_ef_gh
452 + u64::from(NEG10)
453 * (((ab_cd_ef_gh * u64::from(DIV10_SIG)) >> DIV10_EXP) & 0xf000f000f000f);
454 a_b_c_d_e_f_g_h.to_be()
455}
456
457unsafe fn write_if(buffer: *mut u8, digit: u32, condition: bool) -> *mut u8 {
458 unsafe {
459 *buffer = b'0' + digit as u8;
460 buffer.add(usize::from(condition))
461 }
462}
463
464unsafe fn write8(buffer: *mut u8, value: u64) {
465 unsafe {
466 buffer.cast::<u64>().write_unaligned(value);
467 }
468}
469
470#[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
471const fn splat64(x: u64) -> __m128i {
472 unsafe { mem::transmute::<[u64; 2], __m128i>([x, x]) }
473}
474
475#[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
476const fn splat32(x: u32) -> __m128i {
477 splat64(((x as u64) << 32) | x as u64)
478}
479
480#[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
481const fn splat16(x: u16) -> __m128i {
482 splat32(((x as u32) << 16) | x as u32)
483}
484
485#[cfg(all(target_arch = "x86_64", target_feature = "sse4.1", not(miri)))]
486const fn pack8(a: u8, b: u8, c: u8, d: u8, e: u8, f: u8, g: u8, h: u8) -> u64 {
487 ((h as u64) << 56)
488 | ((g as u64) << 48)
489 | ((f as u64) << 40)
490 | ((e as u64) << 32)
491 | ((d as u64) << 24)
492 | ((c as u64) << 16)
493 | ((b as u64) << 8)
494 | a as u64
495}
496
497#[cfg_attr(feature = "no-panic", no_panic)]
502unsafe fn write_significand17(
503 mut buffer: *mut u8,
504 value: u64,
505 has17digits: bool,
506 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))] value_div10: i64,
507) -> *mut u8 {
508 #[cfg(not(any(
509 all(target_arch = "aarch64", target_feature = "neon", not(miri)),
510 all(target_arch = "x86_64", target_feature = "sse2", not(miri)),
511 )))]
512 {
513 let start = unsafe { buffer.add(1) };
514 let abbccddee = (value / 100_000_000) as u32;
516 let ffgghhii = (value % 100_000_000) as u32;
517 buffer = unsafe { write_if(start, abbccddee / 100_000_000, has17digits) };
518 let bcd = to_bcd8(u64::from(abbccddee % 100_000_000));
519 unsafe {
520 write8(buffer, bcd | ZEROS);
521 }
522 if ffgghhii == 0 {
523 return unsafe { buffer.add(count_trailing_nonzeros(bcd)) };
524 }
525 let bcd = to_bcd8(u64::from(ffgghhii));
526 unsafe {
527 write8(buffer.add(8), bcd | ZEROS);
528 buffer.add(8).add(count_trailing_nonzeros(bcd))
529 }
530 }
531
532 #[cfg(all(target_arch = "aarch64", target_feature = "neon", not(miri)))]
533 {
534 use core::arch::aarch64::*;
537
538 const NEG10K: i32 = -10000 + 0x10000;
539
540 struct MulConstants {
541 mul_const: u64,
542 hundred_million: u64,
543 multipliers32: int32x4_t,
544 multipliers16: int16x8_t,
545 }
546
547 static CONSTANTS: MulConstants = MulConstants {
548 mul_const: 0xabcc77118461cefd,
549 hundred_million: 100000000,
550 multipliers32: unsafe {
551 mem::transmute::<[i32; 4], int32x4_t>([
552 DIV10K_SIG as i32,
553 NEG10K,
554 (DIV100_SIG << 12) as i32,
555 NEG100 as i32,
556 ])
557 },
558 multipliers16: unsafe {
559 mem::transmute::<[i16; 8], int16x8_t>([0xce0, NEG10 as i16, 0, 0, 0, 0, 0, 0])
560 },
561 };
562
563 let mut c = ptr::addr_of!(CONSTANTS);
564
565 let c = unsafe {
568 asm!("/*{0}*/", inout(reg) c);
569 &*c
570 };
571
572 let mut hundred_million = c.hundred_million;
573
574 unsafe {
576 asm!("/*{0}*/", inout(reg) hundred_million);
577 }
578
579 let abbccddee = (umul128(value, c.mul_const) >> 90) as u64;
581 let ffgghhii = value - abbccddee * hundred_million;
582
583 let a = (umul128(abbccddee, c.mul_const) >> 90) as u64;
586 let bbccddee = abbccddee - a * hundred_million;
587
588 let start = unsafe { buffer.add(1) };
589 buffer = unsafe { write_if(start, a as u32, has17digits) };
590
591 unsafe {
592 let ffgghhii_bbccddee_64: uint64x1_t =
593 mem::transmute::<u64, uint64x1_t>((ffgghhii << 32) | bbccddee);
594 let bbccddee_ffgghhii: int32x2_t = vreinterpret_s32_u64(ffgghhii_bbccddee_64);
595
596 let bbcc_ffgg: int32x2_t = vreinterpret_s32_u32(vshr_n_u32(
597 vreinterpret_u32_s32(vqdmulh_n_s32(
598 bbccddee_ffgghhii,
599 mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[0],
600 )),
601 9,
602 ));
603 let ddee_bbcc_hhii_ffgg_32: int32x2_t = vmla_n_s32(
604 bbccddee_ffgghhii,
605 bbcc_ffgg,
606 mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[1],
607 );
608
609 let mut ddee_bbcc_hhii_ffgg: int32x4_t =
610 vreinterpretq_s32_u32(vshll_n_u16(vreinterpret_u16_s32(ddee_bbcc_hhii_ffgg_32), 0));
611
612 asm!("/*{:v}*/", inout(vreg) ddee_bbcc_hhii_ffgg);
615
616 let dd_bb_hh_ff: int32x4_t = vqdmulhq_n_s32(
617 ddee_bbcc_hhii_ffgg,
618 mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[2],
619 );
620 let ee_dd_cc_bb_ii_hh_gg_ff: int16x8_t = vreinterpretq_s16_s32(vmlaq_n_s32(
621 ddee_bbcc_hhii_ffgg,
622 dd_bb_hh_ff,
623 mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[3],
624 ));
625 let high_10s: int16x8_t = vqdmulhq_n_s16(
626 ee_dd_cc_bb_ii_hh_gg_ff,
627 mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[0],
628 );
629 let digits: uint8x16_t = vrev64q_u8(vreinterpretq_u8_s16(vmlaq_n_s16(
630 ee_dd_cc_bb_ii_hh_gg_ff,
631 high_10s,
632 mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[1],
633 )));
634 let str: uint16x8_t = vaddq_u16(
635 vreinterpretq_u16_u8(digits),
636 vreinterpretq_u16_s8(vdupq_n_s8(b'0' as i8)),
637 );
638
639 buffer.cast::<uint16x8_t>().write_unaligned(str);
640
641 let is_not_zero: uint16x8_t =
642 vreinterpretq_u16_u8(vcgtzq_s8(mem::transmute::<uint8x16_t, int8x16_t>(digits)));
643 let zeros: u64 = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(is_not_zero, 4)), 0);
644
645 buffer.add(16 - (zeros.leading_zeros() as usize >> 2))
646 }
647 }
648
649 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
650 {
651 use crate::stdarch_x86::*;
652
653 let last_digit = (value - value_div10 as u64 * 10) as u32;
654
655 buffer = unsafe { buffer.add(usize::from(has17digits)) };
660 unsafe {
661 *buffer.add(16) = last_digit as u8 + b'0';
662 }
663
664 let abcdefgh = (value_div10 / 100_000_000) as u32;
665 let ijklmnop = (value_div10 % 100_000_000) as u32;
666
667 #[repr(C, align(64))]
668 struct Consts {
669 div10k: __m128i,
670 neg10k: __m128i,
671 div100: __m128i,
672 div10: __m128i,
673 #[cfg(target_feature = "sse4.1")]
674 neg100: __m128i,
675 #[cfg(target_feature = "sse4.1")]
676 neg10: __m128i,
677 #[cfg(target_feature = "sse4.1")]
678 bswap: __m128i,
679 #[cfg(not(target_feature = "sse4.1"))]
680 hundred: __m128i,
681 #[cfg(not(target_feature = "sse4.1"))]
682 moddiv10: __m128i,
683 zeros: __m128i,
684 }
685
686 static CONSTS: Consts = Consts {
687 div10k: splat64(DIV10K_SIG as u64),
688 neg10k: splat64(NEG10K as u64),
689 div100: splat32(DIV100_SIG),
690 div10: splat16(((1u32 << 16) / 10 + 1) as u16),
691 #[cfg(target_feature = "sse4.1")]
692 neg100: splat32(NEG100),
693 #[cfg(target_feature = "sse4.1")]
694 neg10: splat16((1 << 8) - 10),
695 #[cfg(target_feature = "sse4.1")]
696 bswap: unsafe {
697 mem::transmute::<[u64; 2], __m128i>([
698 pack8(15, 14, 13, 12, 11, 10, 9, 8),
699 pack8(7, 6, 5, 4, 3, 2, 1, 0),
700 ])
701 },
702 #[cfg(not(target_feature = "sse4.1"))]
703 hundred: splat32(100),
704 #[cfg(not(target_feature = "sse4.1"))]
705 moddiv10: splat16(10 * (1 << 8) - 1),
706 zeros: splat64(ZEROS),
707 };
708
709 let div10k = unsafe { _mm_load_si128(&CONSTS.div10k) };
710 let neg10k = unsafe { _mm_load_si128(&CONSTS.neg10k) };
711 let div100 = unsafe { _mm_load_si128(&CONSTS.div100) };
712 let div10 = unsafe { _mm_load_si128(&CONSTS.div10) };
713 #[cfg(target_feature = "sse4.1")]
714 let neg100 = unsafe { _mm_load_si128(&CONSTS.neg100) };
715 #[cfg(target_feature = "sse4.1")]
716 let neg10 = unsafe { _mm_load_si128(&CONSTS.neg10) };
717 #[cfg(target_feature = "sse4.1")]
718 let bswap = unsafe { _mm_load_si128(&CONSTS.bswap) };
719 #[cfg(not(target_feature = "sse4.1"))]
720 let hundred = unsafe { _mm_load_si128(&CONSTS.hundred) };
721 #[cfg(not(target_feature = "sse4.1"))]
722 let moddiv10 = unsafe { _mm_load_si128(&CONSTS.moddiv10) };
723 let zeros = unsafe { _mm_load_si128(&CONSTS.zeros) };
724
725 unsafe {
727 let x: __m128i = _mm_set_epi64x(i64::from(abcdefgh), i64::from(ijklmnop));
728 let y: __m128i = _mm_add_epi64(
729 x,
730 _mm_mul_epu32(neg10k, _mm_srli_epi64(_mm_mul_epu32(x, div10k), DIV10K_EXP)),
731 );
732
733 #[cfg(target_feature = "sse4.1")]
734 let bcd: __m128i = {
735 let z: __m128i = _mm_add_epi64(
737 y,
738 _mm_mullo_epi32(neg100, _mm_srli_epi32(_mm_mulhi_epu16(y, div100), 3)),
739 );
740 let big_endian_bcd: __m128i =
741 _mm_add_epi64(z, _mm_mullo_epi16(neg10, _mm_mulhi_epu16(z, div10)));
742 _mm_shuffle_epi8(big_endian_bcd, bswap)
744 };
745
746 #[cfg(not(target_feature = "sse4.1"))]
747 let bcd: __m128i = {
748 let y_div_100: __m128i = _mm_srli_epi16(_mm_mulhi_epu16(y, div100), 3);
749 let y_mod_100: __m128i = _mm_sub_epi16(y, _mm_mullo_epi16(y_div_100, hundred));
750 let z: __m128i = _mm_or_si128(_mm_slli_epi32(y_mod_100, 16), y_div_100);
751 let bcd_shuffled: __m128i = _mm_sub_epi16(
752 _mm_slli_epi16(z, 8),
753 _mm_mullo_epi16(moddiv10, _mm_mulhi_epu16(z, div10)),
754 );
755 _mm_shuffle_epi32(bcd_shuffled, _MM_SHUFFLE(0, 1, 2, 3))
756 };
757
758 let digits = _mm_or_si128(bcd, zeros);
759
760 let mask128: __m128i = _mm_cmpgt_epi8(bcd, _mm_setzero_si128());
762 let mask = _mm_movemask_epi8(mask128) as u32;
763 let len = 32 - mask.leading_zeros() as usize;
769
770 _mm_storeu_si128(buffer.cast::<__m128i>(), digits);
771 buffer.add(if last_digit != 0 { 17 } else { len })
772 }
773 }
774}
775
776#[cfg_attr(feature = "no-panic", no_panic)]
779unsafe fn write_significand9(mut buffer: *mut u8, value: u32, has9digits: bool) -> *mut u8 {
780 buffer = unsafe { write_if(buffer, value / 100_000_000, has9digits) };
781 let bcd = to_bcd8(u64::from(value % 100_000_000));
782 unsafe {
783 write8(buffer, bcd | ZEROS);
784 buffer.add(count_trailing_nonzeros(bcd))
785 }
786}
787
788struct ToDecimalResult {
789 sig: i64,
790 exp: i32,
791 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
792 sig_div10: i64,
793}
794
795fn normalize<UInt>(mut dec: ToDecimalResult, subnormal: bool) -> ToDecimalResult
796where
797 UInt: traits::UInt,
798{
799 if !subnormal {
800 return dec;
801 }
802 let num_bits = mem::size_of::<UInt>() * 8;
803 while dec.sig
804 < if num_bits == 64 {
805 10_000_000_000_000_000
806 } else {
807 100_000_000
808 }
809 {
810 dec.sig *= 10;
811 dec.exp -= 1;
812 }
813 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
814 {
815 dec.sig_div10 = dec.sig / 10;
816 }
817 dec
818}
819
820#[cfg_attr(feature = "no-panic", no_panic)]
821fn to_decimal_schubfach<const SUBNORMAL: bool, UInt>(
822 bin_sig: UInt,
823 bin_exp: i64,
824 regular: bool,
825) -> ToDecimalResult
826where
827 UInt: traits::UInt,
828{
829 let num_bits = mem::size_of::<UInt>() as i32 * 8;
830 let dec_exp = compute_dec_exp(bin_exp as i32, regular);
831 let exp_shift = unsafe { compute_exp_shift::<UInt, false>(bin_exp as i32, dec_exp) };
832 let mut pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
833
834 if num_bits == 64 {
837 pow10.lo += 1;
838 } else {
839 pow10.hi += 1;
840 }
841
842 const BOUND_SHIFT: u32 = 2;
844 let bin_sig_shifted = bin_sig << BOUND_SHIFT;
845
846 let lsb = bin_sig & UInt::from(1);
849 let lower = (bin_sig_shifted - (UInt::from(regular) + UInt::from(1))) << exp_shift;
850 let lower = umulhi_inexact_to_odd(pow10.hi, pow10.lo, lower) + lsb;
851 let upper = (bin_sig_shifted + UInt::from(2)) << exp_shift;
852 let upper = umulhi_inexact_to_odd(pow10.hi, pow10.lo, upper) - lsb;
853
854 let div10 = (upper >> BOUND_SHIFT) / UInt::from(10);
857 let shorter = div10 * UInt::from(10);
858 if (shorter << BOUND_SHIFT) >= lower {
859 let result = ToDecimalResult {
860 sig: shorter.into() as i64,
861 exp: dec_exp,
862 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
863 sig_div10: div10.into() as i64,
864 };
865 return normalize::<UInt>(result, SUBNORMAL);
866 }
867
868 let scaled_sig = umulhi_inexact_to_odd(pow10.hi, pow10.lo, bin_sig_shifted << exp_shift);
869 let longer_below = scaled_sig >> BOUND_SHIFT;
870 let longer_above = longer_below + UInt::from(1);
871
872 let cmp = scaled_sig
875 .wrapping_sub((longer_below + longer_above) << 1)
876 .to_signed();
877 let below_closer = cmp < UInt::from(0).to_signed()
878 || (cmp == UInt::from(0).to_signed() && (longer_below & UInt::from(1)) == UInt::from(0));
879 let below_in = (longer_below << BOUND_SHIFT) >= lower;
880 let dec_sig = if below_closer & below_in {
881 longer_below
882 } else {
883 longer_above
884 };
885 let result = ToDecimalResult {
886 sig: dec_sig.into() as i64,
887 exp: dec_exp,
888 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
889 sig_div10: (dec_sig / UInt::from(10)).into() as i64,
890 };
891 normalize::<UInt>(result, SUBNORMAL)
892}
893
894#[cfg_attr(feature = "no-panic", no_panic)]
898fn to_decimal_normal<Float, UInt>(bin_sig: UInt, raw_exp: i64, regular: bool) -> ToDecimalResult
899where
900 Float: FloatTraits,
901 UInt: traits::UInt,
902{
903 let bin_exp = raw_exp - i64::from(Float::EXP_OFFSET);
904 let num_bits = mem::size_of::<UInt>() as i32 * 8;
905 while regular {
907 let dec_exp = if USE_UMUL128_HI64 {
908 umul128_hi64(bin_exp as u64, 0x4d10500000000000) as i32
909 } else {
910 compute_dec_exp(bin_exp as i32, true)
911 };
912 let exp_shift = unsafe { compute_exp_shift::<UInt, true>(bin_exp as i32, dec_exp) };
913 let pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
914
915 let integral; let fractional; if num_bits == 64 {
918 let p = umul192_hi128(pow10.hi, pow10.lo, (bin_sig << exp_shift).into());
919 integral = UInt::truncate(p.hi);
920 fractional = p.lo;
921 } else {
922 let p = umul128(pow10.hi, (bin_sig << exp_shift).into());
923 integral = UInt::truncate((p >> 64) as u64);
924 fractional = p as u64;
925 }
926 const HALF_ULP: u64 = 1 << 63;
927
928 let cmp = fractional.wrapping_sub(HALF_ULP) as i64;
930 if cmp == 0 {
931 break;
932 }
933
934 const DIV10_SIG64: u64 = (1 << 63) / 5 + 1;
938 let div10 = umul128_hi64(integral.into(), DIV10_SIG64);
939 #[allow(unused_mut)]
940 let mut digit = integral.into() - div10 * 10;
941 #[cfg(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri)))]
943 unsafe {
944 asm!("/*{0}*/", inout(reg) digit);
945 }
946
947 let num_integral_bits = if num_bits == 64 { 4 } else { 32 };
951 let num_fractional_bits = 64 - num_integral_bits;
952 let ten = 10u64 << num_fractional_bits;
953 let scaled_sig_mod10 = (digit << num_fractional_bits) | (fractional >> num_integral_bits);
955
956 let scaled_half_ulp = pow10.hi >> (num_integral_bits - exp_shift + 1);
961 let upper = scaled_sig_mod10 + scaled_half_ulp;
962
963 if scaled_sig_mod10 == scaled_half_ulp
985 || ten.wrapping_sub(upper) <= 1
988 {
989 break;
990 }
991
992 let round_up = upper >= ten;
993 let mut shorter = (integral.into() - digit) as i64;
994 let longer = (integral.into() + u64::from(cmp >= 0)) as i64;
995 if falsecfg!(target_arch = "aarch64") {
996 let dec_sig =
998 hint::select_unpredictable(scaled_sig_mod10 < scaled_half_ulp, shorter, longer);
999 return ToDecimalResult {
1000 sig: hint::select_unpredictable(round_up, shorter + 10, dec_sig),
1001 exp: dec_exp,
1002 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
1003 sig_div10: 0,
1004 };
1005 }
1006 shorter += i64::from(round_up) * 10;
1007 let use_shorter = scaled_sig_mod10 <= scaled_half_ulp || round_up;
1008 return ToDecimalResult {
1009 sig: hint::select_unpredictable(use_shorter, shorter, longer),
1010 exp: dec_exp,
1011 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
1012 sig_div10: div10 as i64 + i64::from(use_shorter) * i64::from(round_up),
1013 };
1014 }
1015 to_decimal_schubfach::<false, UInt>(bin_sig, bin_exp, regular)
1016}
1017
1018#[cfg_attr(feature = "no-panic", no_panic)]
1021unsafe fn write<Float>(value: Float, mut buffer: *mut u8) -> *mut u8
1022where
1023 Float: FloatTraits,
1024{
1025 let bits = value.to_bits();
1026 let bin_exp = Float::get_exp(bits); let bin_sig = Float::get_sig(bits); unsafe {
1031 *buffer = b'-';
1032 }
1033 buffer = unsafe { buffer.add(usize::from(Float::is_negative(bits))) };
1034
1035 let mut dec = if bin_exp == 0 {
1036 if bin_sig == Float::SigType::from(0) {
1037 return unsafe {
1038 *buffer = b'0';
1039 *buffer.add(1) = b'.';
1040 *buffer.add(2) = b'0';
1041 buffer.add(3)
1042 };
1043 }
1044 to_decimal_schubfach::<true, Float::SigType>(
1045 bin_sig,
1046 i64::from(1 - Float::EXP_OFFSET),
1047 true,
1048 )
1049 } else {
1050 to_decimal_normal::<Float, Float::SigType>(
1051 bin_sig | Float::IMPLICIT_BIT,
1052 bin_exp,
1053 bin_sig != Float::SigType::from(0),
1054 )
1055 };
1056 let mut dec_exp = dec.exp;
1057
1058 let end = if Float::NUM_BITS == 64 {
1060 let has17digits = dec.sig >= 10_000_000_000_000_000;
1061 dec_exp += Float::MAX_DIGITS10 as i32 - 2 + i32::from(has17digits);
1062 unsafe {
1063 write_significand17(
1064 buffer,
1065 dec.sig as u64,
1066 has17digits,
1067 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
1068 dec.sig_div10,
1069 )
1070 }
1071 } else {
1072 if dec.sig < 10_000_000 {
1073 dec.sig *= 10;
1074 dec_exp -= 1;
1075 }
1076 let has9digits = dec.sig >= 100_000_000;
1077 dec_exp += Float::MAX_DIGITS10 as i32 - 2 + i32::from(has9digits);
1078 unsafe { write_significand9(buffer.add(1), dec.sig as u32, has9digits) }
1079 };
1080
1081 let length = unsafe { end.offset_from(buffer.add(1)) } as usize;
1082
1083 if Float::NUM_BITS == 32 && (-6..=12).contains(&dec_exp)
1084 || Float::NUM_BITS == 64 && (-5..=15).contains(&dec_exp)
1085 {
1086 if length as i32 - 1 <= dec_exp {
1087 return unsafe {
1089 ptr::copy(buffer.add(1), buffer, length);
1090 ptr::write_bytes(buffer.add(length), b'0', dec_exp as usize + 3 - length);
1091 *buffer.add(dec_exp as usize + 1) = b'.';
1092 buffer.add(dec_exp as usize + 3)
1093 };
1094 } else if 0 <= dec_exp {
1095 return unsafe {
1097 ptr::copy(buffer.add(1), buffer, dec_exp as usize + 1);
1098 *buffer.add(dec_exp as usize + 1) = b'.';
1099 buffer.add(length + 1)
1100 };
1101 } else {
1102 return unsafe {
1104 ptr::copy(buffer.add(1), buffer.add((1 - dec_exp) as usize), length);
1105 ptr::write_bytes(buffer, b'0', (1 - dec_exp) as usize);
1106 *buffer.add(1) = b'.';
1107 buffer.add((1 - dec_exp) as usize + length)
1108 };
1109 }
1110 }
1111
1112 unsafe {
1113 *buffer = *buffer.add(1);
1115 *buffer.add(1) = b'.';
1116 }
1117 buffer = unsafe { buffer.add(length + usize::from(length > 1)) };
1118
1119 let sign_ptr = buffer;
1121 let e_sign = if dec_exp >= 0 {
1122 (u16::from(b'+') << 8) | u16::from(b'e')
1123 } else {
1124 (u16::from(b'-') << 8) | u16::from(b'e')
1125 };
1126 buffer = unsafe { buffer.add(1) };
1127 dec_exp = if dec_exp >= 0 { dec_exp } else { -dec_exp };
1128 buffer = unsafe { buffer.add(usize::from(dec_exp >= 10)) };
1129 if Float::MIN_10_EXP >= -99 && Float::MAX_10_EXP <= 99 {
1130 unsafe {
1131 buffer
1132 .cast::<u16>()
1133 .write_unaligned(*digits2(dec_exp as usize));
1134 sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1135 return buffer.add(2);
1136 }
1137 }
1138
1139 let digit = if USE_UMUL128_HI64 {
1141 umul128_hi64(dec_exp as u64, 0x290000000000000) as u32
1142 } else {
1143 (dec_exp as u32 * DIV100_SIG) >> DIV100_EXP
1144 };
1145 unsafe {
1146 *buffer = b'0' + digit as u8;
1147 }
1148 buffer = unsafe { buffer.add(usize::from(dec_exp >= 100)) };
1149 unsafe {
1150 buffer
1151 .cast::<u16>()
1152 .write_unaligned(*digits2((dec_exp as u32 - digit * 100) as usize));
1153 sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1154 buffer.add(2)
1155 }
1156}
1157
1158pub struct Buffer {
1168 bytes: [MaybeUninit<u8>; BUFFER_SIZE],
1169}
1170
1171impl Buffer {
1172 #[inline]
1175 #[cfg_attr(feature = "no-panic", no_panic)]
1176 pub fn new() -> Self {
1177 let bytes = [MaybeUninit::<u8>::uninit(); BUFFER_SIZE];
1178 Buffer { bytes }
1179 }
1180
1181 #[cfg_attr(feature = "no-panic", no_panic)]
1193 pub fn format<F: Float>(&mut self, f: F) -> &str {
1194 if f.is_nonfinite() {
1195 f.format_nonfinite()
1196 } else {
1197 self.format_finite(f)
1198 }
1199 }
1200
1201 #[cfg_attr(feature = "no-panic", no_panic)]
1217 pub fn format_finite<F: Float>(&mut self, f: F) -> &str {
1218 unsafe {
1219 let end = f.write_to_zmij_buffer(self.bytes.as_mut_ptr().cast::<u8>());
1220 let len = end.offset_from(self.bytes.as_ptr().cast::<u8>()) as usize;
1221 let slice = slice::from_raw_parts(self.bytes.as_ptr().cast::<u8>(), len);
1222 str::from_utf8_unchecked(slice)
1223 }
1224 }
1225}
1226
1227#[allow(unknown_lints)] #[allow(private_bounds)]
1234pub trait Float: private::Sealed {}
1235impl Float for f32 {}
1236impl Float for f64 {}
1237
1238mod private {
1239 pub trait Sealed: crate::traits::Float {
1240 fn is_nonfinite(self) -> bool;
1241 fn format_nonfinite(self) -> &'static str;
1242 unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8;
1243 }
1244
1245 impl Sealed for f32 {
1246 #[inline]
1247 fn is_nonfinite(self) -> bool {
1248 const EXP_MASK: u32 = 0x7f800000;
1249 let bits = self.to_bits();
1250 bits & EXP_MASK == EXP_MASK
1251 }
1252
1253 #[cold]
1254 #[cfg_attr(feature = "no-panic", inline)]
1255 fn format_nonfinite(self) -> &'static str {
1256 const MANTISSA_MASK: u32 = 0x007fffff;
1257 const SIGN_MASK: u32 = 0x80000000;
1258 let bits = self.to_bits();
1259 if bits & MANTISSA_MASK != 0 {
1260 crate::NAN
1261 } else if bits & SIGN_MASK != 0 {
1262 crate::NEG_INFINITY
1263 } else {
1264 crate::INFINITY
1265 }
1266 }
1267
1268 #[cfg_attr(feature = "no-panic", inline)]
1269 unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8 {
1270 unsafe { crate::write(self, buffer) }
1271 }
1272 }
1273
1274 impl Sealed for f64 {
1275 #[inline]
1276 fn is_nonfinite(self) -> bool {
1277 const EXP_MASK: u64 = 0x7ff0000000000000;
1278 let bits = self.to_bits();
1279 bits & EXP_MASK == EXP_MASK
1280 }
1281
1282 #[cold]
1283 #[cfg_attr(feature = "no-panic", inline)]
1284 fn format_nonfinite(self) -> &'static str {
1285 const MANTISSA_MASK: u64 = 0x000fffffffffffff;
1286 const SIGN_MASK: u64 = 0x8000000000000000;
1287 let bits = self.to_bits();
1288 if bits & MANTISSA_MASK != 0 {
1289 crate::NAN
1290 } else if bits & SIGN_MASK != 0 {
1291 crate::NEG_INFINITY
1292 } else {
1293 crate::INFINITY
1294 }
1295 }
1296
1297 #[cfg_attr(feature = "no-panic", inline)]
1298 unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8 {
1299 unsafe { crate::write(self, buffer) }
1300 }
1301 }
1302}
1303
1304impl Default for Buffer {
1305 #[inline]
1306 #[cfg_attr(feature = "no-panic", no_panic)]
1307 fn default() -> Self {
1308 Buffer::new()
1309 }
1310}