1#[cfg(feature = "simd_support")]
12use core::simd::SimdElement;
13#[cfg(feature = "simd_support")]
14use core::simd::prelude::*;
15
16pub(crate) trait WideningMultiply<RHS = Self> {
17 type Output;
18
19 fn wmul(self, x: RHS) -> Self::Output;
20}
21
22macro_rules! wmul_impl {
23 ($ty:ty, $wide:ty, $shift:expr) => {
24 impl WideningMultiply for $ty {
25 type Output = ($ty, $ty);
26
27 #[inline(always)]
28 fn wmul(self, x: $ty) -> Self::Output {
29 let tmp = (self as $wide) * (x as $wide);
30 ((tmp >> $shift) as $ty, tmp as $ty)
31 }
32 }
33 };
34
35 ($(($ty:ident, $wide:ty),)+, $shift:expr) => {
37 $(
38 impl WideningMultiply for $ty {
39 type Output = ($ty, $ty);
40
41 #[inline(always)]
42 fn wmul(self, x: $ty) -> Self::Output {
43 let y: $wide = self.cast();
48 let x: $wide = x.cast();
49 let tmp = y * x;
50 let hi: $ty = (tmp >> Simd::splat($shift)).cast();
51 let lo: $ty = tmp.cast();
52 (hi, lo)
53 }
54 }
55 )+
56 };
57}
58impl WideningMultiply for u8 {
type Output = (u8, u8);
#[inline(always)]
fn wmul(self, x: u8) -> Self::Output {
let tmp = (self as u16) * (x as u16);
((tmp >> 8) as u8, tmp as u8)
}
}wmul_impl! { u8, u16, 8 }
59impl WideningMultiply for u16 {
type Output = (u16, u16);
#[inline(always)]
fn wmul(self, x: u16) -> Self::Output {
let tmp = (self as u32) * (x as u32);
((tmp >> 16) as u16, tmp as u16)
}
}wmul_impl! { u16, u32, 16 }
60impl WideningMultiply for u32 {
type Output = (u32, u32);
#[inline(always)]
fn wmul(self, x: u32) -> Self::Output {
let tmp = (self as u64) * (x as u64);
((tmp >> 32) as u32, tmp as u32)
}
}wmul_impl! { u32, u64, 32 }
61impl WideningMultiply for u64 {
type Output = (u64, u64);
#[inline(always)]
fn wmul(self, x: u64) -> Self::Output {
let tmp = (self as u128) * (x as u128);
((tmp >> 64) as u64, tmp as u64)
}
}wmul_impl! { u64, u128, 64 }
62
63macro_rules! wmul_impl_large {
70 ($ty:ty, $half:expr) => {
71 impl WideningMultiply for $ty {
72 type Output = ($ty, $ty);
73
74 #[inline(always)]
75 fn wmul(self, b: $ty) -> Self::Output {
76 const LOWER_MASK: $ty = !0 >> $half;
77 let mut low = (self & LOWER_MASK).wrapping_mul(b & LOWER_MASK);
78 let mut t = low >> $half;
79 low &= LOWER_MASK;
80 t += (self >> $half).wrapping_mul(b & LOWER_MASK);
81 low += (t & LOWER_MASK) << $half;
82 let mut high = t >> $half;
83 t = low >> $half;
84 low &= LOWER_MASK;
85 t += (b >> $half).wrapping_mul(self & LOWER_MASK);
86 low += (t & LOWER_MASK) << $half;
87 high += t >> $half;
88 high += (self >> $half).wrapping_mul(b >> $half);
89
90 (high, low)
91 }
92 }
93 };
94
95 (($($ty:ty,)+) $scalar:ty, $half:expr) => {
97 $(
98 impl WideningMultiply for $ty {
99 type Output = ($ty, $ty);
100
101 #[inline(always)]
102 fn wmul(self, b: $ty) -> Self::Output {
103 let lower_mask = <$ty>::splat(!0 >> $half);
105 let half = <$ty>::splat($half);
106 let mut low = (self & lower_mask) * (b & lower_mask);
107 let mut t = low >> half;
108 low &= lower_mask;
109 t += (self >> half) * (b & lower_mask);
110 low += (t & lower_mask) << half;
111 let mut high = t >> half;
112 t = low >> half;
113 low &= lower_mask;
114 t += (b >> half) * (self & lower_mask);
115 low += (t & lower_mask) << half;
116 high += t >> half;
117 high += (self >> half) * (b >> half);
118
119 (high, low)
120 }
121 }
122 )+
123 };
124}
125impl WideningMultiply for u128 {
type Output = (u128, u128);
#[inline(always)]
fn wmul(self, b: u128) -> Self::Output {
const LOWER_MASK: u128 = !0 >> 64;
let mut low = (self & LOWER_MASK).wrapping_mul(b & LOWER_MASK);
let mut t = low >> 64;
low &= LOWER_MASK;
t += (self >> 64).wrapping_mul(b & LOWER_MASK);
low += (t & LOWER_MASK) << 64;
let mut high = t >> 64;
t = low >> 64;
low &= LOWER_MASK;
t += (b >> 64).wrapping_mul(self & LOWER_MASK);
low += (t & LOWER_MASK) << 64;
high += t >> 64;
high += (self >> 64).wrapping_mul(b >> 64);
(high, low)
}
}wmul_impl_large! { u128, 64 }
126
127macro_rules! wmul_impl_usize {
128 ($ty:ty) => {
129 impl WideningMultiply for usize {
130 type Output = (usize, usize);
131
132 #[inline(always)]
133 fn wmul(self, x: usize) -> Self::Output {
134 let (high, low) = (self as $ty).wmul(x as $ty);
135 (high as usize, low as usize)
136 }
137 }
138 };
139}
140#[cfg(target_pointer_width = "16")]
141wmul_impl_usize! { u16 }
142#[cfg(target_pointer_width = "32")]
143wmul_impl_usize! { u32 }
144#[cfg(target_pointer_width = "64")]
145impl WideningMultiply for usize {
type Output = (usize, usize);
#[inline(always)]
fn wmul(self, x: usize) -> Self::Output {
let (high, low) = (self as u64).wmul(x as u64);
(high as usize, low as usize)
}
}wmul_impl_usize! { u64 }
146
147#[cfg(feature = "simd_support")]
148mod simd_wmul {
149 use super::*;
150 #[cfg(target_arch = "x86")]
151 use core::arch::x86::*;
152 #[cfg(target_arch = "x86_64")]
153 use core::arch::x86_64::*;
154
155 wmul_impl! {
156 (u8x4, u16x4),
157 (u8x8, u16x8),
158 (u8x16, u16x16),
159 (u8x32, u16x32),
160 (u8x64, Simd<u16, 64>),,
161 8
162 }
163
164 wmul_impl! { (u16x2, u32x2),, 16 }
165 wmul_impl! { (u16x4, u32x4),, 16 }
166 #[cfg(not(target_feature = "sse2"))]
167 wmul_impl! { (u16x8, u32x8),, 16 }
168 #[cfg(not(target_feature = "avx2"))]
169 wmul_impl! { (u16x16, u32x16),, 16 }
170 #[cfg(not(target_feature = "avx512bw"))]
171 wmul_impl! { (u16x32, Simd<u32, 32>),, 16 }
172
173 #[allow(unused_macros)]
176 macro_rules! wmul_impl_16 {
177 ($ty:ident, $mulhi:ident, $mullo:ident) => {
178 impl WideningMultiply for $ty {
179 type Output = ($ty, $ty);
180
181 #[inline(always)]
182 #[allow(clippy::undocumented_unsafe_blocks)]
183 fn wmul(self, x: $ty) -> Self::Output {
184 let hi = unsafe { $mulhi(self.into(), x.into()) }.into();
185 let lo = unsafe { $mullo(self.into(), x.into()) }.into();
186 (hi, lo)
187 }
188 }
189 };
190 }
191
192 #[cfg(target_feature = "sse2")]
193 wmul_impl_16! { u16x8, _mm_mulhi_epu16, _mm_mullo_epi16 }
194 #[cfg(target_feature = "avx2")]
195 wmul_impl_16! { u16x16, _mm256_mulhi_epu16, _mm256_mullo_epi16 }
196 #[cfg(target_feature = "avx512bw")]
197 wmul_impl_16! { u16x32, _mm512_mulhi_epu16, _mm512_mullo_epi16 }
198
199 wmul_impl! {
200 (u32x2, u64x2),
201 (u32x4, u64x4),
202 (u32x8, u64x8),
203 (u32x16, Simd<u64, 16>),,
204 32
205 }
206
207 wmul_impl_large! { (u64x2, u64x4, u64x8,) u64, 32 }
208}
209
210pub(crate) trait FloatSIMDUtils {
212 fn all_lt(self, other: Self) -> bool;
218 fn all_le(self, other: Self) -> bool;
219 fn all_finite(self) -> bool;
220
221 type Mask;
222 fn gt_mask(self, other: Self) -> Self::Mask;
223
224 fn decrease_masked(self, mask: Self::Mask) -> Self;
228
229 type UInt;
232 fn cast_from_int(i: Self::UInt) -> Self;
233}
234
235#[cfg(test)]
236pub(crate) trait FloatSIMDScalarUtils: FloatSIMDUtils {
237 type Scalar;
238
239 fn replace(self, index: usize, new_value: Self::Scalar) -> Self;
240 fn extract_lane(self, index: usize) -> Self::Scalar;
241}
242
243pub(crate) trait FloatAsSIMD: Sized {
245 #[cfg(test)]
246 const LEN: usize = 1;
247
248 #[inline(always)]
249 fn splat(scalar: Self) -> Self {
250 scalar
251 }
252}
253
254pub(crate) trait IntAsSIMD: Sized {
255 #[inline(always)]
256 fn splat(scalar: Self) -> Self {
257 scalar
258 }
259}
260
261impl IntAsSIMD for u32 {}
262impl IntAsSIMD for u64 {}
263
264pub(crate) trait BoolAsSIMD: Sized {
265 fn any(self) -> bool;
266}
267
268impl BoolAsSIMD for bool {
269 #[inline(always)]
270 fn any(self) -> bool {
271 self
272 }
273}
274
275macro_rules! scalar_float_impl {
276 ($ty:ident, $uty:ident) => {
277 impl FloatSIMDUtils for $ty {
278 type Mask = bool;
279 type UInt = $uty;
280
281 #[inline(always)]
282 fn all_lt(self, other: Self) -> bool {
283 self < other
284 }
285
286 #[inline(always)]
287 fn all_le(self, other: Self) -> bool {
288 self <= other
289 }
290
291 #[inline(always)]
292 fn all_finite(self) -> bool {
293 self.is_finite()
294 }
295
296 #[inline(always)]
297 fn gt_mask(self, other: Self) -> Self::Mask {
298 self > other
299 }
300
301 #[inline(always)]
302 fn decrease_masked(self, mask: Self::Mask) -> Self {
303 debug_assert!(mask, "At least one lane must be set");
304 <$ty>::from_bits(self.to_bits() - 1)
305 }
306
307 #[inline]
308 fn cast_from_int(i: Self::UInt) -> Self {
309 i as $ty
310 }
311 }
312
313 #[cfg(test)]
314 impl FloatSIMDScalarUtils for $ty {
315 type Scalar = $ty;
316
317 #[inline]
318 fn replace(self, index: usize, new_value: Self::Scalar) -> Self {
319 debug_assert_eq!(index, 0);
320 new_value
321 }
322
323 #[inline]
324 fn extract_lane(self, index: usize) -> Self::Scalar {
325 debug_assert_eq!(index, 0);
326 self
327 }
328 }
329
330 impl FloatAsSIMD for $ty {}
331 };
332}
333
334impl FloatSIMDUtils for f32 {
type Mask = bool;
type UInt = u32;
#[inline(always)]
fn all_lt(self, other: Self) -> bool { self < other }
#[inline(always)]
fn all_le(self, other: Self) -> bool { self <= other }
#[inline(always)]
fn all_finite(self) -> bool { self.is_finite() }
#[inline(always)]
fn gt_mask(self, other: Self) -> Self::Mask { self > other }
#[inline(always)]
fn decrease_masked(self, mask: Self::Mask) -> Self {
if true {
if !mask {
{
::core::panicking::panic_fmt(format_args!("At least one lane must be set"));
}
};
};
<f32>::from_bits(self.to_bits() - 1)
}
#[inline]
fn cast_from_int(i: Self::UInt) -> Self { i as f32 }
}
impl FloatAsSIMD for f32 {}scalar_float_impl!(f32, u32);
335impl FloatSIMDUtils for f64 {
type Mask = bool;
type UInt = u64;
#[inline(always)]
fn all_lt(self, other: Self) -> bool { self < other }
#[inline(always)]
fn all_le(self, other: Self) -> bool { self <= other }
#[inline(always)]
fn all_finite(self) -> bool { self.is_finite() }
#[inline(always)]
fn gt_mask(self, other: Self) -> Self::Mask { self > other }
#[inline(always)]
fn decrease_masked(self, mask: Self::Mask) -> Self {
if true {
if !mask {
{
::core::panicking::panic_fmt(format_args!("At least one lane must be set"));
}
};
};
<f64>::from_bits(self.to_bits() - 1)
}
#[inline]
fn cast_from_int(i: Self::UInt) -> Self { i as f64 }
}
impl FloatAsSIMD for f64 {}scalar_float_impl!(f64, u64);
336
337#[cfg(feature = "simd_support")]
338macro_rules! simd_impl {
339 ($fty:ident, $uty:ident) => {
340 impl<const LANES: usize> FloatSIMDUtils for Simd<$fty, LANES> {
341 type Mask = Mask<<$fty as SimdElement>::Mask, LANES>;
342 type UInt = Simd<$uty, LANES>;
343
344 #[inline(always)]
345 fn all_lt(self, other: Self) -> bool {
346 self.simd_lt(other).all()
347 }
348
349 #[inline(always)]
350 fn all_le(self, other: Self) -> bool {
351 self.simd_le(other).all()
352 }
353
354 #[inline(always)]
355 fn all_finite(self) -> bool {
356 self.is_finite().all()
357 }
358
359 #[inline(always)]
360 fn gt_mask(self, other: Self) -> Self::Mask {
361 self.simd_gt(other)
362 }
363
364 #[inline(always)]
365 fn decrease_masked(self, mask: Self::Mask) -> Self {
366 debug_assert!(mask.any(), "At least one lane must be set");
373 Self::from_bits(self.to_bits() + mask.to_simd().cast())
374 }
375
376 #[inline]
377 fn cast_from_int(i: Self::UInt) -> Self {
378 i.cast()
379 }
380 }
381
382 #[cfg(test)]
383 impl<const LANES: usize> FloatSIMDScalarUtils for Simd<$fty, LANES> {
384 type Scalar = $fty;
385
386 #[inline]
387 fn replace(mut self, index: usize, new_value: Self::Scalar) -> Self {
388 self.as_mut_array()[index] = new_value;
389 self
390 }
391
392 #[inline]
393 fn extract_lane(self, index: usize) -> Self::Scalar {
394 self.as_array()[index]
395 }
396 }
397 };
398}
399
400#[cfg(feature = "simd_support")]
401simd_impl!(f32, u32);
402#[cfg(feature = "simd_support")]
403simd_impl!(f64, u64);