1//! Use assembly fma if the `fma` or `fma4` feature is detected at runtime.
23use core::arch::asm;
45use super::super::super::generic;
6use super::detect::{cpu_flags, get_cpu_features};
7use crate::support::Round;
8use crate::support::feature_detect::select_once;
910pub fn fma(x: f64, y: f64, z: f64) -> f64 {
11{
use core::mem;
use core::sync::atomic::{AtomicPtr, Ordering};
type Func = unsafe fn(x: f64, y: f64, z: f64) -> f64;
/// Stores a pointer that is immediately jumped to. By default it is an init function
/// that sets FUNC to something else.
static FUNC: AtomicPtr<()> =
AtomicPtr::new((initializer as Func) as *mut ());
/// Run once to set the function that will be used for all subsequent calls.
fn initializer(x: f64, y: f64, z: f64) -> f64 {
let fn_ptr: Func =
(||
{
let features = get_cpu_features();
if features.contains(cpu_flags::FMA) {
fma_with_fma
} else if features.contains(cpu_flags::FMA4) {
fma_with_fma4
} else { fma_fallback as Func }
})();
FUNC.store(fn_ptr as *mut (), Ordering::Relaxed);
(|fn_ptr: Func| unsafe { fn_ptr(x, y, z) })(fn_ptr)
}
let raw: *mut () = FUNC.load(Ordering::Relaxed);
let fn_ptr: Func = unsafe { mem::transmute::<*mut (), Func>(raw) };
(|fn_ptr: Func| unsafe { fn_ptr(x, y, z) })(fn_ptr)
}select_once! {
12 sig: fn(x: f64, y: f64, z: f64) -> f64,
13 init: || {
14let features = get_cpu_features();
15if features.contains(cpu_flags::FMA) {
16 fma_with_fma
17 } else if features.contains(cpu_flags::FMA4) {
18 fma_with_fma4
19 } else {
20 fma_fallback as Func
21 }
22 },
23// SAFETY: `fn_ptr` is the result of `init`, preconditions have been checked.
24call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) },
25 }26}
2728pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
29{
use core::mem;
use core::sync::atomic::{AtomicPtr, Ordering};
type Func = unsafe fn(x: f32, y: f32, z: f32) -> f32;
/// Stores a pointer that is immediately jumped to. By default it is an init function
/// that sets FUNC to something else.
static FUNC: AtomicPtr<()> =
AtomicPtr::new((initializer as Func) as *mut ());
/// Run once to set the function that will be used for all subsequent calls.
fn initializer(x: f32, y: f32, z: f32) -> f32 {
let fn_ptr: Func =
(||
{
let features = get_cpu_features();
if features.contains(cpu_flags::FMA) {
fmaf_with_fma
} else if features.contains(cpu_flags::FMA4) {
fmaf_with_fma4
} else { fmaf_fallback as Func }
})();
FUNC.store(fn_ptr as *mut (), Ordering::Relaxed);
(|fn_ptr: Func| unsafe { fn_ptr(x, y, z) })(fn_ptr)
}
let raw: *mut () = FUNC.load(Ordering::Relaxed);
let fn_ptr: Func = unsafe { mem::transmute::<*mut (), Func>(raw) };
(|fn_ptr: Func| unsafe { fn_ptr(x, y, z) })(fn_ptr)
}select_once! {
30 sig: fn(x: f32, y: f32, z: f32) -> f32,
31 init: || {
32let features = get_cpu_features();
33if features.contains(cpu_flags::FMA) {
34 fmaf_with_fma
35 } else if features.contains(cpu_flags::FMA4) {
36 fmaf_with_fma4
37 } else {
38 fmaf_fallback as Func
39 }
40 },
41// SAFETY: `fn_ptr` is the result of `init`, preconditions have been checked.
42call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) },
43 }44}
4546/// # Safety
47///
48/// Must have +fma available.
49unsafe fn fma_with_fma(mut x: f64, y: f64, z: f64) -> f64 {
50if true {
if !get_cpu_features().contains(cpu_flags::FMA) {
::core::panicking::panic("assertion failed: get_cpu_features().contains(cpu_flags::FMA)")
};
};debug_assert!(get_cpu_features().contains(cpu_flags::FMA));
5152// SAFETY: fma is asserted available by precondition, which provides the instruction. No
53 // memory access or side effects.
54unsafe {
55asm!(
56"vfmadd213sd {x}, {y}, {z}",
57 x = inout(xmm_reg) x,
58 y = in(xmm_reg) y,
59 z = in(xmm_reg) z,
60 options(nostack, nomem, pure),
61 );
62 }
63x64}
6566/// # Safety
67///
68/// Must have +fma available.
69unsafe fn fmaf_with_fma(mut x: f32, y: f32, z: f32) -> f32 {
70if true {
if !get_cpu_features().contains(cpu_flags::FMA) {
::core::panicking::panic("assertion failed: get_cpu_features().contains(cpu_flags::FMA)")
};
};debug_assert!(get_cpu_features().contains(cpu_flags::FMA));
7172// SAFETY: fma is asserted available by precondition, which provides the instruction. No
73 // memory access or side effects.
74unsafe {
75asm!(
76"vfmadd213ss {x}, {y}, {z}",
77 x = inout(xmm_reg) x,
78 y = in(xmm_reg) y,
79 z = in(xmm_reg) z,
80 options(nostack, nomem, pure),
81 );
82 }
83x84}
8586/// # Safety
87///
88/// Must have +fma4 available.
89unsafe fn fma_with_fma4(mut x: f64, y: f64, z: f64) -> f64 {
90if true {
if !get_cpu_features().contains(cpu_flags::FMA4) {
::core::panicking::panic("assertion failed: get_cpu_features().contains(cpu_flags::FMA4)")
};
};debug_assert!(get_cpu_features().contains(cpu_flags::FMA4));
9192// SAFETY: fma4 is asserted available by precondition, which provides the instruction. No
93 // memory access or side effects.
94unsafe {
95asm!(
96"vfmaddsd {x}, {x}, {y}, {z}",
97 x = inout(xmm_reg) x,
98 y = in(xmm_reg) y,
99 z = in(xmm_reg) z,
100 options(nostack, nomem, pure),
101 );
102 }
103x104}
105106/// # Safety
107///
108/// Must have +fma4 available.
109unsafe fn fmaf_with_fma4(mut x: f32, y: f32, z: f32) -> f32 {
110if true {
if !get_cpu_features().contains(cpu_flags::FMA4) {
::core::panicking::panic("assertion failed: get_cpu_features().contains(cpu_flags::FMA4)")
};
};debug_assert!(get_cpu_features().contains(cpu_flags::FMA4));
111112// SAFETY: fma4 is asserted available by precondition, which provides the instruction. No
113 // memory access or side effects.
114unsafe {
115asm!(
116"vfmaddss {x}, {x}, {y}, {z}",
117 x = inout(xmm_reg) x,
118 y = in(xmm_reg) y,
119 z = in(xmm_reg) z,
120 options(nostack, nomem, pure),
121 );
122 }
123x124}
125126// FIXME: the `select_implementation` macro should handle arch implementations that want
127// to use the fallback, so we don't need to recreate the body.
128129fn fma_fallback(x: f64, y: f64, z: f64) -> f64 {
130 generic::fma_round(x, y, z, Round::Nearest).val
131}
132133fn fmaf_fallback(x: f32, y: f32, z: f32) -> f32 {
134 generic::fma_wide_round(x, y, z, Round::Nearest).val
135}