libm/math/arch/x86/
fma.rs1use core::arch::asm;
4
5use super::super::super::generic;
6use super::detect::{cpu_flags, get_cpu_features};
7use crate::support::Round;
8use crate::support::feature_detect::select_once;
9
10pub fn fma(x: f64, y: f64, z: f64) -> f64 {
11    select_once! {
12        sig: fn(x: f64, y: f64, z: f64) -> f64,
13        init: || {
14            let features = get_cpu_features();
15            if features.contains(cpu_flags::FMA) {
16                fma_with_fma
17            } else if features.contains(cpu_flags::FMA4) {
18               fma_with_fma4
19            } else {
20                fma_fallback as Func
21            }
22        },
23        call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) },
25    }
26}
27
28pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
29    select_once! {
30        sig: fn(x: f32, y: f32, z: f32) -> f32,
31        init: || {
32            let features = get_cpu_features();
33            if features.contains(cpu_flags::FMA) {
34                fmaf_with_fma
35            } else if features.contains(cpu_flags::FMA4) {
36                fmaf_with_fma4
37            } else {
38                fmaf_fallback as Func
39            }
40        },
41        call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) },
43    }
44}
45
46unsafe fn fma_with_fma(mut x: f64, y: f64, z: f64) -> f64 {
50    debug_assert!(get_cpu_features().contains(cpu_flags::FMA));
51
52    unsafe {
55        asm!(
56            "vfmadd213sd {x}, {y}, {z}",
57            x = inout(xmm_reg) x,
58            y = in(xmm_reg) y,
59            z = in(xmm_reg) z,
60            options(nostack, nomem, pure),
61        );
62    }
63    x
64}
65
66unsafe fn fmaf_with_fma(mut x: f32, y: f32, z: f32) -> f32 {
70    debug_assert!(get_cpu_features().contains(cpu_flags::FMA));
71
72    unsafe {
75        asm!(
76            "vfmadd213ss {x}, {y}, {z}",
77            x = inout(xmm_reg) x,
78            y = in(xmm_reg) y,
79            z = in(xmm_reg) z,
80            options(nostack, nomem, pure),
81        );
82    }
83    x
84}
85
86unsafe fn fma_with_fma4(mut x: f64, y: f64, z: f64) -> f64 {
90    debug_assert!(get_cpu_features().contains(cpu_flags::FMA4));
91
92    unsafe {
95        asm!(
96            "vfmaddsd {x}, {x}, {y}, {z}",
97            x = inout(xmm_reg) x,
98            y = in(xmm_reg) y,
99            z = in(xmm_reg) z,
100            options(nostack, nomem, pure),
101        );
102    }
103    x
104}
105
106unsafe fn fmaf_with_fma4(mut x: f32, y: f32, z: f32) -> f32 {
110    debug_assert!(get_cpu_features().contains(cpu_flags::FMA4));
111
112    unsafe {
115        asm!(
116            "vfmaddss {x}, {x}, {y}, {z}",
117            x = inout(xmm_reg) x,
118            y = in(xmm_reg) y,
119            z = in(xmm_reg) z,
120            options(nostack, nomem, pure),
121        );
122    }
123    x
124}
125
126fn fma_fallback(x: f64, y: f64, z: f64) -> f64 {
130    generic::fma_round(x, y, z, Round::Nearest).val
131}
132
133fn fmaf_fallback(x: f32, y: f32, z: f32) -> f32 {
134    generic::fma_wide_round(x, y, z, Round::Nearest).val
135}