Skip to main content

core\stdarch\crates\core_arch\src\aarch64\sve/
mod.rs

1//! SVE intrinsics
2
3#![allow(non_camel_case_types)]
4
5// `generated.rs` has a `super::*` and this import is for that
6use crate::intrinsics::{simd::*, *};
7
8#[rustfmt::skip]
9mod generated;
10#[rustfmt::skip]
11#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
12pub use self::generated::*;
13
14use crate::{marker::ConstParamTy, mem::transmute};
15
16pub(super) trait AsUnsigned {
17    type Unsigned;
18    unsafe fn as_unsigned(self) -> Self::Unsigned;
19}
20
21pub(super) trait AsSigned {
22    type Signed;
23    unsafe fn as_signed(self) -> Self::Signed;
24}
25
26/// Same as `Into` but with into being unsafe so that it can have the required `target_feature`
27pub(super) trait SveInto<T>: Sized {
28    unsafe fn sve_into(self) -> T;
29}
30
31macro_rules! impl_sve_type {
32    ($(($v:vis, $elem_type:ty, $name:ident, $elt:literal))*) => ($(
33        #[doc = concat!("Scalable vector of type ", stringify!($elem_type))]
34        #[derive(Clone, Copy, Debug)]
35        #[rustc_scalable_vector($elt)]
36        #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
37        $v struct $name($elem_type);
38    )*)
39}
40
41macro_rules! impl_sve_tuple_type {
42    ($(($v:vis, $vec_type:ty, $elt:tt, $name:ident))*) => ($(
43        impl_sve_tuple_type!(@ ($v, $vec_type, $elt, $name));
44    )*);
45    (@ ($v:vis, $vec_type:ty, 2, $name:ident)) => (
46        #[doc = concat!("Two-element tuple of scalable vectors of type ", stringify!($vec_type))]
47        #[derive(Clone, Copy, Debug)]
48        #[rustc_scalable_vector]
49        #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
50        $v struct $name($vec_type, $vec_type);
51    );
52    (@ ($v:vis, $vec_type:ty, 3, $name:ident)) => (
53        #[doc = concat!("Three-element tuple of scalable vectors of type ", stringify!($vec_type))]
54        #[derive(Clone, Copy, Debug)]
55        #[rustc_scalable_vector]
56        #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
57        $v struct $name($vec_type, $vec_type, $vec_type);
58    );
59    (@ ($v:vis, $vec_type:ty, 4, $name:ident)) => (
60        #[doc = concat!("Four-element tuple of scalable vectors of type ", stringify!($vec_type))]
61        #[derive(Clone, Copy, Debug)]
62        #[rustc_scalable_vector]
63        #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
64        $v struct $name($vec_type, $vec_type, $vec_type, $vec_type);
65    );
66}
67
68macro_rules! impl_sign_conversions_sv {
69    ($(($signed:ty, $unsigned:ty))*) => ($(
70        impl AsUnsigned for $signed {
71            type Unsigned = $unsigned;
72
73            #[inline]
74            #[target_feature(enable = "sve")]
75            unsafe fn as_unsigned(self) -> $unsigned {
76                transmute_unchecked(self)
77            }
78        }
79
80        impl AsSigned for $unsigned {
81            type Signed = $signed;
82
83            #[inline]
84            #[target_feature(enable = "sve")]
85            unsafe fn as_signed(self) -> $signed {
86                transmute_unchecked(self)
87            }
88        }
89    )*)
90}
91
92macro_rules! impl_sign_conversions {
93    ($(($signed:ty, $unsigned:ty))*) => ($(
94        impl AsUnsigned for $signed {
95            type Unsigned = $unsigned;
96
97            #[inline]
98            #[target_feature(enable = "sve")]
99            unsafe fn as_unsigned(self) -> $unsigned {
100                transmute(self)
101            }
102        }
103
104        impl AsSigned for $unsigned {
105            type Signed = $signed;
106
107            #[inline]
108            #[target_feature(enable = "sve")]
109            unsafe fn as_signed(self) -> $signed {
110                transmute(self)
111            }
112        }
113    )*)
114}
115
116/// LLVM requires the predicate lane count to be the same as the lane count
117/// it's working with. However the ACLE only defines one bool type and the
118/// instruction set doesn't have this distinction. As a result we have to
119/// create these internal types so we can match the LLVM signature. Each of
120/// these internal types can be converted to the public `svbool_t` type and
121/// the `svbool_t` type can be converted into these.
122macro_rules! impl_internal_sve_predicate {
123    ($(($name:ident, $elt:literal))*) => ($(
124        impl_sve_type! {
125            (pub(super), bool, $name, $elt)
126        }
127
128        impl SveInto<svbool_t> for $name {
129            #[inline]
130            #[target_feature(enable = "sve")]
131            unsafe fn sve_into(self) -> svbool_t {
132                #[allow(improper_ctypes)]
133                unsafe extern "C" {
134                    #[cfg_attr(
135                        target_arch = "aarch64",
136                        link_name = concat!("llvm.aarch64.sve.convert.to.svbool.nxv", $elt, "i1")
137                    )]
138                    fn convert_to_svbool(b: $name) -> svbool_t;
139                }
140                unsafe { convert_to_svbool(self) }
141            }
142        }
143
144        #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
145        impl SveInto<$name> for svbool_t {
146            #[inline]
147            #[target_feature(enable = "sve")]
148            unsafe fn sve_into(self) -> $name {
149                #[allow(improper_ctypes)]
150                unsafe extern "C" {
151                    #[cfg_attr(
152                        target_arch = "aarch64",
153                        link_name = concat!("llvm.aarch64.sve.convert.from.svbool.nxv", $elt, "i1")
154                    )]
155                    fn convert_from_svbool(b: svbool_t) -> $name;
156                }
157                unsafe { convert_from_svbool(self) }
158            }
159        }
160    )*)
161}
162
163impl_sve_type! {
164    (pub, bool, svbool_t, 16)
165
166    (pub, i8, svint8_t, 16)
167    (pub, u8, svuint8_t, 16)
168
169    (pub, i16, svint16_t, 8)
170    (pub, u16, svuint16_t, 8)
171    (pub, f32, svfloat32_t, 4)
172    (pub, i32, svint32_t, 4)
173    (pub, u32, svuint32_t, 4)
174    (pub, f64, svfloat64_t, 2)
175    (pub, i64, svint64_t, 2)
176    (pub, u64, svuint64_t, 2)
177
178    // Internal types:
179    (pub(super), i8, nxv2i8, 2)
180    (pub(super), i8, nxv4i8, 4)
181    (pub(super), i8, nxv8i8, 8)
182
183    (pub(super), i16, nxv2i16, 2)
184    (pub(super), i16, nxv4i16, 4)
185
186    (pub(super), i32, nxv2i32, 2)
187
188    (pub(super), u8, nxv2u8, 2)
189    (pub(super), u8, nxv4u8, 4)
190    (pub(super), u8, nxv8u8, 8)
191
192    (pub(super), u16, nxv2u16, 2)
193    (pub(super), u16, nxv4u16, 4)
194
195    (pub(super), u32, nxv2u32, 2)
196}
197
198impl_sve_tuple_type! {
199    (pub, svint8_t, 2, svint8x2_t)
200    (pub, svuint8_t, 2, svuint8x2_t)
201    (pub, svint16_t, 2, svint16x2_t)
202    (pub, svuint16_t, 2, svuint16x2_t)
203    (pub, svfloat32_t, 2, svfloat32x2_t)
204    (pub, svint32_t, 2, svint32x2_t)
205    (pub, svuint32_t, 2, svuint32x2_t)
206    (pub, svfloat64_t, 2, svfloat64x2_t)
207    (pub, svint64_t, 2, svint64x2_t)
208    (pub, svuint64_t, 2, svuint64x2_t)
209
210    (pub, svint8_t, 3, svint8x3_t)
211    (pub, svuint8_t, 3, svuint8x3_t)
212    (pub, svint16_t, 3, svint16x3_t)
213    (pub, svuint16_t, 3, svuint16x3_t)
214    (pub, svfloat32_t, 3, svfloat32x3_t)
215    (pub, svint32_t, 3, svint32x3_t)
216    (pub, svuint32_t, 3, svuint32x3_t)
217    (pub, svfloat64_t, 3, svfloat64x3_t)
218    (pub, svint64_t, 3, svint64x3_t)
219    (pub, svuint64_t, 3, svuint64x3_t)
220
221    (pub, svint8_t, 4, svint8x4_t)
222    (pub, svuint8_t, 4, svuint8x4_t)
223    (pub, svint16_t, 4, svint16x4_t)
224    (pub, svuint16_t, 4, svuint16x4_t)
225    (pub, svfloat32_t, 4, svfloat32x4_t)
226    (pub, svint32_t, 4, svint32x4_t)
227    (pub, svuint32_t, 4, svuint32x4_t)
228    (pub, svfloat64_t, 4, svfloat64x4_t)
229    (pub, svint64_t, 4, svint64x4_t)
230    (pub, svuint64_t, 4, svuint64x4_t)
231}
232
233impl_sign_conversions! {
234    (i8, u8)
235    (i16, u16)
236    (i32, u32)
237    (i64, u64)
238    (*const i8, *const u8)
239    (*const i16, *const u16)
240    (*const i32, *const u32)
241    (*const i64, *const u64)
242    (*mut i8, *mut u8)
243    (*mut i16, *mut u16)
244    (*mut i32, *mut u32)
245    (*mut i64, *mut u64)
246}
247
248impl_sign_conversions_sv! {
249    (svint8_t, svuint8_t)
250    (svint16_t, svuint16_t)
251    (svint32_t, svuint32_t)
252    (svint64_t, svuint64_t)
253
254    (svint8x2_t, svuint8x2_t)
255    (svint16x2_t, svuint16x2_t)
256    (svint32x2_t, svuint32x2_t)
257    (svint64x2_t, svuint64x2_t)
258
259    (svint8x3_t, svuint8x3_t)
260    (svint16x3_t, svuint16x3_t)
261    (svint32x3_t, svuint32x3_t)
262    (svint64x3_t, svuint64x3_t)
263
264    (svint8x4_t, svuint8x4_t)
265    (svint16x4_t, svuint16x4_t)
266    (svint32x4_t, svuint32x4_t)
267    (svint64x4_t, svuint64x4_t)
268
269    // Internal types:
270    (nxv2i8, nxv2u8)
271    (nxv4i8, nxv4u8)
272    (nxv8i8, nxv8u8)
273
274    (nxv2i16, nxv2u16)
275    (nxv4i16, nxv4u16)
276
277    (nxv2i32, nxv2u32)
278}
279
280impl_internal_sve_predicate! {
281    (svbool2_t, 2)
282    (svbool4_t, 4)
283    (svbool8_t, 8)
284}
285
286/// Patterns returned by a `PTRUE`
287#[repr(i32)]
288#[allow(non_camel_case_types)]
289#[derive(Clone, Copy, Debug, PartialEq, Eq, ConstParamTy)]
290#[non_exhaustive]
291#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
292pub enum svpattern {
293    /// Activate the largest power-of-two number of elements that is less than the vector length
294    SV_POW2 = 0,
295    /// Activate the first element
296    SV_VL1 = 1,
297    /// Activate the first two elements
298    SV_VL2 = 2,
299    /// Activate the first three elements
300    SV_VL3 = 3,
301    /// Activate the first four elements
302    SV_VL4 = 4,
303    /// Activate the first five elements
304    SV_VL5 = 5,
305    /// Activate the first six elements
306    SV_VL6 = 6,
307    /// Activate the first seven elements
308    SV_VL7 = 7,
309    /// Activate the first eight elements
310    SV_VL8 = 8,
311    /// Activate the first sixteen elements
312    SV_VL16 = 9,
313    /// Activate the first thirty-two elements
314    SV_VL32 = 10,
315    /// Activate the first sixty-four elements
316    SV_VL64 = 11,
317    /// Activate the first one-hundred-and-twenty-eight elements
318    SV_VL128 = 12,
319    /// Activate the first two-hundred-and-fifty-six elements
320    SV_VL256 = 13,
321    /// Activate the largest multiple-of-four number of elements that is less than the vector length
322    SV_MUL4 = 29,
323    /// Activate the largest multiple-of-three number of elements that is less than the vector
324    /// length
325    SV_MUL3 = 30,
326    /// Activate all elements
327    SV_ALL = 31,
328}
329
330/// Addressing mode for prefetch intrinsics - allows the specification of the expected access
331/// kind (read or write), the cache level to load the data, the data retention policy
332/// (temporal or streaming)
333#[repr(i32)]
334#[allow(non_camel_case_types)]
335#[derive(Clone, Copy, Debug, PartialEq, Eq, ConstParamTy)]
336#[non_exhaustive]
337#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
338pub enum svprfop {
339    /// Temporal fetch of the addressed location for reading to the L1 cache (i.e. allocate in
340    /// cache normally)
341    SV_PLDL1KEEP = 0,
342    /// Streaming fetch of the addressed location for reading to the L1 cache (i.e. memory only
343    /// used once)
344    SV_PLDL1STRM = 1,
345    /// Temporal fetch of the addressed location for reading to the L2 cache (i.e. allocate in
346    /// cache normally)
347    SV_PLDL2KEEP = 2,
348    /// Streaming fetch of the addressed location for reading to the L2 cache (i.e. memory only
349    /// used once)
350    SV_PLDL2STRM = 3,
351    /// Temporal fetch of the addressed location for reading to the L3 cache (i.e. allocate in
352    /// cache normally)
353    SV_PLDL3KEEP = 4,
354    /// Streaming fetch of the addressed location for reading to the L3 cache (i.e. memory only
355    /// used once)
356    SV_PLDL3STRM = 5,
357    /// Temporal fetch of the addressed location for writing to the L1 cache (i.e. allocate in
358    /// cache normally)
359    SV_PSTL1KEEP = 8,
360    /// Temporal fetch of the addressed location for writing to the L1 cache (i.e. memory only
361    /// used once)
362    SV_PSTL1STRM = 9,
363    /// Temporal fetch of the addressed location for writing to the L2 cache (i.e. allocate in
364    /// cache normally)
365    SV_PSTL2KEEP = 10,
366    /// Temporal fetch of the addressed location for writing to the L2 cache (i.e. memory only
367    /// used once)
368    SV_PSTL2STRM = 11,
369    /// Temporal fetch of the addressed location for writing to the L3 cache (i.e. allocate in
370    /// cache normally)
371    SV_PSTL3KEEP = 12,
372    /// Temporal fetch of the addressed location for writing to the L3 cache (i.e. memory only
373    /// used once)
374    SV_PSTL3STRM = 13,
375}
376
377// FIXME(arm-maintainers): On MSVC targets, it seemed like spurious corruption of the FFR was being
378// observed non-deterministically on CI. Disabling these tests out of caution on that platform until
379// it is investigated.
380#[cfg(all(test, not(target_env = "msvc")))]
381#[path = "ld_st_tests_aarch64.rs"]
382mod ld_st_tests;