1#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 intrinsics::sqrtf64,
10 mem, ptr,
11};
12
13#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub fn _mm_pause() {
23 unsafe { pause() }
26}
27
28#[inline]
39#[target_feature(enable = "sse2")]
40#[cfg_attr(test, assert_instr(clflush))]
41#[stable(feature = "simd_x86", since = "1.27.0")]
42pub unsafe fn _mm_clflush(p: *const u8) {
43 clflush(p)
44}
45
46#[inline]
55#[target_feature(enable = "sse2")]
56#[cfg_attr(test, assert_instr(lfence))]
57#[stable(feature = "simd_x86", since = "1.27.0")]
58pub fn _mm_lfence() {
59 unsafe { lfence() }
60}
61
62#[inline]
71#[target_feature(enable = "sse2")]
72#[cfg_attr(test, assert_instr(mfence))]
73#[stable(feature = "simd_x86", since = "1.27.0")]
74pub fn _mm_mfence() {
75 unsafe { mfence() }
76}
77
78#[inline]
82#[target_feature(enable = "sse2")]
83#[cfg_attr(test, assert_instr(paddb))]
84#[stable(feature = "simd_x86", since = "1.27.0")]
85#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
86pub const fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
87 unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
88}
89
90#[inline]
94#[target_feature(enable = "sse2")]
95#[cfg_attr(test, assert_instr(paddw))]
96#[stable(feature = "simd_x86", since = "1.27.0")]
97#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
98pub const fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
99 unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
100}
101
102#[inline]
106#[target_feature(enable = "sse2")]
107#[cfg_attr(test, assert_instr(paddd))]
108#[stable(feature = "simd_x86", since = "1.27.0")]
109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
110pub const fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
111 unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
112}
113
114#[inline]
118#[target_feature(enable = "sse2")]
119#[cfg_attr(test, assert_instr(paddq))]
120#[stable(feature = "simd_x86", since = "1.27.0")]
121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
122pub const fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
123 unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
124}
125
126#[inline]
130#[target_feature(enable = "sse2")]
131#[cfg_attr(test, assert_instr(paddsb))]
132#[stable(feature = "simd_x86", since = "1.27.0")]
133#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
134pub const fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
135 unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
136}
137
138#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddsw))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
146pub const fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
147 unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
148}
149
150#[inline]
154#[target_feature(enable = "sse2")]
155#[cfg_attr(test, assert_instr(paddusb))]
156#[stable(feature = "simd_x86", since = "1.27.0")]
157#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
158pub const fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
159 unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
160}
161
162#[inline]
166#[target_feature(enable = "sse2")]
167#[cfg_attr(test, assert_instr(paddusw))]
168#[stable(feature = "simd_x86", since = "1.27.0")]
169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
170pub const fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
171 unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
172}
173
174#[inline]
178#[target_feature(enable = "sse2")]
179#[cfg_attr(test, assert_instr(pavgb))]
180#[stable(feature = "simd_x86", since = "1.27.0")]
181#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
182pub const fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
183 unsafe {
184 let a = simd_cast::<_, u16x16>(a.as_u8x16());
185 let b = simd_cast::<_, u16x16>(b.as_u8x16());
186 let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
187 transmute(simd_cast::<_, u8x16>(r))
188 }
189}
190
191#[inline]
195#[target_feature(enable = "sse2")]
196#[cfg_attr(test, assert_instr(pavgw))]
197#[stable(feature = "simd_x86", since = "1.27.0")]
198#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
199pub const fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
200 unsafe {
201 let a = simd_cast::<_, u32x8>(a.as_u16x8());
202 let b = simd_cast::<_, u32x8>(b.as_u16x8());
203 let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
204 transmute(simd_cast::<_, u16x8>(r))
205 }
206}
207
208#[inline]
216#[target_feature(enable = "sse2")]
217#[cfg_attr(test, assert_instr(pmaddwd))]
218#[stable(feature = "simd_x86", since = "1.27.0")]
219pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
220 unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
233}
234
235#[inline]
240#[target_feature(enable = "sse2")]
241#[cfg_attr(test, assert_instr(pmaxsw))]
242#[stable(feature = "simd_x86", since = "1.27.0")]
243#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
244pub const fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
245 unsafe { simd_imax(a.as_i16x8(), b.as_i16x8()).as_m128i() }
246}
247
248#[inline]
253#[target_feature(enable = "sse2")]
254#[cfg_attr(test, assert_instr(pmaxub))]
255#[stable(feature = "simd_x86", since = "1.27.0")]
256#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
257pub const fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
258 unsafe { simd_imax(a.as_u8x16(), b.as_u8x16()).as_m128i() }
259}
260
261#[inline]
266#[target_feature(enable = "sse2")]
267#[cfg_attr(test, assert_instr(pminsw))]
268#[stable(feature = "simd_x86", since = "1.27.0")]
269#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
270pub const fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
271 unsafe { simd_imin(a.as_i16x8(), b.as_i16x8()).as_m128i() }
272}
273
274#[inline]
279#[target_feature(enable = "sse2")]
280#[cfg_attr(test, assert_instr(pminub))]
281#[stable(feature = "simd_x86", since = "1.27.0")]
282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
283pub const fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
284 unsafe { simd_imin(a.as_u8x16(), b.as_u8x16()).as_m128i() }
285}
286
287#[inline]
294#[target_feature(enable = "sse2")]
295#[cfg_attr(test, assert_instr(pmulhw))]
296#[stable(feature = "simd_x86", since = "1.27.0")]
297#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
298pub const fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
299 unsafe {
300 let a = simd_cast::<_, i32x8>(a.as_i16x8());
301 let b = simd_cast::<_, i32x8>(b.as_i16x8());
302 let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
303 transmute(simd_cast::<i32x8, i16x8>(r))
304 }
305}
306
307#[inline]
314#[target_feature(enable = "sse2")]
315#[cfg_attr(test, assert_instr(pmulhuw))]
316#[stable(feature = "simd_x86", since = "1.27.0")]
317#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
318pub const fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
319 unsafe {
320 let a = simd_cast::<_, u32x8>(a.as_u16x8());
321 let b = simd_cast::<_, u32x8>(b.as_u16x8());
322 let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
323 transmute(simd_cast::<u32x8, u16x8>(r))
324 }
325}
326
327#[inline]
334#[target_feature(enable = "sse2")]
335#[cfg_attr(test, assert_instr(pmullw))]
336#[stable(feature = "simd_x86", since = "1.27.0")]
337#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
338pub const fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
339 unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
340}
341
342#[inline]
349#[target_feature(enable = "sse2")]
350#[cfg_attr(test, assert_instr(pmuludq))]
351#[stable(feature = "simd_x86", since = "1.27.0")]
352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
353pub const fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
354 unsafe {
355 let a = a.as_u64x2();
356 let b = b.as_u64x2();
357 let mask = u64x2::splat(u32::MAX as u64);
358 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
359 }
360}
361
362#[inline]
371#[target_feature(enable = "sse2")]
372#[cfg_attr(test, assert_instr(psadbw))]
373#[stable(feature = "simd_x86", since = "1.27.0")]
374pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
375 unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) }
376}
377
378#[inline]
382#[target_feature(enable = "sse2")]
383#[cfg_attr(test, assert_instr(psubb))]
384#[stable(feature = "simd_x86", since = "1.27.0")]
385#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
386pub const fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
387 unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
388}
389
390#[inline]
394#[target_feature(enable = "sse2")]
395#[cfg_attr(test, assert_instr(psubw))]
396#[stable(feature = "simd_x86", since = "1.27.0")]
397#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
398pub const fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
399 unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
400}
401
402#[inline]
406#[target_feature(enable = "sse2")]
407#[cfg_attr(test, assert_instr(psubd))]
408#[stable(feature = "simd_x86", since = "1.27.0")]
409#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
410pub const fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
411 unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
412}
413
414#[inline]
418#[target_feature(enable = "sse2")]
419#[cfg_attr(test, assert_instr(psubq))]
420#[stable(feature = "simd_x86", since = "1.27.0")]
421#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
422pub const fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
423 unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
424}
425
426#[inline]
431#[target_feature(enable = "sse2")]
432#[cfg_attr(test, assert_instr(psubsb))]
433#[stable(feature = "simd_x86", since = "1.27.0")]
434#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
435pub const fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
436 unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
437}
438
439#[inline]
444#[target_feature(enable = "sse2")]
445#[cfg_attr(test, assert_instr(psubsw))]
446#[stable(feature = "simd_x86", since = "1.27.0")]
447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
448pub const fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
449 unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
450}
451
452#[inline]
457#[target_feature(enable = "sse2")]
458#[cfg_attr(test, assert_instr(psubusb))]
459#[stable(feature = "simd_x86", since = "1.27.0")]
460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
461pub const fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
462 unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
463}
464
465#[inline]
470#[target_feature(enable = "sse2")]
471#[cfg_attr(test, assert_instr(psubusw))]
472#[stable(feature = "simd_x86", since = "1.27.0")]
473#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
474pub const fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
475 unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
476}
477
478#[inline]
482#[target_feature(enable = "sse2")]
483#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
484#[rustc_legacy_const_generics(1)]
485#[stable(feature = "simd_x86", since = "1.27.0")]
486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
487pub const fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
488 static_assert_uimm_bits!(IMM8, 8);
489 unsafe { _mm_slli_si128_impl::<IMM8>(a) }
490}
491
492#[inline]
495#[target_feature(enable = "sse2")]
496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
497const unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
498 const fn mask(shift: i32, i: u32) -> u32 {
499 let shift = shift as u32 & 0xff;
500 if shift > 15 { i } else { 16 - shift + i }
501 }
502 transmute::<i8x16, _>(simd_shuffle!(
503 i8x16::ZERO,
504 a.as_i8x16(),
505 [
506 mask(IMM8, 0),
507 mask(IMM8, 1),
508 mask(IMM8, 2),
509 mask(IMM8, 3),
510 mask(IMM8, 4),
511 mask(IMM8, 5),
512 mask(IMM8, 6),
513 mask(IMM8, 7),
514 mask(IMM8, 8),
515 mask(IMM8, 9),
516 mask(IMM8, 10),
517 mask(IMM8, 11),
518 mask(IMM8, 12),
519 mask(IMM8, 13),
520 mask(IMM8, 14),
521 mask(IMM8, 15),
522 ],
523 ))
524}
525
526#[inline]
530#[target_feature(enable = "sse2")]
531#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
532#[rustc_legacy_const_generics(1)]
533#[stable(feature = "simd_x86", since = "1.27.0")]
534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
535pub const fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
536 unsafe {
537 static_assert_uimm_bits!(IMM8, 8);
538 _mm_slli_si128_impl::<IMM8>(a)
539 }
540}
541
542#[inline]
546#[target_feature(enable = "sse2")]
547#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
548#[rustc_legacy_const_generics(1)]
549#[stable(feature = "simd_x86", since = "1.27.0")]
550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
551pub const fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
552 unsafe {
553 static_assert_uimm_bits!(IMM8, 8);
554 _mm_srli_si128_impl::<IMM8>(a)
555 }
556}
557
558#[inline]
562#[target_feature(enable = "sse2")]
563#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
564#[rustc_legacy_const_generics(1)]
565#[stable(feature = "simd_x86", since = "1.27.0")]
566#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
567pub const fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
568 static_assert_uimm_bits!(IMM8, 8);
569 unsafe {
570 if IMM8 >= 16 {
571 _mm_setzero_si128()
572 } else {
573 transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
574 }
575 }
576}
577
578#[inline]
583#[target_feature(enable = "sse2")]
584#[cfg_attr(test, assert_instr(psllw))]
585#[stable(feature = "simd_x86", since = "1.27.0")]
586pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
587 unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
588}
589
590#[inline]
594#[target_feature(enable = "sse2")]
595#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
596#[rustc_legacy_const_generics(1)]
597#[stable(feature = "simd_x86", since = "1.27.0")]
598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
599pub const fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
600 static_assert_uimm_bits!(IMM8, 8);
601 unsafe {
602 if IMM8 >= 32 {
603 _mm_setzero_si128()
604 } else {
605 transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
606 }
607 }
608}
609
610#[inline]
615#[target_feature(enable = "sse2")]
616#[cfg_attr(test, assert_instr(pslld))]
617#[stable(feature = "simd_x86", since = "1.27.0")]
618pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
619 unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
620}
621
622#[inline]
626#[target_feature(enable = "sse2")]
627#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
628#[rustc_legacy_const_generics(1)]
629#[stable(feature = "simd_x86", since = "1.27.0")]
630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
631pub const fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
632 static_assert_uimm_bits!(IMM8, 8);
633 unsafe {
634 if IMM8 >= 64 {
635 _mm_setzero_si128()
636 } else {
637 transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
638 }
639 }
640}
641
642#[inline]
647#[target_feature(enable = "sse2")]
648#[cfg_attr(test, assert_instr(psllq))]
649#[stable(feature = "simd_x86", since = "1.27.0")]
650pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
651 unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
652}
653
654#[inline]
659#[target_feature(enable = "sse2")]
660#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
661#[rustc_legacy_const_generics(1)]
662#[stable(feature = "simd_x86", since = "1.27.0")]
663#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
664pub const fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
665 static_assert_uimm_bits!(IMM8, 8);
666 unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
667}
668
669#[inline]
674#[target_feature(enable = "sse2")]
675#[cfg_attr(test, assert_instr(psraw))]
676#[stable(feature = "simd_x86", since = "1.27.0")]
677pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
678 unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
679}
680
681#[inline]
686#[target_feature(enable = "sse2")]
687#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
688#[rustc_legacy_const_generics(1)]
689#[stable(feature = "simd_x86", since = "1.27.0")]
690#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
691pub const fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
692 static_assert_uimm_bits!(IMM8, 8);
693 unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
694}
695
696#[inline]
701#[target_feature(enable = "sse2")]
702#[cfg_attr(test, assert_instr(psrad))]
703#[stable(feature = "simd_x86", since = "1.27.0")]
704pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
705 unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
706}
707
708#[inline]
712#[target_feature(enable = "sse2")]
713#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
714#[rustc_legacy_const_generics(1)]
715#[stable(feature = "simd_x86", since = "1.27.0")]
716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
717pub const fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
718 static_assert_uimm_bits!(IMM8, 8);
719 unsafe { _mm_srli_si128_impl::<IMM8>(a) }
720}
721
722#[inline]
725#[target_feature(enable = "sse2")]
726#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
727const unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
728 const fn mask(shift: i32, i: u32) -> u32 {
729 if (shift as u32) > 15 {
730 i + 16
731 } else {
732 i + (shift as u32)
733 }
734 }
735 let x: i8x16 = simd_shuffle!(
736 a.as_i8x16(),
737 i8x16::ZERO,
738 [
739 mask(IMM8, 0),
740 mask(IMM8, 1),
741 mask(IMM8, 2),
742 mask(IMM8, 3),
743 mask(IMM8, 4),
744 mask(IMM8, 5),
745 mask(IMM8, 6),
746 mask(IMM8, 7),
747 mask(IMM8, 8),
748 mask(IMM8, 9),
749 mask(IMM8, 10),
750 mask(IMM8, 11),
751 mask(IMM8, 12),
752 mask(IMM8, 13),
753 mask(IMM8, 14),
754 mask(IMM8, 15),
755 ],
756 );
757 transmute(x)
758}
759
760#[inline]
765#[target_feature(enable = "sse2")]
766#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
767#[rustc_legacy_const_generics(1)]
768#[stable(feature = "simd_x86", since = "1.27.0")]
769#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
770pub const fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
771 static_assert_uimm_bits!(IMM8, 8);
772 unsafe {
773 if IMM8 >= 16 {
774 _mm_setzero_si128()
775 } else {
776 transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
777 }
778 }
779}
780
781#[inline]
786#[target_feature(enable = "sse2")]
787#[cfg_attr(test, assert_instr(psrlw))]
788#[stable(feature = "simd_x86", since = "1.27.0")]
789pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
790 unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
791}
792
793#[inline]
798#[target_feature(enable = "sse2")]
799#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
800#[rustc_legacy_const_generics(1)]
801#[stable(feature = "simd_x86", since = "1.27.0")]
802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
803pub const fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
804 static_assert_uimm_bits!(IMM8, 8);
805 unsafe {
806 if IMM8 >= 32 {
807 _mm_setzero_si128()
808 } else {
809 transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
810 }
811 }
812}
813
814#[inline]
819#[target_feature(enable = "sse2")]
820#[cfg_attr(test, assert_instr(psrld))]
821#[stable(feature = "simd_x86", since = "1.27.0")]
822pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
823 unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
824}
825
826#[inline]
831#[target_feature(enable = "sse2")]
832#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
833#[rustc_legacy_const_generics(1)]
834#[stable(feature = "simd_x86", since = "1.27.0")]
835#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
836pub const fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
837 static_assert_uimm_bits!(IMM8, 8);
838 unsafe {
839 if IMM8 >= 64 {
840 _mm_setzero_si128()
841 } else {
842 transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
843 }
844 }
845}
846
847#[inline]
852#[target_feature(enable = "sse2")]
853#[cfg_attr(test, assert_instr(psrlq))]
854#[stable(feature = "simd_x86", since = "1.27.0")]
855pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
856 unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
857}
858
859#[inline]
864#[target_feature(enable = "sse2")]
865#[cfg_attr(test, assert_instr(andps))]
866#[stable(feature = "simd_x86", since = "1.27.0")]
867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
868pub const fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
869 unsafe { simd_and(a, b) }
870}
871
872#[inline]
877#[target_feature(enable = "sse2")]
878#[cfg_attr(test, assert_instr(andnps))]
879#[stable(feature = "simd_x86", since = "1.27.0")]
880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
881pub const fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
882 unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
883}
884
885#[inline]
890#[target_feature(enable = "sse2")]
891#[cfg_attr(test, assert_instr(orps))]
892#[stable(feature = "simd_x86", since = "1.27.0")]
893#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
894pub const fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
895 unsafe { simd_or(a, b) }
896}
897
898#[inline]
903#[target_feature(enable = "sse2")]
904#[cfg_attr(test, assert_instr(xorps))]
905#[stable(feature = "simd_x86", since = "1.27.0")]
906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
907pub const fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
908 unsafe { simd_xor(a, b) }
909}
910
911#[inline]
915#[target_feature(enable = "sse2")]
916#[cfg_attr(test, assert_instr(pcmpeqb))]
917#[stable(feature = "simd_x86", since = "1.27.0")]
918#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
919pub const fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
920 unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
921}
922
923#[inline]
927#[target_feature(enable = "sse2")]
928#[cfg_attr(test, assert_instr(pcmpeqw))]
929#[stable(feature = "simd_x86", since = "1.27.0")]
930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
931pub const fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
932 unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
933}
934
935#[inline]
939#[target_feature(enable = "sse2")]
940#[cfg_attr(test, assert_instr(pcmpeqd))]
941#[stable(feature = "simd_x86", since = "1.27.0")]
942#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
943pub const fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
944 unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
945}
946
947#[inline]
951#[target_feature(enable = "sse2")]
952#[cfg_attr(test, assert_instr(pcmpgtb))]
953#[stable(feature = "simd_x86", since = "1.27.0")]
954#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
955pub const fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
956 unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
957}
958
959#[inline]
963#[target_feature(enable = "sse2")]
964#[cfg_attr(test, assert_instr(pcmpgtw))]
965#[stable(feature = "simd_x86", since = "1.27.0")]
966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
967pub const fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
968 unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
969}
970
971#[inline]
975#[target_feature(enable = "sse2")]
976#[cfg_attr(test, assert_instr(pcmpgtd))]
977#[stable(feature = "simd_x86", since = "1.27.0")]
978#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
979pub const fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
980 unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
981}
982
983#[inline]
987#[target_feature(enable = "sse2")]
988#[cfg_attr(test, assert_instr(pcmpgtb))]
989#[stable(feature = "simd_x86", since = "1.27.0")]
990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
991pub const fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
992 unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
993}
994
995#[inline]
999#[target_feature(enable = "sse2")]
1000#[cfg_attr(test, assert_instr(pcmpgtw))]
1001#[stable(feature = "simd_x86", since = "1.27.0")]
1002#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1003pub const fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
1004 unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
1005}
1006
1007#[inline]
1011#[target_feature(enable = "sse2")]
1012#[cfg_attr(test, assert_instr(pcmpgtd))]
1013#[stable(feature = "simd_x86", since = "1.27.0")]
1014#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1015pub const fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
1016 unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
1017}
1018
1019#[inline]
1024#[target_feature(enable = "sse2")]
1025#[cfg_attr(test, assert_instr(cvtdq2pd))]
1026#[stable(feature = "simd_x86", since = "1.27.0")]
1027#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1028pub const fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
1029 unsafe {
1030 let a = a.as_i32x4();
1031 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
1032 }
1033}
1034
1035#[inline]
1040#[target_feature(enable = "sse2")]
1041#[cfg_attr(test, assert_instr(cvtsi2sd))]
1042#[stable(feature = "simd_x86", since = "1.27.0")]
1043#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1044pub const fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
1045 unsafe { simd_insert!(a, 0, b as f64) }
1046}
1047
1048#[inline]
1053#[target_feature(enable = "sse2")]
1054#[cfg_attr(test, assert_instr(cvtdq2ps))]
1055#[stable(feature = "simd_x86", since = "1.27.0")]
1056#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1057pub const fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1058 unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
1059}
1060
1061#[inline]
1066#[target_feature(enable = "sse2")]
1067#[cfg_attr(test, assert_instr(cvtps2dq))]
1068#[stable(feature = "simd_x86", since = "1.27.0")]
1069pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1070 unsafe { transmute(cvtps2dq(a)) }
1071}
1072
1073#[inline]
1078#[target_feature(enable = "sse2")]
1079#[stable(feature = "simd_x86", since = "1.27.0")]
1080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1081pub const fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1082 unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
1083}
1084
1085#[inline]
1089#[target_feature(enable = "sse2")]
1090#[stable(feature = "simd_x86", since = "1.27.0")]
1091#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1092pub const fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1093 unsafe { simd_extract!(a.as_i32x4(), 0) }
1094}
1095
1096#[inline]
1101#[target_feature(enable = "sse2")]
1102#[stable(feature = "simd_x86", since = "1.27.0")]
1104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1105pub const fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1106 unsafe { transmute(i64x2::new(e0, e1)) }
1107}
1108
1109#[inline]
1113#[target_feature(enable = "sse2")]
1114#[stable(feature = "simd_x86", since = "1.27.0")]
1116#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1117pub const fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1118 unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
1119}
1120
1121#[inline]
1125#[target_feature(enable = "sse2")]
1126#[stable(feature = "simd_x86", since = "1.27.0")]
1128#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1129pub const fn _mm_set_epi16(
1130 e7: i16,
1131 e6: i16,
1132 e5: i16,
1133 e4: i16,
1134 e3: i16,
1135 e2: i16,
1136 e1: i16,
1137 e0: i16,
1138) -> __m128i {
1139 unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
1140}
1141
1142#[inline]
1146#[target_feature(enable = "sse2")]
1147#[stable(feature = "simd_x86", since = "1.27.0")]
1149#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1150pub const fn _mm_set_epi8(
1151 e15: i8,
1152 e14: i8,
1153 e13: i8,
1154 e12: i8,
1155 e11: i8,
1156 e10: i8,
1157 e9: i8,
1158 e8: i8,
1159 e7: i8,
1160 e6: i8,
1161 e5: i8,
1162 e4: i8,
1163 e3: i8,
1164 e2: i8,
1165 e1: i8,
1166 e0: i8,
1167) -> __m128i {
1168 unsafe {
1169 #[rustfmt::skip]
1170 transmute(i8x16::new(
1171 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1172 ))
1173 }
1174}
1175
1176#[inline]
1180#[target_feature(enable = "sse2")]
1181#[stable(feature = "simd_x86", since = "1.27.0")]
1183#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1184pub const fn _mm_set1_epi64x(a: i64) -> __m128i {
1185 i64x2::splat(a).as_m128i()
1186}
1187
1188#[inline]
1192#[target_feature(enable = "sse2")]
1193#[stable(feature = "simd_x86", since = "1.27.0")]
1195#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1196pub const fn _mm_set1_epi32(a: i32) -> __m128i {
1197 i32x4::splat(a).as_m128i()
1198}
1199
1200#[inline]
1204#[target_feature(enable = "sse2")]
1205#[stable(feature = "simd_x86", since = "1.27.0")]
1207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1208pub const fn _mm_set1_epi16(a: i16) -> __m128i {
1209 i16x8::splat(a).as_m128i()
1210}
1211
1212#[inline]
1216#[target_feature(enable = "sse2")]
1217#[stable(feature = "simd_x86", since = "1.27.0")]
1219#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1220pub const fn _mm_set1_epi8(a: i8) -> __m128i {
1221 i8x16::splat(a).as_m128i()
1222}
1223
1224#[inline]
1228#[target_feature(enable = "sse2")]
1229#[stable(feature = "simd_x86", since = "1.27.0")]
1231#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1232pub const fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1233 _mm_set_epi32(e0, e1, e2, e3)
1234}
1235
1236#[inline]
1240#[target_feature(enable = "sse2")]
1241#[stable(feature = "simd_x86", since = "1.27.0")]
1243#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1244pub const fn _mm_setr_epi16(
1245 e7: i16,
1246 e6: i16,
1247 e5: i16,
1248 e4: i16,
1249 e3: i16,
1250 e2: i16,
1251 e1: i16,
1252 e0: i16,
1253) -> __m128i {
1254 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1255}
1256
1257#[inline]
1261#[target_feature(enable = "sse2")]
1262#[stable(feature = "simd_x86", since = "1.27.0")]
1264#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1265pub const fn _mm_setr_epi8(
1266 e15: i8,
1267 e14: i8,
1268 e13: i8,
1269 e12: i8,
1270 e11: i8,
1271 e10: i8,
1272 e9: i8,
1273 e8: i8,
1274 e7: i8,
1275 e6: i8,
1276 e5: i8,
1277 e4: i8,
1278 e3: i8,
1279 e2: i8,
1280 e1: i8,
1281 e0: i8,
1282) -> __m128i {
1283 #[rustfmt::skip]
1284 _mm_set_epi8(
1285 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1286 )
1287}
1288
1289#[inline]
1293#[target_feature(enable = "sse2")]
1294#[cfg_attr(test, assert_instr(xorps))]
1295#[stable(feature = "simd_x86", since = "1.27.0")]
1296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1297pub const fn _mm_setzero_si128() -> __m128i {
1298 const { unsafe { mem::zeroed() } }
1299}
1300
1301#[inline]
1305#[target_feature(enable = "sse2")]
1306#[stable(feature = "simd_x86", since = "1.27.0")]
1307#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1308pub const unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1309 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1310}
1311
1312#[inline]
1318#[target_feature(enable = "sse2")]
1319#[cfg_attr(
1320 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1321 assert_instr(movaps)
1322)]
1323#[stable(feature = "simd_x86", since = "1.27.0")]
1324#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1325pub const unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1326 *mem_addr
1327}
1328
1329#[inline]
1335#[target_feature(enable = "sse2")]
1336#[cfg_attr(test, assert_instr(movups))]
1337#[stable(feature = "simd_x86", since = "1.27.0")]
1338#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1339pub const unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1340 let mut dst: __m128i = _mm_undefined_si128();
1341 ptr::copy_nonoverlapping(
1342 mem_addr as *const u8,
1343 ptr::addr_of_mut!(dst) as *mut u8,
1344 mem::size_of::<__m128i>(),
1345 );
1346 dst
1347}
1348
1349#[inline]
1369#[target_feature(enable = "sse2")]
1370#[cfg_attr(test, assert_instr(maskmovdqu))]
1371#[stable(feature = "simd_x86", since = "1.27.0")]
1372pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1373 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1374}
1375
1376#[inline]
1382#[target_feature(enable = "sse2")]
1383#[cfg_attr(
1384 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1385 assert_instr(movaps)
1386)]
1387#[stable(feature = "simd_x86", since = "1.27.0")]
1388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1389pub const unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1390 *mem_addr = a;
1391}
1392
1393#[inline]
1399#[target_feature(enable = "sse2")]
1400#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1403pub const unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1404 mem_addr.write_unaligned(a);
1405}
1406
1407#[inline]
1413#[target_feature(enable = "sse2")]
1414#[stable(feature = "simd_x86", since = "1.27.0")]
1415#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1416pub const unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1417 ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1418}
1419
1420#[inline]
1435#[target_feature(enable = "sse2")]
1436#[cfg_attr(test, assert_instr(movntdq))]
1437#[stable(feature = "simd_x86", since = "1.27.0")]
1438pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1439 crate::arch::asm!(
1441 vps!("movntdq", ",{a}"),
1442 p = in(reg) mem_addr,
1443 a = in(xmm_reg) a,
1444 options(nostack, preserves_flags),
1445 );
1446}
1447
1448#[inline]
1463#[target_feature(enable = "sse2")]
1464#[cfg_attr(test, assert_instr(movnti))]
1465#[stable(feature = "simd_x86", since = "1.27.0")]
1466pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1467 crate::arch::asm!(
1469 vps!("movnti", ",{a:e}"), p = in(reg) mem_addr,
1471 a = in(reg) a,
1472 options(nostack, preserves_flags),
1473 );
1474}
1475
1476#[inline]
1481#[target_feature(enable = "sse2")]
1482#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))]
1484#[stable(feature = "simd_x86", since = "1.27.0")]
1485#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1486pub const fn _mm_move_epi64(a: __m128i) -> __m128i {
1487 unsafe {
1488 let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1489 transmute(r)
1490 }
1491}
1492
1493#[inline]
1498#[target_feature(enable = "sse2")]
1499#[cfg_attr(test, assert_instr(packsswb))]
1500#[stable(feature = "simd_x86", since = "1.27.0")]
1501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1502pub const fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1503 unsafe {
1504 let max = simd_splat(i8::MAX as i16);
1505 let min = simd_splat(i8::MIN as i16);
1506
1507 let clamped_a = simd_imax(simd_imin(a.as_i16x8(), max), min)
1508 .as_m128i()
1509 .as_i8x16();
1510 let clamped_b = simd_imax(simd_imin(b.as_i16x8(), max), min)
1511 .as_m128i()
1512 .as_i8x16();
1513
1514 const IDXS: [u32; 16] = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30];
1517 let result: i8x16 = simd_shuffle!(clamped_a, clamped_b, IDXS);
1518
1519 result.as_m128i()
1520 }
1521}
1522
1523#[inline]
1528#[target_feature(enable = "sse2")]
1529#[cfg_attr(test, assert_instr(packssdw))]
1530#[stable(feature = "simd_x86", since = "1.27.0")]
1531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1532pub const fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1533 unsafe {
1534 let max = simd_splat(i16::MAX as i32);
1535 let min = simd_splat(i16::MIN as i32);
1536
1537 let clamped_a = simd_imax(simd_imin(a.as_i32x4(), max), min);
1538 let clamped_b = simd_imax(simd_imin(b.as_i32x4(), max), min);
1539
1540 let clamped_a: i16x4 = simd_cast(clamped_a);
1541 let clamped_b: i16x4 = simd_cast(clamped_b);
1542
1543 let a: i64 = transmute(clamped_a);
1544 let b: i64 = transmute(clamped_b);
1545 i64x2::new(a, b).as_m128i()
1546 }
1547}
1548
1549#[inline]
1554#[target_feature(enable = "sse2")]
1555#[cfg_attr(test, assert_instr(packuswb))]
1556#[stable(feature = "simd_x86", since = "1.27.0")]
1557#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1558pub const fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1559 unsafe {
1560 let max = simd_splat(u8::MAX as i16);
1561 let min = simd_splat(u8::MIN as i16);
1562
1563 let clamped_a = simd_imax(simd_imin(a.as_i16x8(), max), min)
1564 .as_m128i()
1565 .as_i8x16();
1566 let clamped_b = simd_imax(simd_imin(b.as_i16x8(), max), min)
1567 .as_m128i()
1568 .as_i8x16();
1569
1570 const IDXS: [u32; 16] = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30];
1575 let result: i8x16 = simd_shuffle!(clamped_a, clamped_b, IDXS);
1576
1577 result.as_m128i()
1578 }
1579}
1580
1581#[inline]
1585#[target_feature(enable = "sse2")]
1586#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1587#[rustc_legacy_const_generics(1)]
1588#[stable(feature = "simd_x86", since = "1.27.0")]
1589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1590pub const fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1591 static_assert_uimm_bits!(IMM8, 3);
1592 unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1593}
1594
1595#[inline]
1599#[target_feature(enable = "sse2")]
1600#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1601#[rustc_legacy_const_generics(2)]
1602#[stable(feature = "simd_x86", since = "1.27.0")]
1603#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1604pub const fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1605 static_assert_uimm_bits!(IMM8, 3);
1606 unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1607}
1608
1609#[inline]
1613#[target_feature(enable = "sse2")]
1614#[cfg_attr(test, assert_instr(pmovmskb))]
1615#[stable(feature = "simd_x86", since = "1.27.0")]
1616#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1617pub const fn _mm_movemask_epi8(a: __m128i) -> i32 {
1618 unsafe {
1619 let z = i8x16::ZERO;
1620 let m: i8x16 = simd_lt(a.as_i8x16(), z);
1621 simd_bitmask::<_, u16>(m) as u32 as i32
1622 }
1623}
1624
1625#[inline]
1629#[target_feature(enable = "sse2")]
1630#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1631#[rustc_legacy_const_generics(1)]
1632#[stable(feature = "simd_x86", since = "1.27.0")]
1633#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1634pub const fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1635 static_assert_uimm_bits!(IMM8, 8);
1636 unsafe {
1637 let a = a.as_i32x4();
1638 let x: i32x4 = simd_shuffle!(
1639 a,
1640 a,
1641 [
1642 IMM8 as u32 & 0b11,
1643 (IMM8 as u32 >> 2) & 0b11,
1644 (IMM8 as u32 >> 4) & 0b11,
1645 (IMM8 as u32 >> 6) & 0b11,
1646 ],
1647 );
1648 transmute(x)
1649 }
1650}
1651
1652#[inline]
1660#[target_feature(enable = "sse2")]
1661#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1662#[rustc_legacy_const_generics(1)]
1663#[stable(feature = "simd_x86", since = "1.27.0")]
1664#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1665pub const fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1666 static_assert_uimm_bits!(IMM8, 8);
1667 unsafe {
1668 let a = a.as_i16x8();
1669 let x: i16x8 = simd_shuffle!(
1670 a,
1671 a,
1672 [
1673 0,
1674 1,
1675 2,
1676 3,
1677 (IMM8 as u32 & 0b11) + 4,
1678 ((IMM8 as u32 >> 2) & 0b11) + 4,
1679 ((IMM8 as u32 >> 4) & 0b11) + 4,
1680 ((IMM8 as u32 >> 6) & 0b11) + 4,
1681 ],
1682 );
1683 transmute(x)
1684 }
1685}
1686
1687#[inline]
1695#[target_feature(enable = "sse2")]
1696#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1697#[rustc_legacy_const_generics(1)]
1698#[stable(feature = "simd_x86", since = "1.27.0")]
1699#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1700pub const fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1701 static_assert_uimm_bits!(IMM8, 8);
1702 unsafe {
1703 let a = a.as_i16x8();
1704 let x: i16x8 = simd_shuffle!(
1705 a,
1706 a,
1707 [
1708 IMM8 as u32 & 0b11,
1709 (IMM8 as u32 >> 2) & 0b11,
1710 (IMM8 as u32 >> 4) & 0b11,
1711 (IMM8 as u32 >> 6) & 0b11,
1712 4,
1713 5,
1714 6,
1715 7,
1716 ],
1717 );
1718 transmute(x)
1719 }
1720}
1721
1722#[inline]
1726#[target_feature(enable = "sse2")]
1727#[cfg_attr(test, assert_instr(punpckhbw))]
1728#[stable(feature = "simd_x86", since = "1.27.0")]
1729#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1730pub const fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1731 unsafe {
1732 transmute::<i8x16, _>(simd_shuffle!(
1733 a.as_i8x16(),
1734 b.as_i8x16(),
1735 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1736 ))
1737 }
1738}
1739
1740#[inline]
1744#[target_feature(enable = "sse2")]
1745#[cfg_attr(test, assert_instr(punpckhwd))]
1746#[stable(feature = "simd_x86", since = "1.27.0")]
1747#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1748pub const fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1749 unsafe {
1750 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1751 transmute::<i16x8, _>(x)
1752 }
1753}
1754
1755#[inline]
1759#[target_feature(enable = "sse2")]
1760#[cfg_attr(test, assert_instr(unpckhps))]
1761#[stable(feature = "simd_x86", since = "1.27.0")]
1762#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1763pub const fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1764 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1765}
1766
1767#[inline]
1771#[target_feature(enable = "sse2")]
1772#[cfg_attr(test, assert_instr(unpckhpd))]
1773#[stable(feature = "simd_x86", since = "1.27.0")]
1774#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1775pub const fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1776 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1777}
1778
1779#[inline]
1783#[target_feature(enable = "sse2")]
1784#[cfg_attr(test, assert_instr(punpcklbw))]
1785#[stable(feature = "simd_x86", since = "1.27.0")]
1786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1787pub const fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1788 unsafe {
1789 transmute::<i8x16, _>(simd_shuffle!(
1790 a.as_i8x16(),
1791 b.as_i8x16(),
1792 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1793 ))
1794 }
1795}
1796
1797#[inline]
1801#[target_feature(enable = "sse2")]
1802#[cfg_attr(test, assert_instr(punpcklwd))]
1803#[stable(feature = "simd_x86", since = "1.27.0")]
1804#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1805pub const fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1806 unsafe {
1807 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1808 transmute::<i16x8, _>(x)
1809 }
1810}
1811
1812#[inline]
1816#[target_feature(enable = "sse2")]
1817#[cfg_attr(test, assert_instr(unpcklps))]
1818#[stable(feature = "simd_x86", since = "1.27.0")]
1819#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1820pub const fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1821 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1822}
1823
1824#[inline]
1828#[target_feature(enable = "sse2")]
1829#[cfg_attr(test, assert_instr(movlhps))]
1830#[stable(feature = "simd_x86", since = "1.27.0")]
1831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1832pub const fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1833 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1834}
1835
1836#[inline]
1841#[target_feature(enable = "sse2")]
1842#[cfg_attr(test, assert_instr(addsd))]
1843#[stable(feature = "simd_x86", since = "1.27.0")]
1844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1845pub const fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1846 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1847}
1848
1849#[inline]
1854#[target_feature(enable = "sse2")]
1855#[cfg_attr(test, assert_instr(addpd))]
1856#[stable(feature = "simd_x86", since = "1.27.0")]
1857#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1858pub const fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1859 unsafe { simd_add(a, b) }
1860}
1861
1862#[inline]
1867#[target_feature(enable = "sse2")]
1868#[cfg_attr(test, assert_instr(divsd))]
1869#[stable(feature = "simd_x86", since = "1.27.0")]
1870#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1871pub const fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1872 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1873}
1874
1875#[inline]
1880#[target_feature(enable = "sse2")]
1881#[cfg_attr(test, assert_instr(divpd))]
1882#[stable(feature = "simd_x86", since = "1.27.0")]
1883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1884pub const fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1885 unsafe { simd_div(a, b) }
1886}
1887
1888#[inline]
1893#[target_feature(enable = "sse2")]
1894#[cfg_attr(test, assert_instr(maxsd))]
1895#[stable(feature = "simd_x86", since = "1.27.0")]
1896pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1897 unsafe { maxsd(a, b) }
1898}
1899
1900#[inline]
1905#[target_feature(enable = "sse2")]
1906#[cfg_attr(test, assert_instr(maxpd))]
1907#[stable(feature = "simd_x86", since = "1.27.0")]
1908pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1909 unsafe { maxpd(a, b) }
1910}
1911
1912#[inline]
1917#[target_feature(enable = "sse2")]
1918#[cfg_attr(test, assert_instr(minsd))]
1919#[stable(feature = "simd_x86", since = "1.27.0")]
1920pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1921 unsafe { minsd(a, b) }
1922}
1923
1924#[inline]
1929#[target_feature(enable = "sse2")]
1930#[cfg_attr(test, assert_instr(minpd))]
1931#[stable(feature = "simd_x86", since = "1.27.0")]
1932pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1933 unsafe { minpd(a, b) }
1934}
1935
1936#[inline]
1941#[target_feature(enable = "sse2")]
1942#[cfg_attr(test, assert_instr(mulsd))]
1943#[stable(feature = "simd_x86", since = "1.27.0")]
1944#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1945pub const fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1946 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1947}
1948
1949#[inline]
1954#[target_feature(enable = "sse2")]
1955#[cfg_attr(test, assert_instr(mulpd))]
1956#[stable(feature = "simd_x86", since = "1.27.0")]
1957#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1958pub const fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1959 unsafe { simd_mul(a, b) }
1960}
1961
1962#[inline]
1967#[target_feature(enable = "sse2")]
1968#[cfg_attr(test, assert_instr(sqrtsd))]
1969#[stable(feature = "simd_x86", since = "1.27.0")]
1970pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1971 unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1972}
1973
1974#[inline]
1978#[target_feature(enable = "sse2")]
1979#[cfg_attr(test, assert_instr(sqrtpd))]
1980#[stable(feature = "simd_x86", since = "1.27.0")]
1981pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1982 unsafe { simd_fsqrt(a) }
1983}
1984
1985#[inline]
1990#[target_feature(enable = "sse2")]
1991#[cfg_attr(test, assert_instr(subsd))]
1992#[stable(feature = "simd_x86", since = "1.27.0")]
1993#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1994pub const fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1995 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1996}
1997
1998#[inline]
2003#[target_feature(enable = "sse2")]
2004#[cfg_attr(test, assert_instr(subpd))]
2005#[stable(feature = "simd_x86", since = "1.27.0")]
2006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2007pub const fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
2008 unsafe { simd_sub(a, b) }
2009}
2010
2011#[inline]
2016#[target_feature(enable = "sse2")]
2017#[cfg_attr(test, assert_instr(andps))]
2018#[stable(feature = "simd_x86", since = "1.27.0")]
2019#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2020pub const fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
2021 unsafe {
2022 let a: __m128i = transmute(a);
2023 let b: __m128i = transmute(b);
2024 transmute(_mm_and_si128(a, b))
2025 }
2026}
2027
2028#[inline]
2032#[target_feature(enable = "sse2")]
2033#[cfg_attr(test, assert_instr(andnps))]
2034#[stable(feature = "simd_x86", since = "1.27.0")]
2035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2036pub const fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
2037 unsafe {
2038 let a: __m128i = transmute(a);
2039 let b: __m128i = transmute(b);
2040 transmute(_mm_andnot_si128(a, b))
2041 }
2042}
2043
2044#[inline]
2048#[target_feature(enable = "sse2")]
2049#[cfg_attr(test, assert_instr(orps))]
2050#[stable(feature = "simd_x86", since = "1.27.0")]
2051#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2052pub const fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
2053 unsafe {
2054 let a: __m128i = transmute(a);
2055 let b: __m128i = transmute(b);
2056 transmute(_mm_or_si128(a, b))
2057 }
2058}
2059
2060#[inline]
2064#[target_feature(enable = "sse2")]
2065#[cfg_attr(test, assert_instr(xorps))]
2066#[stable(feature = "simd_x86", since = "1.27.0")]
2067#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2068pub const fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
2069 unsafe {
2070 let a: __m128i = transmute(a);
2071 let b: __m128i = transmute(b);
2072 transmute(_mm_xor_si128(a, b))
2073 }
2074}
2075
2076#[inline]
2081#[target_feature(enable = "sse2")]
2082#[cfg_attr(test, assert_instr(cmpeqsd))]
2083#[stable(feature = "simd_x86", since = "1.27.0")]
2084pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
2085 unsafe { cmpsd(a, b, 0) }
2086}
2087
2088#[inline]
2093#[target_feature(enable = "sse2")]
2094#[cfg_attr(test, assert_instr(cmpltsd))]
2095#[stable(feature = "simd_x86", since = "1.27.0")]
2096pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
2097 unsafe { cmpsd(a, b, 1) }
2098}
2099
2100#[inline]
2105#[target_feature(enable = "sse2")]
2106#[cfg_attr(test, assert_instr(cmplesd))]
2107#[stable(feature = "simd_x86", since = "1.27.0")]
2108pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
2109 unsafe { cmpsd(a, b, 2) }
2110}
2111
2112#[inline]
2117#[target_feature(enable = "sse2")]
2118#[cfg_attr(test, assert_instr(cmpltsd))]
2119#[stable(feature = "simd_x86", since = "1.27.0")]
2120pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
2121 unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2122}
2123
2124#[inline]
2129#[target_feature(enable = "sse2")]
2130#[cfg_attr(test, assert_instr(cmplesd))]
2131#[stable(feature = "simd_x86", since = "1.27.0")]
2132pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
2133 unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2134}
2135
2136#[inline]
2143#[target_feature(enable = "sse2")]
2144#[cfg_attr(test, assert_instr(cmpordsd))]
2145#[stable(feature = "simd_x86", since = "1.27.0")]
2146pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
2147 unsafe { cmpsd(a, b, 7) }
2148}
2149
2150#[inline]
2156#[target_feature(enable = "sse2")]
2157#[cfg_attr(test, assert_instr(cmpunordsd))]
2158#[stable(feature = "simd_x86", since = "1.27.0")]
2159pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
2160 unsafe { cmpsd(a, b, 3) }
2161}
2162
2163#[inline]
2168#[target_feature(enable = "sse2")]
2169#[cfg_attr(test, assert_instr(cmpneqsd))]
2170#[stable(feature = "simd_x86", since = "1.27.0")]
2171pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2172 unsafe { cmpsd(a, b, 4) }
2173}
2174
2175#[inline]
2180#[target_feature(enable = "sse2")]
2181#[cfg_attr(test, assert_instr(cmpnltsd))]
2182#[stable(feature = "simd_x86", since = "1.27.0")]
2183pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2184 unsafe { cmpsd(a, b, 5) }
2185}
2186
2187#[inline]
2192#[target_feature(enable = "sse2")]
2193#[cfg_attr(test, assert_instr(cmpnlesd))]
2194#[stable(feature = "simd_x86", since = "1.27.0")]
2195pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2196 unsafe { cmpsd(a, b, 6) }
2197}
2198
2199#[inline]
2204#[target_feature(enable = "sse2")]
2205#[cfg_attr(test, assert_instr(cmpnltsd))]
2206#[stable(feature = "simd_x86", since = "1.27.0")]
2207pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2208 unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2209}
2210
2211#[inline]
2216#[target_feature(enable = "sse2")]
2217#[cfg_attr(test, assert_instr(cmpnlesd))]
2218#[stable(feature = "simd_x86", since = "1.27.0")]
2219pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2220 unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2221}
2222
2223#[inline]
2227#[target_feature(enable = "sse2")]
2228#[cfg_attr(test, assert_instr(cmpeqpd))]
2229#[stable(feature = "simd_x86", since = "1.27.0")]
2230pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2231 unsafe { cmppd(a, b, 0) }
2232}
2233
2234#[inline]
2238#[target_feature(enable = "sse2")]
2239#[cfg_attr(test, assert_instr(cmpltpd))]
2240#[stable(feature = "simd_x86", since = "1.27.0")]
2241pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2242 unsafe { cmppd(a, b, 1) }
2243}
2244
2245#[inline]
2249#[target_feature(enable = "sse2")]
2250#[cfg_attr(test, assert_instr(cmplepd))]
2251#[stable(feature = "simd_x86", since = "1.27.0")]
2252pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2253 unsafe { cmppd(a, b, 2) }
2254}
2255
2256#[inline]
2260#[target_feature(enable = "sse2")]
2261#[cfg_attr(test, assert_instr(cmpltpd))]
2262#[stable(feature = "simd_x86", since = "1.27.0")]
2263pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2264 _mm_cmplt_pd(b, a)
2265}
2266
2267#[inline]
2271#[target_feature(enable = "sse2")]
2272#[cfg_attr(test, assert_instr(cmplepd))]
2273#[stable(feature = "simd_x86", since = "1.27.0")]
2274pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2275 _mm_cmple_pd(b, a)
2276}
2277
2278#[inline]
2282#[target_feature(enable = "sse2")]
2283#[cfg_attr(test, assert_instr(cmpordpd))]
2284#[stable(feature = "simd_x86", since = "1.27.0")]
2285pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2286 unsafe { cmppd(a, b, 7) }
2287}
2288
2289#[inline]
2293#[target_feature(enable = "sse2")]
2294#[cfg_attr(test, assert_instr(cmpunordpd))]
2295#[stable(feature = "simd_x86", since = "1.27.0")]
2296pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2297 unsafe { cmppd(a, b, 3) }
2298}
2299
2300#[inline]
2304#[target_feature(enable = "sse2")]
2305#[cfg_attr(test, assert_instr(cmpneqpd))]
2306#[stable(feature = "simd_x86", since = "1.27.0")]
2307pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2308 unsafe { cmppd(a, b, 4) }
2309}
2310
2311#[inline]
2315#[target_feature(enable = "sse2")]
2316#[cfg_attr(test, assert_instr(cmpnltpd))]
2317#[stable(feature = "simd_x86", since = "1.27.0")]
2318pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2319 unsafe { cmppd(a, b, 5) }
2320}
2321
2322#[inline]
2326#[target_feature(enable = "sse2")]
2327#[cfg_attr(test, assert_instr(cmpnlepd))]
2328#[stable(feature = "simd_x86", since = "1.27.0")]
2329pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2330 unsafe { cmppd(a, b, 6) }
2331}
2332
2333#[inline]
2337#[target_feature(enable = "sse2")]
2338#[cfg_attr(test, assert_instr(cmpnltpd))]
2339#[stable(feature = "simd_x86", since = "1.27.0")]
2340pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2341 _mm_cmpnlt_pd(b, a)
2342}
2343
2344#[inline]
2349#[target_feature(enable = "sse2")]
2350#[cfg_attr(test, assert_instr(cmpnlepd))]
2351#[stable(feature = "simd_x86", since = "1.27.0")]
2352pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2353 _mm_cmpnle_pd(b, a)
2354}
2355
2356#[inline]
2360#[target_feature(enable = "sse2")]
2361#[cfg_attr(test, assert_instr(comisd))]
2362#[stable(feature = "simd_x86", since = "1.27.0")]
2363pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2364 unsafe { comieqsd(a, b) }
2365}
2366
2367#[inline]
2371#[target_feature(enable = "sse2")]
2372#[cfg_attr(test, assert_instr(comisd))]
2373#[stable(feature = "simd_x86", since = "1.27.0")]
2374pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2375 unsafe { comiltsd(a, b) }
2376}
2377
2378#[inline]
2382#[target_feature(enable = "sse2")]
2383#[cfg_attr(test, assert_instr(comisd))]
2384#[stable(feature = "simd_x86", since = "1.27.0")]
2385pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2386 unsafe { comilesd(a, b) }
2387}
2388
2389#[inline]
2393#[target_feature(enable = "sse2")]
2394#[cfg_attr(test, assert_instr(comisd))]
2395#[stable(feature = "simd_x86", since = "1.27.0")]
2396pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2397 unsafe { comigtsd(a, b) }
2398}
2399
2400#[inline]
2404#[target_feature(enable = "sse2")]
2405#[cfg_attr(test, assert_instr(comisd))]
2406#[stable(feature = "simd_x86", since = "1.27.0")]
2407pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2408 unsafe { comigesd(a, b) }
2409}
2410
2411#[inline]
2415#[target_feature(enable = "sse2")]
2416#[cfg_attr(test, assert_instr(comisd))]
2417#[stable(feature = "simd_x86", since = "1.27.0")]
2418pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2419 unsafe { comineqsd(a, b) }
2420}
2421
2422#[inline]
2426#[target_feature(enable = "sse2")]
2427#[cfg_attr(test, assert_instr(ucomisd))]
2428#[stable(feature = "simd_x86", since = "1.27.0")]
2429pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2430 unsafe { ucomieqsd(a, b) }
2431}
2432
2433#[inline]
2437#[target_feature(enable = "sse2")]
2438#[cfg_attr(test, assert_instr(ucomisd))]
2439#[stable(feature = "simd_x86", since = "1.27.0")]
2440pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2441 unsafe { ucomiltsd(a, b) }
2442}
2443
2444#[inline]
2448#[target_feature(enable = "sse2")]
2449#[cfg_attr(test, assert_instr(ucomisd))]
2450#[stable(feature = "simd_x86", since = "1.27.0")]
2451pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2452 unsafe { ucomilesd(a, b) }
2453}
2454
2455#[inline]
2459#[target_feature(enable = "sse2")]
2460#[cfg_attr(test, assert_instr(ucomisd))]
2461#[stable(feature = "simd_x86", since = "1.27.0")]
2462pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2463 unsafe { ucomigtsd(a, b) }
2464}
2465
2466#[inline]
2470#[target_feature(enable = "sse2")]
2471#[cfg_attr(test, assert_instr(ucomisd))]
2472#[stable(feature = "simd_x86", since = "1.27.0")]
2473pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2474 unsafe { ucomigesd(a, b) }
2475}
2476
2477#[inline]
2481#[target_feature(enable = "sse2")]
2482#[cfg_attr(test, assert_instr(ucomisd))]
2483#[stable(feature = "simd_x86", since = "1.27.0")]
2484pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2485 unsafe { ucomineqsd(a, b) }
2486}
2487
2488#[inline]
2493#[target_feature(enable = "sse2")]
2494#[cfg_attr(test, assert_instr(cvtpd2ps))]
2495#[stable(feature = "simd_x86", since = "1.27.0")]
2496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2497pub const fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2498 unsafe {
2499 let r = simd_cast::<_, f32x2>(a.as_f64x2());
2500 let zero = f32x2::ZERO;
2501 transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2502 }
2503}
2504
2505#[inline]
2511#[target_feature(enable = "sse2")]
2512#[cfg_attr(test, assert_instr(cvtps2pd))]
2513#[stable(feature = "simd_x86", since = "1.27.0")]
2514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2515pub const fn _mm_cvtps_pd(a: __m128) -> __m128d {
2516 unsafe {
2517 let a = a.as_f32x4();
2518 transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2519 }
2520}
2521
2522#[inline]
2527#[target_feature(enable = "sse2")]
2528#[cfg_attr(test, assert_instr(cvtpd2dq))]
2529#[stable(feature = "simd_x86", since = "1.27.0")]
2530pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2531 unsafe { transmute(cvtpd2dq(a)) }
2532}
2533
2534#[inline]
2539#[target_feature(enable = "sse2")]
2540#[cfg_attr(test, assert_instr(cvtsd2si))]
2541#[stable(feature = "simd_x86", since = "1.27.0")]
2542pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2543 unsafe { cvtsd2si(a) }
2544}
2545
2546#[inline]
2553#[target_feature(enable = "sse2")]
2554#[cfg_attr(test, assert_instr(cvtsd2ss))]
2555#[stable(feature = "simd_x86", since = "1.27.0")]
2556pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2557 unsafe { cvtsd2ss(a, b) }
2558}
2559
2560#[inline]
2564#[target_feature(enable = "sse2")]
2565#[stable(feature = "simd_x86", since = "1.27.0")]
2566#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2567pub const fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2568 unsafe { simd_extract!(a, 0) }
2569}
2570
2571#[inline]
2578#[target_feature(enable = "sse2")]
2579#[cfg_attr(test, assert_instr(cvtss2sd))]
2580#[stable(feature = "simd_x86", since = "1.27.0")]
2581#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2582pub const fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2583 unsafe {
2584 let elt: f32 = simd_extract!(b, 0);
2585 simd_insert!(a, 0, elt as f64)
2586 }
2587}
2588
2589#[inline]
2594#[target_feature(enable = "sse2")]
2595#[cfg_attr(test, assert_instr(cvttpd2dq))]
2596#[stable(feature = "simd_x86", since = "1.27.0")]
2597pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2598 unsafe { transmute(cvttpd2dq(a)) }
2599}
2600
2601#[inline]
2606#[target_feature(enable = "sse2")]
2607#[cfg_attr(test, assert_instr(cvttsd2si))]
2608#[stable(feature = "simd_x86", since = "1.27.0")]
2609pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2610 unsafe { cvttsd2si(a) }
2611}
2612
2613#[inline]
2618#[target_feature(enable = "sse2")]
2619#[cfg_attr(test, assert_instr(cvttps2dq))]
2620#[stable(feature = "simd_x86", since = "1.27.0")]
2621pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2622 unsafe { transmute(cvttps2dq(a)) }
2623}
2624
2625#[inline]
2630#[target_feature(enable = "sse2")]
2631#[stable(feature = "simd_x86", since = "1.27.0")]
2632#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2633pub const fn _mm_set_sd(a: f64) -> __m128d {
2634 _mm_set_pd(0.0, a)
2635}
2636
2637#[inline]
2642#[target_feature(enable = "sse2")]
2643#[stable(feature = "simd_x86", since = "1.27.0")]
2644#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2645pub const fn _mm_set1_pd(a: f64) -> __m128d {
2646 _mm_set_pd(a, a)
2647}
2648
2649#[inline]
2654#[target_feature(enable = "sse2")]
2655#[stable(feature = "simd_x86", since = "1.27.0")]
2656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2657pub const fn _mm_set_pd1(a: f64) -> __m128d {
2658 _mm_set_pd(a, a)
2659}
2660
2661#[inline]
2666#[target_feature(enable = "sse2")]
2667#[stable(feature = "simd_x86", since = "1.27.0")]
2668#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2669pub const fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2670 __m128d([b, a])
2671}
2672
2673#[inline]
2678#[target_feature(enable = "sse2")]
2679#[stable(feature = "simd_x86", since = "1.27.0")]
2680#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2681pub const fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2682 _mm_set_pd(b, a)
2683}
2684
2685#[inline]
2690#[target_feature(enable = "sse2")]
2691#[cfg_attr(test, assert_instr(xorp))]
2692#[stable(feature = "simd_x86", since = "1.27.0")]
2693#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2694pub const fn _mm_setzero_pd() -> __m128d {
2695 const { unsafe { mem::zeroed() } }
2696}
2697
2698#[inline]
2705#[target_feature(enable = "sse2")]
2706#[cfg_attr(test, assert_instr(movmskpd))]
2707#[stable(feature = "simd_x86", since = "1.27.0")]
2708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2709pub const fn _mm_movemask_pd(a: __m128d) -> i32 {
2710 unsafe {
2713 let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2714 simd_bitmask::<i64x2, u8>(mask) as i32
2715 }
2716}
2717
2718#[inline]
2725#[target_feature(enable = "sse2")]
2726#[cfg_attr(
2727 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2728 assert_instr(movaps)
2729)]
2730#[stable(feature = "simd_x86", since = "1.27.0")]
2731#[allow(clippy::cast_ptr_alignment)]
2732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2733pub const unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2734 *(mem_addr as *const __m128d)
2735}
2736
2737#[inline]
2742#[target_feature(enable = "sse2")]
2743#[cfg_attr(test, assert_instr(movsd))]
2744#[stable(feature = "simd_x86", since = "1.27.0")]
2745#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2746pub const unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2747 _mm_setr_pd(*mem_addr, 0.)
2748}
2749
2750#[inline]
2756#[target_feature(enable = "sse2")]
2757#[cfg_attr(test, assert_instr(movhps))]
2758#[stable(feature = "simd_x86", since = "1.27.0")]
2759#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2760pub const unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2761 _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2762}
2763
2764#[inline]
2770#[target_feature(enable = "sse2")]
2771#[cfg_attr(test, assert_instr(movlps))]
2772#[stable(feature = "simd_x86", since = "1.27.0")]
2773#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2774pub const unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2775 _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2776}
2777
2778#[inline]
2794#[target_feature(enable = "sse2")]
2795#[cfg_attr(test, assert_instr(movntpd))]
2796#[stable(feature = "simd_x86", since = "1.27.0")]
2797#[allow(clippy::cast_ptr_alignment)]
2798pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2799 crate::arch::asm!(
2801 vps!("movntpd", ",{a}"),
2802 p = in(reg) mem_addr,
2803 a = in(xmm_reg) a,
2804 options(nostack, preserves_flags),
2805 );
2806}
2807
2808#[inline]
2813#[target_feature(enable = "sse2")]
2814#[cfg_attr(test, assert_instr(movlps))]
2815#[stable(feature = "simd_x86", since = "1.27.0")]
2816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2817pub const unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2818 *mem_addr = simd_extract!(a, 0)
2819}
2820
2821#[inline]
2827#[target_feature(enable = "sse2")]
2828#[cfg_attr(
2829 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2830 assert_instr(movaps)
2831)]
2832#[stable(feature = "simd_x86", since = "1.27.0")]
2833#[allow(clippy::cast_ptr_alignment)]
2834#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2835pub const unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2836 *(mem_addr as *mut __m128d) = a;
2837}
2838
2839#[inline]
2845#[target_feature(enable = "sse2")]
2846#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
2848#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2849pub const unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2850 mem_addr.cast::<__m128d>().write_unaligned(a);
2851}
2852
2853#[inline]
2859#[target_feature(enable = "sse2")]
2860#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2861#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2862pub const unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2863 ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2864}
2865
2866#[inline]
2872#[target_feature(enable = "sse2")]
2873#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2875pub const unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2876 ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2877}
2878
2879#[inline]
2885#[target_feature(enable = "sse2")]
2886#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2887#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2888pub const unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2889 ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2890}
2891
2892#[inline]
2898#[target_feature(enable = "sse2")]
2899#[stable(feature = "simd_x86", since = "1.27.0")]
2900#[allow(clippy::cast_ptr_alignment)]
2901#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2902pub const unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2903 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2904 *(mem_addr as *mut __m128d) = b;
2905}
2906
2907#[inline]
2913#[target_feature(enable = "sse2")]
2914#[stable(feature = "simd_x86", since = "1.27.0")]
2915#[allow(clippy::cast_ptr_alignment)]
2916#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2917pub const unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2918 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2919 *(mem_addr as *mut __m128d) = b;
2920}
2921
2922#[inline]
2929#[target_feature(enable = "sse2")]
2930#[stable(feature = "simd_x86", since = "1.27.0")]
2931#[allow(clippy::cast_ptr_alignment)]
2932#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2933pub const unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2934 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2935 *(mem_addr as *mut __m128d) = b;
2936}
2937
2938#[inline]
2943#[target_feature(enable = "sse2")]
2944#[cfg_attr(test, assert_instr(movhps))]
2945#[stable(feature = "simd_x86", since = "1.27.0")]
2946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2947pub const unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2948 *mem_addr = simd_extract!(a, 1);
2949}
2950
2951#[inline]
2956#[target_feature(enable = "sse2")]
2957#[cfg_attr(test, assert_instr(movlps))]
2958#[stable(feature = "simd_x86", since = "1.27.0")]
2959#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2960pub const unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2961 *mem_addr = simd_extract!(a, 0);
2962}
2963
2964#[inline]
2969#[target_feature(enable = "sse2")]
2970#[stable(feature = "simd_x86", since = "1.27.0")]
2972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2973pub const unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2974 let d = *mem_addr;
2975 _mm_setr_pd(d, d)
2976}
2977
2978#[inline]
2983#[target_feature(enable = "sse2")]
2984#[stable(feature = "simd_x86", since = "1.27.0")]
2986#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2987pub const unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2988 _mm_load1_pd(mem_addr)
2989}
2990
2991#[inline]
2997#[target_feature(enable = "sse2")]
2998#[cfg_attr(
2999 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
3000 assert_instr(movaps)
3001)]
3002#[stable(feature = "simd_x86", since = "1.27.0")]
3003#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3004pub const unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
3005 let a = _mm_load_pd(mem_addr);
3006 simd_shuffle!(a, a, [1, 0])
3007}
3008
3009#[inline]
3015#[target_feature(enable = "sse2")]
3016#[cfg_attr(test, assert_instr(movups))]
3017#[stable(feature = "simd_x86", since = "1.27.0")]
3018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3019pub const unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
3020 let mut dst = _mm_undefined_pd();
3021 ptr::copy_nonoverlapping(
3022 mem_addr as *const u8,
3023 ptr::addr_of_mut!(dst) as *mut u8,
3024 mem::size_of::<__m128d>(),
3025 );
3026 dst
3027}
3028
3029#[inline]
3035#[target_feature(enable = "sse2")]
3036#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3037#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3038pub const unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
3039 transmute(i16x8::new(
3040 ptr::read_unaligned(mem_addr as *const i16),
3041 0,
3042 0,
3043 0,
3044 0,
3045 0,
3046 0,
3047 0,
3048 ))
3049}
3050
3051#[inline]
3057#[target_feature(enable = "sse2")]
3058#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3059#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3060pub const unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
3061 transmute(i32x4::new(
3062 ptr::read_unaligned(mem_addr as *const i32),
3063 0,
3064 0,
3065 0,
3066 ))
3067}
3068
3069#[inline]
3075#[target_feature(enable = "sse2")]
3076#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
3077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3078pub const unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
3079 transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
3080}
3081
3082#[inline]
3088#[target_feature(enable = "sse2")]
3089#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
3090#[rustc_legacy_const_generics(2)]
3091#[stable(feature = "simd_x86", since = "1.27.0")]
3092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3093pub const fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
3094 static_assert_uimm_bits!(MASK, 8);
3095 unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
3096}
3097
3098#[inline]
3104#[target_feature(enable = "sse2")]
3105#[cfg_attr(test, assert_instr(movsd))]
3106#[stable(feature = "simd_x86", since = "1.27.0")]
3107#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3108pub const fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
3109 unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
3110}
3111
3112#[inline]
3117#[target_feature(enable = "sse2")]
3118#[stable(feature = "simd_x86", since = "1.27.0")]
3119#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3120pub const fn _mm_castpd_ps(a: __m128d) -> __m128 {
3121 unsafe { transmute(a) }
3122}
3123
3124#[inline]
3129#[target_feature(enable = "sse2")]
3130#[stable(feature = "simd_x86", since = "1.27.0")]
3131#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3132pub const fn _mm_castpd_si128(a: __m128d) -> __m128i {
3133 unsafe { transmute(a) }
3134}
3135
3136#[inline]
3141#[target_feature(enable = "sse2")]
3142#[stable(feature = "simd_x86", since = "1.27.0")]
3143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3144pub const fn _mm_castps_pd(a: __m128) -> __m128d {
3145 unsafe { transmute(a) }
3146}
3147
3148#[inline]
3153#[target_feature(enable = "sse2")]
3154#[stable(feature = "simd_x86", since = "1.27.0")]
3155#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3156pub const fn _mm_castps_si128(a: __m128) -> __m128i {
3157 unsafe { transmute(a) }
3158}
3159
3160#[inline]
3165#[target_feature(enable = "sse2")]
3166#[stable(feature = "simd_x86", since = "1.27.0")]
3167#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3168pub const fn _mm_castsi128_pd(a: __m128i) -> __m128d {
3169 unsafe { transmute(a) }
3170}
3171
3172#[inline]
3177#[target_feature(enable = "sse2")]
3178#[stable(feature = "simd_x86", since = "1.27.0")]
3179#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3180pub const fn _mm_castsi128_ps(a: __m128i) -> __m128 {
3181 unsafe { transmute(a) }
3182}
3183
3184#[inline]
3191#[target_feature(enable = "sse2")]
3192#[stable(feature = "simd_x86", since = "1.27.0")]
3193#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3194pub const fn _mm_undefined_pd() -> __m128d {
3195 const { unsafe { mem::zeroed() } }
3196}
3197
3198#[inline]
3205#[target_feature(enable = "sse2")]
3206#[stable(feature = "simd_x86", since = "1.27.0")]
3207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3208pub const fn _mm_undefined_si128() -> __m128i {
3209 const { unsafe { mem::zeroed() } }
3210}
3211
3212#[inline]
3220#[target_feature(enable = "sse2")]
3221#[cfg_attr(test, assert_instr(unpckhpd))]
3222#[stable(feature = "simd_x86", since = "1.27.0")]
3223#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3224pub const fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
3225 unsafe { simd_shuffle!(a, b, [1, 3]) }
3226}
3227
3228#[inline]
3236#[target_feature(enable = "sse2")]
3237#[cfg_attr(test, assert_instr(movlhps))]
3238#[stable(feature = "simd_x86", since = "1.27.0")]
3239#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3240pub const fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3241 unsafe { simd_shuffle!(a, b, [0, 2]) }
3242}
3243
3244#[allow(improper_ctypes)]
3245unsafe extern "C" {
3246 #[link_name = "llvm.x86.sse2.pause"]
3247 fn pause();
3248 #[link_name = "llvm.x86.sse2.clflush"]
3249 fn clflush(p: *const u8);
3250 #[link_name = "llvm.x86.sse2.lfence"]
3251 fn lfence();
3252 #[link_name = "llvm.x86.sse2.mfence"]
3253 fn mfence();
3254 #[link_name = "llvm.x86.sse2.pmadd.wd"]
3255 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3256 #[link_name = "llvm.x86.sse2.psad.bw"]
3257 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3258 #[link_name = "llvm.x86.sse2.psll.w"]
3259 fn psllw(a: i16x8, count: i16x8) -> i16x8;
3260 #[link_name = "llvm.x86.sse2.psll.d"]
3261 fn pslld(a: i32x4, count: i32x4) -> i32x4;
3262 #[link_name = "llvm.x86.sse2.psll.q"]
3263 fn psllq(a: i64x2, count: i64x2) -> i64x2;
3264 #[link_name = "llvm.x86.sse2.psra.w"]
3265 fn psraw(a: i16x8, count: i16x8) -> i16x8;
3266 #[link_name = "llvm.x86.sse2.psra.d"]
3267 fn psrad(a: i32x4, count: i32x4) -> i32x4;
3268 #[link_name = "llvm.x86.sse2.psrl.w"]
3269 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3270 #[link_name = "llvm.x86.sse2.psrl.d"]
3271 fn psrld(a: i32x4, count: i32x4) -> i32x4;
3272 #[link_name = "llvm.x86.sse2.psrl.q"]
3273 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3274 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3275 fn cvtps2dq(a: __m128) -> i32x4;
3276 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3277 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3278 #[link_name = "llvm.x86.sse2.max.sd"]
3279 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3280 #[link_name = "llvm.x86.sse2.max.pd"]
3281 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3282 #[link_name = "llvm.x86.sse2.min.sd"]
3283 fn minsd(a: __m128d, b: __m128d) -> __m128d;
3284 #[link_name = "llvm.x86.sse2.min.pd"]
3285 fn minpd(a: __m128d, b: __m128d) -> __m128d;
3286 #[link_name = "llvm.x86.sse2.cmp.sd"]
3287 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3288 #[link_name = "llvm.x86.sse2.cmp.pd"]
3289 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3290 #[link_name = "llvm.x86.sse2.comieq.sd"]
3291 fn comieqsd(a: __m128d, b: __m128d) -> i32;
3292 #[link_name = "llvm.x86.sse2.comilt.sd"]
3293 fn comiltsd(a: __m128d, b: __m128d) -> i32;
3294 #[link_name = "llvm.x86.sse2.comile.sd"]
3295 fn comilesd(a: __m128d, b: __m128d) -> i32;
3296 #[link_name = "llvm.x86.sse2.comigt.sd"]
3297 fn comigtsd(a: __m128d, b: __m128d) -> i32;
3298 #[link_name = "llvm.x86.sse2.comige.sd"]
3299 fn comigesd(a: __m128d, b: __m128d) -> i32;
3300 #[link_name = "llvm.x86.sse2.comineq.sd"]
3301 fn comineqsd(a: __m128d, b: __m128d) -> i32;
3302 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3303 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3304 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3305 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3306 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3307 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3308 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3309 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3310 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3311 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3312 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3313 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3314 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3315 fn cvtpd2dq(a: __m128d) -> i32x4;
3316 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3317 fn cvtsd2si(a: __m128d) -> i32;
3318 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3319 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3320 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3321 fn cvttpd2dq(a: __m128d) -> i32x4;
3322 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3323 fn cvttsd2si(a: __m128d) -> i32;
3324 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3325 fn cvttps2dq(a: __m128) -> i32x4;
3326}
3327
3328#[cfg(test)]
3329mod tests {
3330 use crate::core_arch::assert_eq_const as assert_eq;
3331 use crate::{
3332 core_arch::{simd::*, x86::*},
3333 hint::black_box,
3334 };
3335 use std::{boxed, f32, f64, mem, ptr};
3336 use stdarch_test::simd_test;
3337
3338 const NAN: f64 = f64::NAN;
3339
3340 #[test]
3341 fn test_mm_pause() {
3342 _mm_pause()
3343 }
3344
3345 #[simd_test(enable = "sse2")]
3346 fn test_mm_clflush() {
3347 let x = 0_u8;
3348 unsafe {
3349 _mm_clflush(ptr::addr_of!(x));
3350 }
3351 }
3352
3353 #[simd_test(enable = "sse2")]
3354 #[cfg_attr(miri, ignore)]
3356 fn test_mm_lfence() {
3357 _mm_lfence();
3358 }
3359
3360 #[simd_test(enable = "sse2")]
3361 #[cfg_attr(miri, ignore)]
3363 fn test_mm_mfence() {
3364 _mm_mfence();
3365 }
3366
3367 #[simd_test(enable = "sse2")]
3368 const fn test_mm_add_epi8() {
3369 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3370 #[rustfmt::skip]
3371 let b = _mm_setr_epi8(
3372 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3373 );
3374 let r = _mm_add_epi8(a, b);
3375 #[rustfmt::skip]
3376 let e = _mm_setr_epi8(
3377 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3378 );
3379 assert_eq_m128i(r, e);
3380 }
3381
3382 #[simd_test(enable = "sse2")]
3383 fn test_mm_add_epi8_overflow() {
3384 let a = _mm_set1_epi8(0x7F);
3385 let b = _mm_set1_epi8(1);
3386 let r = _mm_add_epi8(a, b);
3387 assert_eq_m128i(r, _mm_set1_epi8(-128));
3388 }
3389
3390 #[simd_test(enable = "sse2")]
3391 const fn test_mm_add_epi16() {
3392 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3393 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3394 let r = _mm_add_epi16(a, b);
3395 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3396 assert_eq_m128i(r, e);
3397 }
3398
3399 #[simd_test(enable = "sse2")]
3400 const fn test_mm_add_epi32() {
3401 let a = _mm_setr_epi32(0, 1, 2, 3);
3402 let b = _mm_setr_epi32(4, 5, 6, 7);
3403 let r = _mm_add_epi32(a, b);
3404 let e = _mm_setr_epi32(4, 6, 8, 10);
3405 assert_eq_m128i(r, e);
3406 }
3407
3408 #[simd_test(enable = "sse2")]
3409 const fn test_mm_add_epi64() {
3410 let a = _mm_setr_epi64x(0, 1);
3411 let b = _mm_setr_epi64x(2, 3);
3412 let r = _mm_add_epi64(a, b);
3413 let e = _mm_setr_epi64x(2, 4);
3414 assert_eq_m128i(r, e);
3415 }
3416
3417 #[simd_test(enable = "sse2")]
3418 const fn test_mm_adds_epi8() {
3419 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3420 #[rustfmt::skip]
3421 let b = _mm_setr_epi8(
3422 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3423 );
3424 let r = _mm_adds_epi8(a, b);
3425 #[rustfmt::skip]
3426 let e = _mm_setr_epi8(
3427 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3428 );
3429 assert_eq_m128i(r, e);
3430 }
3431
3432 #[simd_test(enable = "sse2")]
3433 fn test_mm_adds_epi8_saturate_positive() {
3434 let a = _mm_set1_epi8(0x7F);
3435 let b = _mm_set1_epi8(1);
3436 let r = _mm_adds_epi8(a, b);
3437 assert_eq_m128i(r, a);
3438 }
3439
3440 #[simd_test(enable = "sse2")]
3441 fn test_mm_adds_epi8_saturate_negative() {
3442 let a = _mm_set1_epi8(-0x80);
3443 let b = _mm_set1_epi8(-1);
3444 let r = _mm_adds_epi8(a, b);
3445 assert_eq_m128i(r, a);
3446 }
3447
3448 #[simd_test(enable = "sse2")]
3449 const fn test_mm_adds_epi16() {
3450 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3451 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3452 let r = _mm_adds_epi16(a, b);
3453 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3454 assert_eq_m128i(r, e);
3455 }
3456
3457 #[simd_test(enable = "sse2")]
3458 fn test_mm_adds_epi16_saturate_positive() {
3459 let a = _mm_set1_epi16(0x7FFF);
3460 let b = _mm_set1_epi16(1);
3461 let r = _mm_adds_epi16(a, b);
3462 assert_eq_m128i(r, a);
3463 }
3464
3465 #[simd_test(enable = "sse2")]
3466 fn test_mm_adds_epi16_saturate_negative() {
3467 let a = _mm_set1_epi16(-0x8000);
3468 let b = _mm_set1_epi16(-1);
3469 let r = _mm_adds_epi16(a, b);
3470 assert_eq_m128i(r, a);
3471 }
3472
3473 #[simd_test(enable = "sse2")]
3474 const fn test_mm_adds_epu8() {
3475 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3476 #[rustfmt::skip]
3477 let b = _mm_setr_epi8(
3478 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3479 );
3480 let r = _mm_adds_epu8(a, b);
3481 #[rustfmt::skip]
3482 let e = _mm_setr_epi8(
3483 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3484 );
3485 assert_eq_m128i(r, e);
3486 }
3487
3488 #[simd_test(enable = "sse2")]
3489 fn test_mm_adds_epu8_saturate() {
3490 let a = _mm_set1_epi8(!0);
3491 let b = _mm_set1_epi8(1);
3492 let r = _mm_adds_epu8(a, b);
3493 assert_eq_m128i(r, a);
3494 }
3495
3496 #[simd_test(enable = "sse2")]
3497 const fn test_mm_adds_epu16() {
3498 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3499 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3500 let r = _mm_adds_epu16(a, b);
3501 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3502 assert_eq_m128i(r, e);
3503 }
3504
3505 #[simd_test(enable = "sse2")]
3506 fn test_mm_adds_epu16_saturate() {
3507 let a = _mm_set1_epi16(!0);
3508 let b = _mm_set1_epi16(1);
3509 let r = _mm_adds_epu16(a, b);
3510 assert_eq_m128i(r, a);
3511 }
3512
3513 #[simd_test(enable = "sse2")]
3514 const fn test_mm_avg_epu8() {
3515 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3516 let r = _mm_avg_epu8(a, b);
3517 assert_eq_m128i(r, _mm_set1_epi8(6));
3518 }
3519
3520 #[simd_test(enable = "sse2")]
3521 const fn test_mm_avg_epu16() {
3522 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3523 let r = _mm_avg_epu16(a, b);
3524 assert_eq_m128i(r, _mm_set1_epi16(6));
3525 }
3526
3527 #[simd_test(enable = "sse2")]
3528 fn test_mm_madd_epi16() {
3529 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3530 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3531 let r = _mm_madd_epi16(a, b);
3532 let e = _mm_setr_epi32(29, 81, 149, 233);
3533 assert_eq_m128i(r, e);
3534
3535 let a = _mm_setr_epi16(
3538 i16::MAX,
3539 i16::MAX,
3540 i16::MIN,
3541 i16::MIN,
3542 i16::MIN,
3543 i16::MAX,
3544 0,
3545 0,
3546 );
3547 let b = _mm_setr_epi16(
3548 i16::MAX,
3549 i16::MAX,
3550 i16::MIN,
3551 i16::MIN,
3552 i16::MAX,
3553 i16::MIN,
3554 0,
3555 0,
3556 );
3557 let r = _mm_madd_epi16(a, b);
3558 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3559 assert_eq_m128i(r, e);
3560 }
3561
3562 #[simd_test(enable = "sse2")]
3563 const fn test_mm_max_epi16() {
3564 let a = _mm_set1_epi16(1);
3565 let b = _mm_set1_epi16(-1);
3566 let r = _mm_max_epi16(a, b);
3567 assert_eq_m128i(r, a);
3568 }
3569
3570 #[simd_test(enable = "sse2")]
3571 const fn test_mm_max_epu8() {
3572 let a = _mm_set1_epi8(1);
3573 let b = _mm_set1_epi8(!0);
3574 let r = _mm_max_epu8(a, b);
3575 assert_eq_m128i(r, b);
3576 }
3577
3578 #[simd_test(enable = "sse2")]
3579 const fn test_mm_min_epi16() {
3580 let a = _mm_set1_epi16(1);
3581 let b = _mm_set1_epi16(-1);
3582 let r = _mm_min_epi16(a, b);
3583 assert_eq_m128i(r, b);
3584 }
3585
3586 #[simd_test(enable = "sse2")]
3587 const fn test_mm_min_epu8() {
3588 let a = _mm_set1_epi8(1);
3589 let b = _mm_set1_epi8(!0);
3590 let r = _mm_min_epu8(a, b);
3591 assert_eq_m128i(r, a);
3592 }
3593
3594 #[simd_test(enable = "sse2")]
3595 const fn test_mm_mulhi_epi16() {
3596 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3597 let r = _mm_mulhi_epi16(a, b);
3598 assert_eq_m128i(r, _mm_set1_epi16(-16));
3599 }
3600
3601 #[simd_test(enable = "sse2")]
3602 const fn test_mm_mulhi_epu16() {
3603 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3604 let r = _mm_mulhi_epu16(a, b);
3605 assert_eq_m128i(r, _mm_set1_epi16(15));
3606 }
3607
3608 #[simd_test(enable = "sse2")]
3609 const fn test_mm_mullo_epi16() {
3610 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3611 let r = _mm_mullo_epi16(a, b);
3612 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3613 }
3614
3615 #[simd_test(enable = "sse2")]
3616 const fn test_mm_mul_epu32() {
3617 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3618 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3619 let r = _mm_mul_epu32(a, b);
3620 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3621 assert_eq_m128i(r, e);
3622 }
3623
3624 #[simd_test(enable = "sse2")]
3625 fn test_mm_sad_epu8() {
3626 #[rustfmt::skip]
3627 let a = _mm_setr_epi8(
3628 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3629 1, 2, 3, 4,
3630 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3631 1, 2, 3, 4,
3632 );
3633 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3634 let r = _mm_sad_epu8(a, b);
3635 let e = _mm_setr_epi64x(1020, 614);
3636 assert_eq_m128i(r, e);
3637 }
3638
3639 #[simd_test(enable = "sse2")]
3640 const fn test_mm_sub_epi8() {
3641 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3642 let r = _mm_sub_epi8(a, b);
3643 assert_eq_m128i(r, _mm_set1_epi8(-1));
3644 }
3645
3646 #[simd_test(enable = "sse2")]
3647 const fn test_mm_sub_epi16() {
3648 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3649 let r = _mm_sub_epi16(a, b);
3650 assert_eq_m128i(r, _mm_set1_epi16(-1));
3651 }
3652
3653 #[simd_test(enable = "sse2")]
3654 const fn test_mm_sub_epi32() {
3655 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3656 let r = _mm_sub_epi32(a, b);
3657 assert_eq_m128i(r, _mm_set1_epi32(-1));
3658 }
3659
3660 #[simd_test(enable = "sse2")]
3661 const fn test_mm_sub_epi64() {
3662 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3663 let r = _mm_sub_epi64(a, b);
3664 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3665 }
3666
3667 #[simd_test(enable = "sse2")]
3668 const fn test_mm_subs_epi8() {
3669 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3670 let r = _mm_subs_epi8(a, b);
3671 assert_eq_m128i(r, _mm_set1_epi8(3));
3672 }
3673
3674 #[simd_test(enable = "sse2")]
3675 fn test_mm_subs_epi8_saturate_positive() {
3676 let a = _mm_set1_epi8(0x7F);
3677 let b = _mm_set1_epi8(-1);
3678 let r = _mm_subs_epi8(a, b);
3679 assert_eq_m128i(r, a);
3680 }
3681
3682 #[simd_test(enable = "sse2")]
3683 fn test_mm_subs_epi8_saturate_negative() {
3684 let a = _mm_set1_epi8(-0x80);
3685 let b = _mm_set1_epi8(1);
3686 let r = _mm_subs_epi8(a, b);
3687 assert_eq_m128i(r, a);
3688 }
3689
3690 #[simd_test(enable = "sse2")]
3691 const fn test_mm_subs_epi16() {
3692 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3693 let r = _mm_subs_epi16(a, b);
3694 assert_eq_m128i(r, _mm_set1_epi16(3));
3695 }
3696
3697 #[simd_test(enable = "sse2")]
3698 fn test_mm_subs_epi16_saturate_positive() {
3699 let a = _mm_set1_epi16(0x7FFF);
3700 let b = _mm_set1_epi16(-1);
3701 let r = _mm_subs_epi16(a, b);
3702 assert_eq_m128i(r, a);
3703 }
3704
3705 #[simd_test(enable = "sse2")]
3706 fn test_mm_subs_epi16_saturate_negative() {
3707 let a = _mm_set1_epi16(-0x8000);
3708 let b = _mm_set1_epi16(1);
3709 let r = _mm_subs_epi16(a, b);
3710 assert_eq_m128i(r, a);
3711 }
3712
3713 #[simd_test(enable = "sse2")]
3714 const fn test_mm_subs_epu8() {
3715 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3716 let r = _mm_subs_epu8(a, b);
3717 assert_eq_m128i(r, _mm_set1_epi8(3));
3718 }
3719
3720 #[simd_test(enable = "sse2")]
3721 fn test_mm_subs_epu8_saturate() {
3722 let a = _mm_set1_epi8(0);
3723 let b = _mm_set1_epi8(1);
3724 let r = _mm_subs_epu8(a, b);
3725 assert_eq_m128i(r, a);
3726 }
3727
3728 #[simd_test(enable = "sse2")]
3729 const fn test_mm_subs_epu16() {
3730 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3731 let r = _mm_subs_epu16(a, b);
3732 assert_eq_m128i(r, _mm_set1_epi16(3));
3733 }
3734
3735 #[simd_test(enable = "sse2")]
3736 fn test_mm_subs_epu16_saturate() {
3737 let a = _mm_set1_epi16(0);
3738 let b = _mm_set1_epi16(1);
3739 let r = _mm_subs_epu16(a, b);
3740 assert_eq_m128i(r, a);
3741 }
3742
3743 #[simd_test(enable = "sse2")]
3744 const fn test_mm_slli_si128() {
3745 #[rustfmt::skip]
3746 let a = _mm_setr_epi8(
3747 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3748 );
3749 let r = _mm_slli_si128::<1>(a);
3750 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3751 assert_eq_m128i(r, e);
3752
3753 #[rustfmt::skip]
3754 let a = _mm_setr_epi8(
3755 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3756 );
3757 let r = _mm_slli_si128::<15>(a);
3758 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3759 assert_eq_m128i(r, e);
3760
3761 #[rustfmt::skip]
3762 let a = _mm_setr_epi8(
3763 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3764 );
3765 let r = _mm_slli_si128::<16>(a);
3766 assert_eq_m128i(r, _mm_set1_epi8(0));
3767 }
3768
3769 #[simd_test(enable = "sse2")]
3770 const fn test_mm_slli_epi16() {
3771 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3772 let r = _mm_slli_epi16::<4>(a);
3773 assert_eq_m128i(
3774 r,
3775 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3776 );
3777 let r = _mm_slli_epi16::<16>(a);
3778 assert_eq_m128i(r, _mm_set1_epi16(0));
3779 }
3780
3781 #[simd_test(enable = "sse2")]
3782 fn test_mm_sll_epi16() {
3783 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3784 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3785 assert_eq_m128i(
3786 r,
3787 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3788 );
3789 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3790 assert_eq_m128i(r, a);
3791 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3792 assert_eq_m128i(r, _mm_set1_epi16(0));
3793 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3794 assert_eq_m128i(r, _mm_set1_epi16(0));
3795 }
3796
3797 #[simd_test(enable = "sse2")]
3798 const fn test_mm_slli_epi32() {
3799 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3800 let r = _mm_slli_epi32::<4>(a);
3801 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3802 let r = _mm_slli_epi32::<32>(a);
3803 assert_eq_m128i(r, _mm_set1_epi32(0));
3804 }
3805
3806 #[simd_test(enable = "sse2")]
3807 fn test_mm_sll_epi32() {
3808 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3809 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3810 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3811 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3812 assert_eq_m128i(r, a);
3813 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3814 assert_eq_m128i(r, _mm_set1_epi32(0));
3815 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3816 assert_eq_m128i(r, _mm_set1_epi32(0));
3817 }
3818
3819 #[simd_test(enable = "sse2")]
3820 const fn test_mm_slli_epi64() {
3821 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3822 let r = _mm_slli_epi64::<4>(a);
3823 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3824 let r = _mm_slli_epi64::<64>(a);
3825 assert_eq_m128i(r, _mm_set1_epi64x(0));
3826 }
3827
3828 #[simd_test(enable = "sse2")]
3829 fn test_mm_sll_epi64() {
3830 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3831 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3832 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3833 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3834 assert_eq_m128i(r, a);
3835 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3836 assert_eq_m128i(r, _mm_set1_epi64x(0));
3837 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3838 assert_eq_m128i(r, _mm_set1_epi64x(0));
3839 }
3840
3841 #[simd_test(enable = "sse2")]
3842 const fn test_mm_srai_epi16() {
3843 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3844 let r = _mm_srai_epi16::<4>(a);
3845 assert_eq_m128i(
3846 r,
3847 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3848 );
3849 let r = _mm_srai_epi16::<16>(a);
3850 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3851 }
3852
3853 #[simd_test(enable = "sse2")]
3854 fn test_mm_sra_epi16() {
3855 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3856 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3857 assert_eq_m128i(
3858 r,
3859 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3860 );
3861 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3862 assert_eq_m128i(r, a);
3863 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3864 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3865 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3866 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3867 }
3868
3869 #[simd_test(enable = "sse2")]
3870 const fn test_mm_srai_epi32() {
3871 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3872 let r = _mm_srai_epi32::<4>(a);
3873 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3874 let r = _mm_srai_epi32::<32>(a);
3875 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3876 }
3877
3878 #[simd_test(enable = "sse2")]
3879 fn test_mm_sra_epi32() {
3880 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3881 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3882 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3883 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3884 assert_eq_m128i(r, a);
3885 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3886 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3887 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3888 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3889 }
3890
3891 #[simd_test(enable = "sse2")]
3892 const fn test_mm_srli_si128() {
3893 #[rustfmt::skip]
3894 let a = _mm_setr_epi8(
3895 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3896 );
3897 let r = _mm_srli_si128::<1>(a);
3898 #[rustfmt::skip]
3899 let e = _mm_setr_epi8(
3900 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3901 );
3902 assert_eq_m128i(r, e);
3903
3904 #[rustfmt::skip]
3905 let a = _mm_setr_epi8(
3906 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3907 );
3908 let r = _mm_srli_si128::<15>(a);
3909 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3910 assert_eq_m128i(r, e);
3911
3912 #[rustfmt::skip]
3913 let a = _mm_setr_epi8(
3914 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3915 );
3916 let r = _mm_srli_si128::<16>(a);
3917 assert_eq_m128i(r, _mm_set1_epi8(0));
3918 }
3919
3920 #[simd_test(enable = "sse2")]
3921 const fn test_mm_srli_epi16() {
3922 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3923 let r = _mm_srli_epi16::<4>(a);
3924 assert_eq_m128i(
3925 r,
3926 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3927 );
3928 let r = _mm_srli_epi16::<16>(a);
3929 assert_eq_m128i(r, _mm_set1_epi16(0));
3930 }
3931
3932 #[simd_test(enable = "sse2")]
3933 fn test_mm_srl_epi16() {
3934 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3935 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3936 assert_eq_m128i(
3937 r,
3938 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3939 );
3940 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3941 assert_eq_m128i(r, a);
3942 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3943 assert_eq_m128i(r, _mm_set1_epi16(0));
3944 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3945 assert_eq_m128i(r, _mm_set1_epi16(0));
3946 }
3947
3948 #[simd_test(enable = "sse2")]
3949 const fn test_mm_srli_epi32() {
3950 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3951 let r = _mm_srli_epi32::<4>(a);
3952 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3953 let r = _mm_srli_epi32::<32>(a);
3954 assert_eq_m128i(r, _mm_set1_epi32(0));
3955 }
3956
3957 #[simd_test(enable = "sse2")]
3958 fn test_mm_srl_epi32() {
3959 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3960 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3961 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3962 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3963 assert_eq_m128i(r, a);
3964 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3965 assert_eq_m128i(r, _mm_set1_epi32(0));
3966 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3967 assert_eq_m128i(r, _mm_set1_epi32(0));
3968 }
3969
3970 #[simd_test(enable = "sse2")]
3971 const fn test_mm_srli_epi64() {
3972 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3973 let r = _mm_srli_epi64::<4>(a);
3974 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3975 let r = _mm_srli_epi64::<64>(a);
3976 assert_eq_m128i(r, _mm_set1_epi64x(0));
3977 }
3978
3979 #[simd_test(enable = "sse2")]
3980 fn test_mm_srl_epi64() {
3981 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3982 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3983 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3984 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3985 assert_eq_m128i(r, a);
3986 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3987 assert_eq_m128i(r, _mm_set1_epi64x(0));
3988 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3989 assert_eq_m128i(r, _mm_set1_epi64x(0));
3990 }
3991
3992 #[simd_test(enable = "sse2")]
3993 const fn test_mm_and_si128() {
3994 let a = _mm_set1_epi8(5);
3995 let b = _mm_set1_epi8(3);
3996 let r = _mm_and_si128(a, b);
3997 assert_eq_m128i(r, _mm_set1_epi8(1));
3998 }
3999
4000 #[simd_test(enable = "sse2")]
4001 const fn test_mm_andnot_si128() {
4002 let a = _mm_set1_epi8(5);
4003 let b = _mm_set1_epi8(3);
4004 let r = _mm_andnot_si128(a, b);
4005 assert_eq_m128i(r, _mm_set1_epi8(2));
4006 }
4007
4008 #[simd_test(enable = "sse2")]
4009 const fn test_mm_or_si128() {
4010 let a = _mm_set1_epi8(5);
4011 let b = _mm_set1_epi8(3);
4012 let r = _mm_or_si128(a, b);
4013 assert_eq_m128i(r, _mm_set1_epi8(7));
4014 }
4015
4016 #[simd_test(enable = "sse2")]
4017 const fn test_mm_xor_si128() {
4018 let a = _mm_set1_epi8(5);
4019 let b = _mm_set1_epi8(3);
4020 let r = _mm_xor_si128(a, b);
4021 assert_eq_m128i(r, _mm_set1_epi8(6));
4022 }
4023
4024 #[simd_test(enable = "sse2")]
4025 const fn test_mm_cmpeq_epi8() {
4026 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4027 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
4028 let r = _mm_cmpeq_epi8(a, b);
4029 #[rustfmt::skip]
4030 assert_eq_m128i(
4031 r,
4032 _mm_setr_epi8(
4033 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4034 )
4035 );
4036 }
4037
4038 #[simd_test(enable = "sse2")]
4039 const fn test_mm_cmpeq_epi16() {
4040 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4041 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
4042 let r = _mm_cmpeq_epi16(a, b);
4043 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
4044 }
4045
4046 #[simd_test(enable = "sse2")]
4047 const fn test_mm_cmpeq_epi32() {
4048 let a = _mm_setr_epi32(0, 1, 2, 3);
4049 let b = _mm_setr_epi32(3, 2, 2, 0);
4050 let r = _mm_cmpeq_epi32(a, b);
4051 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
4052 }
4053
4054 #[simd_test(enable = "sse2")]
4055 const fn test_mm_cmpgt_epi8() {
4056 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4057 let b = _mm_set1_epi8(0);
4058 let r = _mm_cmpgt_epi8(a, b);
4059 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4060 assert_eq_m128i(r, e);
4061 }
4062
4063 #[simd_test(enable = "sse2")]
4064 const fn test_mm_cmpgt_epi16() {
4065 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
4066 let b = _mm_set1_epi16(0);
4067 let r = _mm_cmpgt_epi16(a, b);
4068 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
4069 assert_eq_m128i(r, e);
4070 }
4071
4072 #[simd_test(enable = "sse2")]
4073 const fn test_mm_cmpgt_epi32() {
4074 let a = _mm_set_epi32(5, 0, 0, 0);
4075 let b = _mm_set1_epi32(0);
4076 let r = _mm_cmpgt_epi32(a, b);
4077 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
4078 }
4079
4080 #[simd_test(enable = "sse2")]
4081 const fn test_mm_cmplt_epi8() {
4082 let a = _mm_set1_epi8(0);
4083 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4084 let r = _mm_cmplt_epi8(a, b);
4085 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4086 assert_eq_m128i(r, e);
4087 }
4088
4089 #[simd_test(enable = "sse2")]
4090 const fn test_mm_cmplt_epi16() {
4091 let a = _mm_set1_epi16(0);
4092 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
4093 let r = _mm_cmplt_epi16(a, b);
4094 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
4095 assert_eq_m128i(r, e);
4096 }
4097
4098 #[simd_test(enable = "sse2")]
4099 const fn test_mm_cmplt_epi32() {
4100 let a = _mm_set1_epi32(0);
4101 let b = _mm_set_epi32(5, 0, 0, 0);
4102 let r = _mm_cmplt_epi32(a, b);
4103 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
4104 }
4105
4106 #[simd_test(enable = "sse2")]
4107 const fn test_mm_cvtepi32_pd() {
4108 let a = _mm_set_epi32(35, 25, 15, 5);
4109 let r = _mm_cvtepi32_pd(a);
4110 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
4111 }
4112
4113 #[simd_test(enable = "sse2")]
4114 const fn test_mm_cvtsi32_sd() {
4115 let a = _mm_set1_pd(3.5);
4116 let r = _mm_cvtsi32_sd(a, 5);
4117 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
4118 }
4119
4120 #[simd_test(enable = "sse2")]
4121 const fn test_mm_cvtepi32_ps() {
4122 let a = _mm_setr_epi32(1, 2, 3, 4);
4123 let r = _mm_cvtepi32_ps(a);
4124 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
4125 }
4126
4127 #[simd_test(enable = "sse2")]
4128 fn test_mm_cvtps_epi32() {
4129 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4130 let r = _mm_cvtps_epi32(a);
4131 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
4132 }
4133
4134 #[simd_test(enable = "sse2")]
4135 const fn test_mm_cvtsi32_si128() {
4136 let r = _mm_cvtsi32_si128(5);
4137 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
4138 }
4139
4140 #[simd_test(enable = "sse2")]
4141 const fn test_mm_cvtsi128_si32() {
4142 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
4143 assert_eq!(r, 5);
4144 }
4145
4146 #[simd_test(enable = "sse2")]
4147 const fn test_mm_set_epi64x() {
4148 let r = _mm_set_epi64x(0, 1);
4149 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
4150 }
4151
4152 #[simd_test(enable = "sse2")]
4153 const fn test_mm_set_epi32() {
4154 let r = _mm_set_epi32(0, 1, 2, 3);
4155 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
4156 }
4157
4158 #[simd_test(enable = "sse2")]
4159 const fn test_mm_set_epi16() {
4160 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4161 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
4162 }
4163
4164 #[simd_test(enable = "sse2")]
4165 const fn test_mm_set_epi8() {
4166 #[rustfmt::skip]
4167 let r = _mm_set_epi8(
4168 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4169 );
4170 #[rustfmt::skip]
4171 let e = _mm_setr_epi8(
4172 15, 14, 13, 12, 11, 10, 9, 8,
4173 7, 6, 5, 4, 3, 2, 1, 0,
4174 );
4175 assert_eq_m128i(r, e);
4176 }
4177
4178 #[simd_test(enable = "sse2")]
4179 const fn test_mm_set1_epi64x() {
4180 let r = _mm_set1_epi64x(1);
4181 assert_eq_m128i(r, _mm_set1_epi64x(1));
4182 }
4183
4184 #[simd_test(enable = "sse2")]
4185 const fn test_mm_set1_epi32() {
4186 let r = _mm_set1_epi32(1);
4187 assert_eq_m128i(r, _mm_set1_epi32(1));
4188 }
4189
4190 #[simd_test(enable = "sse2")]
4191 const fn test_mm_set1_epi16() {
4192 let r = _mm_set1_epi16(1);
4193 assert_eq_m128i(r, _mm_set1_epi16(1));
4194 }
4195
4196 #[simd_test(enable = "sse2")]
4197 const fn test_mm_set1_epi8() {
4198 let r = _mm_set1_epi8(1);
4199 assert_eq_m128i(r, _mm_set1_epi8(1));
4200 }
4201
4202 #[simd_test(enable = "sse2")]
4203 const fn test_mm_setr_epi32() {
4204 let r = _mm_setr_epi32(0, 1, 2, 3);
4205 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
4206 }
4207
4208 #[simd_test(enable = "sse2")]
4209 const fn test_mm_setr_epi16() {
4210 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4211 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
4212 }
4213
4214 #[simd_test(enable = "sse2")]
4215 const fn test_mm_setr_epi8() {
4216 #[rustfmt::skip]
4217 let r = _mm_setr_epi8(
4218 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4219 );
4220 #[rustfmt::skip]
4221 let e = _mm_setr_epi8(
4222 0, 1, 2, 3, 4, 5, 6, 7,
4223 8, 9, 10, 11, 12, 13, 14, 15,
4224 );
4225 assert_eq_m128i(r, e);
4226 }
4227
4228 #[simd_test(enable = "sse2")]
4229 const fn test_mm_setzero_si128() {
4230 let r = _mm_setzero_si128();
4231 assert_eq_m128i(r, _mm_set1_epi64x(0));
4232 }
4233
4234 #[simd_test(enable = "sse2")]
4235 const fn test_mm_loadl_epi64() {
4236 let a = _mm_setr_epi64x(6, 5);
4237 let r = unsafe { _mm_loadl_epi64(ptr::addr_of!(a)) };
4238 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4239 }
4240
4241 #[simd_test(enable = "sse2")]
4242 const fn test_mm_load_si128() {
4243 let a = _mm_set_epi64x(5, 6);
4244 let r = unsafe { _mm_load_si128(ptr::addr_of!(a) as *const _) };
4245 assert_eq_m128i(a, r);
4246 }
4247
4248 #[simd_test(enable = "sse2")]
4249 const fn test_mm_loadu_si128() {
4250 let a = _mm_set_epi64x(5, 6);
4251 let r = unsafe { _mm_loadu_si128(ptr::addr_of!(a) as *const _) };
4252 assert_eq_m128i(a, r);
4253 }
4254
4255 #[simd_test(enable = "sse2")]
4256 #[cfg_attr(miri, ignore)]
4259 fn test_mm_maskmoveu_si128() {
4260 let a = _mm_set1_epi8(9);
4261 #[rustfmt::skip]
4262 let mask = _mm_set_epi8(
4263 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4264 0, 0, 0, 0, 0, 0, 0, 0,
4265 );
4266 let mut r = _mm_set1_epi8(0);
4267 unsafe {
4268 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4269 }
4270 _mm_sfence();
4271 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4272 assert_eq_m128i(r, e);
4273 }
4274
4275 #[simd_test(enable = "sse2")]
4276 const fn test_mm_store_si128() {
4277 let a = _mm_set1_epi8(9);
4278 let mut r = _mm_set1_epi8(0);
4279 unsafe {
4280 _mm_store_si128(&mut r, a);
4281 }
4282 assert_eq_m128i(r, a);
4283 }
4284
4285 #[simd_test(enable = "sse2")]
4286 const fn test_mm_storeu_si128() {
4287 let a = _mm_set1_epi8(9);
4288 let mut r = _mm_set1_epi8(0);
4289 unsafe {
4290 _mm_storeu_si128(&mut r, a);
4291 }
4292 assert_eq_m128i(r, a);
4293 }
4294
4295 #[simd_test(enable = "sse2")]
4296 const fn test_mm_storel_epi64() {
4297 let a = _mm_setr_epi64x(2, 9);
4298 let mut r = _mm_set1_epi8(0);
4299 unsafe {
4300 _mm_storel_epi64(&mut r, a);
4301 }
4302 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4303 }
4304
4305 #[simd_test(enable = "sse2")]
4306 #[cfg_attr(miri, ignore)]
4309 fn test_mm_stream_si128() {
4310 let a = _mm_setr_epi32(1, 2, 3, 4);
4311 let mut r = _mm_undefined_si128();
4312 unsafe {
4313 _mm_stream_si128(ptr::addr_of_mut!(r), a);
4314 }
4315 _mm_sfence();
4316 assert_eq_m128i(r, a);
4317 }
4318
4319 #[simd_test(enable = "sse2")]
4320 #[cfg_attr(miri, ignore)]
4323 fn test_mm_stream_si32() {
4324 let a: i32 = 7;
4325 let mut mem = boxed::Box::<i32>::new(-1);
4326 unsafe {
4327 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4328 }
4329 _mm_sfence();
4330 assert_eq!(a, *mem);
4331 }
4332
4333 #[simd_test(enable = "sse2")]
4334 const fn test_mm_move_epi64() {
4335 let a = _mm_setr_epi64x(5, 6);
4336 let r = _mm_move_epi64(a);
4337 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4338 }
4339
4340 #[simd_test(enable = "sse2")]
4341 const fn test_mm_packs_epi16() {
4342 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4343 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4344 let r = _mm_packs_epi16(a, b);
4345 #[rustfmt::skip]
4346 assert_eq_m128i(
4347 r,
4348 _mm_setr_epi8(
4349 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4350 )
4351 );
4352 }
4353
4354 #[simd_test(enable = "sse2")]
4355 const fn test_mm_packs_epi32() {
4356 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4357 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4358 let r = _mm_packs_epi32(a, b);
4359 assert_eq_m128i(
4360 r,
4361 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4362 );
4363 }
4364
4365 #[simd_test(enable = "sse2")]
4366 const fn test_mm_packus_epi16() {
4367 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4368 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4369 let r = _mm_packus_epi16(a, b);
4370 assert_eq_m128i(
4371 r,
4372 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4373 );
4374 }
4375
4376 #[simd_test(enable = "sse2")]
4377 const fn test_mm_extract_epi16() {
4378 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4379 let r1 = _mm_extract_epi16::<0>(a);
4380 let r2 = _mm_extract_epi16::<3>(a);
4381 assert_eq!(r1, 0xFFFF);
4382 assert_eq!(r2, 3);
4383 }
4384
4385 #[simd_test(enable = "sse2")]
4386 const fn test_mm_insert_epi16() {
4387 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4388 let r = _mm_insert_epi16::<0>(a, 9);
4389 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4390 assert_eq_m128i(r, e);
4391 }
4392
4393 #[simd_test(enable = "sse2")]
4394 const fn test_mm_movemask_epi8() {
4395 #[rustfmt::skip]
4396 let a = _mm_setr_epi8(
4397 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4398 0b0101, 0b1111_0000u8 as i8, 0, 0,
4399 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4400 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4401 );
4402 let r = _mm_movemask_epi8(a);
4403 assert_eq!(r, 0b10100110_00100101);
4404 }
4405
4406 #[simd_test(enable = "sse2")]
4407 const fn test_mm_shuffle_epi32() {
4408 let a = _mm_setr_epi32(5, 10, 15, 20);
4409 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4410 let e = _mm_setr_epi32(20, 10, 10, 5);
4411 assert_eq_m128i(r, e);
4412 }
4413
4414 #[simd_test(enable = "sse2")]
4415 const fn test_mm_shufflehi_epi16() {
4416 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4417 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4418 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4419 assert_eq_m128i(r, e);
4420 }
4421
4422 #[simd_test(enable = "sse2")]
4423 const fn test_mm_shufflelo_epi16() {
4424 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4425 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4426 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4427 assert_eq_m128i(r, e);
4428 }
4429
4430 #[simd_test(enable = "sse2")]
4431 const fn test_mm_unpackhi_epi8() {
4432 #[rustfmt::skip]
4433 let a = _mm_setr_epi8(
4434 0, 1, 2, 3, 4, 5, 6, 7,
4435 8, 9, 10, 11, 12, 13, 14, 15,
4436 );
4437 #[rustfmt::skip]
4438 let b = _mm_setr_epi8(
4439 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4440 );
4441 let r = _mm_unpackhi_epi8(a, b);
4442 #[rustfmt::skip]
4443 let e = _mm_setr_epi8(
4444 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4445 );
4446 assert_eq_m128i(r, e);
4447 }
4448
4449 #[simd_test(enable = "sse2")]
4450 const fn test_mm_unpackhi_epi16() {
4451 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4452 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4453 let r = _mm_unpackhi_epi16(a, b);
4454 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4455 assert_eq_m128i(r, e);
4456 }
4457
4458 #[simd_test(enable = "sse2")]
4459 const fn test_mm_unpackhi_epi32() {
4460 let a = _mm_setr_epi32(0, 1, 2, 3);
4461 let b = _mm_setr_epi32(4, 5, 6, 7);
4462 let r = _mm_unpackhi_epi32(a, b);
4463 let e = _mm_setr_epi32(2, 6, 3, 7);
4464 assert_eq_m128i(r, e);
4465 }
4466
4467 #[simd_test(enable = "sse2")]
4468 const fn test_mm_unpackhi_epi64() {
4469 let a = _mm_setr_epi64x(0, 1);
4470 let b = _mm_setr_epi64x(2, 3);
4471 let r = _mm_unpackhi_epi64(a, b);
4472 let e = _mm_setr_epi64x(1, 3);
4473 assert_eq_m128i(r, e);
4474 }
4475
4476 #[simd_test(enable = "sse2")]
4477 const fn test_mm_unpacklo_epi8() {
4478 #[rustfmt::skip]
4479 let a = _mm_setr_epi8(
4480 0, 1, 2, 3, 4, 5, 6, 7,
4481 8, 9, 10, 11, 12, 13, 14, 15,
4482 );
4483 #[rustfmt::skip]
4484 let b = _mm_setr_epi8(
4485 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4486 );
4487 let r = _mm_unpacklo_epi8(a, b);
4488 #[rustfmt::skip]
4489 let e = _mm_setr_epi8(
4490 0, 16, 1, 17, 2, 18, 3, 19,
4491 4, 20, 5, 21, 6, 22, 7, 23,
4492 );
4493 assert_eq_m128i(r, e);
4494 }
4495
4496 #[simd_test(enable = "sse2")]
4497 const fn test_mm_unpacklo_epi16() {
4498 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4499 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4500 let r = _mm_unpacklo_epi16(a, b);
4501 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4502 assert_eq_m128i(r, e);
4503 }
4504
4505 #[simd_test(enable = "sse2")]
4506 const fn test_mm_unpacklo_epi32() {
4507 let a = _mm_setr_epi32(0, 1, 2, 3);
4508 let b = _mm_setr_epi32(4, 5, 6, 7);
4509 let r = _mm_unpacklo_epi32(a, b);
4510 let e = _mm_setr_epi32(0, 4, 1, 5);
4511 assert_eq_m128i(r, e);
4512 }
4513
4514 #[simd_test(enable = "sse2")]
4515 const fn test_mm_unpacklo_epi64() {
4516 let a = _mm_setr_epi64x(0, 1);
4517 let b = _mm_setr_epi64x(2, 3);
4518 let r = _mm_unpacklo_epi64(a, b);
4519 let e = _mm_setr_epi64x(0, 2);
4520 assert_eq_m128i(r, e);
4521 }
4522
4523 #[simd_test(enable = "sse2")]
4524 const fn test_mm_add_sd() {
4525 let a = _mm_setr_pd(1.0, 2.0);
4526 let b = _mm_setr_pd(5.0, 10.0);
4527 let r = _mm_add_sd(a, b);
4528 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4529 }
4530
4531 #[simd_test(enable = "sse2")]
4532 const fn test_mm_add_pd() {
4533 let a = _mm_setr_pd(1.0, 2.0);
4534 let b = _mm_setr_pd(5.0, 10.0);
4535 let r = _mm_add_pd(a, b);
4536 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4537 }
4538
4539 #[simd_test(enable = "sse2")]
4540 const fn test_mm_div_sd() {
4541 let a = _mm_setr_pd(1.0, 2.0);
4542 let b = _mm_setr_pd(5.0, 10.0);
4543 let r = _mm_div_sd(a, b);
4544 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4545 }
4546
4547 #[simd_test(enable = "sse2")]
4548 const fn test_mm_div_pd() {
4549 let a = _mm_setr_pd(1.0, 2.0);
4550 let b = _mm_setr_pd(5.0, 10.0);
4551 let r = _mm_div_pd(a, b);
4552 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4553 }
4554
4555 #[simd_test(enable = "sse2")]
4556 fn test_mm_max_sd() {
4557 let a = _mm_setr_pd(1.0, 2.0);
4558 let b = _mm_setr_pd(5.0, 10.0);
4559 let r = _mm_max_sd(a, b);
4560 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4561 }
4562
4563 #[simd_test(enable = "sse2")]
4564 fn test_mm_max_pd() {
4565 let a = _mm_setr_pd(1.0, 2.0);
4566 let b = _mm_setr_pd(5.0, 10.0);
4567 let r = _mm_max_pd(a, b);
4568 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4569
4570 let a = _mm_setr_pd(-0.0, 0.0);
4572 let b = _mm_setr_pd(0.0, 0.0);
4573 let r1 = _mm_castpd_si128(_mm_max_pd(a, b));
4575 let r2 = _mm_castpd_si128(_mm_max_pd(b, a));
4576 let a = _mm_castpd_si128(a);
4577 let b = _mm_castpd_si128(b);
4578 assert_eq_m128i(r1, b);
4579 assert_eq_m128i(r2, a);
4580 assert_ne!(a.as_u8x16(), b.as_u8x16()); }
4582
4583 #[simd_test(enable = "sse2")]
4584 fn test_mm_min_sd() {
4585 let a = _mm_setr_pd(1.0, 2.0);
4586 let b = _mm_setr_pd(5.0, 10.0);
4587 let r = _mm_min_sd(a, b);
4588 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4589 }
4590
4591 #[simd_test(enable = "sse2")]
4592 fn test_mm_min_pd() {
4593 let a = _mm_setr_pd(1.0, 2.0);
4594 let b = _mm_setr_pd(5.0, 10.0);
4595 let r = _mm_min_pd(a, b);
4596 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4597
4598 let a = _mm_setr_pd(-0.0, 0.0);
4600 let b = _mm_setr_pd(0.0, 0.0);
4601 let r1 = _mm_castpd_si128(_mm_min_pd(a, b));
4603 let r2 = _mm_castpd_si128(_mm_min_pd(b, a));
4604 let a = _mm_castpd_si128(a);
4605 let b = _mm_castpd_si128(b);
4606 assert_eq_m128i(r1, b);
4607 assert_eq_m128i(r2, a);
4608 assert_ne!(a.as_u8x16(), b.as_u8x16()); }
4610
4611 #[simd_test(enable = "sse2")]
4612 const fn test_mm_mul_sd() {
4613 let a = _mm_setr_pd(1.0, 2.0);
4614 let b = _mm_setr_pd(5.0, 10.0);
4615 let r = _mm_mul_sd(a, b);
4616 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4617 }
4618
4619 #[simd_test(enable = "sse2")]
4620 const fn test_mm_mul_pd() {
4621 let a = _mm_setr_pd(1.0, 2.0);
4622 let b = _mm_setr_pd(5.0, 10.0);
4623 let r = _mm_mul_pd(a, b);
4624 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4625 }
4626
4627 #[simd_test(enable = "sse2")]
4628 fn test_mm_sqrt_sd() {
4629 let a = _mm_setr_pd(1.0, 2.0);
4630 let b = _mm_setr_pd(5.0, 10.0);
4631 let r = _mm_sqrt_sd(a, b);
4632 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4633 }
4634
4635 #[simd_test(enable = "sse2")]
4636 fn test_mm_sqrt_pd() {
4637 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4638 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4639 }
4640
4641 #[simd_test(enable = "sse2")]
4642 const fn test_mm_sub_sd() {
4643 let a = _mm_setr_pd(1.0, 2.0);
4644 let b = _mm_setr_pd(5.0, 10.0);
4645 let r = _mm_sub_sd(a, b);
4646 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4647 }
4648
4649 #[simd_test(enable = "sse2")]
4650 const fn test_mm_sub_pd() {
4651 let a = _mm_setr_pd(1.0, 2.0);
4652 let b = _mm_setr_pd(5.0, 10.0);
4653 let r = _mm_sub_pd(a, b);
4654 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4655 }
4656
4657 #[simd_test(enable = "sse2")]
4658 const fn test_mm_and_pd() {
4659 let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
4660 let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
4661 let r = _mm_and_pd(a, b);
4662 let e = f64x2::from_bits(u64x2::splat(1)).as_m128d();
4663 assert_eq_m128d(r, e);
4664 }
4665
4666 #[simd_test(enable = "sse2")]
4667 const fn test_mm_andnot_pd() {
4668 let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
4669 let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
4670 let r = _mm_andnot_pd(a, b);
4671 let e = f64x2::from_bits(u64x2::splat(2)).as_m128d();
4672 assert_eq_m128d(r, e);
4673 }
4674
4675 #[simd_test(enable = "sse2")]
4676 const fn test_mm_or_pd() {
4677 let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
4678 let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
4679 let r = _mm_or_pd(a, b);
4680 let e = f64x2::from_bits(u64x2::splat(7)).as_m128d();
4681 assert_eq_m128d(r, e);
4682 }
4683
4684 #[simd_test(enable = "sse2")]
4685 const fn test_mm_xor_pd() {
4686 let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
4687 let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
4688 let r = _mm_xor_pd(a, b);
4689 let e = f64x2::from_bits(u64x2::splat(6)).as_m128d();
4690 assert_eq_m128d(r, e);
4691 }
4692
4693 #[simd_test(enable = "sse2")]
4694 fn test_mm_cmpeq_sd() {
4695 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4696 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4697 let r = _mm_castpd_si128(_mm_cmpeq_sd(a, b));
4698 assert_eq_m128i(r, e);
4699 }
4700
4701 #[simd_test(enable = "sse2")]
4702 fn test_mm_cmplt_sd() {
4703 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4704 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4705 let r = _mm_castpd_si128(_mm_cmplt_sd(a, b));
4706 assert_eq_m128i(r, e);
4707 }
4708
4709 #[simd_test(enable = "sse2")]
4710 fn test_mm_cmple_sd() {
4711 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4712 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4713 let r = _mm_castpd_si128(_mm_cmple_sd(a, b));
4714 assert_eq_m128i(r, e);
4715 }
4716
4717 #[simd_test(enable = "sse2")]
4718 fn test_mm_cmpgt_sd() {
4719 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4720 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4721 let r = _mm_castpd_si128(_mm_cmpgt_sd(a, b));
4722 assert_eq_m128i(r, e);
4723 }
4724
4725 #[simd_test(enable = "sse2")]
4726 fn test_mm_cmpge_sd() {
4727 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4728 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4729 let r = _mm_castpd_si128(_mm_cmpge_sd(a, b));
4730 assert_eq_m128i(r, e);
4731 }
4732
4733 #[simd_test(enable = "sse2")]
4734 fn test_mm_cmpord_sd() {
4735 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4736 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4737 let r = _mm_castpd_si128(_mm_cmpord_sd(a, b));
4738 assert_eq_m128i(r, e);
4739 }
4740
4741 #[simd_test(enable = "sse2")]
4742 fn test_mm_cmpunord_sd() {
4743 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4744 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4745 let r = _mm_castpd_si128(_mm_cmpunord_sd(a, b));
4746 assert_eq_m128i(r, e);
4747 }
4748
4749 #[simd_test(enable = "sse2")]
4750 fn test_mm_cmpneq_sd() {
4751 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4752 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4753 let r = _mm_castpd_si128(_mm_cmpneq_sd(a, b));
4754 assert_eq_m128i(r, e);
4755 }
4756
4757 #[simd_test(enable = "sse2")]
4758 fn test_mm_cmpnlt_sd() {
4759 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4760 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4761 let r = _mm_castpd_si128(_mm_cmpnlt_sd(a, b));
4762 assert_eq_m128i(r, e);
4763 }
4764
4765 #[simd_test(enable = "sse2")]
4766 fn test_mm_cmpnle_sd() {
4767 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4768 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4769 let r = _mm_castpd_si128(_mm_cmpnle_sd(a, b));
4770 assert_eq_m128i(r, e);
4771 }
4772
4773 #[simd_test(enable = "sse2")]
4774 fn test_mm_cmpngt_sd() {
4775 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4776 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4777 let r = _mm_castpd_si128(_mm_cmpngt_sd(a, b));
4778 assert_eq_m128i(r, e);
4779 }
4780
4781 #[simd_test(enable = "sse2")]
4782 fn test_mm_cmpnge_sd() {
4783 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4784 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4785 let r = _mm_castpd_si128(_mm_cmpnge_sd(a, b));
4786 assert_eq_m128i(r, e);
4787 }
4788
4789 #[simd_test(enable = "sse2")]
4790 fn test_mm_cmpeq_pd() {
4791 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4792 let e = _mm_setr_epi64x(!0, 0);
4793 let r = _mm_castpd_si128(_mm_cmpeq_pd(a, b));
4794 assert_eq_m128i(r, e);
4795 }
4796
4797 #[simd_test(enable = "sse2")]
4798 fn test_mm_cmplt_pd() {
4799 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4800 let e = _mm_setr_epi64x(0, !0);
4801 let r = _mm_castpd_si128(_mm_cmplt_pd(a, b));
4802 assert_eq_m128i(r, e);
4803 }
4804
4805 #[simd_test(enable = "sse2")]
4806 fn test_mm_cmple_pd() {
4807 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4808 let e = _mm_setr_epi64x(!0, !0);
4809 let r = _mm_castpd_si128(_mm_cmple_pd(a, b));
4810 assert_eq_m128i(r, e);
4811 }
4812
4813 #[simd_test(enable = "sse2")]
4814 fn test_mm_cmpgt_pd() {
4815 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4816 let e = _mm_setr_epi64x(0, 0);
4817 let r = _mm_castpd_si128(_mm_cmpgt_pd(a, b));
4818 assert_eq_m128i(r, e);
4819 }
4820
4821 #[simd_test(enable = "sse2")]
4822 fn test_mm_cmpge_pd() {
4823 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4824 let e = _mm_setr_epi64x(!0, 0);
4825 let r = _mm_castpd_si128(_mm_cmpge_pd(a, b));
4826 assert_eq_m128i(r, e);
4827 }
4828
4829 #[simd_test(enable = "sse2")]
4830 fn test_mm_cmpord_pd() {
4831 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4832 let e = _mm_setr_epi64x(0, !0);
4833 let r = _mm_castpd_si128(_mm_cmpord_pd(a, b));
4834 assert_eq_m128i(r, e);
4835 }
4836
4837 #[simd_test(enable = "sse2")]
4838 fn test_mm_cmpunord_pd() {
4839 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4840 let e = _mm_setr_epi64x(!0, 0);
4841 let r = _mm_castpd_si128(_mm_cmpunord_pd(a, b));
4842 assert_eq_m128i(r, e);
4843 }
4844
4845 #[simd_test(enable = "sse2")]
4846 fn test_mm_cmpneq_pd() {
4847 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4848 let e = _mm_setr_epi64x(!0, !0);
4849 let r = _mm_castpd_si128(_mm_cmpneq_pd(a, b));
4850 assert_eq_m128i(r, e);
4851 }
4852
4853 #[simd_test(enable = "sse2")]
4854 fn test_mm_cmpnlt_pd() {
4855 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4856 let e = _mm_setr_epi64x(0, 0);
4857 let r = _mm_castpd_si128(_mm_cmpnlt_pd(a, b));
4858 assert_eq_m128i(r, e);
4859 }
4860
4861 #[simd_test(enable = "sse2")]
4862 fn test_mm_cmpnle_pd() {
4863 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4864 let e = _mm_setr_epi64x(0, 0);
4865 let r = _mm_castpd_si128(_mm_cmpnle_pd(a, b));
4866 assert_eq_m128i(r, e);
4867 }
4868
4869 #[simd_test(enable = "sse2")]
4870 fn test_mm_cmpngt_pd() {
4871 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4872 let e = _mm_setr_epi64x(0, !0);
4873 let r = _mm_castpd_si128(_mm_cmpngt_pd(a, b));
4874 assert_eq_m128i(r, e);
4875 }
4876
4877 #[simd_test(enable = "sse2")]
4878 fn test_mm_cmpnge_pd() {
4879 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4880 let e = _mm_setr_epi64x(0, !0);
4881 let r = _mm_castpd_si128(_mm_cmpnge_pd(a, b));
4882 assert_eq_m128i(r, e);
4883 }
4884
4885 #[simd_test(enable = "sse2")]
4886 fn test_mm_comieq_sd() {
4887 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4888 assert!(_mm_comieq_sd(a, b) != 0);
4889
4890 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4891 assert!(_mm_comieq_sd(a, b) == 0);
4892 }
4893
4894 #[simd_test(enable = "sse2")]
4895 fn test_mm_comilt_sd() {
4896 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4897 assert!(_mm_comilt_sd(a, b) == 0);
4898 }
4899
4900 #[simd_test(enable = "sse2")]
4901 fn test_mm_comile_sd() {
4902 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4903 assert!(_mm_comile_sd(a, b) != 0);
4904 }
4905
4906 #[simd_test(enable = "sse2")]
4907 fn test_mm_comigt_sd() {
4908 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4909 assert!(_mm_comigt_sd(a, b) == 0);
4910 }
4911
4912 #[simd_test(enable = "sse2")]
4913 fn test_mm_comige_sd() {
4914 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4915 assert!(_mm_comige_sd(a, b) != 0);
4916 }
4917
4918 #[simd_test(enable = "sse2")]
4919 fn test_mm_comineq_sd() {
4920 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4921 assert!(_mm_comineq_sd(a, b) == 0);
4922 }
4923
4924 #[simd_test(enable = "sse2")]
4925 fn test_mm_ucomieq_sd() {
4926 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4927 assert!(_mm_ucomieq_sd(a, b) != 0);
4928
4929 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4930 assert!(_mm_ucomieq_sd(a, b) == 0);
4931 }
4932
4933 #[simd_test(enable = "sse2")]
4934 fn test_mm_ucomilt_sd() {
4935 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4936 assert!(_mm_ucomilt_sd(a, b) == 0);
4937 }
4938
4939 #[simd_test(enable = "sse2")]
4940 fn test_mm_ucomile_sd() {
4941 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4942 assert!(_mm_ucomile_sd(a, b) != 0);
4943 }
4944
4945 #[simd_test(enable = "sse2")]
4946 fn test_mm_ucomigt_sd() {
4947 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4948 assert!(_mm_ucomigt_sd(a, b) == 0);
4949 }
4950
4951 #[simd_test(enable = "sse2")]
4952 fn test_mm_ucomige_sd() {
4953 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4954 assert!(_mm_ucomige_sd(a, b) != 0);
4955 }
4956
4957 #[simd_test(enable = "sse2")]
4958 fn test_mm_ucomineq_sd() {
4959 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4960 assert!(_mm_ucomineq_sd(a, b) == 0);
4961 }
4962
4963 #[simd_test(enable = "sse2")]
4964 const fn test_mm_movemask_pd() {
4965 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4966 assert_eq!(r, 0b01);
4967
4968 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4969 assert_eq!(r, 0b11);
4970 }
4971
4972 #[repr(align(16))]
4973 struct Memory {
4974 data: [f64; 4],
4975 }
4976
4977 #[simd_test(enable = "sse2")]
4978 const fn test_mm_load_pd() {
4979 let mem = Memory {
4980 data: [1.0f64, 2.0, 3.0, 4.0],
4981 };
4982 let vals = &mem.data;
4983 let d = vals.as_ptr();
4984
4985 let r = unsafe { _mm_load_pd(d) };
4986 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4987 }
4988
4989 #[simd_test(enable = "sse2")]
4990 const fn test_mm_load_sd() {
4991 let a = 1.;
4992 let expected = _mm_setr_pd(a, 0.);
4993 let r = unsafe { _mm_load_sd(&a) };
4994 assert_eq_m128d(r, expected);
4995 }
4996
4997 #[simd_test(enable = "sse2")]
4998 const fn test_mm_loadh_pd() {
4999 let a = _mm_setr_pd(1., 2.);
5000 let b = 3.;
5001 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
5002 let r = unsafe { _mm_loadh_pd(a, &b) };
5003 assert_eq_m128d(r, expected);
5004 }
5005
5006 #[simd_test(enable = "sse2")]
5007 const fn test_mm_loadl_pd() {
5008 let a = _mm_setr_pd(1., 2.);
5009 let b = 3.;
5010 let expected = _mm_setr_pd(3., get_m128d(a, 1));
5011 let r = unsafe { _mm_loadl_pd(a, &b) };
5012 assert_eq_m128d(r, expected);
5013 }
5014
5015 #[simd_test(enable = "sse2")]
5016 #[cfg_attr(miri, ignore)]
5019 fn test_mm_stream_pd() {
5020 #[repr(align(128))]
5021 struct Memory {
5022 pub data: [f64; 2],
5023 }
5024 let a = _mm_set1_pd(7.0);
5025 let mut mem = Memory { data: [-1.0; 2] };
5026
5027 unsafe {
5028 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
5029 }
5030 _mm_sfence();
5031 for i in 0..2 {
5032 assert_eq!(mem.data[i], get_m128d(a, i));
5033 }
5034 }
5035
5036 #[simd_test(enable = "sse2")]
5037 const fn test_mm_store_sd() {
5038 let mut dest = 0.;
5039 let a = _mm_setr_pd(1., 2.);
5040 unsafe {
5041 _mm_store_sd(&mut dest, a);
5042 }
5043 assert_eq!(dest, _mm_cvtsd_f64(a));
5044 }
5045
5046 #[simd_test(enable = "sse2")]
5047 const fn test_mm_store_pd() {
5048 let mut mem = Memory { data: [0.0f64; 4] };
5049 let vals = &mut mem.data;
5050 let a = _mm_setr_pd(1.0, 2.0);
5051 let d = vals.as_mut_ptr();
5052
5053 unsafe {
5054 _mm_store_pd(d, *black_box(&a));
5055 }
5056 assert_eq!(vals[0], 1.0);
5057 assert_eq!(vals[1], 2.0);
5058 }
5059
5060 #[simd_test(enable = "sse2")]
5061 const fn test_mm_storeu_pd() {
5062 let mut mem = Memory { data: [0.0f64; 4] };
5064 let vals = &mut mem.data;
5065 let a = _mm_setr_pd(1.0, 2.0);
5066
5067 unsafe {
5069 let p = vals.as_mut_ptr().offset(1);
5070 _mm_storeu_pd(p, *black_box(&a));
5071 }
5072
5073 assert_eq!(*vals, [0.0, 1.0, 2.0, 0.0]);
5074 }
5075
5076 #[simd_test(enable = "sse2")]
5077 const fn test_mm_storeu_si16() {
5078 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
5079 let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
5080 unsafe {
5081 _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
5082 }
5083 let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
5084 assert_eq_m128i(r, e);
5085 }
5086
5087 #[simd_test(enable = "sse2")]
5088 const fn test_mm_storeu_si32() {
5089 let a = _mm_setr_epi32(1, 2, 3, 4);
5090 let mut r = _mm_setr_epi32(5, 6, 7, 8);
5091 unsafe {
5092 _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
5093 }
5094 let e = _mm_setr_epi32(1, 6, 7, 8);
5095 assert_eq_m128i(r, e);
5096 }
5097
5098 #[simd_test(enable = "sse2")]
5099 const fn test_mm_storeu_si64() {
5100 let a = _mm_setr_epi64x(1, 2);
5101 let mut r = _mm_setr_epi64x(3, 4);
5102 unsafe {
5103 _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
5104 }
5105 let e = _mm_setr_epi64x(1, 4);
5106 assert_eq_m128i(r, e);
5107 }
5108
5109 #[simd_test(enable = "sse2")]
5110 const fn test_mm_store1_pd() {
5111 let mut mem = Memory { data: [0.0f64; 4] };
5112 let vals = &mut mem.data;
5113 let a = _mm_setr_pd(1.0, 2.0);
5114 let d = vals.as_mut_ptr();
5115
5116 unsafe {
5117 _mm_store1_pd(d, *black_box(&a));
5118 }
5119 assert_eq!(vals[0], 1.0);
5120 assert_eq!(vals[1], 1.0);
5121 }
5122
5123 #[simd_test(enable = "sse2")]
5124 const fn test_mm_store_pd1() {
5125 let mut mem = Memory { data: [0.0f64; 4] };
5126 let vals = &mut mem.data;
5127 let a = _mm_setr_pd(1.0, 2.0);
5128 let d = vals.as_mut_ptr();
5129
5130 unsafe {
5131 _mm_store_pd1(d, *black_box(&a));
5132 }
5133 assert_eq!(vals[0], 1.0);
5134 assert_eq!(vals[1], 1.0);
5135 }
5136
5137 #[simd_test(enable = "sse2")]
5138 const fn test_mm_storer_pd() {
5139 let mut mem = Memory { data: [0.0f64; 4] };
5140 let vals = &mut mem.data;
5141 let a = _mm_setr_pd(1.0, 2.0);
5142 let d = vals.as_mut_ptr();
5143
5144 unsafe {
5145 _mm_storer_pd(d, *black_box(&a));
5146 }
5147 assert_eq!(vals[0], 2.0);
5148 assert_eq!(vals[1], 1.0);
5149 }
5150
5151 #[simd_test(enable = "sse2")]
5152 const fn test_mm_storeh_pd() {
5153 let mut dest = 0.;
5154 let a = _mm_setr_pd(1., 2.);
5155 unsafe {
5156 _mm_storeh_pd(&mut dest, a);
5157 }
5158 assert_eq!(dest, get_m128d(a, 1));
5159 }
5160
5161 #[simd_test(enable = "sse2")]
5162 const fn test_mm_storel_pd() {
5163 let mut dest = 0.;
5164 let a = _mm_setr_pd(1., 2.);
5165 unsafe {
5166 _mm_storel_pd(&mut dest, a);
5167 }
5168 assert_eq!(dest, _mm_cvtsd_f64(a));
5169 }
5170
5171 #[simd_test(enable = "sse2")]
5172 const fn test_mm_loadr_pd() {
5173 let mut mem = Memory {
5174 data: [1.0f64, 2.0, 3.0, 4.0],
5175 };
5176 let vals = &mut mem.data;
5177 let d = vals.as_ptr();
5178
5179 let r = unsafe { _mm_loadr_pd(d) };
5180 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
5181 }
5182
5183 #[simd_test(enable = "sse2")]
5184 const fn test_mm_loadu_pd() {
5185 let mut mem = Memory {
5187 data: [1.0f64, 2.0, 3.0, 4.0],
5188 };
5189 let vals = &mut mem.data;
5190
5191 let d = unsafe { vals.as_ptr().offset(1) };
5193
5194 let r = unsafe { _mm_loadu_pd(d) };
5195 let e = _mm_setr_pd(2.0, 3.0);
5196 assert_eq_m128d(r, e);
5197 }
5198
5199 #[simd_test(enable = "sse2")]
5200 const fn test_mm_loadu_si16() {
5201 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
5202 let r = unsafe { _mm_loadu_si16(ptr::addr_of!(a) as *const _) };
5203 assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
5204 }
5205
5206 #[simd_test(enable = "sse2")]
5207 const fn test_mm_loadu_si32() {
5208 let a = _mm_setr_epi32(1, 2, 3, 4);
5209 let r = unsafe { _mm_loadu_si32(ptr::addr_of!(a) as *const _) };
5210 assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
5211 }
5212
5213 #[simd_test(enable = "sse2")]
5214 const fn test_mm_loadu_si64() {
5215 let a = _mm_setr_epi64x(5, 6);
5216 let r = unsafe { _mm_loadu_si64(ptr::addr_of!(a) as *const _) };
5217 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
5218 }
5219
5220 #[simd_test(enable = "sse2")]
5221 const fn test_mm_cvtpd_ps() {
5222 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
5223 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
5224
5225 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
5226 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
5227
5228 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
5229 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
5230
5231 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
5232 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
5233 }
5234
5235 #[simd_test(enable = "sse2")]
5236 const fn test_mm_cvtps_pd() {
5237 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
5238 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
5239
5240 let r = _mm_cvtps_pd(_mm_setr_ps(
5241 f32::MAX,
5242 f32::INFINITY,
5243 f32::NEG_INFINITY,
5244 f32::MIN,
5245 ));
5246 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
5247 }
5248
5249 #[simd_test(enable = "sse2")]
5250 fn test_mm_cvtpd_epi32() {
5251 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5252 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5253
5254 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5255 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5256
5257 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5258 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5259
5260 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5261 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5262
5263 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5264 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5265 }
5266
5267 #[simd_test(enable = "sse2")]
5268 fn test_mm_cvtsd_si32() {
5269 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5270 assert_eq!(r, -2);
5271
5272 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5273 assert_eq!(r, i32::MIN);
5274
5275 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5276 assert_eq!(r, i32::MIN);
5277 }
5278
5279 #[simd_test(enable = "sse2")]
5280 fn test_mm_cvtsd_ss() {
5281 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5282 let b = _mm_setr_pd(2.0, -5.0);
5283
5284 let r = _mm_cvtsd_ss(a, b);
5285
5286 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5287
5288 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5289 let b = _mm_setr_pd(f64::INFINITY, -5.0);
5290
5291 let r = _mm_cvtsd_ss(a, b);
5292
5293 assert_eq_m128(
5294 r,
5295 _mm_setr_ps(
5296 f32::INFINITY,
5297 f32::NEG_INFINITY,
5298 f32::MAX,
5299 f32::NEG_INFINITY,
5300 ),
5301 );
5302 }
5303
5304 #[simd_test(enable = "sse2")]
5305 const fn test_mm_cvtsd_f64() {
5306 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5307 assert_eq!(r, -1.1);
5308 }
5309
5310 #[simd_test(enable = "sse2")]
5311 const fn test_mm_cvtss_sd() {
5312 let a = _mm_setr_pd(-1.1, 2.2);
5313 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5314
5315 let r = _mm_cvtss_sd(a, b);
5316 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5317
5318 let a = _mm_setr_pd(-1.1, f64::INFINITY);
5319 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5320
5321 let r = _mm_cvtss_sd(a, b);
5322 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5323 }
5324
5325 #[simd_test(enable = "sse2")]
5326 fn test_mm_cvttpd_epi32() {
5327 let a = _mm_setr_pd(-1.1, 2.2);
5328 let r = _mm_cvttpd_epi32(a);
5329 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5330
5331 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5332 let r = _mm_cvttpd_epi32(a);
5333 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5334 }
5335
5336 #[simd_test(enable = "sse2")]
5337 fn test_mm_cvttsd_si32() {
5338 let a = _mm_setr_pd(-1.1, 2.2);
5339 let r = _mm_cvttsd_si32(a);
5340 assert_eq!(r, -1);
5341
5342 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5343 let r = _mm_cvttsd_si32(a);
5344 assert_eq!(r, i32::MIN);
5345 }
5346
5347 #[simd_test(enable = "sse2")]
5348 fn test_mm_cvttps_epi32() {
5349 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5350 let r = _mm_cvttps_epi32(a);
5351 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5352
5353 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5354 let r = _mm_cvttps_epi32(a);
5355 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5356 }
5357
5358 #[simd_test(enable = "sse2")]
5359 const fn test_mm_set_sd() {
5360 let r = _mm_set_sd(-1.0_f64);
5361 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5362 }
5363
5364 #[simd_test(enable = "sse2")]
5365 const fn test_mm_set1_pd() {
5366 let r = _mm_set1_pd(-1.0_f64);
5367 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5368 }
5369
5370 #[simd_test(enable = "sse2")]
5371 const fn test_mm_set_pd1() {
5372 let r = _mm_set_pd1(-2.0_f64);
5373 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5374 }
5375
5376 #[simd_test(enable = "sse2")]
5377 const fn test_mm_set_pd() {
5378 let r = _mm_set_pd(1.0_f64, 5.0_f64);
5379 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5380 }
5381
5382 #[simd_test(enable = "sse2")]
5383 const fn test_mm_setr_pd() {
5384 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5385 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5386 }
5387
5388 #[simd_test(enable = "sse2")]
5389 const fn test_mm_setzero_pd() {
5390 let r = _mm_setzero_pd();
5391 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5392 }
5393
5394 #[simd_test(enable = "sse2")]
5395 const fn test_mm_load1_pd() {
5396 let d = -5.0;
5397 let r = unsafe { _mm_load1_pd(&d) };
5398 assert_eq_m128d(r, _mm_setr_pd(d, d));
5399 }
5400
5401 #[simd_test(enable = "sse2")]
5402 const fn test_mm_load_pd1() {
5403 let d = -5.0;
5404 let r = unsafe { _mm_load_pd1(&d) };
5405 assert_eq_m128d(r, _mm_setr_pd(d, d));
5406 }
5407
5408 #[simd_test(enable = "sse2")]
5409 const fn test_mm_unpackhi_pd() {
5410 let a = _mm_setr_pd(1.0, 2.0);
5411 let b = _mm_setr_pd(3.0, 4.0);
5412 let r = _mm_unpackhi_pd(a, b);
5413 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5414 }
5415
5416 #[simd_test(enable = "sse2")]
5417 const fn test_mm_unpacklo_pd() {
5418 let a = _mm_setr_pd(1.0, 2.0);
5419 let b = _mm_setr_pd(3.0, 4.0);
5420 let r = _mm_unpacklo_pd(a, b);
5421 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5422 }
5423
5424 #[simd_test(enable = "sse2")]
5425 const fn test_mm_shuffle_pd() {
5426 let a = _mm_setr_pd(1., 2.);
5427 let b = _mm_setr_pd(3., 4.);
5428 let expected = _mm_setr_pd(1., 3.);
5429 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5430 assert_eq_m128d(r, expected);
5431 }
5432
5433 #[simd_test(enable = "sse2")]
5434 const fn test_mm_move_sd() {
5435 let a = _mm_setr_pd(1., 2.);
5436 let b = _mm_setr_pd(3., 4.);
5437 let expected = _mm_setr_pd(3., 2.);
5438 let r = _mm_move_sd(a, b);
5439 assert_eq_m128d(r, expected);
5440 }
5441
5442 #[simd_test(enable = "sse2")]
5443 const fn test_mm_castpd_ps() {
5444 let a = _mm_set1_pd(0.);
5445 let expected = _mm_set1_ps(0.);
5446 let r = _mm_castpd_ps(a);
5447 assert_eq_m128(r, expected);
5448 }
5449
5450 #[simd_test(enable = "sse2")]
5451 const fn test_mm_castpd_si128() {
5452 let a = _mm_set1_pd(0.);
5453 let expected = _mm_set1_epi64x(0);
5454 let r = _mm_castpd_si128(a);
5455 assert_eq_m128i(r, expected);
5456 }
5457
5458 #[simd_test(enable = "sse2")]
5459 const fn test_mm_castps_pd() {
5460 let a = _mm_set1_ps(0.);
5461 let expected = _mm_set1_pd(0.);
5462 let r = _mm_castps_pd(a);
5463 assert_eq_m128d(r, expected);
5464 }
5465
5466 #[simd_test(enable = "sse2")]
5467 const fn test_mm_castps_si128() {
5468 let a = _mm_set1_ps(0.);
5469 let expected = _mm_set1_epi32(0);
5470 let r = _mm_castps_si128(a);
5471 assert_eq_m128i(r, expected);
5472 }
5473
5474 #[simd_test(enable = "sse2")]
5475 const fn test_mm_castsi128_pd() {
5476 let a = _mm_set1_epi64x(0);
5477 let expected = _mm_set1_pd(0.);
5478 let r = _mm_castsi128_pd(a);
5479 assert_eq_m128d(r, expected);
5480 }
5481
5482 #[simd_test(enable = "sse2")]
5483 const fn test_mm_castsi128_ps() {
5484 let a = _mm_set1_epi32(0);
5485 let expected = _mm_set1_ps(0.);
5486 let r = _mm_castsi128_ps(a);
5487 assert_eq_m128(r, expected);
5488 }
5489}