1use crate::core_arch::x86_64::{__tile1024i, Tile};
2use crate::core_arch::{simd::*, x86::*};
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7#[inline]
34#[target_feature(enable = "amx-tile")]
35#[cfg_attr(test, assert_instr(ldtilecfg))]
36#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
37pub unsafe fn _tile_loadconfig(mem_addr: *const u8) {
38 ldtilecfg(mem_addr);
39}
40
41#[inline]
47#[target_feature(enable = "amx-tile")]
48#[cfg_attr(test, assert_instr(sttilecfg))]
49#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
50pub unsafe fn _tile_storeconfig(mem_addr: *mut u8) {
51 sttilecfg(mem_addr);
52}
53
54#[inline]
58#[rustc_legacy_const_generics(0)]
59#[target_feature(enable = "amx-tile")]
60#[cfg_attr(test, assert_instr(tileloadd, DST = 0))]
61#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
62pub unsafe fn _tile_loadd<const DST: i32>(base: *const u8, stride: usize) {
63 static_assert_uimm_bits!(DST, 3);
64 tileloadd64(DST as i8, base, stride as u64);
65}
66
67#[inline]
72#[target_feature(enable = "amx-tile")]
73#[cfg_attr(test, assert_instr(tileloadd))]
74#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
75pub unsafe fn __tile_loadd(dst: *mut __tile1024i, base: *const u8, stride: usize) {
76 (*dst).tile = tileloadd64_internal((*dst).rows, (*dst).colsb, base, stride as u64);
77}
78
79#[inline]
83#[target_feature(enable = "amx-tile")]
84#[cfg_attr(test, assert_instr(tilerelease))]
85#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
86pub unsafe fn _tile_release() {
87 tilerelease();
88}
89
90#[inline]
94#[rustc_legacy_const_generics(0)]
95#[target_feature(enable = "amx-tile")]
96#[cfg_attr(test, assert_instr(tilestored, DST = 0))]
97#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
98pub unsafe fn _tile_stored<const DST: i32>(base: *mut u8, stride: usize) {
99 static_assert_uimm_bits!(DST, 3);
100 tilestored64(DST as i8, base, stride as u64);
101}
102
103#[inline]
108#[target_feature(enable = "amx-tile")]
109#[cfg_attr(test, assert_instr(tilestored))]
110#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
111pub unsafe fn __tile_stored(base: *mut u8, stride: usize, src: __tile1024i) {
112 tilestored64_internal(src.rows, src.colsb, base, stride as u64, src.tile);
113}
114
115#[inline]
121#[rustc_legacy_const_generics(0)]
122#[target_feature(enable = "amx-tile")]
123#[cfg_attr(test, assert_instr(tileloaddt1, DST = 0))]
124#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
125pub unsafe fn _tile_stream_loadd<const DST: i32>(base: *const u8, stride: usize) {
126 static_assert_uimm_bits!(DST, 3);
127 tileloaddt164(DST as i8, base, stride as u64);
128}
129
130#[inline]
137#[target_feature(enable = "amx-tile")]
138#[cfg_attr(test, assert_instr(tileloaddt1))]
139#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
140pub unsafe fn __tile_stream_loadd(dst: *mut __tile1024i, base: *const u8, stride: usize) {
141 (*dst).tile = tileloaddt164_internal((*dst).rows, (*dst).colsb, base, stride as u64);
142}
143
144#[inline]
148#[rustc_legacy_const_generics(0)]
149#[target_feature(enable = "amx-tile")]
150#[cfg_attr(test, assert_instr(tilezero, DST = 0))]
151#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
152pub unsafe fn _tile_zero<const DST: i32>() {
153 static_assert_uimm_bits!(DST, 3);
154 tilezero(DST as i8);
155}
156
157#[inline]
162#[target_feature(enable = "amx-tile")]
163#[cfg_attr(test, assert_instr(tilezero))]
164#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
165pub unsafe fn __tile_zero(dst: *mut __tile1024i) {
166 (*dst).tile = tilezero_internal((*dst).rows, (*dst).colsb);
167}
168
169#[inline]
175#[rustc_legacy_const_generics(0, 1, 2)]
176#[target_feature(enable = "amx-bf16")]
177#[cfg_attr(test, assert_instr(tdpbf16ps, DST = 0, A = 1, B = 2))]
178#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
179pub unsafe fn _tile_dpbf16ps<const DST: i32, const A: i32, const B: i32>() {
180 static_assert_uimm_bits!(DST, 3);
181 static_assert_uimm_bits!(A, 3);
182 static_assert_uimm_bits!(B, 3);
183 tdpbf16ps(DST as i8, A as i8, B as i8);
184}
185
186#[inline]
193#[target_feature(enable = "amx-bf16")]
194#[cfg_attr(test, assert_instr(tdpbf16ps))]
195#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
196pub unsafe fn __tile_dpbf16ps(dst: *mut __tile1024i, a: __tile1024i, b: __tile1024i) {
197 (*dst).tile = tdpbf16ps_internal(a.rows, b.colsb, a.colsb, (*dst).tile, a.tile, b.tile);
198}
199
200#[inline]
207#[rustc_legacy_const_generics(0, 1, 2)]
208#[target_feature(enable = "amx-int8")]
209#[cfg_attr(test, assert_instr(tdpbssd, DST = 0, A = 1, B = 2))]
210#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
211pub unsafe fn _tile_dpbssd<const DST: i32, const A: i32, const B: i32>() {
212 static_assert_uimm_bits!(DST, 3);
213 static_assert_uimm_bits!(A, 3);
214 static_assert_uimm_bits!(B, 3);
215 tdpbssd(DST as i8, A as i8, B as i8);
216}
217
218#[inline]
226#[target_feature(enable = "amx-int8")]
227#[cfg_attr(test, assert_instr(tdpbssd))]
228#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
229pub unsafe fn __tile_dpbssd(dst: *mut __tile1024i, a: __tile1024i, b: __tile1024i) {
230 (*dst).tile = tdpbssd_internal(a.rows, b.colsb, a.colsb, (*dst).tile, a.tile, b.tile);
231}
232
233#[inline]
240#[rustc_legacy_const_generics(0, 1, 2)]
241#[target_feature(enable = "amx-int8")]
242#[cfg_attr(test, assert_instr(tdpbsud, DST = 0, A = 1, B = 2))]
243#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
244pub unsafe fn _tile_dpbsud<const DST: i32, const A: i32, const B: i32>() {
245 static_assert_uimm_bits!(DST, 3);
246 static_assert_uimm_bits!(A, 3);
247 static_assert_uimm_bits!(B, 3);
248 tdpbsud(DST as i8, A as i8, B as i8);
249}
250
251#[inline]
259#[target_feature(enable = "amx-int8")]
260#[cfg_attr(test, assert_instr(tdpbsud))]
261#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
262pub unsafe fn __tile_dpbsud(dst: *mut __tile1024i, a: __tile1024i, b: __tile1024i) {
263 (*dst).tile = tdpbsud_internal(a.rows, b.colsb, a.colsb, (*dst).tile, a.tile, b.tile);
264}
265
266#[inline]
273#[rustc_legacy_const_generics(0, 1, 2)]
274#[target_feature(enable = "amx-int8")]
275#[cfg_attr(test, assert_instr(tdpbusd, DST = 0, A = 1, B = 2))]
276#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
277pub unsafe fn _tile_dpbusd<const DST: i32, const A: i32, const B: i32>() {
278 static_assert_uimm_bits!(DST, 3);
279 static_assert_uimm_bits!(A, 3);
280 static_assert_uimm_bits!(B, 3);
281 tdpbusd(DST as i8, A as i8, B as i8);
282}
283
284#[inline]
292#[target_feature(enable = "amx-int8")]
293#[cfg_attr(test, assert_instr(tdpbusd))]
294#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
295pub unsafe fn __tile_dpbusd(dst: *mut __tile1024i, a: __tile1024i, b: __tile1024i) {
296 (*dst).tile = tdpbusd_internal(a.rows, b.colsb, a.colsb, (*dst).tile, a.tile, b.tile);
297}
298
299#[inline]
306#[rustc_legacy_const_generics(0, 1, 2)]
307#[target_feature(enable = "amx-int8")]
308#[cfg_attr(test, assert_instr(tdpbuud, DST = 0, A = 1, B = 2))]
309#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
310pub unsafe fn _tile_dpbuud<const DST: i32, const A: i32, const B: i32>() {
311 static_assert_uimm_bits!(DST, 3);
312 static_assert_uimm_bits!(A, 3);
313 static_assert_uimm_bits!(B, 3);
314 tdpbuud(DST as i8, A as i8, B as i8);
315}
316
317#[inline]
325#[target_feature(enable = "amx-int8")]
326#[cfg_attr(test, assert_instr(tdpbuud))]
327#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
328pub unsafe fn __tile_dpbuud(dst: *mut __tile1024i, a: __tile1024i, b: __tile1024i) {
329 (*dst).tile = tdpbuud_internal(a.rows, b.colsb, a.colsb, (*dst).tile, a.tile, b.tile);
330}
331
332#[inline]
338#[rustc_legacy_const_generics(0, 1, 2)]
339#[target_feature(enable = "amx-fp16")]
340#[cfg_attr(test, assert_instr(tdpfp16ps, DST = 0, A = 1, B = 2))]
341#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
342pub unsafe fn _tile_dpfp16ps<const DST: i32, const A: i32, const B: i32>() {
343 static_assert_uimm_bits!(DST, 3);
344 static_assert_uimm_bits!(A, 3);
345 static_assert_uimm_bits!(B, 3);
346 tdpfp16ps(DST as i8, A as i8, B as i8);
347}
348
349#[inline]
356#[target_feature(enable = "amx-fp16")]
357#[cfg_attr(test, assert_instr(tdpfp16ps))]
358#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
359pub unsafe fn __tile_dpfp16ps(dst: *mut __tile1024i, a: __tile1024i, b: __tile1024i) {
360 (*dst).tile = tdpfp16ps_internal(a.rows, b.colsb, a.colsb, (*dst).tile, a.tile, b.tile);
361}
362
363#[inline]
373#[rustc_legacy_const_generics(0, 1, 2)]
374#[target_feature(enable = "amx-complex")]
375#[cfg_attr(test, assert_instr(tcmmimfp16ps, DST = 0, A = 1, B = 2))]
376#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
377pub unsafe fn _tile_cmmimfp16ps<const DST: i32, const A: i32, const B: i32>() {
378 static_assert_uimm_bits!(DST, 3);
379 static_assert_uimm_bits!(A, 3);
380 static_assert_uimm_bits!(B, 3);
381 tcmmimfp16ps(DST as i8, A as i8, B as i8);
382}
383
384#[inline]
395#[target_feature(enable = "amx-complex")]
396#[cfg_attr(test, assert_instr(tcmmimfp16ps))]
397#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
398pub unsafe fn __tile_cmmimfp16ps(dst: *mut __tile1024i, a: __tile1024i, b: __tile1024i) {
399 (*dst).tile = tcmmimfp16ps_internal(a.rows, b.colsb, a.colsb, (*dst).tile, a.tile, b.tile);
400}
401
402#[inline]
412#[rustc_legacy_const_generics(0, 1, 2)]
413#[target_feature(enable = "amx-complex")]
414#[cfg_attr(test, assert_instr(tcmmrlfp16ps, DST = 0, A = 1, B = 2))]
415#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
416pub unsafe fn _tile_cmmrlfp16ps<const DST: i32, const A: i32, const B: i32>() {
417 static_assert_uimm_bits!(DST, 3);
418 static_assert_uimm_bits!(A, 3);
419 static_assert_uimm_bits!(B, 3);
420 tcmmrlfp16ps(DST as i8, A as i8, B as i8);
421}
422
423#[inline]
434#[target_feature(enable = "amx-complex")]
435#[cfg_attr(test, assert_instr(tcmmrlfp16ps))]
436#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
437pub unsafe fn __tile_cmmrlfp16ps(dst: *mut __tile1024i, a: __tile1024i, b: __tile1024i) {
438 (*dst).tile = tcmmrlfp16ps_internal(a.rows, b.colsb, a.colsb, (*dst).tile, a.tile, b.tile);
439}
440
441#[inline]
446#[rustc_legacy_const_generics(0, 1, 2)]
447#[target_feature(enable = "amx-fp8")]
448#[cfg_attr(
449 all(test, not(target_vendor = "apple")),
450 assert_instr(tdpbf8ps, DST = 0, A = 1, B = 2)
451)]
452#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
453pub unsafe fn _tile_dpbf8ps<const DST: i32, const A: i32, const B: i32>() {
454 static_assert_uimm_bits!(DST, 3);
455 static_assert_uimm_bits!(A, 3);
456 static_assert_uimm_bits!(B, 3);
457 tdpbf8ps(DST as i8, A as i8, B as i8);
458}
459
460#[inline]
466#[target_feature(enable = "amx-fp8")]
467#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(tdpbf8ps))]
468#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
469pub unsafe fn __tile_dpbf8ps(dst: *mut __tile1024i, a: __tile1024i, b: __tile1024i) {
470 (*dst).tile = tdpbf8ps_internal(a.rows, b.colsb, a.colsb, (*dst).tile, a.tile, b.tile);
471}
472
473#[inline]
478#[rustc_legacy_const_generics(0, 1, 2)]
479#[target_feature(enable = "amx-fp8")]
480#[cfg_attr(
481 all(test, not(target_vendor = "apple")),
482 assert_instr(tdpbhf8ps, DST = 0, A = 1, B = 2)
483)]
484#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
485pub unsafe fn _tile_dpbhf8ps<const DST: i32, const A: i32, const B: i32>() {
486 static_assert_uimm_bits!(DST, 3);
487 static_assert_uimm_bits!(A, 3);
488 static_assert_uimm_bits!(B, 3);
489 tdpbhf8ps(DST as i8, A as i8, B as i8);
490}
491
492#[inline]
498#[target_feature(enable = "amx-fp8")]
499#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(tdpbhf8ps))]
500#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
501pub unsafe fn __tile_dpbhf8ps(dst: *mut __tile1024i, a: __tile1024i, b: __tile1024i) {
502 (*dst).tile = tdpbhf8ps_internal(a.rows, b.colsb, a.colsb, (*dst).tile, a.tile, b.tile);
503}
504
505#[inline]
510#[rustc_legacy_const_generics(0, 1, 2)]
511#[target_feature(enable = "amx-fp8")]
512#[cfg_attr(
513 all(test, not(target_vendor = "apple")),
514 assert_instr(tdphbf8ps, DST = 0, A = 1, B = 2)
515)]
516#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
517pub unsafe fn _tile_dphbf8ps<const DST: i32, const A: i32, const B: i32>() {
518 static_assert_uimm_bits!(DST, 3);
519 static_assert_uimm_bits!(A, 3);
520 static_assert_uimm_bits!(B, 3);
521 tdphbf8ps(DST as i8, A as i8, B as i8);
522}
523
524#[inline]
530#[target_feature(enable = "amx-fp8")]
531#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(tdphbf8ps))]
532#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
533pub unsafe fn __tile_dphbf8ps(dst: *mut __tile1024i, a: __tile1024i, b: __tile1024i) {
534 (*dst).tile = tdphbf8ps_internal(a.rows, b.colsb, a.colsb, (*dst).tile, a.tile, b.tile);
535}
536
537#[inline]
542#[rustc_legacy_const_generics(0, 1, 2)]
543#[target_feature(enable = "amx-fp8")]
544#[cfg_attr(
545 all(test, not(target_vendor = "apple")),
546 assert_instr(tdphf8ps, DST = 0, A = 1, B = 2)
547)]
548#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
549pub unsafe fn _tile_dphf8ps<const DST: i32, const A: i32, const B: i32>() {
550 static_assert_uimm_bits!(DST, 3);
551 static_assert_uimm_bits!(A, 3);
552 static_assert_uimm_bits!(B, 3);
553 tdphf8ps(DST as i8, A as i8, B as i8);
554}
555
556#[inline]
562#[target_feature(enable = "amx-fp8")]
563#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(tdphf8ps))]
564#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
565pub unsafe fn __tile_dphf8ps(dst: *mut __tile1024i, a: __tile1024i, b: __tile1024i) {
566 (*dst).tile = tdphf8ps_internal(a.rows, b.colsb, a.colsb, (*dst).tile, a.tile, b.tile);
567}
568
569#[inline]
575#[rustc_legacy_const_generics(0)]
576#[target_feature(enable = "amx-movrs")]
577#[cfg_attr(
578 all(test, not(target_vendor = "apple")),
579 assert_instr(tileloaddrs, DST = 0)
580)]
581#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
582pub unsafe fn _tile_loaddrs<const DST: i32>(base: *const u8, stride: usize) {
583 static_assert_uimm_bits!(DST, 3);
584 tileloaddrs64(DST as i8, base, stride as u64);
585}
586
587#[inline]
593#[target_feature(enable = "amx-movrs")]
594#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(tileloaddrs))]
595#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
596pub unsafe fn __tile_loaddrs(dst: *mut __tile1024i, base: *const u8, stride: usize) {
597 (*dst).tile = tileloaddrs64_internal((*dst).rows, (*dst).colsb, base, stride as u64);
598}
599
600#[inline]
608#[rustc_legacy_const_generics(0)]
609#[target_feature(enable = "amx-movrs")]
610#[cfg_attr(
611 all(test, not(target_vendor = "apple")),
612 assert_instr(tileloaddrst1, DST = 0)
613)]
614#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
615pub unsafe fn _tile_stream_loaddrs<const DST: i32>(base: *const u8, stride: usize) {
616 static_assert_uimm_bits!(DST, 3);
617 tileloaddrst164(DST as i8, base, stride as u64);
618}
619
620#[inline]
628#[target_feature(enable = "amx-movrs")]
629#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(tileloaddrst1))]
630#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
631pub unsafe fn __tile_stream_loaddrs(dst: *mut __tile1024i, base: *const u8, stride: usize) {
632 (*dst).tile = tileloaddrst164_internal((*dst).rows, (*dst).colsb, base, stride as u64);
633}
634
635#[inline]
646#[rustc_legacy_const_generics(0, 1, 2)]
647#[target_feature(enable = "amx-tf32")]
648#[cfg_attr(
649 all(test, not(target_vendor = "apple")),
650 assert_instr(tmmultf32ps, DST = 0, A = 1, B = 2)
651)]
652#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
653pub unsafe fn _tile_mmultf32ps<const DST: i32, const A: i32, const B: i32>() {
654 static_assert_uimm_bits!(DST, 3);
655 static_assert_uimm_bits!(A, 3);
656 static_assert_uimm_bits!(B, 3);
657 tmmultf32ps(DST as i8, A as i8, B as i8);
658}
659
660#[inline]
672#[target_feature(enable = "amx-tf32")]
673#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(tmmultf32ps))]
674#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
675pub unsafe fn __tile_mmultf32ps(dst: *mut __tile1024i, a: __tile1024i, b: __tile1024i) {
676 (*dst).tile = tmmultf32ps_internal(a.rows, b.colsb, a.colsb, (*dst).tile, a.tile, b.tile);
677}
678
679#[inline]
682#[rustc_legacy_const_generics(0)]
683#[target_feature(enable = "amx-avx512,avx10.2")]
684#[cfg_attr(
685 all(test, not(target_vendor = "apple")),
686 assert_instr(tcvtrowd2ps, TILE = 0)
687)]
688#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
689pub unsafe fn _tile_cvtrowd2ps<const TILE: i32>(row: u32) -> __m512 {
690 static_assert_uimm_bits!(TILE, 3);
691 tcvtrowd2ps(TILE as i8, row).as_m512()
692}
693
694#[inline]
697#[rustc_legacy_const_generics(0, 1)]
698#[target_feature(enable = "amx-avx512,avx10.2")]
699#[cfg_attr(
700 all(test, not(target_vendor = "apple")),
701 assert_instr(tcvtrowd2ps, TILE = 0, ROW = 0)
702)]
703#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
704pub unsafe fn _tile_cvtrowd2psi<const TILE: i32, const ROW: i32>() -> __m512 {
705 static_assert_uimm_bits!(TILE, 3);
706 static_assert_uimm_bits!(ROW, 6);
707 tcvtrowd2psi(TILE as i8, ROW as u32).as_m512()
708}
709
710#[inline]
714#[target_feature(enable = "amx-avx512,avx10.2")]
715#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(tcvtrowd2ps))]
716#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
717pub unsafe fn __tile_cvtrowd2ps(src: __tile1024i, row: u32) -> __m512 {
718 tcvtrowd2ps_internal(src.rows, src.colsb, src.tile, row).as_m512()
719}
720
721#[inline]
725#[rustc_legacy_const_generics(0)]
726#[target_feature(enable = "amx-avx512,avx10.2")]
727#[cfg_attr(
728 all(test, not(target_vendor = "apple")),
729 assert_instr(tcvtrowps2phh, TILE = 0)
730)]
731#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
732pub unsafe fn _tile_cvtrowps2phh<const TILE: i32>(row: u32) -> __m512h {
733 static_assert_uimm_bits!(TILE, 3);
734 tcvtrowps2phh(TILE as i8, row).as_m512h()
735}
736
737#[inline]
741#[rustc_legacy_const_generics(0, 1)]
742#[target_feature(enable = "amx-avx512,avx10.2")]
743#[cfg_attr(
744 all(test, not(target_vendor = "apple")),
745 assert_instr(tcvtrowps2phh, TILE = 0, ROW = 0)
746)]
747#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
748pub unsafe fn _tile_cvtrowps2phhi<const TILE: i32, const ROW: i32>() -> __m512h {
749 static_assert_uimm_bits!(TILE, 3);
750 static_assert_uimm_bits!(ROW, 6);
751 tcvtrowps2phhi(TILE as i8, ROW as u32).as_m512h()
752}
753
754#[inline]
759#[target_feature(enable = "amx-avx512,avx10.2")]
760#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(tcvtrowps2phh))]
761#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
762pub unsafe fn __tile_cvtrowps2phh(src: __tile1024i, row: u32) -> __m512h {
763 tcvtrowps2phh_internal(src.rows, src.colsb, src.tile, row).as_m512h()
764}
765
766#[inline]
770#[rustc_legacy_const_generics(0)]
771#[target_feature(enable = "amx-avx512,avx10.2")]
772#[cfg_attr(
773 all(test, not(target_vendor = "apple")),
774 assert_instr(tcvtrowps2phl, TILE = 0)
775)]
776#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
777pub unsafe fn _tile_cvtrowps2phl<const TILE: i32>(row: u32) -> __m512h {
778 static_assert_uimm_bits!(TILE, 3);
779 tcvtrowps2phl(TILE as i8, row).as_m512h()
780}
781
782#[inline]
786#[rustc_legacy_const_generics(0, 1)]
787#[target_feature(enable = "amx-avx512,avx10.2")]
788#[cfg_attr(
789 all(test, not(target_vendor = "apple")),
790 assert_instr(tcvtrowps2phl, TILE = 0, ROW = 0)
791)]
792#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
793pub unsafe fn _tile_cvtrowps2phli<const TILE: i32, const ROW: i32>() -> __m512h {
794 static_assert_uimm_bits!(TILE, 3);
795 static_assert_uimm_bits!(ROW, 6);
796 tcvtrowps2phli(TILE as i8, ROW as u32).as_m512h()
797}
798
799#[inline]
804#[target_feature(enable = "amx-avx512,avx10.2")]
805#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(tcvtrowps2phl))]
806#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
807pub unsafe fn __tile_cvtrowps2phl(src: __tile1024i, row: u32) -> __m512h {
808 tcvtrowps2phl_internal(src.rows, src.colsb, src.tile, row).as_m512h()
809}
810
811#[inline]
815#[rustc_legacy_const_generics(0)]
816#[target_feature(enable = "amx-avx512,avx10.2")]
817#[cfg_attr(
818 all(test, not(target_vendor = "apple")),
819 assert_instr(tcvtrowps2bf16h, TILE = 0)
820)]
821#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
822pub unsafe fn _tile_cvtrowps2bf16h<const TILE: i32>(row: u32) -> __m512bh {
823 static_assert_uimm_bits!(TILE, 3);
824 tcvtrowps2bf16h(TILE as i8, row).as_m512bh()
825}
826
827#[inline]
831#[rustc_legacy_const_generics(0, 1)]
832#[target_feature(enable = "amx-avx512,avx10.2")]
833#[cfg_attr(
834 all(test, not(target_vendor = "apple")),
835 assert_instr(tcvtrowps2bf16h, TILE = 0, ROW = 0)
836)]
837#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
838pub unsafe fn _tile_cvtrowps2bf16hi<const TILE: i32, const ROW: i32>() -> __m512bh {
839 static_assert_uimm_bits!(TILE, 3);
840 static_assert_uimm_bits!(ROW, 6);
841 tcvtrowps2bf16hi(TILE as i8, ROW as u32).as_m512bh()
842}
843
844#[inline]
849#[target_feature(enable = "amx-avx512,avx10.2")]
850#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(tcvtrowps2bf16h))]
851#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
852pub unsafe fn __tile_cvtrowps2bf16h(src: __tile1024i, row: u32) -> __m512bh {
853 tcvtrowps2bf16h_internal(src.rows, src.colsb, src.tile, row).as_m512bh()
854}
855
856#[inline]
860#[rustc_legacy_const_generics(0)]
861#[target_feature(enable = "amx-avx512,avx10.2")]
862#[cfg_attr(
863 all(test, not(target_vendor = "apple")),
864 assert_instr(tcvtrowps2bf16l, TILE = 0)
865)]
866#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
867pub unsafe fn _tile_cvtrowps2bf16l<const TILE: i32>(row: u32) -> __m512bh {
868 static_assert_uimm_bits!(TILE, 3);
869 tcvtrowps2bf16l(TILE as i8, row).as_m512bh()
870}
871
872#[inline]
876#[rustc_legacy_const_generics(0, 1)]
877#[target_feature(enable = "amx-avx512,avx10.2")]
878#[cfg_attr(
879 all(test, not(target_vendor = "apple")),
880 assert_instr(tcvtrowps2bf16l, TILE = 0, ROW = 0)
881)]
882#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
883pub unsafe fn _tile_cvtrowps2bf16li<const TILE: i32, const ROW: i32>() -> __m512bh {
884 static_assert_uimm_bits!(TILE, 3);
885 static_assert_uimm_bits!(ROW, 6);
886 tcvtrowps2bf16li(TILE as i8, ROW as u32).as_m512bh()
887}
888
889#[inline]
894#[target_feature(enable = "amx-avx512,avx10.2")]
895#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(tcvtrowps2bf16l))]
896#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
897pub unsafe fn __tile_cvtrowps2bf16l(src: __tile1024i, row: u32) -> __m512bh {
898 tcvtrowps2bf16l_internal(src.rows, src.colsb, src.tile, row).as_m512bh()
899}
900
901#[inline]
903#[rustc_legacy_const_generics(0)]
904#[target_feature(enable = "amx-avx512,avx10.2")]
905#[cfg_attr(
906 all(test, not(target_vendor = "apple")),
907 assert_instr(tilemovrow, TILE = 0)
908)]
909#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
910pub unsafe fn _tile_movrow<const TILE: i32>(row: u32) -> __m512i {
911 static_assert_uimm_bits!(TILE, 3);
912 tilemovrow(TILE as i8, row).as_m512i()
913}
914
915#[inline]
917#[rustc_legacy_const_generics(0, 1)]
918#[target_feature(enable = "amx-avx512,avx10.2")]
919#[cfg_attr(
920 all(test, not(target_vendor = "apple")),
921 assert_instr(tilemovrow, TILE = 0, ROW = 0)
922)]
923#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
924pub unsafe fn _tile_movrowi<const TILE: i32, const ROW: i32>() -> __m512i {
925 static_assert_uimm_bits!(TILE, 3);
926 static_assert_uimm_bits!(ROW, 6);
927 tilemovrowi(TILE as i8, ROW as u32).as_m512i()
928}
929
930#[inline]
933#[target_feature(enable = "amx-avx512,avx10.2")]
934#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(tilemovrow))]
935#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
936pub unsafe fn __tile_movrow(src: __tile1024i, row: u32) -> __m512i {
937 tilemovrow_internal(src.rows, src.colsb, src.tile, row).as_m512i()
938}
939
940#[allow(improper_ctypes)]
941unsafe extern "unadjusted" {
942 #[link_name = "llvm.x86.ldtilecfg"]
943 fn ldtilecfg(mem_addr: *const u8);
944 #[link_name = "llvm.x86.sttilecfg"]
945 fn sttilecfg(mem_addr: *mut u8);
946
947 #[link_name = "llvm.x86.tileloadd64"]
948 fn tileloadd64(dst: i8, base: *const u8, stride: u64);
949 #[link_name = "llvm.x86.tileloadd64.internal"]
950 fn tileloadd64_internal(rows: u16, colsb: u16, base: *const u8, stride: u64) -> Tile;
951
952 #[link_name = "llvm.x86.tileloaddt164"]
953 fn tileloaddt164(dst: i8, base: *const u8, stride: u64);
954 #[link_name = "llvm.x86.tileloaddt164.internal"]
955 fn tileloaddt164_internal(rows: u16, colsb: u16, base: *const u8, stride: u64) -> Tile;
956
957 #[link_name = "llvm.x86.tilerelease"]
958 fn tilerelease();
959
960 #[link_name = "llvm.x86.tilestored64"]
961 fn tilestored64(dst: i8, base: *mut u8, stride: u64);
962 #[link_name = "llvm.x86.tilestored64.internal"]
963 fn tilestored64_internal(rows: u16, colsb: u16, base: *mut u8, stride: u64, src: Tile);
964
965 #[link_name = "llvm.x86.tilezero"]
966 fn tilezero(dst: i8);
967 #[link_name = "llvm.x86.tilezero.internal"]
968 fn tilezero_internal(rows: u16, colsb: u16) -> Tile;
969
970 #[link_name = "llvm.x86.tdpbf16ps"]
971 fn tdpbf16ps(dst: i8, a: i8, b: i8);
972 #[link_name = "llvm.x86.tdpbf16ps.internal"]
973 fn tdpbf16ps_internal(m: u16, n: u16, k: u16, dst: Tile, a: Tile, b: Tile) -> Tile;
974
975 #[link_name = "llvm.x86.tdpbuud"]
976 fn tdpbuud(dst: i8, a: i8, b: i8);
977 #[link_name = "llvm.x86.tdpbuud.internal"]
978 fn tdpbuud_internal(m: u16, n: u16, k: u16, dst: Tile, a: Tile, b: Tile) -> Tile;
979
980 #[link_name = "llvm.x86.tdpbusd"]
981 fn tdpbusd(dst: i8, a: i8, b: i8);
982 #[link_name = "llvm.x86.tdpbusd.internal"]
983 fn tdpbusd_internal(m: u16, n: u16, k: u16, dst: Tile, a: Tile, b: Tile) -> Tile;
984
985 #[link_name = "llvm.x86.tdpbsud"]
986 fn tdpbsud(dst: i8, a: i8, b: i8);
987 #[link_name = "llvm.x86.tdpbsud.internal"]
988 fn tdpbsud_internal(m: u16, n: u16, k: u16, dst: Tile, a: Tile, b: Tile) -> Tile;
989
990 #[link_name = "llvm.x86.tdpbssd"]
991 fn tdpbssd(dst: i8, a: i8, b: i8);
992 #[link_name = "llvm.x86.tdpbssd.internal"]
993 fn tdpbssd_internal(m: u16, n: u16, k: u16, dst: Tile, a: Tile, b: Tile) -> Tile;
994
995 #[link_name = "llvm.x86.tdpfp16ps"]
996 fn tdpfp16ps(dst: i8, a: i8, b: i8);
997 #[link_name = "llvm.x86.tdpfp16ps.internal"]
998 fn tdpfp16ps_internal(m: u16, n: u16, k: u16, dst: Tile, a: Tile, b: Tile) -> Tile;
999
1000 #[link_name = "llvm.x86.tcmmimfp16ps"]
1001 fn tcmmimfp16ps(dst: i8, a: i8, b: i8);
1002 #[link_name = "llvm.x86.tcmmimfp16ps.internal"]
1003 fn tcmmimfp16ps_internal(m: u16, n: u16, k: u16, dst: Tile, a: Tile, b: Tile) -> Tile;
1004
1005 #[link_name = "llvm.x86.tcmmrlfp16ps"]
1006 fn tcmmrlfp16ps(dst: i8, a: i8, b: i8);
1007 #[link_name = "llvm.x86.tcmmrlfp16ps.internal"]
1008 fn tcmmrlfp16ps_internal(m: u16, n: u16, k: u16, dst: Tile, a: Tile, b: Tile) -> Tile;
1009
1010 #[link_name = "llvm.x86.tdpbf8ps"]
1011 fn tdpbf8ps(dst: i8, a: i8, b: i8);
1012 #[link_name = "llvm.x86.tdpbf8ps.internal"]
1013 fn tdpbf8ps_internal(m: u16, n: u16, k: u16, dst: Tile, a: Tile, b: Tile) -> Tile;
1014
1015 #[link_name = "llvm.x86.tdpbhf8ps"]
1016 fn tdpbhf8ps(dst: i8, a: i8, b: i8);
1017 #[link_name = "llvm.x86.tdpbhf8ps.internal"]
1018 fn tdpbhf8ps_internal(m: u16, n: u16, k: u16, dst: Tile, a: Tile, b: Tile) -> Tile;
1019
1020 #[link_name = "llvm.x86.tdphbf8ps"]
1021 fn tdphbf8ps(dst: i8, a: i8, b: i8);
1022 #[link_name = "llvm.x86.tdphbf8ps.internal"]
1023 fn tdphbf8ps_internal(m: u16, n: u16, k: u16, dst: Tile, a: Tile, b: Tile) -> Tile;
1024
1025 #[link_name = "llvm.x86.tdphf8ps"]
1026 fn tdphf8ps(dst: i8, a: i8, b: i8);
1027 #[link_name = "llvm.x86.tdphf8ps.internal"]
1028 fn tdphf8ps_internal(m: u16, n: u16, k: u16, dst: Tile, a: Tile, b: Tile) -> Tile;
1029
1030 #[link_name = "llvm.x86.tileloaddrs64"]
1031 fn tileloaddrs64(dst: i8, base: *const u8, stride: u64);
1032 #[link_name = "llvm.x86.tileloaddrs64.internal"]
1033 fn tileloaddrs64_internal(rows: u16, colsb: u16, base: *const u8, stride: u64) -> Tile;
1034
1035 #[link_name = "llvm.x86.tileloaddrst164"]
1036 fn tileloaddrst164(dst: i8, base: *const u8, stride: u64);
1037 #[link_name = "llvm.x86.tileloaddrst164.internal"]
1038 fn tileloaddrst164_internal(rows: u16, colsb: u16, base: *const u8, stride: u64) -> Tile;
1039
1040 #[link_name = "llvm.x86.tmmultf32ps"]
1041 fn tmmultf32ps(dst: i8, a: i8, b: i8);
1042 #[link_name = "llvm.x86.tmmultf32ps.internal"]
1043 fn tmmultf32ps_internal(m: u16, n: u16, k: u16, dst: Tile, a: Tile, b: Tile) -> Tile;
1044
1045 #[link_name = "llvm.x86.tcvtrowd2ps"]
1046 fn tcvtrowd2ps(tile: i8, row: u32) -> f32x16;
1047 #[link_name = "llvm.x86.tcvtrowd2psi"]
1048 fn tcvtrowd2psi(tile: i8, row: u32) -> f32x16;
1049 #[link_name = "llvm.x86.tcvtrowd2ps.internal"]
1050 fn tcvtrowd2ps_internal(rows: u16, colsb: u16, src: Tile, row: u32) -> f32x16;
1051
1052 #[link_name = "llvm.x86.tcvtrowps2phh"]
1053 fn tcvtrowps2phh(tile: i8, row: u32) -> f16x32;
1054 #[link_name = "llvm.x86.tcvtrowps2phhi"]
1055 fn tcvtrowps2phhi(tile: i8, row: u32) -> f16x32;
1056 #[link_name = "llvm.x86.tcvtrowps2phh.internal"]
1057 fn tcvtrowps2phh_internal(rows: u16, colsb: u16, src: Tile, row: u32) -> f16x32;
1058
1059 #[link_name = "llvm.x86.tcvtrowps2phl"]
1060 fn tcvtrowps2phl(tile: i8, row: u32) -> f16x32;
1061 #[link_name = "llvm.x86.tcvtrowps2phli"]
1062 fn tcvtrowps2phli(tile: i8, row: u32) -> f16x32;
1063 #[link_name = "llvm.x86.tcvtrowps2phl.internal"]
1064 fn tcvtrowps2phl_internal(rows: u16, colsb: u16, src: Tile, row: u32) -> f16x32;
1065
1066 #[link_name = "llvm.x86.tcvtrowps2bf16h"]
1067 fn tcvtrowps2bf16h(tile: i8, row: u32) -> u16x32;
1068 #[link_name = "llvm.x86.tcvtrowps2bf16hi"]
1069 fn tcvtrowps2bf16hi(tile: i8, row: u32) -> u16x32;
1070 #[link_name = "llvm.x86.tcvtrowps2bf16h.internal"]
1071 fn tcvtrowps2bf16h_internal(rows: u16, colsb: u16, src: Tile, row: u32) -> u16x32;
1072
1073 #[link_name = "llvm.x86.tcvtrowps2bf16l"]
1074 fn tcvtrowps2bf16l(tile: i8, row: u32) -> u16x32;
1075 #[link_name = "llvm.x86.tcvtrowps2bf16li"]
1076 fn tcvtrowps2bf16li(tile: i8, row: u32) -> u16x32;
1077 #[link_name = "llvm.x86.tcvtrowps2bf16l.internal"]
1078 fn tcvtrowps2bf16l_internal(rows: u16, colsb: u16, src: Tile, row: u32) -> u16x32;
1079
1080 #[link_name = "llvm.x86.tilemovrow"]
1081 fn tilemovrow(tile: i8, row: u32) -> i32x16;
1082 #[link_name = "llvm.x86.tilemovrowi"]
1083 fn tilemovrowi(tile: i8, row: u32) -> i32x16;
1084 #[link_name = "llvm.x86.tilemovrow.internal"]
1085 fn tilemovrow_internal(rows: u16, colsb: u16, src: Tile, row: u32) -> i32x16;
1086}
1087
1088#[cfg(test)]
1089mod tests {
1090 use crate::core_arch::x86::_mm_cvtness_sbh;
1091 use crate::core_arch::x86_64::*;
1092 use core::array;
1093 use stdarch_test::simd_test;
1094 #[cfg(target_os = "linux")]
1095 use syscalls::{Sysno, syscall};
1096
1097 #[allow(non_camel_case_types)]
1098 #[repr(C, packed)]
1099 #[derive(Copy, Clone, Default, Debug, PartialEq)]
1100 struct __tilecfg {
1101 palette: u8,
1103 start_row: u8,
1104 reserved_a0: [u8; 14],
1106 colsb: [u16; 8],
1108 reserved_b0: [u16; 8],
1110 rows: [u8; 8],
1112 reserved_c0: [u8; 8],
1114 }
1115
1116 impl __tilecfg {
1117 fn new(palette: u8, start_row: u8, colsb: [u16; 8], rows: [u8; 8]) -> Self {
1118 Self {
1119 palette,
1120 start_row,
1121 reserved_a0: [0u8; 14],
1122 colsb,
1123 reserved_b0: [0u16; 8],
1124 rows,
1125 reserved_c0: [0u8; 8],
1126 }
1127 }
1128
1129 const fn as_ptr(&self) -> *const u8 {
1130 self as *const Self as *const u8
1131 }
1132
1133 fn as_mut_ptr(&mut self) -> *mut u8 {
1134 self as *mut Self as *mut u8
1135 }
1136 }
1137
1138 #[cfg(not(target_os = "linux"))]
1139 #[target_feature(enable = "amx-tile")]
1140 fn _init_amx() {}
1141
1142 #[cfg(target_os = "linux")]
1143 #[target_feature(enable = "amx-tile")]
1144 #[inline]
1145 fn _init_amx() {
1146 let mut ret: usize;
1147 let mut xfeatures: usize = 0;
1148 ret = unsafe {
1149 syscall!(Sysno::arch_prctl, 0x1022, &raw mut xfeatures)
1150 .expect("arch_prctl ARCH_GET_XCOMP_PERM syscall failed")
1151 };
1152 if ret != 0 {
1153 panic!("Failed to get XFEATURES");
1154 } else {
1155 match 0b11 & (xfeatures >> 17) {
1156 0 => panic!("AMX is not available"),
1157 1 => {
1158 ret = unsafe {
1159 syscall!(Sysno::arch_prctl, 0x1023, 18)
1160 .expect("arch_prctl ARCH_REQ_XCOMP_PERM syscall failed")
1161 };
1162 if ret != 0 {
1163 panic!("Failed to enable AMX");
1164 }
1165 }
1166 3 => {}
1167 _ => unreachable!(),
1168 }
1169 }
1170 }
1171
1172 impl __tile1024i {
1173 #[inline]
1174 #[target_feature(enable = "amx-tile")]
1175 fn zeroed(rows: u16, colsb: u16) -> Self {
1176 Self {
1177 rows,
1178 colsb,
1179 tile: unsafe { super::tilezero_internal(rows, colsb) },
1180 }
1181 }
1182 }
1183
1184 #[simd_test(enable = "amx-tile")]
1185 fn test_tile_loadconfig() {
1186 unsafe {
1187 let config = __tilecfg::default();
1188 _tile_loadconfig(config.as_ptr());
1189 _tile_release();
1190 }
1191 }
1192
1193 #[simd_test(enable = "amx-tile")]
1194 fn test_tile_storeconfig() {
1195 unsafe {
1196 let config = __tilecfg::new(1, 0, [32; 8], [8; 8]);
1197 _tile_loadconfig(config.as_ptr());
1198 let mut _config = __tilecfg::default();
1199 _tile_storeconfig(_config.as_mut_ptr());
1200 _tile_release();
1201 assert_eq!(config, _config);
1202 }
1203 }
1204
1205 #[simd_test(enable = "amx-tile")]
1206 fn test_tile_zero() {
1207 unsafe {
1208 _init_amx();
1209 let mut config = __tilecfg::default();
1210 config.palette = 1;
1211 config.colsb[0] = 64;
1212 config.rows[0] = 16;
1213 _tile_loadconfig(config.as_ptr());
1214 _tile_zero::<0>();
1215 let mut out = [[1_i8; 64]; 16];
1216 _tile_stored::<0>(out.as_mut_ptr().cast(), 64);
1217 _tile_release();
1218 assert_eq!(out, [[0; 64]; 16]);
1219 }
1220 }
1221
1222 #[simd_test(enable = "amx-tile")]
1223 fn test__tile_zero() {
1224 unsafe {
1225 _init_amx();
1226
1227 let tile = __tile1024i::zeroed(16, 64);
1228
1229 let mut out = [[1_i8; 64]; 16];
1230 __tile_stored(out.as_mut_ptr().cast(), 64, tile);
1231
1232 assert_eq!(out, [[0; 64]; 16]);
1233 }
1234 }
1235
1236 #[simd_test(enable = "amx-tile")]
1237 fn test_tile_stored() {
1238 unsafe {
1239 _init_amx();
1240 let mut config = __tilecfg::default();
1241 config.palette = 1;
1242 config.colsb[0] = 64;
1243 config.rows[0] = 16;
1244 _tile_loadconfig(config.as_ptr());
1245 _tile_zero::<0>();
1246 let mut out = [[1_i8; 64]; 16];
1247 _tile_stored::<0>(out.as_mut_ptr().cast(), 64);
1248 _tile_release();
1249 assert_eq!(out, [[0; 64]; 16]);
1250 }
1251 }
1252
1253 #[simd_test(enable = "amx-tile")]
1254 fn test__tile_stored() {
1255 unsafe {
1256 _init_amx();
1257
1258 let tile = __tile1024i::zeroed(16, 64);
1259
1260 let mut out = [[1_i8; 64]; 16];
1261 __tile_stored(out.as_mut_ptr().cast(), 64, tile);
1262
1263 assert_eq!(out, [[0; 64]; 16]);
1264 }
1265 }
1266
1267 #[simd_test(enable = "amx-tile")]
1268 fn test_tile_loadd() {
1269 unsafe {
1270 _init_amx();
1271 let mut config = __tilecfg::default();
1272 config.palette = 1;
1273 config.colsb[0] = 64;
1274 config.rows[0] = 16;
1275 _tile_loadconfig(config.as_ptr());
1276 _tile_zero::<0>();
1277 let mat = [1_i8; 1024];
1278 _tile_loadd::<0>(mat.as_ptr().cast(), 64);
1279 let mut out = [[0_i8; 64]; 16];
1280 _tile_stored::<0>(out.as_mut_ptr().cast(), 64);
1281 _tile_release();
1282 assert_eq!(out, [[1; 64]; 16]);
1283 }
1284 }
1285
1286 #[simd_test(enable = "amx-tile")]
1287 fn test__tile_loadd() {
1288 unsafe {
1289 _init_amx();
1290
1291 let mut tile = __tile1024i::zeroed(16, 64);
1292
1293 let mat = [1_i8; 1024];
1294 __tile_loadd(&mut tile, mat.as_ptr().cast(), 64);
1295 let mut out = [[0_i8; 64]; 16];
1296 __tile_stored(out.as_mut_ptr().cast(), 64, tile);
1297
1298 assert_eq!(out, [[1; 64]; 16]);
1299 }
1300 }
1301
1302 #[simd_test(enable = "amx-tile")]
1303 fn test_tile_stream_loadd() {
1304 unsafe {
1305 _init_amx();
1306 let mut config = __tilecfg::default();
1307 config.palette = 1;
1308 config.colsb[0] = 64;
1309 config.rows[0] = 16;
1310 _tile_loadconfig(config.as_ptr());
1311 _tile_zero::<0>();
1312 let mat = [1_i8; 1024];
1313 _tile_stream_loadd::<0>(mat.as_ptr().cast(), 64);
1314 let mut out = [[0_i8; 64]; 16];
1315 _tile_stored::<0>(out.as_mut_ptr().cast(), 64);
1316 _tile_release();
1317 assert_eq!(out, [[1; 64]; 16]);
1318 }
1319 }
1320
1321 #[simd_test(enable = "amx-tile")]
1322 fn test__tile_stream_loadd() {
1323 unsafe {
1324 _init_amx();
1325
1326 let mut tile = __tile1024i::zeroed(16, 64);
1327
1328 let mat = [1_i8; 1024];
1329 __tile_stream_loadd(&mut tile, mat.as_ptr().cast(), 64);
1330 let mut out = [[0_i8; 64]; 16];
1331 __tile_stored(out.as_mut_ptr().cast(), 64, tile);
1332
1333 assert_eq!(out, [[1; 64]; 16]);
1334 }
1335 }
1336
1337 #[simd_test(enable = "amx-tile")]
1338 fn test_tile_release() {
1339 unsafe {
1340 _tile_release();
1341 }
1342 }
1343
1344 const BF16_1: u16 = 0x3f80;
1345 const BF16_2: u16 = 0x4000;
1346
1347 #[simd_test(enable = "amx-bf16")]
1348 fn test_tile_dpbf16ps() {
1349 unsafe {
1350 _init_amx();
1351 let ones = [BF16_1; 512];
1352 let twos = [BF16_2; 512];
1353 let mut res = [[0f32; 16]; 16];
1354 let mut config = __tilecfg::default();
1355 config.palette = 1;
1356 (0..=2).for_each(|i| {
1357 config.colsb[i] = 64;
1358 config.rows[i] = 16;
1359 });
1360 _tile_loadconfig(config.as_ptr());
1361 _tile_zero::<0>();
1362 _tile_loadd::<1>(ones.as_ptr().cast(), 64);
1363 _tile_loadd::<2>(twos.as_ptr().cast(), 64);
1364 _tile_dpbf16ps::<0, 1, 2>();
1365 _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
1366 _tile_release();
1367 assert_eq!(res, [[64f32; 16]; 16]);
1368 }
1369 }
1370
1371 #[simd_test(enable = "amx-bf16")]
1372 fn test__tile_dpbf16ps() {
1373 unsafe {
1374 _init_amx();
1375 let ones = [BF16_1; 512];
1376 let twos = [BF16_2; 512];
1377 let mut res = [[0f32; 16]; 16];
1378
1379 let mut a = __tile1024i::zeroed(16, 64);
1380 let mut b = __tile1024i::zeroed(16, 64);
1381 let mut c = __tile1024i::zeroed(16, 64);
1382
1383 __tile_loadd(&mut a, ones.as_ptr().cast(), 64);
1384 __tile_loadd(&mut b, twos.as_ptr().cast(), 64);
1385 __tile_dpbf16ps(&mut c, a, b);
1386 __tile_stored(res.as_mut_ptr().cast(), 64, c);
1387
1388 assert_eq!(res, [[64f32; 16]; 16]);
1389 }
1390 }
1391
1392 #[simd_test(enable = "amx-int8")]
1393 fn test_tile_dpbssd() {
1394 unsafe {
1395 _init_amx();
1396 let ones = [-1_i8; 1024];
1397 let twos = [-2_i8; 1024];
1398 let mut res = [[0_i32; 16]; 16];
1399 let mut config = __tilecfg::default();
1400 config.palette = 1;
1401 (0..=2).for_each(|i| {
1402 config.colsb[i] = 64;
1403 config.rows[i] = 16;
1404 });
1405 _tile_loadconfig(config.as_ptr());
1406 _tile_zero::<0>();
1407 _tile_loadd::<1>(ones.as_ptr().cast(), 64);
1408 _tile_loadd::<2>(twos.as_ptr().cast(), 64);
1409 _tile_dpbssd::<0, 1, 2>();
1410 _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
1411 _tile_release();
1412 assert_eq!(res, [[128_i32; 16]; 16]);
1413 }
1414 }
1415
1416 #[simd_test(enable = "amx-int8")]
1417 fn test__tile_dpbssd() {
1418 unsafe {
1419 _init_amx();
1420 let ones = [-1_i8; 1024];
1421 let twos = [-2_i8; 1024];
1422 let mut res = [[0_i32; 16]; 16];
1423
1424 let mut a = __tile1024i::zeroed(16, 64);
1425 let mut b = __tile1024i::zeroed(16, 64);
1426 let mut c = __tile1024i::zeroed(16, 64);
1427
1428 __tile_loadd(&mut a, ones.as_ptr().cast(), 64);
1429 __tile_loadd(&mut b, twos.as_ptr().cast(), 64);
1430 __tile_dpbssd(&mut c, a, b);
1431 __tile_stored(res.as_mut_ptr().cast(), 64, c);
1432
1433 assert_eq!(res, [[128_i32; 16]; 16]);
1434 }
1435 }
1436
1437 #[simd_test(enable = "amx-int8")]
1438 fn test_tile_dpbsud() {
1439 unsafe {
1440 _init_amx();
1441 let ones = [-1_i8; 1024];
1442 let twos = [2_u8; 1024];
1443 let mut res = [[0_i32; 16]; 16];
1444 let mut config = __tilecfg::default();
1445 config.palette = 1;
1446 (0..=2).for_each(|i| {
1447 config.colsb[i] = 64;
1448 config.rows[i] = 16;
1449 });
1450 _tile_loadconfig(config.as_ptr());
1451 _tile_zero::<0>();
1452 _tile_loadd::<1>(ones.as_ptr().cast(), 64);
1453 _tile_loadd::<2>(twos.as_ptr(), 64);
1454 _tile_dpbsud::<0, 1, 2>();
1455 _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
1456 _tile_release();
1457 assert_eq!(res, [[-128_i32; 16]; 16]);
1458 }
1459 }
1460
1461 #[simd_test(enable = "amx-int8")]
1462 fn test__tile_dpbsud() {
1463 unsafe {
1464 _init_amx();
1465 let ones = [-1_i8; 1024];
1466 let twos = [2_u8; 1024];
1467 let mut res = [[0_i32; 16]; 16];
1468
1469 let mut a = __tile1024i::zeroed(16, 64);
1470 let mut b = __tile1024i::zeroed(16, 64);
1471 let mut c = __tile1024i::zeroed(16, 64);
1472
1473 __tile_loadd(&mut a, ones.as_ptr().cast(), 64);
1474 __tile_loadd(&mut b, twos.as_ptr(), 64);
1475 __tile_dpbsud(&mut c, a, b);
1476 __tile_stored(res.as_mut_ptr().cast(), 64, c);
1477
1478 assert_eq!(res, [[-128_i32; 16]; 16]);
1479 }
1480 }
1481
1482 #[simd_test(enable = "amx-int8")]
1483 fn test_tile_dpbusd() {
1484 unsafe {
1485 _init_amx();
1486 let ones = [1_u8; 1024];
1487 let twos = [-2_i8; 1024];
1488 let mut res = [[0_i32; 16]; 16];
1489 let mut config = __tilecfg::default();
1490 config.palette = 1;
1491 (0..=2).for_each(|i| {
1492 config.colsb[i] = 64;
1493 config.rows[i] = 16;
1494 });
1495 _tile_loadconfig(config.as_ptr());
1496 _tile_zero::<0>();
1497 _tile_loadd::<1>(ones.as_ptr(), 64);
1498 _tile_loadd::<2>(twos.as_ptr().cast(), 64);
1499 _tile_dpbusd::<0, 1, 2>();
1500 _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
1501 _tile_release();
1502 assert_eq!(res, [[-128_i32; 16]; 16]);
1503 }
1504 }
1505
1506 #[simd_test(enable = "amx-int8")]
1507 fn test__tile_dpbusd() {
1508 unsafe {
1509 _init_amx();
1510 let ones = [1_u8; 1024];
1511 let twos = [-2_i8; 1024];
1512 let mut res = [[0_i32; 16]; 16];
1513
1514 let mut a = __tile1024i::zeroed(16, 64);
1515 let mut b = __tile1024i::zeroed(16, 64);
1516 let mut c = __tile1024i::zeroed(16, 64);
1517
1518 __tile_loadd(&mut a, ones.as_ptr(), 64);
1519 __tile_loadd(&mut b, twos.as_ptr().cast(), 64);
1520 __tile_dpbusd(&mut c, a, b);
1521 __tile_stored(res.as_mut_ptr().cast(), 64, c);
1522
1523 assert_eq!(res, [[-128_i32; 16]; 16]);
1524 }
1525 }
1526
1527 #[simd_test(enable = "amx-int8")]
1528 fn test_tile_dpbuud() {
1529 unsafe {
1530 _init_amx();
1531 let ones = [1_u8; 1024];
1532 let twos = [2_u8; 1024];
1533 let mut res = [[0_i32; 16]; 16];
1534 let mut config = __tilecfg::default();
1535 config.palette = 1;
1536 (0..=2).for_each(|i| {
1537 config.colsb[i] = 64;
1538 config.rows[i] = 16;
1539 });
1540 _tile_loadconfig(config.as_ptr());
1541 _tile_zero::<0>();
1542 _tile_loadd::<1>(ones.as_ptr(), 64);
1543 _tile_loadd::<2>(twos.as_ptr(), 64);
1544 _tile_dpbuud::<0, 1, 2>();
1545 _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
1546 _tile_release();
1547 assert_eq!(res, [[128_i32; 16]; 16]);
1548 }
1549 }
1550
1551 #[simd_test(enable = "amx-int8")]
1552 fn test__tile_dpbuud() {
1553 unsafe {
1554 _init_amx();
1555 let ones = [1_u8; 1024];
1556 let twos = [2_u8; 1024];
1557 let mut res = [[0_i32; 16]; 16];
1558
1559 let mut a = __tile1024i::zeroed(16, 64);
1560 let mut b = __tile1024i::zeroed(16, 64);
1561 let mut c = __tile1024i::zeroed(16, 64);
1562
1563 __tile_loadd(&mut a, ones.as_ptr(), 64);
1564 __tile_loadd(&mut b, twos.as_ptr(), 64);
1565 __tile_dpbuud(&mut c, a, b);
1566 __tile_stored(res.as_mut_ptr().cast(), 64, c);
1567
1568 assert_eq!(res, [[128_i32; 16]; 16]);
1569 }
1570 }
1571
1572 #[simd_test(enable = "amx-fp16")]
1573 fn test_tile_dpfp16ps() {
1574 unsafe {
1575 _init_amx();
1576 let ones = [1f16; 512];
1577 let twos = [2f16; 512];
1578 let mut res = [[0f32; 16]; 16];
1579 let mut config = __tilecfg::default();
1580 config.palette = 1;
1581 (0..=2).for_each(|i| {
1582 config.colsb[i] = 64;
1583 config.rows[i] = 16;
1584 });
1585 _tile_loadconfig(config.as_ptr());
1586 _tile_zero::<0>();
1587 _tile_loadd::<1>(ones.as_ptr().cast(), 64);
1588 _tile_loadd::<2>(twos.as_ptr().cast(), 64);
1589 _tile_dpfp16ps::<0, 1, 2>();
1590 _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
1591 _tile_release();
1592 assert_eq!(res, [[64f32; 16]; 16]);
1593 }
1594 }
1595
1596 #[simd_test(enable = "amx-fp16")]
1597 fn test__tile_dpfp16ps() {
1598 unsafe {
1599 _init_amx();
1600 let ones = [1f16; 512];
1601 let twos = [2f16; 512];
1602 let mut res = [[0f32; 16]; 16];
1603
1604 let mut a = __tile1024i::zeroed(16, 64);
1605 let mut b = __tile1024i::zeroed(16, 64);
1606 let mut c = __tile1024i::zeroed(16, 64);
1607
1608 __tile_loadd(&mut a, ones.as_ptr().cast(), 64);
1609 __tile_loadd(&mut b, twos.as_ptr().cast(), 64);
1610 __tile_dpfp16ps(&mut c, a, b);
1611 __tile_stored(res.as_mut_ptr().cast(), 64, c);
1612
1613 assert_eq!(res, [[64f32; 16]; 16]);
1614 }
1615 }
1616
1617 #[simd_test(enable = "amx-complex")]
1618 fn test_tile_cmmimfp16ps() {
1619 unsafe {
1620 _init_amx();
1621 let ones = [1f16; 512];
1622 let twos = [2f16; 512];
1623 let mut res = [[0f32; 16]; 16];
1624 let mut config = __tilecfg::default();
1625 config.palette = 1;
1626 (0..=2).for_each(|i| {
1627 config.colsb[i] = 64;
1628 config.rows[i] = 16;
1629 });
1630 _tile_loadconfig(config.as_ptr());
1631 _tile_zero::<0>();
1632 _tile_loadd::<1>(ones.as_ptr().cast(), 64);
1633 _tile_loadd::<2>(twos.as_ptr().cast(), 64);
1634 _tile_cmmimfp16ps::<0, 1, 2>();
1635 _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
1636 _tile_release();
1637 assert_eq!(res, [[64f32; 16]; 16]);
1638 }
1639 }
1640
1641 #[simd_test(enable = "amx-complex")]
1642 fn test__tile_cmmimfp16ps() {
1643 unsafe {
1644 _init_amx();
1645 let ones = [1f16; 512];
1646 let twos = [2f16; 512];
1647 let mut res = [[0f32; 16]; 16];
1648
1649 let mut a = __tile1024i::zeroed(16, 64);
1650 let mut b = __tile1024i::zeroed(16, 64);
1651 let mut c = __tile1024i::zeroed(16, 64);
1652
1653 __tile_loadd(&mut a, ones.as_ptr().cast(), 64);
1654 __tile_loadd(&mut b, twos.as_ptr().cast(), 64);
1655 __tile_cmmimfp16ps(&mut c, a, b);
1656 __tile_stored(res.as_mut_ptr().cast(), 64, c);
1657
1658 assert_eq!(res, [[64f32; 16]; 16]);
1659 }
1660 }
1661
1662 #[simd_test(enable = "amx-complex")]
1663 fn test_tile_cmmrlfp16ps() {
1664 unsafe {
1665 _init_amx();
1666 let ones = [1f16; 512];
1667 let twos = [2f16; 512];
1668 let mut res = [[0f32; 16]; 16];
1669 let mut config = __tilecfg::default();
1670 config.palette = 1;
1671 (0..=2).for_each(|i| {
1672 config.colsb[i] = 64;
1673 config.rows[i] = 16;
1674 });
1675 _tile_loadconfig(config.as_ptr());
1676 _tile_zero::<0>();
1677 _tile_loadd::<1>(ones.as_ptr().cast(), 64);
1678 _tile_loadd::<2>(twos.as_ptr().cast(), 64);
1679 _tile_cmmrlfp16ps::<0, 1, 2>();
1680 _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
1681 _tile_release();
1682 assert_eq!(res, [[0f32; 16]; 16]);
1683 }
1684 }
1685
1686 #[simd_test(enable = "amx-complex")]
1687 fn test__tile_cmmrlfp16ps() {
1688 unsafe {
1689 _init_amx();
1690 let ones = [1f16; 512];
1691 let twos = [2f16; 512];
1692 let mut res = [[0f32; 16]; 16];
1693
1694 let mut a = __tile1024i::zeroed(16, 64);
1695 let mut b = __tile1024i::zeroed(16, 64);
1696 let mut c = __tile1024i::zeroed(16, 64);
1697
1698 __tile_loadd(&mut a, ones.as_ptr().cast(), 64);
1699 __tile_loadd(&mut b, twos.as_ptr().cast(), 64);
1700 __tile_cmmrlfp16ps(&mut c, a, b);
1701 __tile_stored(res.as_mut_ptr().cast(), 64, c);
1702
1703 assert_eq!(res, [[0f32; 16]; 16]);
1704 }
1705 }
1706
1707 const BF8_ONE: u8 = 0x3c;
1708 const BF8_TWO: u8 = 0x40;
1709 const HF8_ONE: u8 = 0x38;
1710 const HF8_TWO: u8 = 0x40;
1711
1712 #[simd_test(enable = "amx-fp8")]
1713 fn test_tile_dpbf8ps() {
1714 unsafe {
1715 _init_amx();
1716 let ones = [BF8_ONE; 1024];
1717 let twos = [BF8_TWO; 1024];
1718 let mut res = [[0.0_f32; 16]; 16];
1719 let mut config = __tilecfg::default();
1720 config.palette = 1;
1721 (0..=2).for_each(|i| {
1722 config.colsb[i] = 64;
1723 config.rows[i] = 16;
1724 });
1725 _tile_loadconfig(config.as_ptr());
1726 _tile_zero::<0>();
1727 _tile_loadd::<1>(ones.as_ptr(), 64);
1728 _tile_loadd::<2>(twos.as_ptr(), 64);
1729 _tile_dpbf8ps::<0, 1, 2>();
1730 _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
1731 _tile_release();
1732 assert_eq!(res, [[128.0_f32; 16]; 16]);
1733 }
1734 }
1735
1736 #[simd_test(enable = "amx-fp8")]
1737 fn test__tile_dpbf8ps() {
1738 unsafe {
1739 _init_amx();
1740 let ones = [BF8_ONE; 1024];
1741 let twos = [BF8_TWO; 1024];
1742 let mut res = [[0.0_f32; 16]; 16];
1743
1744 let mut a = __tile1024i::zeroed(16, 64);
1745 let mut b = __tile1024i::zeroed(16, 64);
1746 let mut c = __tile1024i::zeroed(16, 64);
1747
1748 __tile_loadd(&mut a, ones.as_ptr(), 64);
1749 __tile_loadd(&mut b, twos.as_ptr(), 64);
1750 __tile_dpbf8ps(&mut c, a, b);
1751 __tile_stored(res.as_mut_ptr().cast(), 64, c);
1752
1753 assert_eq!(res, [[128.0_f32; 16]; 16]);
1754 }
1755 }
1756
1757 #[simd_test(enable = "amx-fp8")]
1758 fn test_tile_dpbhf8ps() {
1759 unsafe {
1760 _init_amx();
1761 let ones = [BF8_ONE; 1024];
1762 let twos = [HF8_TWO; 1024];
1763 let mut res = [[0.0_f32; 16]; 16];
1764 let mut config = __tilecfg::default();
1765 config.palette = 1;
1766 (0..=2).for_each(|i| {
1767 config.colsb[i] = 64;
1768 config.rows[i] = 16;
1769 });
1770 _tile_loadconfig(config.as_ptr());
1771 _tile_zero::<0>();
1772 _tile_loadd::<1>(ones.as_ptr(), 64);
1773 _tile_loadd::<2>(twos.as_ptr(), 64);
1774 _tile_dpbhf8ps::<0, 1, 2>();
1775 _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
1776 _tile_release();
1777 assert_eq!(res, [[128.0_f32; 16]; 16]);
1778 }
1779 }
1780
1781 #[simd_test(enable = "amx-fp8")]
1782 fn test__tile_dpbhf8ps() {
1783 unsafe {
1784 _init_amx();
1785 let ones = [BF8_ONE; 1024];
1786 let twos = [HF8_TWO; 1024];
1787 let mut res = [[0.0_f32; 16]; 16];
1788
1789 let mut a = __tile1024i::zeroed(16, 64);
1790 let mut b = __tile1024i::zeroed(16, 64);
1791 let mut c = __tile1024i::zeroed(16, 64);
1792
1793 __tile_loadd(&mut a, ones.as_ptr(), 64);
1794 __tile_loadd(&mut b, twos.as_ptr(), 64);
1795 __tile_dpbhf8ps(&mut c, a, b);
1796 __tile_stored(res.as_mut_ptr().cast(), 64, c);
1797
1798 assert_eq!(res, [[128.0_f32; 16]; 16]);
1799 }
1800 }
1801
1802 #[simd_test(enable = "amx-fp8")]
1803 fn test_tile_dphbf8ps() {
1804 unsafe {
1805 _init_amx();
1806 let ones = [HF8_ONE; 1024];
1807 let twos = [BF8_TWO; 1024];
1808 let mut res = [[0.0_f32; 16]; 16];
1809 let mut config = __tilecfg::default();
1810 config.palette = 1;
1811 (0..=2).for_each(|i| {
1812 config.colsb[i] = 64;
1813 config.rows[i] = 16;
1814 });
1815 _tile_loadconfig(config.as_ptr());
1816 _tile_zero::<0>();
1817 _tile_loadd::<1>(ones.as_ptr(), 64);
1818 _tile_loadd::<2>(twos.as_ptr(), 64);
1819 _tile_dphbf8ps::<0, 1, 2>();
1820 _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
1821 _tile_release();
1822 assert_eq!(res, [[128.0_f32; 16]; 16]);
1823 }
1824 }
1825
1826 #[simd_test(enable = "amx-fp8")]
1827 fn test__tile_dphbf8ps() {
1828 unsafe {
1829 _init_amx();
1830 let ones = [HF8_ONE; 1024];
1831 let twos = [BF8_TWO; 1024];
1832 let mut res = [[0.0_f32; 16]; 16];
1833
1834 let mut a = __tile1024i::zeroed(16, 64);
1835 let mut b = __tile1024i::zeroed(16, 64);
1836 let mut c = __tile1024i::zeroed(16, 64);
1837
1838 __tile_loadd(&mut a, ones.as_ptr(), 64);
1839 __tile_loadd(&mut b, twos.as_ptr(), 64);
1840 __tile_dphbf8ps(&mut c, a, b);
1841 __tile_stored(res.as_mut_ptr().cast(), 64, c);
1842
1843 assert_eq!(res, [[128.0_f32; 16]; 16]);
1844 }
1845 }
1846
1847 #[simd_test(enable = "amx-fp8")]
1848 fn test_tile_dphf8ps() {
1849 unsafe {
1850 _init_amx();
1851 let ones = [HF8_ONE; 1024];
1852 let twos = [HF8_TWO; 1024];
1853 let mut res = [[0.0_f32; 16]; 16];
1854 let mut config = __tilecfg::default();
1855 config.palette = 1;
1856 (0..=2).for_each(|i| {
1857 config.colsb[i] = 64;
1858 config.rows[i] = 16;
1859 });
1860 _tile_loadconfig(config.as_ptr());
1861 _tile_zero::<0>();
1862 _tile_loadd::<1>(ones.as_ptr(), 64);
1863 _tile_loadd::<2>(twos.as_ptr(), 64);
1864 _tile_dphf8ps::<0, 1, 2>();
1865 _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
1866 _tile_release();
1867 assert_eq!(res, [[128.0_f32; 16]; 16]);
1868 }
1869 }
1870
1871 #[simd_test(enable = "amx-fp8")]
1872 fn test__tile_dphf8ps() {
1873 unsafe {
1874 _init_amx();
1875 let ones = [HF8_ONE; 1024];
1876 let twos = [HF8_TWO; 1024];
1877 let mut res = [[0.0_f32; 16]; 16];
1878
1879 let mut a = __tile1024i::zeroed(16, 64);
1880 let mut b = __tile1024i::zeroed(16, 64);
1881 let mut c = __tile1024i::zeroed(16, 64);
1882
1883 __tile_loadd(&mut a, ones.as_ptr(), 64);
1884 __tile_loadd(&mut b, twos.as_ptr(), 64);
1885 __tile_dphf8ps(&mut c, a, b);
1886 __tile_stored(res.as_mut_ptr().cast(), 64, c);
1887
1888 assert_eq!(res, [[128.0_f32; 16]; 16]);
1889 }
1890 }
1891
1892 #[simd_test(enable = "amx-movrs")]
1893 fn test_tile_loaddrs() {
1894 unsafe {
1895 _init_amx();
1896 let mut config = __tilecfg::default();
1897 config.palette = 1;
1898 config.colsb[0] = 64;
1899 config.rows[0] = 16;
1900 _tile_loadconfig(config.as_ptr());
1901 _tile_zero::<0>();
1902 let mat = [1_i8; 1024];
1903 _tile_loaddrs::<0>(mat.as_ptr().cast(), 64);
1904 let mut out = [[0_i8; 64]; 16];
1905 _tile_stored::<0>(out.as_mut_ptr().cast(), 64);
1906 _tile_release();
1907 assert_eq!(out, [[1; 64]; 16]);
1908 }
1909 }
1910
1911 #[simd_test(enable = "amx-movrs")]
1912 fn test__tile_loaddrs() {
1913 unsafe {
1914 _init_amx();
1915
1916 let mut tile = __tile1024i::zeroed(16, 64);
1917
1918 let mat = [1_i8; 1024];
1919 __tile_loaddrs(&mut tile, mat.as_ptr().cast(), 64);
1920 let mut out = [[0_i8; 64]; 16];
1921 __tile_stored(out.as_mut_ptr().cast(), 64, tile);
1922
1923 assert_eq!(out, [[1; 64]; 16]);
1924 }
1925 }
1926
1927 #[simd_test(enable = "amx-movrs")]
1928 fn test_tile_stream_loaddrs() {
1929 unsafe {
1930 _init_amx();
1931 let mut config = __tilecfg::default();
1932 config.palette = 1;
1933 config.colsb[0] = 64;
1934 config.rows[0] = 16;
1935 _tile_loadconfig(config.as_ptr());
1936 _tile_zero::<0>();
1937 let mat = [1_i8; 1024];
1938 _tile_stream_loaddrs::<0>(mat.as_ptr().cast(), 64);
1939 let mut out = [[0_i8; 64]; 16];
1940 _tile_stored::<0>(out.as_mut_ptr().cast(), 64);
1941 _tile_release();
1942 assert_eq!(out, [[1; 64]; 16]);
1943 }
1944 }
1945
1946 #[simd_test(enable = "amx-movrs")]
1947 fn test__tile_stream_loaddrs() {
1948 unsafe {
1949 _init_amx();
1950
1951 let mut tile = __tile1024i::zeroed(16, 64);
1952
1953 let mat = [1_i8; 1024];
1954 __tile_stream_loaddrs(&mut tile, mat.as_ptr().cast(), 64);
1955 let mut out = [[0_i8; 64]; 16];
1956 __tile_stored(out.as_mut_ptr().cast(), 64, tile);
1957
1958 assert_eq!(out, [[1; 64]; 16]);
1959 }
1960 }
1961
1962 #[simd_test(enable = "amx-avx512,avx10.2")]
1963 fn test_tile_movrow() {
1964 unsafe {
1965 _init_amx();
1966 let array: [[u8; 64]; 16] = array::from_fn(|i| [i as _; _]);
1967
1968 let mut config = __tilecfg::default();
1969 config.palette = 1;
1970 config.colsb[0] = 64;
1971 config.rows[0] = 16;
1972 _tile_loadconfig(config.as_ptr());
1973 _tile_loadd::<0>(array.as_ptr().cast(), 64);
1974 for i in 0..16 {
1975 let row = _tile_movrow::<0>(i);
1976 assert_eq!(*row.as_u8x64().as_array(), [i as _; _]);
1977 }
1978 }
1979 }
1980
1981 macro_rules! wrap_imm4 {
1982 ($name:ident :: <$TILE:literal>, $row:expr) => {
1983 match $row {
1984 0 => $name::<$TILE, 0>(),
1985 1 => $name::<$TILE, 1>(),
1986 2 => $name::<$TILE, 2>(),
1987 3 => $name::<$TILE, 3>(),
1988 4 => $name::<$TILE, 4>(),
1989 5 => $name::<$TILE, 5>(),
1990 6 => $name::<$TILE, 6>(),
1991 7 => $name::<$TILE, 7>(),
1992 8 => $name::<$TILE, 8>(),
1993 9 => $name::<$TILE, 9>(),
1994 10 => $name::<$TILE, 10>(),
1995 11 => $name::<$TILE, 11>(),
1996 12 => $name::<$TILE, 12>(),
1997 13 => $name::<$TILE, 13>(),
1998 14 => $name::<$TILE, 14>(),
1999 15 => $name::<$TILE, 15>(),
2000 _ => panic!("row index out of range"),
2001 }
2002 };
2003 }
2004
2005 #[simd_test(enable = "amx-avx512,avx10.2")]
2006 fn test_tile_movrowi() {
2007 unsafe {
2008 _init_amx();
2009 let array: [[u8; 64]; 16] = array::from_fn(|i| [i as _; _]);
2010
2011 let mut config = __tilecfg::default();
2012 config.palette = 1;
2013 config.colsb[0] = 64;
2014 config.rows[0] = 16;
2015 _tile_loadconfig(config.as_ptr());
2016 _tile_loadd::<0>(array.as_ptr().cast(), 64);
2017
2018 for i in 0..16 {
2019 let row = wrap_imm4!(_tile_movrowi::<0>, i);
2020 assert_eq!(*row.as_u8x64().as_array(), [i as _; _]);
2021 }
2022 }
2023 }
2024
2025 #[simd_test(enable = "amx-avx512,avx10.2")]
2026 fn test__tile_movrow() {
2027 unsafe {
2028 _init_amx();
2029 let array: [[u8; 64]; 16] = array::from_fn(|i| [i as _; _]);
2030
2031 let mut tile = __tile1024i::zeroed(16, 64);
2032 __tile_loadd(&mut tile, array.as_ptr().cast(), 64);
2033
2034 for i in 0..16 {
2035 let row = __tile_movrow(tile, i);
2036 assert_eq!(*row.as_u8x64().as_array(), [i as _; _]);
2037 }
2038 }
2039 }
2040
2041 #[simd_test(enable = "amx-avx512,avx10.2")]
2042 fn test_tile_cvtrowd2ps() {
2043 unsafe {
2044 _init_amx();
2045 let array: [[u32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2046
2047 let mut config = __tilecfg::default();
2048 config.palette = 1;
2049 config.colsb[0] = 64;
2050 config.rows[0] = 16;
2051 _tile_loadconfig(config.as_ptr());
2052 _tile_loadd::<0>(array.as_ptr().cast(), 64);
2053 for i in 0..16 {
2054 let row = _tile_cvtrowd2ps::<0>(i);
2055 assert_eq!(*row.as_f32x16().as_array(), [i as _; _]);
2056 }
2057 }
2058 }
2059
2060 #[simd_test(enable = "amx-avx512,avx10.2")]
2061 fn test_tile_cvtrowd2psi() {
2062 unsafe {
2063 _init_amx();
2064 let array: [[u32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2065
2066 let mut config = __tilecfg::default();
2067 config.palette = 1;
2068 config.colsb[0] = 64;
2069 config.rows[0] = 16;
2070 _tile_loadconfig(config.as_ptr());
2071 _tile_loadd::<0>(array.as_ptr().cast(), 64);
2072
2073 for i in 0..16 {
2074 let row = wrap_imm4!(_tile_cvtrowd2psi::<0>, i);
2075 assert_eq!(*row.as_f32x16().as_array(), [i as _; _]);
2076 }
2077 }
2078 }
2079
2080 #[simd_test(enable = "amx-avx512,avx10.2")]
2081 fn test__tile_cvtrowd2ps() {
2082 unsafe {
2083 _init_amx();
2084 let array: [[u32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2085
2086 let mut tile = __tile1024i::zeroed(16, 64);
2087 __tile_loadd(&mut tile, array.as_ptr().cast(), 64);
2088
2089 for i in 0..16 {
2090 let row = __tile_cvtrowd2ps(tile, i);
2091 assert_eq!(*row.as_f32x16().as_array(), [i as _; _]);
2092 }
2093 }
2094 }
2095
2096 #[simd_test(enable = "amx-avx512,avx10.2")]
2097 fn test_tile_cvtrowps2phh() {
2098 unsafe {
2099 _init_amx();
2100 let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2101
2102 let mut config = __tilecfg::default();
2103 config.palette = 1;
2104 config.colsb[0] = 64;
2105 config.rows[0] = 16;
2106 _tile_loadconfig(config.as_ptr());
2107 _tile_loadd::<0>(array.as_ptr().cast(), 64);
2108 for i in 0..16 {
2109 let row = _tile_cvtrowps2phh::<0>(i);
2110 assert_eq!(
2111 *row.as_f16x32().as_array(),
2112 array::from_fn(|j| if j & 1 == 0 { 0.0 } else { i as _ })
2113 );
2114 }
2115 }
2116 }
2117
2118 #[simd_test(enable = "amx-avx512,avx10.2")]
2119 fn test_tile_cvtrowps2phhi() {
2120 unsafe {
2121 _init_amx();
2122 let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2123
2124 let mut config = __tilecfg::default();
2125 config.palette = 1;
2126 config.colsb[0] = 64;
2127 config.rows[0] = 16;
2128 _tile_loadconfig(config.as_ptr());
2129 _tile_loadd::<0>(array.as_ptr().cast(), 64);
2130 for i in 0..16 {
2131 let row = wrap_imm4!(_tile_cvtrowps2phhi::<0>, i);
2132 assert_eq!(
2133 *row.as_f16x32().as_array(),
2134 array::from_fn(|j| if j & 1 == 0 { 0.0 } else { i as _ })
2135 );
2136 }
2137 }
2138 }
2139
2140 #[simd_test(enable = "amx-avx512,avx10.2")]
2141 fn test__tile_cvtrowps2phh() {
2142 unsafe {
2143 _init_amx();
2144 let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2145
2146 let mut tile = __tile1024i::zeroed(16, 64);
2147 __tile_loadd(&mut tile, array.as_ptr().cast(), 64);
2148
2149 for i in 0..16 {
2150 let row = __tile_cvtrowps2phh(tile, i);
2151 assert_eq!(
2152 *row.as_f16x32().as_array(),
2153 array::from_fn(|j| if j & 1 == 0 { 0.0 } else { i as _ })
2154 );
2155 }
2156 }
2157 }
2158
2159 #[simd_test(enable = "amx-avx512,avx10.2")]
2160 fn test_tile_cvtrowps2phl() {
2161 unsafe {
2162 _init_amx();
2163 let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2164
2165 let mut config = __tilecfg::default();
2166 config.palette = 1;
2167 config.colsb[0] = 64;
2168 config.rows[0] = 16;
2169 _tile_loadconfig(config.as_ptr());
2170 _tile_loadd::<0>(array.as_ptr().cast(), 64);
2171 for i in 0..16 {
2172 let row = _tile_cvtrowps2phl::<0>(i);
2173 assert_eq!(
2174 *row.as_f16x32().as_array(),
2175 array::from_fn(|j| if j & 1 == 0 { i as _ } else { 0.0 })
2176 );
2177 }
2178 }
2179 }
2180
2181 #[simd_test(enable = "amx-avx512,avx10.2")]
2182 fn test_tile_cvtrowps2phli() {
2183 unsafe {
2184 _init_amx();
2185 let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2186
2187 let mut config = __tilecfg::default();
2188 config.palette = 1;
2189 config.colsb[0] = 64;
2190 config.rows[0] = 16;
2191 _tile_loadconfig(config.as_ptr());
2192 _tile_loadd::<0>(array.as_ptr().cast(), 64);
2193 for i in 0..16 {
2194 let row = wrap_imm4!(_tile_cvtrowps2phli::<0>, i);
2195 assert_eq!(
2196 *row.as_f16x32().as_array(),
2197 array::from_fn(|j| if j & 1 == 0 { i as _ } else { 0.0 })
2198 );
2199 }
2200 }
2201 }
2202
2203 #[simd_test(enable = "amx-avx512,avx10.2")]
2204 fn test__tile_cvtrowps2phl() {
2205 unsafe {
2206 _init_amx();
2207 let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2208
2209 let mut tile = __tile1024i::zeroed(16, 64);
2210 __tile_loadd(&mut tile, array.as_ptr().cast(), 64);
2211
2212 for i in 0..16 {
2213 let row = __tile_cvtrowps2phl(tile, i);
2214 assert_eq!(
2215 *row.as_f16x32().as_array(),
2216 array::from_fn(|j| if j & 1 == 0 { i as _ } else { 0.0 })
2217 );
2218 }
2219 }
2220 }
2221
2222 #[simd_test(enable = "amx-avx512,avx10.2")]
2223 fn test_tile_cvtrowps2bf16h() {
2224 unsafe {
2225 _init_amx();
2226 let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2227
2228 let mut config = __tilecfg::default();
2229 config.palette = 1;
2230 config.colsb[0] = 64;
2231 config.rows[0] = 16;
2232 _tile_loadconfig(config.as_ptr());
2233 _tile_loadd::<0>(array.as_ptr().cast(), 64);
2234 for i in 0..16 {
2235 let row = _tile_cvtrowps2bf16h::<0>(i);
2236 assert_eq!(
2237 *row.as_u16x32().as_array(),
2238 array::from_fn(|j| if j & 1 == 0 {
2239 0
2240 } else {
2241 _mm_cvtness_sbh(i as _).to_bits()
2242 })
2243 );
2244 }
2245 }
2246 }
2247
2248 #[simd_test(enable = "amx-avx512,avx10.2")]
2249 fn test_tile_cvtrowps2bf16hi() {
2250 unsafe {
2251 _init_amx();
2252 let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2253
2254 let mut config = __tilecfg::default();
2255 config.palette = 1;
2256 config.colsb[0] = 64;
2257 config.rows[0] = 16;
2258 _tile_loadconfig(config.as_ptr());
2259 _tile_loadd::<0>(array.as_ptr().cast(), 64);
2260 for i in 0..16 {
2261 let row = wrap_imm4!(_tile_cvtrowps2bf16hi::<0>, i);
2262 assert_eq!(
2263 *row.as_u16x32().as_array(),
2264 array::from_fn(|j| if j & 1 == 0 {
2265 0
2266 } else {
2267 _mm_cvtness_sbh(i as _).to_bits()
2268 })
2269 );
2270 }
2271 }
2272 }
2273
2274 #[simd_test(enable = "amx-avx512,avx10.2")]
2275 fn test__tile_cvtrowps2bf16h() {
2276 unsafe {
2277 _init_amx();
2278 let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2279
2280 let mut tile = __tile1024i::zeroed(16, 64);
2281 __tile_loadd(&mut tile, array.as_ptr().cast(), 64);
2282
2283 for i in 0..16 {
2284 let row = __tile_cvtrowps2bf16h(tile, i);
2285 assert_eq!(
2286 *row.as_u16x32().as_array(),
2287 array::from_fn(|j| if j & 1 == 0 {
2288 0
2289 } else {
2290 _mm_cvtness_sbh(i as _).to_bits()
2291 })
2292 );
2293 }
2294 }
2295 }
2296
2297 #[simd_test(enable = "amx-avx512,avx10.2")]
2298 fn test_tile_cvtrowps2bf16l() {
2299 unsafe {
2300 _init_amx();
2301 let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2302
2303 let mut config = __tilecfg::default();
2304 config.palette = 1;
2305 config.colsb[0] = 64;
2306 config.rows[0] = 16;
2307 _tile_loadconfig(config.as_ptr());
2308 _tile_loadd::<0>(array.as_ptr().cast(), 64);
2309 for i in 0..16 {
2310 let row = _tile_cvtrowps2bf16l::<0>(i);
2311 assert_eq!(
2312 *row.as_u16x32().as_array(),
2313 array::from_fn(|j| if j & 1 == 0 {
2314 _mm_cvtness_sbh(i as _).to_bits()
2315 } else {
2316 0
2317 })
2318 );
2319 }
2320 }
2321 }
2322
2323 #[simd_test(enable = "amx-avx512,avx10.2")]
2324 fn test_tile_cvtrowps2bf16li() {
2325 unsafe {
2326 _init_amx();
2327 let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2328
2329 let mut config = __tilecfg::default();
2330 config.palette = 1;
2331 config.colsb[0] = 64;
2332 config.rows[0] = 16;
2333 _tile_loadconfig(config.as_ptr());
2334 _tile_loadd::<0>(array.as_ptr().cast(), 64);
2335 for i in 0..16 {
2336 let row = wrap_imm4!(_tile_cvtrowps2bf16li::<0>, i);
2337 assert_eq!(
2338 *row.as_u16x32().as_array(),
2339 array::from_fn(|j| if j & 1 == 0 {
2340 _mm_cvtness_sbh(i as _).to_bits()
2341 } else {
2342 0
2343 })
2344 );
2345 }
2346 }
2347 }
2348
2349 #[simd_test(enable = "amx-avx512,avx10.2")]
2350 fn test__tile_cvtrowps2bf16l() {
2351 unsafe {
2352 _init_amx();
2353 let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2354
2355 let mut tile = __tile1024i::zeroed(16, 64);
2356 __tile_loadd(&mut tile, array.as_ptr().cast(), 64);
2357
2358 for i in 0..16 {
2359 let row = __tile_cvtrowps2bf16l(tile, i);
2360 assert_eq!(
2361 *row.as_u16x32().as_array(),
2362 array::from_fn(|j| if j & 1 == 0 {
2363 _mm_cvtness_sbh(i as _).to_bits()
2364 } else {
2365 0
2366 })
2367 );
2368 }
2369 }
2370 }
2371
2372 #[simd_test(enable = "amx-tf32")]
2373 fn test_tile_mmultf32ps() {
2374 unsafe {
2375 _init_amx();
2376 let a: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2377 let b: [[f32; 16]; 16] = [array::from_fn(|j| j as _); _];
2378 let mut res = [[0.0; 16]; 16];
2379
2380 let mut config = __tilecfg::default();
2381 config.palette = 1;
2382 (0..=2).for_each(|i| {
2383 config.colsb[i] = 64;
2384 config.rows[i] = 16;
2385 });
2386 _tile_loadconfig(config.as_ptr());
2387 _tile_zero::<0>();
2388 _tile_loadd::<1>(a.as_ptr().cast(), 64);
2389 _tile_loadd::<2>(b.as_ptr().cast(), 64);
2390 _tile_mmultf32ps::<0, 1, 2>();
2391 _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
2392 _tile_release();
2393
2394 let expected = array::from_fn(|i| array::from_fn(|j| 16.0 * i as f32 * j as f32));
2395 assert_eq!(res, expected);
2396 }
2397 }
2398
2399 #[simd_test(enable = "amx-tf32")]
2400 fn test__tile_mmultf32ps() {
2401 unsafe {
2402 _init_amx();
2403 let a: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
2404 let b: [[f32; 16]; 16] = [array::from_fn(|j| j as _); _];
2405 let mut res = [[0.0; 16]; 16];
2406
2407 let mut tile_a = __tile1024i::zeroed(16, 64);
2408 let mut tile_b = __tile1024i::zeroed(16, 64);
2409 let mut tile_c = __tile1024i::zeroed(16, 64);
2410
2411 __tile_loadd(&mut tile_a, a.as_ptr().cast(), 64);
2412 __tile_loadd(&mut tile_b, b.as_ptr().cast(), 64);
2413 __tile_mmultf32ps(&mut tile_c, tile_a, tile_b);
2414 __tile_stored(res.as_mut_ptr().cast(), 64, tile_c);
2415
2416 let expected = array::from_fn(|i| array::from_fn(|j| 16.0 * i as f32 * j as f32));
2417 assert_eq!(res, expected);
2418 }
2419 }
2420}