core/stdarch/crates/core_arch/src/
simd.rs

1//! Internal `#[repr(simd)]` types
2
3#![allow(non_camel_case_types)]
4
5#[inline(always)]
6#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
7pub(crate) const unsafe fn simd_imax<T: Copy>(a: T, b: T) -> T {
8    let mask: T = crate::intrinsics::simd::simd_gt(a, b);
9    crate::intrinsics::simd::simd_select(mask, a, b)
10}
11
12#[inline(always)]
13#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
14pub(crate) const unsafe fn simd_imin<T: Copy>(a: T, b: T) -> T {
15    let mask: T = crate::intrinsics::simd::simd_lt(a, b);
16    crate::intrinsics::simd::simd_select(mask, a, b)
17}
18
19macro_rules! simd_ty {
20    ($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
21        #[repr(simd)]
22        #[derive(Copy, Clone)]
23        pub(crate) struct $id([$elem_type; $len]);
24
25        #[allow(clippy::use_self)]
26        impl $id {
27            /// A value of this type where all elements are zeroed out.
28            pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
29
30            #[inline(always)]
31            pub(crate) const fn new($($param_name: $elem_type),*) -> Self {
32                $id([$($param_name),*])
33            }
34            #[inline(always)]
35            pub(crate) const fn from_array(elements: [$elem_type; $len]) -> Self {
36                $id(elements)
37            }
38            // FIXME: Workaround rust@60637
39            #[inline(always)]
40            #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
41            pub(crate) const fn splat(value: $elem_type) -> Self {
42                #[derive(Copy, Clone)]
43                #[repr(simd)]
44                struct JustOne([$elem_type; 1]);
45                let one = JustOne([value]);
46                // SAFETY: 0 is always in-bounds because we're shuffling
47                // a simd type with exactly one element.
48                unsafe { simd_shuffle!(one, one, [0; $len]) }
49            }
50
51            /// Extract the element at position `index`.
52            /// `index` is not a constant so this is not efficient!
53            /// Use for testing only.
54            // FIXME: Workaround rust@60637
55            #[inline(always)]
56            pub(crate) const fn extract(&self, index: usize) -> $elem_type {
57                self.as_array()[index]
58            }
59
60            #[inline]
61            pub(crate) const fn as_array(&self) -> &[$elem_type; $len] {
62                let simd_ptr: *const Self = self;
63                let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
64                // SAFETY: We can always read the prefix of a simd type as an array.
65                // There might be more padding afterwards for some widths, but
66                // that's not a problem for reading less than that.
67                unsafe { &*array_ptr }
68            }
69        }
70
71        #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
72        const impl core::cmp::PartialEq for $id {
73            #[inline]
74            fn eq(&self, other: &Self) -> bool {
75                self.as_array() == other.as_array()
76            }
77        }
78
79        impl core::fmt::Debug for $id {
80            #[inline]
81            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
82                debug_simd_finish(f, stringify!($id), self.as_array())
83            }
84        }
85    }
86}
87
88macro_rules! simd_m_ty {
89    ($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => {
90        #[repr(simd)]
91        #[derive(Copy, Clone)]
92        pub(crate) struct $id([$elem_type; $len]);
93
94        #[allow(clippy::use_self)]
95        impl $id {
96            #[inline(always)]
97            const fn bool_to_internal(x: bool) -> $elem_type {
98                [0 as $elem_type, !(0 as $elem_type)][x as usize]
99            }
100
101            #[inline(always)]
102            pub(crate) const fn new($($param_name: bool),*) -> Self {
103                $id([$(Self::bool_to_internal($param_name)),*])
104            }
105
106            // FIXME: Workaround rust@60637
107            #[inline(always)]
108            #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
109            pub(crate) const fn splat(value: bool) -> Self {
110                #[derive(Copy, Clone)]
111                #[repr(simd)]
112                struct JustOne([$elem_type; 1]);
113                let one = JustOne([Self::bool_to_internal(value)]);
114                // SAFETY: 0 is always in-bounds because we're shuffling
115                // a simd type with exactly one element.
116                unsafe { simd_shuffle!(one, one, [0; $len]) }
117            }
118
119            #[inline]
120            pub(crate) const fn as_array(&self) -> &[$elem_type; $len] {
121                let simd_ptr: *const Self = self;
122                let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
123                // SAFETY: We can always read the prefix of a simd type as an array.
124                // There might be more padding afterwards for some widths, but
125                // that's not a problem for reading less than that.
126                unsafe { &*array_ptr }
127            }
128        }
129
130        #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
131        const impl core::cmp::PartialEq for $id {
132            #[inline]
133            fn eq(&self, other: &Self) -> bool {
134                self.as_array() == other.as_array()
135            }
136        }
137
138        impl core::fmt::Debug for $id {
139            #[inline]
140            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
141                debug_simd_finish(f, stringify!($id), self.as_array())
142            }
143        }
144    }
145}
146
147// 16-bit wide types:
148
149simd_ty!(u8x2[u8;2]: x0, x1);
150simd_ty!(i8x2[i8;2]: x0, x1);
151
152// 32-bit wide types:
153
154simd_ty!(u8x4[u8;4]: x0, x1, x2, x3);
155simd_ty!(u16x2[u16;2]: x0, x1);
156
157simd_ty!(i8x4[i8;4]: x0, x1, x2, x3);
158simd_ty!(i16x2[i16;2]: x0, x1);
159
160// 64-bit wide types:
161
162simd_ty!(
163    u8x8[u8;8]:
164    x0,
165    x1,
166    x2,
167    x3,
168    x4,
169    x5,
170    x6,
171    x7
172);
173simd_ty!(u16x4[u16;4]: x0, x1, x2, x3);
174simd_ty!(u32x2[u32;2]: x0, x1);
175simd_ty!(u64x1[u64;1]: x1);
176
177simd_ty!(
178    i8x8[i8;8]:
179    x0,
180    x1,
181    x2,
182    x3,
183    x4,
184    x5,
185    x6,
186    x7
187);
188simd_ty!(i16x4[i16;4]: x0, x1, x2, x3);
189simd_ty!(i32x2[i32;2]: x0, x1);
190simd_ty!(i64x1[i64;1]: x1);
191
192simd_ty!(f32x2[f32;2]: x0, x1);
193simd_ty!(f64x1[f64;1]: x1);
194
195// 128-bit wide types:
196
197simd_ty!(
198    u8x16[u8;16]:
199    x0,
200    x1,
201    x2,
202    x3,
203    x4,
204    x5,
205    x6,
206    x7,
207    x8,
208    x9,
209    x10,
210    x11,
211    x12,
212    x13,
213    x14,
214    x15
215);
216simd_ty!(
217    u16x8[u16;8]:
218    x0,
219    x1,
220    x2,
221    x3,
222    x4,
223    x5,
224    x6,
225    x7
226);
227simd_ty!(u32x4[u32;4]: x0, x1, x2, x3);
228simd_ty!(u64x2[u64;2]: x0, x1);
229
230simd_ty!(
231    i8x16[i8;16]:
232    x0,
233    x1,
234    x2,
235    x3,
236    x4,
237    x5,
238    x6,
239    x7,
240    x8,
241    x9,
242    x10,
243    x11,
244    x12,
245    x13,
246    x14,
247    x15
248);
249simd_ty!(
250    i16x8[i16;8]:
251    x0,
252    x1,
253    x2,
254    x3,
255    x4,
256    x5,
257    x6,
258    x7
259);
260simd_ty!(i32x4[i32;4]: x0, x1, x2, x3);
261simd_ty!(i64x2[i64;2]: x0, x1);
262
263simd_ty!(f16x4[f16;4]: x0, x1, x2, x3);
264
265simd_ty!(
266    f16x8[f16;8]:
267    x0,
268    x1,
269    x2,
270    x3,
271    x4,
272    x5,
273    x6,
274    x7
275);
276simd_ty!(f32x4[f32;4]: x0, x1, x2, x3);
277simd_ty!(f64x2[f64;2]: x0, x1);
278
279simd_m_ty!(
280    m8x16[i8;16]:
281    x0,
282    x1,
283    x2,
284    x3,
285    x4,
286    x5,
287    x6,
288    x7,
289    x8,
290    x9,
291    x10,
292    x11,
293    x12,
294    x13,
295    x14,
296    x15
297);
298simd_m_ty!(
299    m16x8[i16;8]:
300    x0,
301    x1,
302    x2,
303    x3,
304    x4,
305    x5,
306    x6,
307    x7
308);
309simd_m_ty!(m32x4[i32;4]: x0, x1, x2, x3);
310simd_m_ty!(m64x2[i64;2]: x0, x1);
311
312// 256-bit wide types:
313
314simd_ty!(
315    u8x32[u8;32]:
316    x0,
317    x1,
318    x2,
319    x3,
320    x4,
321    x5,
322    x6,
323    x7,
324    x8,
325    x9,
326    x10,
327    x11,
328    x12,
329    x13,
330    x14,
331    x15,
332    x16,
333    x17,
334    x18,
335    x19,
336    x20,
337    x21,
338    x22,
339    x23,
340    x24,
341    x25,
342    x26,
343    x27,
344    x28,
345    x29,
346    x30,
347    x31
348);
349simd_ty!(
350    u16x16[u16;16]:
351    x0,
352    x1,
353    x2,
354    x3,
355    x4,
356    x5,
357    x6,
358    x7,
359    x8,
360    x9,
361    x10,
362    x11,
363    x12,
364    x13,
365    x14,
366    x15
367);
368simd_ty!(
369    u32x8[u32;8]:
370    x0,
371    x1,
372    x2,
373    x3,
374    x4,
375    x5,
376    x6,
377    x7
378);
379simd_ty!(u64x4[u64;4]: x0, x1, x2, x3);
380
381simd_ty!(
382    i8x32[i8;32]:
383    x0,
384    x1,
385    x2,
386    x3,
387    x4,
388    x5,
389    x6,
390    x7,
391    x8,
392    x9,
393    x10,
394    x11,
395    x12,
396    x13,
397    x14,
398    x15,
399    x16,
400    x17,
401    x18,
402    x19,
403    x20,
404    x21,
405    x22,
406    x23,
407    x24,
408    x25,
409    x26,
410    x27,
411    x28,
412    x29,
413    x30,
414    x31
415);
416simd_ty!(
417    i16x16[i16;16]:
418    x0,
419    x1,
420    x2,
421    x3,
422    x4,
423    x5,
424    x6,
425    x7,
426    x8,
427    x9,
428    x10,
429    x11,
430    x12,
431    x13,
432    x14,
433    x15
434);
435simd_ty!(
436    i32x8[i32;8]:
437    x0,
438    x1,
439    x2,
440    x3,
441    x4,
442    x5,
443    x6,
444    x7
445);
446simd_ty!(i64x4[i64;4]: x0, x1, x2, x3);
447
448simd_ty!(
449    f16x16[f16;16]:
450    x0,
451    x1,
452    x2,
453    x3,
454    x4,
455    x5,
456    x6,
457    x7,
458    x8,
459    x9,
460    x10,
461    x11,
462    x12,
463    x13,
464    x14,
465    x15
466);
467simd_ty!(
468    f32x8[f32;8]:
469    x0,
470    x1,
471    x2,
472    x3,
473    x4,
474    x5,
475    x6,
476    x7
477);
478simd_ty!(f64x4[f64;4]: x0, x1, x2, x3);
479
480simd_m_ty!(
481    m8x32[i8;32]:
482    x0,
483    x1,
484    x2,
485    x3,
486    x4,
487    x5,
488    x6,
489    x7,
490    x8,
491    x9,
492    x10,
493    x11,
494    x12,
495    x13,
496    x14,
497    x15,
498    x16,
499    x17,
500    x18,
501    x19,
502    x20,
503    x21,
504    x22,
505    x23,
506    x24,
507    x25,
508    x26,
509    x27,
510    x28,
511    x29,
512    x30,
513    x31
514);
515simd_m_ty!(
516    m16x16[i16;16]:
517    x0,
518    x1,
519    x2,
520    x3,
521    x4,
522    x5,
523    x6,
524    x7,
525    x8,
526    x9,
527    x10,
528    x11,
529    x12,
530    x13,
531    x14,
532    x15
533);
534simd_m_ty!(
535    m32x8[i32;8]:
536    x0,
537    x1,
538    x2,
539    x3,
540    x4,
541    x5,
542    x6,
543    x7
544);
545
546// 512-bit wide types:
547
548simd_ty!(
549    i8x64[i8;64]:
550    x0,
551    x1,
552    x2,
553    x3,
554    x4,
555    x5,
556    x6,
557    x7,
558    x8,
559    x9,
560    x10,
561    x11,
562    x12,
563    x13,
564    x14,
565    x15,
566    x16,
567    x17,
568    x18,
569    x19,
570    x20,
571    x21,
572    x22,
573    x23,
574    x24,
575    x25,
576    x26,
577    x27,
578    x28,
579    x29,
580    x30,
581    x31,
582    x32,
583    x33,
584    x34,
585    x35,
586    x36,
587    x37,
588    x38,
589    x39,
590    x40,
591    x41,
592    x42,
593    x43,
594    x44,
595    x45,
596    x46,
597    x47,
598    x48,
599    x49,
600    x50,
601    x51,
602    x52,
603    x53,
604    x54,
605    x55,
606    x56,
607    x57,
608    x58,
609    x59,
610    x60,
611    x61,
612    x62,
613    x63
614);
615
616simd_ty!(
617    u8x64[u8;64]:
618    x0,
619    x1,
620    x2,
621    x3,
622    x4,
623    x5,
624    x6,
625    x7,
626    x8,
627    x9,
628    x10,
629    x11,
630    x12,
631    x13,
632    x14,
633    x15,
634    x16,
635    x17,
636    x18,
637    x19,
638    x20,
639    x21,
640    x22,
641    x23,
642    x24,
643    x25,
644    x26,
645    x27,
646    x28,
647    x29,
648    x30,
649    x31,
650    x32,
651    x33,
652    x34,
653    x35,
654    x36,
655    x37,
656    x38,
657    x39,
658    x40,
659    x41,
660    x42,
661    x43,
662    x44,
663    x45,
664    x46,
665    x47,
666    x48,
667    x49,
668    x50,
669    x51,
670    x52,
671    x53,
672    x54,
673    x55,
674    x56,
675    x57,
676    x58,
677    x59,
678    x60,
679    x61,
680    x62,
681    x63
682);
683
684simd_ty!(
685    i16x32[i16;32]:
686    x0,
687    x1,
688    x2,
689    x3,
690    x4,
691    x5,
692    x6,
693    x7,
694    x8,
695    x9,
696    x10,
697    x11,
698    x12,
699    x13,
700    x14,
701    x15,
702    x16,
703    x17,
704    x18,
705    x19,
706    x20,
707    x21,
708    x22,
709    x23,
710    x24,
711    x25,
712    x26,
713    x27,
714    x28,
715    x29,
716    x30,
717    x31
718);
719
720simd_ty!(
721    u16x32[u16;32]:
722    x0,
723    x1,
724    x2,
725    x3,
726    x4,
727    x5,
728    x6,
729    x7,
730    x8,
731    x9,
732    x10,
733    x11,
734    x12,
735    x13,
736    x14,
737    x15,
738    x16,
739    x17,
740    x18,
741    x19,
742    x20,
743    x21,
744    x22,
745    x23,
746    x24,
747    x25,
748    x26,
749    x27,
750    x28,
751    x29,
752    x30,
753    x31
754);
755
756simd_ty!(
757    i32x16[i32;16]:
758    x0,
759    x1,
760    x2,
761    x3,
762    x4,
763    x5,
764    x6,
765    x7,
766    x8,
767    x9,
768    x10,
769    x11,
770    x12,
771    x13,
772    x14,
773    x15
774);
775
776simd_ty!(
777    u32x16[u32;16]:
778    x0,
779    x1,
780    x2,
781    x3,
782    x4,
783    x5,
784    x6,
785    x7,
786    x8,
787    x9,
788    x10,
789    x11,
790    x12,
791    x13,
792    x14,
793    x15
794);
795
796simd_ty!(
797    f16x32[f16;32]:
798    x0,
799    x1,
800    x2,
801    x3,
802    x4,
803    x5,
804    x6,
805    x7,
806    x8,
807    x9,
808    x10,
809    x11,
810    x12,
811    x13,
812    x14,
813    x15,
814    x16,
815    x17,
816    x18,
817    x19,
818    x20,
819    x21,
820    x22,
821    x23,
822    x24,
823    x25,
824    x26,
825    x27,
826    x28,
827    x29,
828    x30,
829    x31
830);
831simd_ty!(
832    f32x16[f32;16]:
833    x0,
834    x1,
835    x2,
836    x3,
837    x4,
838    x5,
839    x6,
840    x7,
841    x8,
842    x9,
843    x10,
844    x11,
845    x12,
846    x13,
847    x14,
848    x15
849);
850
851simd_ty!(
852    i64x8[i64;8]:
853    x0,
854    x1,
855    x2,
856    x3,
857    x4,
858    x5,
859    x6,
860    x7
861);
862
863simd_ty!(
864    u64x8[u64;8]:
865    x0,
866    x1,
867    x2,
868    x3,
869    x4,
870    x5,
871    x6,
872    x7
873);
874
875simd_ty!(
876    f64x8[f64;8]:
877    x0,
878    x1,
879    x2,
880    x3,
881    x4,
882    x5,
883    x6,
884    x7
885);
886
887// 1024-bit wide types:
888simd_ty!(
889    u16x64[u16;64]:
890    x0,
891    x1,
892    x2,
893    x3,
894    x4,
895    x5,
896    x6,
897    x7,
898    x8,
899    x9,
900    x10,
901    x11,
902    x12,
903    x13,
904    x14,
905    x15,
906    x16,
907    x17,
908    x18,
909    x19,
910    x20,
911    x21,
912    x22,
913    x23,
914    x24,
915    x25,
916    x26,
917    x27,
918    x28,
919    x29,
920    x30,
921    x31,
922    x32,
923    x33,
924    x34,
925    x35,
926    x36,
927    x37,
928    x38,
929    x39,
930    x40,
931    x41,
932    x42,
933    x43,
934    x44,
935    x45,
936    x46,
937    x47,
938    x48,
939    x49,
940    x50,
941    x51,
942    x52,
943    x53,
944    x54,
945    x55,
946    x56,
947    x57,
948    x58,
949    x59,
950    x60,
951    x61,
952    x62,
953    x63
954);
955simd_ty!(
956    i32x32[i32;32]:
957    x0,
958    x1,
959    x2,
960    x3,
961    x4,
962    x5,
963    x6,
964    x7,
965    x8,
966    x9,
967    x10,
968    x11,
969    x12,
970    x13,
971    x14,
972    x15,
973    x16,
974    x17,
975    x18,
976    x19,
977    x20,
978    x21,
979    x22,
980    x23,
981    x24,
982    x25,
983    x26,
984    x27,
985    x28,
986    x29,
987    x30,
988    x31
989);
990simd_ty!(
991    u32x32[u32;32]:
992    x0,
993    x1,
994    x2,
995    x3,
996    x4,
997    x5,
998    x6,
999    x7,
1000    x8,
1001    x9,
1002    x10,
1003    x11,
1004    x12,
1005    x13,
1006    x14,
1007    x15,
1008    x16,
1009    x17,
1010    x18,
1011    x19,
1012    x20,
1013    x21,
1014    x22,
1015    x23,
1016    x24,
1017    x25,
1018    x26,
1019    x27,
1020    x28,
1021    x29,
1022    x30,
1023    x31
1024);
1025
1026/// Used to continue `Debug`ging SIMD types as `MySimd(1, 2, 3, 4)`, as they
1027/// were before moving to array-based simd.
1028#[inline]
1029pub(crate) fn debug_simd_finish<T: crate::fmt::Debug, const N: usize>(
1030    formatter: &mut crate::fmt::Formatter<'_>,
1031    type_name: &str,
1032    array: &[T; N],
1033) -> crate::fmt::Result {
1034    crate::fmt::Formatter::debug_tuple_fields_finish(
1035        formatter,
1036        type_name,
1037        &crate::array::from_fn::<&dyn crate::fmt::Debug, N, _>(|i| &array[i]),
1038    )
1039}