24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
27 #ifndef __AVX512FINTRIN_H
28 #define __AVX512FINTRIN_H
32 typedef long long __v8di
__attribute__((__vector_size__(64)));
37 typedef long long __m512i
__attribute__((__vector_size__(64)));
43 #define _MM_FROUND_TO_NEAREST_INT 0x00
44 #define _MM_FROUND_TO_NEG_INF 0x01
45 #define _MM_FROUND_TO_POS_INF 0x02
46 #define _MM_FROUND_TO_ZERO 0x03
47 #define _MM_FROUND_CUR_DIRECTION 0x04
50 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
55 _mm512_setzero_si512(
void)
57 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
63 return (__m512d)__builtin_ia32_undef512();
69 return (__m512)__builtin_ia32_undef512();
75 return (__m512)__builtin_ia32_undef512();
79 _mm512_undefined_epi32()
81 return (__m512i)__builtin_ia32_undef512();
85 _mm512_maskz_set1_epi32(__mmask16 __M,
int __A)
87 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
89 _mm512_setzero_si512 (),
94 _mm512_maskz_set1_epi64(__mmask8 __M,
long long __A)
97 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
99 _mm512_setzero_si512 (),
102 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
104 _mm512_setzero_si512 (),
110 _mm512_setzero_ps(
void)
112 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
113 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
116 _mm512_setzero_pd(
void)
118 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
122 _mm512_set1_ps(
float __w)
124 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
125 __w, __w, __w, __w, __w, __w, __w, __w };
129 _mm512_set1_pd(
double __w)
131 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
135 _mm512_set1_epi32(
int __s)
137 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
138 __s, __s, __s, __s, __s, __s, __s, __s };
142 _mm512_set1_epi64(
long long __d)
144 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
148 _mm512_broadcastss_ps(__m128 __X)
151 return (__v16sf){ __f, __f, __f, __f,
154 __f, __f, __f, __f };
158 _mm512_broadcastsd_pd(__m128d __X)
161 return (__v8df){ __d, __d, __d, __d,
162 __d, __d, __d, __d };
168 _mm512_castpd256_pd512(__m256d __a)
170 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
174 _mm512_castps256_ps512(__m256 __a)
176 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
177 -1, -1, -1, -1, -1, -1, -1, -1);
181 _mm512_castpd512_pd128(__m512d __a)
183 return __builtin_shufflevector(__a, __a, 0, 1);
187 _mm512_castps512_ps128(__m512 __a)
189 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
194 _mm512_and_epi32(__m512i __a, __m512i
__b)
200 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i
__b)
202 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
208 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
210 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
213 _mm512_setzero_si512 (),
218 _mm512_and_epi64(__m512i __a, __m512i __b)
224 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
226 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
232 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
234 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
237 _mm512_setzero_si512 (),
242 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
244 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
247 _mm512_setzero_si512 (),
252 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
254 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
261 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
263 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
266 _mm512_setzero_si512 (),
271 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
273 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
276 _mm512_setzero_si512 (),
281 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
283 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
289 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
291 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
294 _mm512_setzero_pd (),
298 _mm512_or_epi32(__m512i __a, __m512i __b)
304 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
306 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
312 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
314 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
317 _mm512_setzero_si512 (),
322 _mm512_or_epi64(__m512i __a, __m512i __b)
328 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
330 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
336 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
338 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
341 _mm512_setzero_si512 (),
346 _mm512_xor_epi32(__m512i __a, __m512i __b)
352 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
354 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
360 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
362 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
365 _mm512_setzero_si512 (),
370 _mm512_xor_epi64(__m512i __a, __m512i __b)
376 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
378 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
384 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
386 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
389 _mm512_setzero_si512 (),
394 _mm512_and_si512(__m512i __a, __m512i __b)
400 _mm512_or_si512(__m512i __a, __m512i __b)
406 _mm512_xor_si512(__m512i __a, __m512i __b)
413 _mm512_add_pd(__m512d __a, __m512d __b)
419 _mm512_add_ps(__m512 __a, __m512 __b)
425 _mm512_mul_pd(__m512d __a, __m512d __b)
431 _mm512_mul_ps(__m512 __a, __m512 __b)
437 _mm512_sub_pd(__m512d __a, __m512d __b)
443 _mm512_sub_ps(__m512 __a, __m512 __b)
449 _mm512_add_epi64 (__m512i __A, __m512i __B)
451 return (__m512i) ((__v8di) __A + (__v8di) __B);
455 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
457 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
464 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
466 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
469 _mm512_setzero_si512 (),
474 _mm512_sub_epi64 (__m512i __A, __m512i __B)
476 return (__m512i) ((__v8di) __A - (__v8di) __B);
480 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
482 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
489 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
491 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
494 _mm512_setzero_si512 (),
499 _mm512_add_epi32 (__m512i __A, __m512i __B)
501 return (__m512i) ((__v16si) __A + (__v16si) __B);
505 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
507 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
514 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
516 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
519 _mm512_setzero_si512 (),
524 _mm512_sub_epi32 (__m512i __A, __m512i __B)
526 return (__m512i) ((__v16si) __A - (__v16si) __B);
530 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
532 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
539 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
541 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
544 _mm512_setzero_si512 (),
549 _mm512_max_pd(__m512d __A, __m512d __B)
551 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
554 _mm512_setzero_pd (),
560 _mm512_max_ps(__m512 __A, __m512 __B)
562 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
565 _mm512_setzero_ps (),
571 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
572 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
580 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
581 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
583 (__v4sf) _mm_setzero_ps (),
588 #define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \
589 (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
590 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
592 #define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
593 (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
594 (__v4sf) __W, (__mmask8) __U,__R); })
596 #define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \
597 (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
598 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
601 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
602 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
610 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
611 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
613 (__v2df) _mm_setzero_pd (),
618 #define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \
619 (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
620 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
622 #define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
623 (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
624 (__v2df) __W, (__mmask8) __U,__R); })
626 #define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \
627 (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
628 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
630 static __inline __m512i
632 _mm512_max_epi32(__m512i __A, __m512i __B)
634 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
637 _mm512_setzero_si512 (),
642 _mm512_max_epu32(__m512i __A, __m512i __B)
644 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
647 _mm512_setzero_si512 (),
652 _mm512_max_epi64(__m512i __A, __m512i __B)
654 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
657 _mm512_setzero_si512 (),
662 _mm512_max_epu64(__m512i __A, __m512i __B)
664 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
667 _mm512_setzero_si512 (),
672 _mm512_min_pd(__m512d __A, __m512d __B)
674 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
677 _mm512_setzero_pd (),
683 _mm512_min_ps(__m512 __A, __m512 __B)
685 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
688 _mm512_setzero_ps (),
694 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
695 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
703 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
704 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
706 (__v4sf) _mm_setzero_ps (),
711 #define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \
712 (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
713 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
715 #define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
716 (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
717 (__v4sf) __W, (__mmask8) __U,__R); })
719 #define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \
720 (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
721 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
724 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
725 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
733 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
734 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
736 (__v2df) _mm_setzero_pd (),
741 #define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \
742 (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
743 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
745 #define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
746 (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
747 (__v2df) __W, (__mmask8) __U,__R); })
749 #define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \
750 (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
751 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
753 static __inline __m512i
755 _mm512_min_epi32(__m512i __A, __m512i __B)
757 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
760 _mm512_setzero_si512 (),
765 _mm512_min_epu32(__m512i __A, __m512i __B)
767 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
770 _mm512_setzero_si512 (),
775 _mm512_min_epi64(__m512i __A, __m512i __B)
777 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
780 _mm512_setzero_si512 (),
785 _mm512_min_epu64(__m512i __A, __m512i __B)
787 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
790 _mm512_setzero_si512 (),
795 _mm512_mul_epi32(__m512i __X, __m512i __Y)
797 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
800 _mm512_setzero_si512 (),
805 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
807 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
813 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
815 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
818 _mm512_setzero_si512 (),
823 _mm512_mul_epu32(__m512i __X, __m512i __Y)
825 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
828 _mm512_setzero_si512 (),
833 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
835 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
841 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
843 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
846 _mm512_setzero_si512 (),
851 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
853 return (__m512i) ((__v16si) __A * (__v16si) __B);
857 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
859 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
862 _mm512_setzero_si512 (),
867 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
869 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
875 _mm512_sqrt_pd(__m512d __a)
877 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
878 (__v8df) _mm512_setzero_pd (),
884 _mm512_sqrt_ps(__m512 __a)
886 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
887 (__v16sf) _mm512_setzero_ps (),
893 _mm512_rsqrt14_pd(__m512d __A)
895 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
897 _mm512_setzero_pd (),
901 _mm512_rsqrt14_ps(__m512 __A)
903 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
905 _mm512_setzero_ps (),
910 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
912 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A,
920 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
922 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A,
930 _mm512_rcp14_pd(__m512d __A)
932 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
934 _mm512_setzero_pd (),
939 _mm512_rcp14_ps(__m512 __A)
941 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
943 _mm512_setzero_ps (),
947 _mm_rcp14_ss(__m128 __A, __m128 __B)
949 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
957 _mm_rcp14_sd(__m128d __A, __m128d __B)
959 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
967 _mm512_floor_ps(__m512 __A)
969 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
976 _mm512_floor_pd(__m512d __A)
978 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
985 _mm512_ceil_ps(__m512 __A)
987 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
994 _mm512_ceil_pd(__m512d __A)
996 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1003 _mm512_abs_epi64(__m512i __A)
1005 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1007 _mm512_setzero_si512 (),
1012 _mm512_abs_epi32(__m512i __A)
1014 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1016 _mm512_setzero_si512 (),
1021 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1022 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1030 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1031 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1033 (__v4sf) _mm_setzero_ps (),
1038 #define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \
1039 (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
1040 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1042 #define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1043 (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
1044 (__v4sf) __W, (__mmask8) __U,__R); })
1046 #define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \
1047 (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
1048 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1051 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1052 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1060 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1061 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1063 (__v2df) _mm_setzero_pd (),
1067 #define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \
1068 (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
1069 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1071 #define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1072 (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
1073 (__v2df) __W, (__mmask8) __U,__R); })
1075 #define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \
1076 (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
1077 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1080 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1081 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1089 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1090 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1092 (__v8df) _mm512_setzero_pd (),
1098 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1099 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1107 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1108 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1110 (__v16sf) _mm512_setzero_ps (),
1115 #define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \
1116 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
1117 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1119 #define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1120 (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \
1121 (__v8df) __W, (__mmask8) __U, __R); })
1123 #define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \
1124 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
1125 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); })
1127 #define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \
1128 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1129 (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); })
1131 #define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1132 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1133 (__v16sf) __W, (__mmask16)__U, __R); })
1135 #define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \
1136 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1137 (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); })
1140 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1141 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1149 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1150 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1152 (__v4sf) _mm_setzero_ps (),
1156 #define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \
1157 (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
1158 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1160 #define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1161 (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
1162 (__v4sf) __W, (__mmask8) __U,__R); })
1164 #define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \
1165 (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
1166 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1169 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1170 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1178 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1179 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1181 (__v2df) _mm_setzero_pd (),
1186 #define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \
1187 (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
1188 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1190 #define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1191 (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
1192 (__v2df) __W, (__mmask8) __U,__R); })
1194 #define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \
1195 (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
1196 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1199 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1200 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
1208 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1209 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
1212 _mm512_setzero_pd (),
1218 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1219 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
1227 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1228 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
1231 _mm512_setzero_ps (),
1236 #define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \
1237 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\
1238 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1240 #define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1241 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
1242 (__v8df) __W, (__mmask8) __U, __R); })
1244 #define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \
1245 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
1246 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1248 #define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \
1249 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1250 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1252 #define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1253 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1254 (__v16sf) __W, (__mmask16) __U, __R); });
1256 #define _mm512_maskz_sub_round_ps(__U, __A, __B, __R) __extension__ ({ \
1257 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1258 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1261 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1262 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
1270 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1271 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
1273 (__v4sf) _mm_setzero_ps (),
1277 #define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \
1278 (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
1279 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1281 #define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1282 (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
1283 (__v4sf) __W, (__mmask8) __U,__R); })
1285 #define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \
1286 (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
1287 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1290 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1291 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
1299 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1300 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
1302 (__v2df) _mm_setzero_pd (),
1307 #define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \
1308 (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
1309 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1311 #define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1312 (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
1313 (__v2df) __W, (__mmask8) __U,__R); })
1315 #define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \
1316 (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
1317 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1320 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1321 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
1329 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1330 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
1333 _mm512_setzero_pd (),
1339 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1340 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
1348 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1349 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
1352 _mm512_setzero_ps (),
1357 #define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \
1358 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\
1359 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1361 #define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1362 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
1363 (__v8df) __W, (__mmask8) __U, __R); })
1365 #define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \
1366 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
1367 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1369 #define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \
1370 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1371 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1373 #define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1374 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1375 (__v16sf) __W, (__mmask16) __U, __R); });
1377 #define _mm512_maskz_mul_round_ps(__U, __A, __B, __R) __extension__ ({ \
1378 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1379 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1382 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1383 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
1391 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1392 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
1394 (__v4sf) _mm_setzero_ps (),
1399 #define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \
1400 (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
1401 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1403 #define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1404 (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
1405 (__v4sf) __W, (__mmask8) __U,__R); })
1407 #define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \
1408 (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
1409 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1412 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1413 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
1421 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1422 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
1424 (__v2df) _mm_setzero_pd (),
1429 #define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \
1430 (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
1431 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1433 #define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1434 (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
1435 (__v2df) __W, (__mmask8) __U,__R); })
1437 #define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \
1438 (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
1439 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1442 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1443 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
1451 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1452 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
1455 _mm512_setzero_pd (),
1461 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1462 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
1470 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1471 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
1474 _mm512_setzero_ps (),
1479 #define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \
1480 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\
1481 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1483 #define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1484 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
1485 (__v8df) __W, (__mmask8) __U, __R); })
1487 #define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \
1488 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
1489 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1491 #define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \
1492 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1493 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1495 #define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1496 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1497 (__v16sf) __W, (__mmask16) __U, __R); });
1499 #define _mm512_maskz_div_round_ps(__U, __A, __B, __R) __extension__ ({ \
1500 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1501 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1503 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
1504 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
1505 -1, _MM_FROUND_CUR_DIRECTION); })
1507 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
1508 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
1509 -1, _MM_FROUND_CUR_DIRECTION); })
1511 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
1512 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1513 (__v8df) (B), (__v8df) (C), \
1514 (__mmask8) -1, (R)); })
1517 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
1518 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1519 (__v8df) (B), (__v8df) (C), \
1520 (__mmask8) (U), (R)); })
1523 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
1524 (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \
1525 (__v8df) (B), (__v8df) (C), \
1526 (__mmask8) (U), (R)); })
1529 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
1530 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
1531 (__v8df) (B), (__v8df) (C), \
1532 (__mmask8) (U), (R)); })
1535 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
1536 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1537 (__v8df) (B), -(__v8df) (C), \
1538 (__mmask8) -1, (R)); })
1541 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
1542 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1543 (__v8df) (B), -(__v8df) (C), \
1544 (__mmask8) (U), (R)); })
1547 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
1548 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
1549 (__v8df) (B), -(__v8df) (C), \
1550 (__mmask8) (U), (R)); })
1553 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
1554 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
1555 (__v8df) (B), (__v8df) (C), \
1556 (__mmask8) -1, (R)); })
1559 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
1560 (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \
1561 (__v8df) (B), (__v8df) (C), \
1562 (__mmask8) (U), (R)); })
1565 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
1566 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
1567 (__v8df) (B), (__v8df) (C), \
1568 (__mmask8) (U), (R)); })
1571 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
1572 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
1573 (__v8df) (B), -(__v8df) (C), \
1574 (__mmask8) -1, (R)); })
1577 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
1578 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
1579 (__v8df) (B), -(__v8df) (C), \
1580 (__mmask8) (U), (R)); })
1584 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1586 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1594 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1596 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1604 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1606 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
1614 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1616 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1624 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1626 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1634 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1636 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1644 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1646 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1654 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1656 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1664 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1666 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
1674 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1676 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1684 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1686 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1694 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1696 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1703 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
1704 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1705 (__v16sf) (B), (__v16sf) (C), \
1706 (__mmask16) -1, (R)); })
1709 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
1710 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1711 (__v16sf) (B), (__v16sf) (C), \
1712 (__mmask16) (U), (R)); })
1715 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1716 (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \
1717 (__v16sf) (B), (__v16sf) (C), \
1718 (__mmask16) (U), (R)); })
1721 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1722 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1723 (__v16sf) (B), (__v16sf) (C), \
1724 (__mmask16) (U), (R)); })
1727 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
1728 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1729 (__v16sf) (B), -(__v16sf) (C), \
1730 (__mmask16) -1, (R)); })
1733 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
1734 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1735 (__v16sf) (B), -(__v16sf) (C), \
1736 (__mmask16) (U), (R)); })
1739 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1740 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1741 (__v16sf) (B), -(__v16sf) (C), \
1742 (__mmask16) (U), (R)); })
1745 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
1746 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1747 (__v16sf) (B), (__v16sf) (C), \
1748 (__mmask16) -1, (R)); })
1751 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1752 (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \
1753 (__v16sf) (B), (__v16sf) (C), \
1754 (__mmask16) (U), (R)); })
1757 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1758 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
1759 (__v16sf) (B), (__v16sf) (C), \
1760 (__mmask16) (U), (R)); })
1763 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
1764 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1765 (__v16sf) (B), -(__v16sf) (C), \
1766 (__mmask16) -1, (R)); })
1769 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1770 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
1771 (__v16sf) (B), -(__v16sf) (C), \
1772 (__mmask16) (U), (R)); })
1776 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
1778 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1786 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1788 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1796 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1798 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
1806 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1808 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
1816 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
1818 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1826 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1828 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1836 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1838 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
1846 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
1848 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
1856 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1858 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
1866 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1868 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
1876 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
1878 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
1886 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1888 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
1895 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
1896 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1897 (__v8df) (B), (__v8df) (C), \
1898 (__mmask8) -1, (R)); })
1901 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
1902 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1903 (__v8df) (B), (__v8df) (C), \
1904 (__mmask8) (U), (R)); })
1907 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
1908 (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \
1909 (__v8df) (B), (__v8df) (C), \
1910 (__mmask8) (U), (R)); })
1913 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
1914 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
1915 (__v8df) (B), (__v8df) (C), \
1916 (__mmask8) (U), (R)); })
1919 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
1920 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1921 (__v8df) (B), -(__v8df) (C), \
1922 (__mmask8) -1, (R)); })
1925 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
1926 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1927 (__v8df) (B), -(__v8df) (C), \
1928 (__mmask8) (U), (R)); })
1931 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
1932 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
1933 (__v8df) (B), -(__v8df) (C), \
1934 (__mmask8) (U), (R)); })
1938 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
1940 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1948 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1950 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1958 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1960 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
1968 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1970 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
1978 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
1980 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1988 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1990 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1998 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2000 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2007 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
2008 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2009 (__v16sf) (B), (__v16sf) (C), \
2010 (__mmask16) -1, (R)); })
2013 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
2014 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2015 (__v16sf) (B), (__v16sf) (C), \
2016 (__mmask16) (U), (R)); })
2019 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
2020 (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \
2021 (__v16sf) (B), (__v16sf) (C), \
2022 (__mmask16) (U), (R)); })
2025 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
2026 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
2027 (__v16sf) (B), (__v16sf) (C), \
2028 (__mmask16) (U), (R)); })
2031 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
2032 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2033 (__v16sf) (B), -(__v16sf) (C), \
2034 (__mmask16) -1, (R)); })
2037 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
2038 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2039 (__v16sf) (B), -(__v16sf) (C), \
2040 (__mmask16) (U), (R)); })
2043 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
2044 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
2045 (__v16sf) (B), -(__v16sf) (C), \
2046 (__mmask16) (U), (R)); })
2050 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2052 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2060 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2062 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2070 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2072 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2080 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2082 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2090 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
2092 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2100 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2102 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2110 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2112 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2119 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
2120 (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \
2121 (__v8df) (B), (__v8df) (C), \
2122 (__mmask8) (U), (R)); })
2126 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2128 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2135 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
2136 (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \
2137 (__v16sf) (B), (__v16sf) (C), \
2138 (__mmask16) (U), (R)); })
2142 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2144 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2151 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
2152 (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \
2153 (__v8df) (B), (__v8df) (C), \
2154 (__mmask8) (U), (R)); })
2158 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2160 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2167 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
2168 (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \
2169 (__v16sf) (B), (__v16sf) (C), \
2170 (__mmask16) (U), (R)); })
2174 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2176 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2183 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2184 (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \
2185 (__v8df) (B), (__v8df) (C), \
2186 (__mmask8) (U), (R)); })
2190 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2192 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
2199 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2200 (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \
2201 (__v16sf) (B), (__v16sf) (C), \
2202 (__mmask16) (U), (R)); })
2206 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2208 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
2215 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2216 (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \
2217 (__v8df) (B), (__v8df) (C), \
2218 (__mmask8) (U), (R)); })
2221 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
2222 (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \
2223 (__v8df) (B), (__v8df) (C), \
2224 (__mmask8) (U), (R)); })
2228 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2230 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
2238 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2240 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
2247 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2248 (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \
2249 (__v16sf) (B), (__v16sf) (C), \
2250 (__mmask16) (U), (R)); })
2253 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
2254 (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \
2255 (__v16sf) (B), (__v16sf) (C), \
2256 (__mmask16) (U), (R)); })
2260 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2262 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
2270 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2272 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
2284 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
2286 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
2293 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
2295 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
2303 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
2305 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
2312 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
2314 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
2321 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
2322 (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
2323 (__v8di)(__m512i)(B), \
2324 (I), (__v8di)_mm512_setzero_si512(), \
2327 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
2328 (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
2329 (__v16si)(__m512i)(B), \
2330 (I), (__v16si)_mm512_setzero_si512(), \
2335 #define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
2337 __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), \
2339 (__v4df)_mm256_setzero_si256(), \
2342 #define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
2344 __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), \
2346 (__v4sf)_mm_setzero_ps(), \
2352 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
2354 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
2360 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
2362 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
2368 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
2370 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
2376 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
2378 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
2385 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
2386 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
2387 (__v16sf)(__m512)(B), \
2388 (P), (__mmask16)-1, (R)); })
2390 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
2391 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
2392 (__v16sf)(__m512)(B), \
2393 (P), (__mmask16)(U), (R)); })
2395 #define _mm512_cmp_ps_mask(A, B, P) \
2396 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2398 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
2399 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2401 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
2402 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
2403 (__v8df)(__m512d)(B), \
2404 (P), (__mmask8)-1, (R)); })
2406 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
2407 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
2408 (__v8df)(__m512d)(B), \
2409 (P), (__mmask8)(U), (R)); })
2411 #define _mm512_cmp_pd_mask(A, B, P) \
2412 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2414 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
2415 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2420 _mm512_cvttps_epu32(__m512 __A)
2422 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
2424 _mm512_setzero_si512 (),
2429 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
2430 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
2431 (__v16sf)_mm512_setzero_ps(), \
2432 (__mmask16)-1, (R)); })
2434 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
2435 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
2436 (__v16sf)_mm512_setzero_ps(), \
2437 (__mmask16)-1, (R)); })
2440 _mm512_cvtepi32_pd(__m256i __A)
2442 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
2444 _mm512_setzero_pd (),
2449 _mm512_cvtepu32_pd(__m256i __A)
2451 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
2453 _mm512_setzero_pd (),
2457 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
2458 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
2459 (__v8sf)_mm256_setzero_ps(), \
2460 (__mmask8)-1, (R)); })
2462 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
2463 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
2464 (__v16hi)_mm256_setzero_si256(), \
2468 _mm512_cvtph_ps(__m256i __A)
2470 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
2472 _mm512_setzero_ps (),
2478 _mm512_cvttps_epi32(__m512 __a)
2481 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
2482 (__v16si) _mm512_setzero_si512 (),
2487 _mm512_cvttpd_epi32(__m512d __a)
2489 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
2490 (__v8si)_mm256_setzero_si256(),
2495 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
2496 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
2497 (__v8si)_mm256_setzero_si256(), \
2498 (__mmask8)-1, (R)); })
2500 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
2501 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
2502 (__v16si)_mm512_setzero_si512(), \
2503 (__mmask16)-1, (R)); })
2505 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
2506 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
2507 (__v16si)_mm512_setzero_si512(), \
2508 (__mmask16)-1, (R)); })
2510 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
2511 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
2512 (__v8si)_mm256_setzero_si256(), \
2513 (__mmask8)-1, (R)); })
2515 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
2516 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
2517 (__v16si)_mm512_setzero_si512(), \
2518 (__mmask16)-1, (R)); })
2520 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
2521 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
2522 (__v8si)_mm256_setzero_si256(), \
2523 (__mmask8) -1, (R)); })
2527 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
2529 return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
2533 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
2535 return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
2539 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
2541 return __builtin_shufflevector(__a, __b,
2543 2+4, 18+4, 3+4, 19+4,
2544 2+8, 18+8, 3+8, 19+8,
2545 2+12, 18+12, 3+12, 19+12);
2549 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
2551 return __builtin_shufflevector(__a, __b,
2553 0+4, 16+4, 1+4, 17+4,
2554 0+8, 16+8, 1+8, 17+8,
2555 0+12, 16+12, 1+12, 17+12);
2561 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
2563 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
2569 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
2571 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
2579 _mm512_maskz_loadu_epi32(__mmask16 __U,
void const *__P)
2581 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const __v16si *)__P,
2583 _mm512_setzero_si512 (),
2588 _mm512_maskz_loadu_epi64(__mmask8 __U,
void const *__P)
2590 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const __v8di *)__P,
2592 _mm512_setzero_si512 (),
2597 _mm512_maskz_loadu_ps(__mmask16 __U,
void const *__P)
2599 return (__m512) __builtin_ia32_loadups512_mask ((
const __v16sf *)__P,
2601 _mm512_setzero_ps (),
2606 _mm512_maskz_loadu_pd(__mmask8 __U,
void const *__P)
2608 return (__m512d) __builtin_ia32_loadupd512_mask ((
const __v8df *)__P,
2610 _mm512_setzero_pd (),
2615 _mm512_maskz_load_ps(__mmask16 __U,
void const *__P)
2617 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)__P,
2619 _mm512_setzero_ps (),
2624 _mm512_maskz_load_pd(__mmask8 __U,
void const *__P)
2626 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)__P,
2628 _mm512_setzero_pd (),
2633 _mm512_loadu_pd(
double const *
__p)
2638 return ((
struct __loadu_pd*)__p)->__v;
2642 _mm512_loadu_ps(
float const *__p)
2647 return ((
struct __loadu_ps*)__p)->__v;
2651 _mm512_load_ps(
double const *__p)
2653 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)__p,
2655 _mm512_setzero_ps (),
2660 _mm512_load_pd(
float const *__p)
2662 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)__p,
2664 _mm512_setzero_pd (),
2671 _mm512_mask_storeu_epi64(
void *__P, __mmask8 __U, __m512i __A)
2673 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
2678 _mm512_mask_storeu_epi32(
void *__P, __mmask16 __U, __m512i __A)
2680 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
2685 _mm512_mask_storeu_pd(
void *__P, __mmask8 __U, __m512d __A)
2687 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2691 _mm512_storeu_pd(
void *__P, __m512d __A)
2693 __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
2697 _mm512_mask_storeu_ps(
void *__P, __mmask16 __U, __m512 __A)
2699 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
2704 _mm512_storeu_ps(
void *__P, __m512 __A)
2706 __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
2710 _mm512_mask_store_pd(
void *__P, __mmask8 __U, __m512d __A)
2712 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2716 _mm512_store_pd(
void *__P, __m512d __A)
2718 *(__m512d*)__P = __A;
2722 _mm512_mask_store_ps(
void *__P, __mmask16 __U, __m512 __A)
2724 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
2729 _mm512_store_ps(
void *__P, __m512 __A)
2731 *(__m512*)__P = __A;
2737 _mm512_knot(__mmask16 __M)
2739 return __builtin_ia32_knothi(__M);
2745 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
2746 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
2751 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2752 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
2757 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
2758 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
2763 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2764 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
2769 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2770 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
2775 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
2776 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
2781 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
2782 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
2787 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2788 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
2793 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
2794 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2799 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2800 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2805 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
2806 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2811 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2812 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2817 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
2818 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2823 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2824 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2829 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
2830 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2835 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2836 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2841 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
2842 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
2847 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2848 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
2853 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
2854 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
2859 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2860 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
2865 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2866 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
2871 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
2872 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
2877 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
2878 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
2883 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2884 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
2889 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
2890 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2895 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2896 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2901 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
2902 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2907 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2908 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2913 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
2914 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2919 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2920 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2925 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
2926 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2931 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2932 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2937 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
2938 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2943 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2944 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2949 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
2950 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2955 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2956 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2961 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
2962 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2967 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2968 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2973 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
2974 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2979 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2980 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2985 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
2986 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2991 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2992 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2997 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
2998 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3003 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3004 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3009 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
3010 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3015 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3016 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3021 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
3022 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3027 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3028 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3032 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
3033 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
3034 (__v16si)(__m512i)(b), (p), \
3037 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
3038 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
3039 (__v16si)(__m512i)(b), (p), \
3042 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
3043 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
3044 (__v8di)(__m512i)(b), (p), \
3047 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
3048 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
3049 (__v8di)(__m512i)(b), (p), \
3052 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
3053 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
3054 (__v16si)(__m512i)(b), (p), \
3057 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
3058 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
3059 (__v16si)(__m512i)(b), (p), \
3062 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
3063 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
3064 (__v8di)(__m512i)(b), (p), \
3067 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
3068 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
3069 (__v8di)(__m512i)(b), (p), \
3072 #undef __DEFAULT_FN_ATTRS
3074 #endif // __AVX512FINTRIN_H
unsigned char __mmask8
Definition: avx512fintrin.h:39
#define _MM_FROUND_CEIL
Definition: smmintrin.h:44
static __inline unsigned char unsigned int unsigned int unsigned int * __p
Definition: adxintrin.h:38
static vector float vector float __b
Definition: altivec.h:419
#define _MM_FROUND_CUR_DIRECTION
Definition: avx512fintrin.h:47
#define __DEFAULT_FN_ATTRS
Definition: avx512fintrin.h:50
unsigned short __mmask16
Definition: avx512fintrin.h:40
double __v8df __attribute__((__vector_size__(64)))
Definition: avx512fintrin.h:30
#define _MM_FROUND_FLOOR
Definition: smmintrin.h:43