summaryrefslogtreecommitdiff
path: root/lib/Headers/avx512fintrin.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Headers/avx512fintrin.h')
-rw-r--r--lib/Headers/avx512fintrin.h382
1 files changed, 178 insertions, 204 deletions
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h
index 04be924d63..d37414adc2 100644
--- a/lib/Headers/avx512fintrin.h
+++ b/lib/Headers/avx512fintrin.h
@@ -4167,37 +4167,203 @@ _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
(__mmask8) __U,
_MM_FROUND_CUR_DIRECTION);
}
+
/* Unpack and Interleave */
+
static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_unpackhi_pd(__m512d __a, __m512d __b)
{
- return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
+ return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
+ 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
+ (__v8df)_mm512_unpackhi_pd(__A, __B),
+ (__v8df)__W);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
+ (__v8df)_mm512_unpackhi_pd(__A, __B),
+ (__v8df)_mm512_setzero_pd());
}
static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_unpacklo_pd(__m512d __a, __m512d __b)
{
- return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
+ return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
+ 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
+ (__v8df)_mm512_unpacklo_pd(__A, __B),
+ (__v8df)__W);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
+ (__v8df)_mm512_unpacklo_pd(__A, __B),
+ (__v8df)_mm512_setzero_pd());
}
static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_unpackhi_ps(__m512 __a, __m512 __b)
{
- return __builtin_shufflevector(__a, __b,
- 2, 18, 3, 19,
- 2+4, 18+4, 3+4, 19+4,
- 2+8, 18+8, 3+8, 19+8,
- 2+12, 18+12, 3+12, 19+12);
+ return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
+ 2, 18, 3, 19,
+ 2+4, 18+4, 3+4, 19+4,
+ 2+8, 18+8, 3+8, 19+8,
+ 2+12, 18+12, 3+12, 19+12);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
+ (__v16sf)_mm512_unpackhi_ps(__A, __B),
+ (__v16sf)__W);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
+ (__v16sf)_mm512_unpackhi_ps(__A, __B),
+ (__v16sf)_mm512_setzero_ps());
}
static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_unpacklo_ps(__m512 __a, __m512 __b)
{
- return __builtin_shufflevector(__a, __b,
- 0, 16, 1, 17,
- 0+4, 16+4, 1+4, 17+4,
- 0+8, 16+8, 1+8, 17+8,
- 0+12, 16+12, 1+12, 17+12);
+ return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
+ 0, 16, 1, 17,
+ 0+4, 16+4, 1+4, 17+4,
+ 0+8, 16+8, 1+8, 17+8,
+ 0+12, 16+12, 1+12, 17+12);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
+ (__v16sf)_mm512_unpacklo_ps(__A, __B),
+ (__v16sf)__W);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
+ (__v16sf)_mm512_unpacklo_ps(__A, __B),
+ (__v16sf)_mm512_setzero_ps());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
+ 2, 18, 3, 19,
+ 2+4, 18+4, 3+4, 19+4,
+ 2+8, 18+8, 3+8, 19+8,
+ 2+12, 18+12, 3+12, 19+12);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
+ (__v16si)_mm512_unpackhi_epi32(__A, __B),
+ (__v16si)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
+ (__v16si)_mm512_unpackhi_epi32(__A, __B),
+ (__v16si)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
+ 0, 16, 1, 17,
+ 0+4, 16+4, 1+4, 17+4,
+ 0+8, 16+8, 1+8, 17+8,
+ 0+12, 16+12, 1+12, 17+12);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
+ (__v16si)_mm512_unpacklo_epi32(__A, __B),
+ (__v16si)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
+ (__v16si)_mm512_unpacklo_epi32(__A, __B),
+ (__v16si)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
+ 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
+ (__v8di)_mm512_unpackhi_epi64(__A, __B),
+ (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
+ (__v8di)_mm512_unpackhi_epi64(__A, __B),
+ (__v8di)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
+ 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
+ (__v8di)_mm512_unpacklo_epi64(__A, __B),
+ (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
+ (__v8di)_mm512_unpacklo_epi64(__A, __B),
+ (__v8di)_mm512_setzero_si512());
}
/* Bit Test */
@@ -5787,14 +5953,6 @@ _mm512_kmov (__mmask16 __A)
(int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
(int)(P), (int)(R)); })
-static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
- return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __W,
- (__mmask8) __U);
-}
#define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
(long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -5807,15 +5965,6 @@ _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
(__v16si) __B,
(__mmask16) __U);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
- (__v16si) __B,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) -1);
-}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_sll_epi32 (__m512i __A, __m128i __B)
@@ -6201,73 +6350,6 @@ _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
(__v8di)(__m512i)(C), (int)(imm), \
(__mmask8)(U)); })
-static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
- return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U);
-}
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
- return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __W,
- (__mmask16) __U);
-}
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
- return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U);
-}
-
-static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
- return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __W,
- (__mmask8) __U);
-}
-
-static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
- return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U);
-}
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
- return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __W,
- (__mmask16) __U);
-}
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
- return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U);
-}
-
#define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
(long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
@@ -6686,114 +6768,6 @@ _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
- __m512i __B)
-{
- return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
- (__v16si) __B,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
- (__v16si) __B,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
- (__v8di) __B,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) -1);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
- (__v8di) __B,
- (__v8di) __W,
- (__mmask8) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
- (__v8di) __B,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
- (__v16si) __B,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) -1);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
- __m512i __B)
-{
- return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
- (__v16si) __B,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
- (__v16si) __B,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
- (__v8di) __B,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) -1);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
- (__v8di) __B,
- (__v8di) __W,
- (__mmask8) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
- (__v8di) __B,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U);
-}
-
#define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
(__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \