mirror of
https://github.com/ziglang/zig.git
synced 2024-11-15 08:33:06 +00:00
update clang headers to 8.0.0rc2
This commit is contained in:
parent
00902ff9b0
commit
48c1e235cb
@ -62,10 +62,15 @@
|
||||
#include "cuda.h"
|
||||
#if !defined(CUDA_VERSION)
|
||||
#error "cuda.h did not define CUDA_VERSION"
|
||||
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9020
|
||||
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10000
|
||||
#error "Unsupported CUDA version!"
|
||||
#endif
|
||||
|
||||
#pragma push_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__")
|
||||
#if CUDA_VERSION >= 10000
|
||||
#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
|
||||
#endif
|
||||
|
||||
// Make largest subset of device functions available during host
|
||||
// compilation -- SM_35 for the time being.
|
||||
#ifndef __CUDA_ARCH__
|
||||
@ -419,6 +424,7 @@ __device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
|
||||
#pragma pop_macro("dim3")
|
||||
#pragma pop_macro("uint3")
|
||||
#pragma pop_macro("__USE_FAST_MATH__")
|
||||
#pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__")
|
||||
|
||||
#endif // __CUDA__
|
||||
#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__
|
||||
|
@ -53,7 +53,7 @@ static __inline unsigned char __DEFAULT_FN_ATTRS
|
||||
_addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
|
||||
unsigned int *__p)
|
||||
{
|
||||
return __builtin_ia32_addcarry_u32(__cf, __x, __y, __p);
|
||||
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
@ -61,7 +61,7 @@ static __inline unsigned char __DEFAULT_FN_ATTRS
|
||||
_addcarry_u64(unsigned char __cf, unsigned long long __x,
|
||||
unsigned long long __y, unsigned long long *__p)
|
||||
{
|
||||
return __builtin_ia32_addcarry_u64(__cf, __x, __y, __p);
|
||||
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -9492,49 +9492,51 @@ vec_splat_u32(signed char __a) {
|
||||
|
||||
/* vec_sr */
|
||||
|
||||
static __inline__ vector signed char __ATTRS_o_ai
|
||||
vec_sr(vector signed char __a, vector unsigned char __b) {
|
||||
vector unsigned char __res = (vector unsigned char)__a >> __b;
|
||||
return (vector signed char)__res;
|
||||
}
|
||||
|
||||
// vec_sr does modulo arithmetic on __b first, so __b is allowed to be more
|
||||
// than the length of __a.
|
||||
static __inline__ vector unsigned char __ATTRS_o_ai
|
||||
vec_sr(vector unsigned char __a, vector unsigned char __b) {
|
||||
return __a >> __b;
|
||||
return __a >>
|
||||
(__b % (vector unsigned char)(sizeof(unsigned char) * __CHAR_BIT__));
|
||||
}
|
||||
|
||||
static __inline__ vector signed short __ATTRS_o_ai
|
||||
vec_sr(vector signed short __a, vector unsigned short __b) {
|
||||
vector unsigned short __res = (vector unsigned short)__a >> __b;
|
||||
return (vector signed short)__res;
|
||||
static __inline__ vector signed char __ATTRS_o_ai
|
||||
vec_sr(vector signed char __a, vector unsigned char __b) {
|
||||
return (vector signed char)vec_sr((vector unsigned char)__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned short __ATTRS_o_ai
|
||||
vec_sr(vector unsigned short __a, vector unsigned short __b) {
|
||||
return __a >> __b;
|
||||
return __a >>
|
||||
(__b % (vector unsigned short)(sizeof(unsigned short) * __CHAR_BIT__));
|
||||
}
|
||||
|
||||
static __inline__ vector signed int __ATTRS_o_ai
|
||||
vec_sr(vector signed int __a, vector unsigned int __b) {
|
||||
vector unsigned int __res = (vector unsigned int)__a >> __b;
|
||||
return (vector signed int)__res;
|
||||
static __inline__ vector short __ATTRS_o_ai vec_sr(vector short __a,
|
||||
vector unsigned short __b) {
|
||||
return (vector short)vec_sr((vector unsigned short)__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned int __ATTRS_o_ai
|
||||
vec_sr(vector unsigned int __a, vector unsigned int __b) {
|
||||
return __a >> __b;
|
||||
return __a >>
|
||||
(__b % (vector unsigned int)(sizeof(unsigned int) * __CHAR_BIT__));
|
||||
}
|
||||
|
||||
static __inline__ vector int __ATTRS_o_ai vec_sr(vector int __a,
|
||||
vector unsigned int __b) {
|
||||
return (vector int)vec_sr((vector unsigned int)__a, __b);
|
||||
}
|
||||
|
||||
#ifdef __POWER8_VECTOR__
|
||||
static __inline__ vector signed long long __ATTRS_o_ai
|
||||
vec_sr(vector signed long long __a, vector unsigned long long __b) {
|
||||
vector unsigned long long __res = (vector unsigned long long)__a >> __b;
|
||||
return (vector signed long long)__res;
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned long long __ATTRS_o_ai
|
||||
vec_sr(vector unsigned long long __a, vector unsigned long long __b) {
|
||||
return __a >> __b;
|
||||
return __a >> (__b % (vector unsigned long long)(sizeof(unsigned long long) *
|
||||
__CHAR_BIT__));
|
||||
}
|
||||
|
||||
static __inline__ vector long long __ATTRS_o_ai
|
||||
vec_sr(vector long long __a, vector unsigned long long __b) {
|
||||
return (vector long long)vec_sr((vector unsigned long long)__a, __b);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -9544,12 +9546,12 @@ vec_sr(vector unsigned long long __a, vector unsigned long long __b) {
|
||||
|
||||
static __inline__ vector signed char __ATTRS_o_ai
|
||||
vec_vsrb(vector signed char __a, vector unsigned char __b) {
|
||||
return __a >> (vector signed char)__b;
|
||||
return vec_sr(__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned char __ATTRS_o_ai
|
||||
vec_vsrb(vector unsigned char __a, vector unsigned char __b) {
|
||||
return __a >> __b;
|
||||
return vec_sr(__a, __b);
|
||||
}
|
||||
|
||||
/* vec_vsrh */
|
||||
@ -9558,12 +9560,12 @@ vec_vsrb(vector unsigned char __a, vector unsigned char __b) {
|
||||
|
||||
static __inline__ vector short __ATTRS_o_ai
|
||||
vec_vsrh(vector short __a, vector unsigned short __b) {
|
||||
return __a >> (vector short)__b;
|
||||
return vec_sr(__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned short __ATTRS_o_ai
|
||||
vec_vsrh(vector unsigned short __a, vector unsigned short __b) {
|
||||
return __a >> __b;
|
||||
return vec_sr(__a, __b);
|
||||
}
|
||||
|
||||
/* vec_vsrw */
|
||||
@ -9572,12 +9574,12 @@ vec_vsrh(vector unsigned short __a, vector unsigned short __b) {
|
||||
|
||||
static __inline__ vector int __ATTRS_o_ai vec_vsrw(vector int __a,
|
||||
vector unsigned int __b) {
|
||||
return __a >> (vector int)__b;
|
||||
return vec_sr(__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned int __ATTRS_o_ai
|
||||
vec_vsrw(vector unsigned int __a, vector unsigned int __b) {
|
||||
return __a >> __b;
|
||||
return vec_sr(__a, __b);
|
||||
}
|
||||
|
||||
/* vec_sra */
|
||||
@ -16353,67 +16355,82 @@ vec_revb(vector unsigned __int128 __a) {
|
||||
|
||||
/* vec_xl */
|
||||
|
||||
typedef vector signed char unaligned_vec_schar __attribute__((aligned(1)));
|
||||
typedef vector unsigned char unaligned_vec_uchar __attribute__((aligned(1)));
|
||||
typedef vector signed short unaligned_vec_sshort __attribute__((aligned(1)));
|
||||
typedef vector unsigned short unaligned_vec_ushort __attribute__((aligned(1)));
|
||||
typedef vector signed int unaligned_vec_sint __attribute__((aligned(1)));
|
||||
typedef vector unsigned int unaligned_vec_uint __attribute__((aligned(1)));
|
||||
typedef vector float unaligned_vec_float __attribute__((aligned(1)));
|
||||
|
||||
static inline __ATTRS_o_ai vector signed char vec_xl(signed long long __offset,
|
||||
signed char *__ptr) {
|
||||
return *(vector signed char *)(__ptr + __offset);
|
||||
return *(unaligned_vec_schar *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector unsigned char
|
||||
vec_xl(signed long long __offset, unsigned char *__ptr) {
|
||||
return *(vector unsigned char *)(__ptr + __offset);
|
||||
return *(unaligned_vec_uchar*)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector signed short vec_xl(signed long long __offset,
|
||||
signed short *__ptr) {
|
||||
return *(vector signed short *)(__ptr + __offset);
|
||||
return *(unaligned_vec_sshort *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector unsigned short
|
||||
vec_xl(signed long long __offset, unsigned short *__ptr) {
|
||||
return *(vector unsigned short *)(__ptr + __offset);
|
||||
return *(unaligned_vec_ushort *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector signed int vec_xl(signed long long __offset,
|
||||
signed int *__ptr) {
|
||||
return *(vector signed int *)(__ptr + __offset);
|
||||
return *(unaligned_vec_sint *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector unsigned int vec_xl(signed long long __offset,
|
||||
unsigned int *__ptr) {
|
||||
return *(vector unsigned int *)(__ptr + __offset);
|
||||
return *(unaligned_vec_uint *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector float vec_xl(signed long long __offset,
|
||||
float *__ptr) {
|
||||
return *(vector float *)(__ptr + __offset);
|
||||
return *(unaligned_vec_float *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
#ifdef __VSX__
|
||||
typedef vector signed long long unaligned_vec_sll __attribute__((aligned(1)));
|
||||
typedef vector unsigned long long unaligned_vec_ull __attribute__((aligned(1)));
|
||||
typedef vector double unaligned_vec_double __attribute__((aligned(1)));
|
||||
|
||||
static inline __ATTRS_o_ai vector signed long long
|
||||
vec_xl(signed long long __offset, signed long long *__ptr) {
|
||||
return *(vector signed long long *)(__ptr + __offset);
|
||||
return *(unaligned_vec_sll *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector unsigned long long
|
||||
vec_xl(signed long long __offset, unsigned long long *__ptr) {
|
||||
return *(vector unsigned long long *)(__ptr + __offset);
|
||||
return *(unaligned_vec_ull *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector double vec_xl(signed long long __offset,
|
||||
double *__ptr) {
|
||||
return *(vector double *)(__ptr + __offset);
|
||||
return *(unaligned_vec_double *)(__ptr + __offset);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
|
||||
typedef vector signed __int128 unaligned_vec_si128 __attribute__((aligned(1)));
|
||||
typedef vector unsigned __int128 unaligned_vec_ui128
|
||||
__attribute__((aligned(1)));
|
||||
static inline __ATTRS_o_ai vector signed __int128
|
||||
vec_xl(signed long long __offset, signed __int128 *__ptr) {
|
||||
return *(vector signed __int128 *)(__ptr + __offset);
|
||||
return *(unaligned_vec_si128 *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector unsigned __int128
|
||||
vec_xl(signed long long __offset, unsigned __int128 *__ptr) {
|
||||
return *(vector unsigned __int128 *)(__ptr + __offset);
|
||||
return *(unaligned_vec_ui128 *)(__ptr + __offset);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -16498,62 +16515,62 @@ vec_xl_be(signed long long __offset, unsigned __int128 *__ptr) {
|
||||
static inline __ATTRS_o_ai void vec_xst(vector signed char __vec,
|
||||
signed long long __offset,
|
||||
signed char *__ptr) {
|
||||
*(vector signed char *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_schar *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned char __vec,
|
||||
signed long long __offset,
|
||||
unsigned char *__ptr) {
|
||||
*(vector unsigned char *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_uchar *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector signed short __vec,
|
||||
signed long long __offset,
|
||||
signed short *__ptr) {
|
||||
*(vector signed short *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_sshort *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned short __vec,
|
||||
signed long long __offset,
|
||||
unsigned short *__ptr) {
|
||||
*(vector unsigned short *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_ushort *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector signed int __vec,
|
||||
signed long long __offset,
|
||||
signed int *__ptr) {
|
||||
*(vector signed int *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_sint *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned int __vec,
|
||||
signed long long __offset,
|
||||
unsigned int *__ptr) {
|
||||
*(vector unsigned int *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_uint *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector float __vec,
|
||||
signed long long __offset,
|
||||
float *__ptr) {
|
||||
*(vector float *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_float *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
#ifdef __VSX__
|
||||
static inline __ATTRS_o_ai void vec_xst(vector signed long long __vec,
|
||||
signed long long __offset,
|
||||
signed long long *__ptr) {
|
||||
*(vector signed long long *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_sll *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned long long __vec,
|
||||
signed long long __offset,
|
||||
unsigned long long *__ptr) {
|
||||
*(vector unsigned long long *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_ull *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector double __vec,
|
||||
signed long long __offset,
|
||||
double *__ptr) {
|
||||
*(vector double *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_double *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -16561,13 +16578,13 @@ static inline __ATTRS_o_ai void vec_xst(vector double __vec,
|
||||
static inline __ATTRS_o_ai void vec_xst(vector signed __int128 __vec,
|
||||
signed long long __offset,
|
||||
signed __int128 *__ptr) {
|
||||
*(vector signed __int128 *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_si128 *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec,
|
||||
signed long long __offset,
|
||||
unsigned __int128 *__ptr) {
|
||||
*(vector unsigned __int128 *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_ui128 *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -27,7 +27,7 @@
|
||||
#include <stdint.h>
|
||||
|
||||
typedef __fp16 float16_t;
|
||||
#define __ai static inline __attribute__((__always_inline__, __nodebug__))
|
||||
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
|
||||
|
||||
#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
|
2958
c_headers/arm_neon.h
2958
c_headers/arm_neon.h
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -29,180 +29,309 @@
|
||||
#define __AVX512DQINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline __mmask8 __DEFAULT_FN_ATTRS
|
||||
_knot_mask8(__mmask8 __M)
|
||||
{
|
||||
return __builtin_ia32_knotqi(__M);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_kand_mask8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_kandn_mask8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_kor_mask8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_kxnor_mask8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_kxor_mask8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
|
||||
*__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
|
||||
return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktestc_mask8_u8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktestz_mask8_u8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
|
||||
*__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
|
||||
return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktestc_mask16_u8(__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_ktestchi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktestz_mask16_u8(__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
|
||||
*__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B);
|
||||
return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_kadd_mask8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_kadd_mask16(__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
#define _kshiftli_mask8(A, I) \
|
||||
(__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I))
|
||||
|
||||
#define _kshiftri_mask8(A, I) \
|
||||
(__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))
|
||||
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_cvtmask8_u32(__mmask8 __A) {
|
||||
return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_cvtu32_mask8(unsigned int __A) {
|
||||
return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_load_mask8(__mmask8 *__A) {
|
||||
return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_store_mask8(__mmask8 *__A, __mmask8 __B) {
|
||||
*(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
|
||||
return (__m512i) ((__v8du) __A * (__v8du) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
|
||||
(__v8di)_mm512_mullo_epi64(__A, __B),
|
||||
(__v8di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
|
||||
(__v8di)_mm512_mullo_epi64(__A, __B),
|
||||
(__v8di)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_xor_pd(__m512d __A, __m512d __B) {
|
||||
return (__m512d)((__v8du)__A ^ (__v8du)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_xor_pd(__A, __B),
|
||||
(__v8df)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_xor_pd(__A, __B),
|
||||
(__v8df)_mm512_setzero_pd());
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_xor_ps (__m512 __A, __m512 __B) {
|
||||
return (__m512)((__v16su)__A ^ (__v16su)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_xor_ps(__A, __B),
|
||||
(__v16sf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_xor_ps(__A, __B),
|
||||
(__v16sf)_mm512_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_or_pd(__m512d __A, __m512d __B) {
|
||||
return (__m512d)((__v8du)__A | (__v8du)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_or_pd(__A, __B),
|
||||
(__v8df)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_or_pd(__A, __B),
|
||||
(__v8df)_mm512_setzero_pd());
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_or_ps(__m512 __A, __m512 __B) {
|
||||
return (__m512)((__v16su)__A | (__v16su)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_or_ps(__A, __B),
|
||||
(__v16sf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_or_ps(__A, __B),
|
||||
(__v16sf)_mm512_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_and_pd(__m512d __A, __m512d __B) {
|
||||
return (__m512d)((__v8du)__A & (__v8du)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_and_pd(__A, __B),
|
||||
(__v8df)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_and_pd(__A, __B),
|
||||
(__v8df)_mm512_setzero_pd());
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_and_ps(__m512 __A, __m512 __B) {
|
||||
return (__m512)((__v16su)__A & (__v16su)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_and_ps(__A, __B),
|
||||
(__v16sf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_and_ps(__A, __B),
|
||||
(__v16sf)_mm512_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_andnot_pd(__m512d __A, __m512d __B) {
|
||||
return (__m512d)(~(__v8du)__A & (__v8du)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_andnot_pd(__A, __B),
|
||||
(__v8df)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_andnot_pd(__A, __B),
|
||||
(__v8df)_mm512_setzero_pd());
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_andnot_ps(__m512 __A, __m512 __B) {
|
||||
return (__m512)(~(__v16su)__A & (__v16su)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_andnot_ps(__A, __B),
|
||||
(__v16sf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_andnot_ps(__A, __B),
|
||||
(__v16sf)_mm512_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtpd_epi64 (__m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -210,7 +339,7 @@ _mm512_cvtpd_epi64 (__m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
|
||||
(__v8di) __W,
|
||||
@ -218,7 +347,7 @@ _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -241,7 +370,7 @@ _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtpd_epu64 (__m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -249,7 +378,7 @@ _mm512_cvtpd_epu64 (__m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
|
||||
(__v8di) __W,
|
||||
@ -257,7 +386,7 @@ _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -280,7 +409,7 @@ _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtps_epi64 (__m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -288,7 +417,7 @@ _mm512_cvtps_epi64 (__m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
|
||||
(__v8di) __W,
|
||||
@ -296,7 +425,7 @@ _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -319,7 +448,7 @@ _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtps_epu64 (__m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -327,7 +456,7 @@ _mm512_cvtps_epu64 (__m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
|
||||
(__v8di) __W,
|
||||
@ -335,7 +464,7 @@ _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -359,19 +488,19 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepi64_pd (__m512i __A) {
|
||||
return (__m512d)__builtin_convertvector((__v8di)__A, __v8df);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_cvtepi64_pd(__A),
|
||||
(__v8df)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_cvtepi64_pd(__A),
|
||||
@ -393,7 +522,7 @@ _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepi64_ps (__m512i __A) {
|
||||
return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
|
||||
(__v8sf) _mm256_setzero_ps(),
|
||||
@ -401,7 +530,7 @@ _mm512_cvtepi64_ps (__m512i __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
|
||||
return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
|
||||
(__v8sf) __W,
|
||||
@ -409,7 +538,7 @@ _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
|
||||
return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
|
||||
(__v8sf) _mm256_setzero_ps(),
|
||||
@ -433,7 +562,7 @@ _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttpd_epi64 (__m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -441,7 +570,7 @@ _mm512_cvttpd_epi64 (__m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
|
||||
(__v8di) __W,
|
||||
@ -449,7 +578,7 @@ _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -472,7 +601,7 @@ _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttpd_epu64 (__m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -480,7 +609,7 @@ _mm512_cvttpd_epu64 (__m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
|
||||
(__v8di) __W,
|
||||
@ -488,7 +617,7 @@ _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -511,7 +640,7 @@ _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttps_epi64 (__m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -519,7 +648,7 @@ _mm512_cvttps_epi64 (__m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
|
||||
(__v8di) __W,
|
||||
@ -527,7 +656,7 @@ _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -550,7 +679,7 @@ _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttps_epu64 (__m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -558,7 +687,7 @@ _mm512_cvttps_epu64 (__m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
|
||||
(__v8di) __W,
|
||||
@ -566,7 +695,7 @@ _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -589,19 +718,19 @@ _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepu64_pd (__m512i __A) {
|
||||
return (__m512d)__builtin_convertvector((__v8du)__A, __v8df);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_cvtepu64_pd(__A),
|
||||
(__v8df)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_cvtepu64_pd(__A),
|
||||
@ -625,7 +754,7 @@ _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepu64_ps (__m512i __A) {
|
||||
return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
|
||||
(__v8sf) _mm256_setzero_ps(),
|
||||
@ -633,7 +762,7 @@ _mm512_cvtepu64_ps (__m512i __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
|
||||
return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
|
||||
(__v8sf) __W,
|
||||
@ -641,7 +770,7 @@ _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
|
||||
return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
|
||||
(__v8sf) _mm256_setzero_ps(),
|
||||
@ -935,32 +1064,32 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)(U), (int)(C), (int)(R))
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
_mm512_movepi32_mask (__m512i __A)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_movm_epi32 (__mmask16 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_movm_epi64 (__mmask8 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
|
||||
_mm512_movepi64_mask (__m512i __A)
|
||||
{
|
||||
return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
|
||||
}
|
||||
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_broadcast_f32x2 (__m128 __A)
|
||||
{
|
||||
return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
|
||||
@ -968,7 +1097,7 @@ _mm512_broadcast_f32x2 (__m128 __A)
|
||||
0, 1, 0, 1, 0, 1, 0, 1);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
|
||||
{
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
|
||||
@ -976,7 +1105,7 @@ _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
|
||||
(__v16sf)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
|
||||
{
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
|
||||
@ -984,7 +1113,7 @@ _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
|
||||
(__v16sf)_mm512_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_broadcast_f32x8(__m256 __A)
|
||||
{
|
||||
return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
|
||||
@ -992,7 +1121,7 @@ _mm512_broadcast_f32x8(__m256 __A)
|
||||
0, 1, 2, 3, 4, 5, 6, 7);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
|
||||
{
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
|
||||
@ -1000,7 +1129,7 @@ _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
|
||||
(__v16sf)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
|
||||
{
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
|
||||
@ -1008,14 +1137,14 @@ _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
|
||||
(__v16sf)_mm512_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_broadcast_f64x2(__m128d __A)
|
||||
{
|
||||
return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
|
||||
0, 1, 0, 1, 0, 1, 0, 1);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
|
||||
{
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
|
||||
@ -1023,7 +1152,7 @@ _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
|
||||
(__v8df)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
|
||||
{
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
|
||||
@ -1031,7 +1160,7 @@ _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
|
||||
(__v8df)_mm512_setzero_pd());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_broadcast_i32x2 (__m128i __A)
|
||||
{
|
||||
return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
|
||||
@ -1039,7 +1168,7 @@ _mm512_broadcast_i32x2 (__m128i __A)
|
||||
0, 1, 0, 1, 0, 1, 0, 1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
|
||||
@ -1047,7 +1176,7 @@ _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
|
||||
(__v16si)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
|
||||
@ -1055,7 +1184,7 @@ _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_broadcast_i32x8(__m256i __A)
|
||||
{
|
||||
return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
|
||||
@ -1063,7 +1192,7 @@ _mm512_broadcast_i32x8(__m256i __A)
|
||||
0, 1, 2, 3, 4, 5, 6, 7);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
|
||||
@ -1071,7 +1200,7 @@ _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
|
||||
(__v16si)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
|
||||
@ -1079,14 +1208,14 @@ _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_broadcast_i64x2(__m128i __A)
|
||||
{
|
||||
return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
|
||||
0, 1, 0, 1, 0, 1, 0, 1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
|
||||
@ -1094,7 +1223,7 @@ _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
|
||||
(__v8di)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
|
||||
@ -1256,6 +1385,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
|
||||
(__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
|
||||
(__mmask8)(U))
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS512
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
|
@ -175,6 +175,7 @@ typedef enum
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
|
||||
|
||||
/* Create vectors with repeated elements */
|
||||
|
||||
@ -508,13 +509,13 @@ _mm512_castsi512_si256 (__m512i __A)
|
||||
return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_int2mask(int __a)
|
||||
{
|
||||
return (__mmask16)__a;
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS512
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm512_mask2int(__mmask16 __a)
|
||||
{
|
||||
return (int)__a;
|
||||
@ -4328,6 +4329,15 @@ _mm512_loadu_si512 (void const *__P)
|
||||
return ((struct __loadu_si512*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_loadu_epi32 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi32 {
|
||||
__m512i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi32*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
|
||||
{
|
||||
@ -4346,6 +4356,15 @@ _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_loadu_epi64 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi64 {
|
||||
__m512i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi64*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -4481,6 +4500,15 @@ _mm512_load_epi64 (void const *__P)
|
||||
|
||||
/* SIMD store ops */
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS512
|
||||
_mm512_storeu_epi64 (void *__P, __m512i __A)
|
||||
{
|
||||
struct __storeu_epi64 {
|
||||
__m512i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi64*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
|
||||
{
|
||||
@ -4497,6 +4525,15 @@ _mm512_storeu_si512 (void *__P, __m512i __A)
|
||||
((struct __storeu_si512*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS512
|
||||
_mm512_storeu_epi32 (void *__P, __m512i __A)
|
||||
{
|
||||
struct __storeu_epi32 {
|
||||
__m512i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi32*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
|
||||
{
|
||||
@ -4580,7 +4617,7 @@ _mm512_store_epi64 (void *__P, __m512i __A)
|
||||
|
||||
/* Mask ops */
|
||||
|
||||
static __inline __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_knot(__mmask16 __M)
|
||||
{
|
||||
return __builtin_ia32_knothi(__M);
|
||||
@ -5622,7 +5659,7 @@ _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kmov (__mmask16 __A)
|
||||
{
|
||||
return __A;
|
||||
@ -7593,177 +7630,177 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
|
||||
|
||||
#define _mm512_i64gather_ps(index, addr, scale) \
|
||||
(__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), (__mmask8)-1, \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
|
||||
(__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i64gather_epi32(index, addr, scale) \
|
||||
(__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__mmask8)-1, (int)(scale))
|
||||
|
||||
#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
|
||||
(__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i64gather_pd(index, addr, scale) \
|
||||
(__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), (__mmask8)-1, \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
|
||||
(__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i64gather_epi64(index, addr, scale) \
|
||||
(__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), (__mmask8)-1, \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
|
||||
(__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i32gather_ps(index, addr, scale) \
|
||||
(__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v16sf)(__m512)(index), \
|
||||
(__mmask16)-1, (int)(scale))
|
||||
|
||||
#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
|
||||
(__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v16sf)(__m512)(index), \
|
||||
(__mmask16)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i32gather_epi32(index, addr, scale) \
|
||||
(__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v16si)(__m512i)(index), \
|
||||
(__mmask16)-1, (int)(scale))
|
||||
|
||||
#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
|
||||
(__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v16si)(__m512i)(index), \
|
||||
(__mmask16)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i32gather_pd(index, addr, scale) \
|
||||
(__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8si)(__m256i)(index), (__mmask8)-1, \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
|
||||
(__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i32gather_epi64(index, addr, scale) \
|
||||
(__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8si)(__m256i)(index), (__mmask8)-1, \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
|
||||
(__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i64scatter_ps(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8sf)(__m256)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8sf)(__m256)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8si)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8si)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i64scatter_pd(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8df)(__m512d)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8df)(__m512d)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8di)(__m512i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8di)(__m512i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i32scatter_ps(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
|
||||
__builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
|
||||
(__v16si)(__m512i)(index), \
|
||||
(__v16sf)(__m512)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
|
||||
__builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
|
||||
(__v16si)(__m512i)(index), \
|
||||
(__v16sf)(__m512)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
|
||||
__builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
|
||||
(__v16si)(__m512i)(index), \
|
||||
(__v16si)(__m512i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
|
||||
__builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
|
||||
(__v16si)(__m512i)(index), \
|
||||
(__v16si)(__m512i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i32scatter_pd(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__v8df)(__m512d)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__v8df)(__m512d)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__v8di)(__m512i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__v8di)(__m512i)(v1), (int)(scale))
|
||||
|
||||
@ -8320,54 +8357,105 @@ _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
|
||||
|
||||
#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kand (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kandn (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kor (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS512
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm512_kortestc (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS512
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm512_kortestz (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
|
||||
*__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
|
||||
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kxnor (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kxor (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
#define _kand_mask16 _mm512_kand
|
||||
#define _kandn_mask16 _mm512_kandn
|
||||
#define _knot_mask16 _mm512_knot
|
||||
#define _kor_mask16 _mm512_kor
|
||||
#define _kxnor_mask16 _mm512_kxnor
|
||||
#define _kxor_mask16 _mm512_kxor
|
||||
|
||||
#define _kshiftli_mask16(A, I) \
|
||||
(__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))
|
||||
|
||||
#define _kshiftri_mask16(A, I) \
|
||||
(__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))
|
||||
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_cvtmask16_u32(__mmask16 __A) {
|
||||
return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_cvtu32_mask16(unsigned int __A) {
|
||||
return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_load_mask16(__mmask16 *__A) {
|
||||
return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_store_mask16(__mmask16 *__A, __mmask16 __B) {
|
||||
*(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS512
|
||||
_mm512_stream_si512 (__m512i * __P, __m512i __A)
|
||||
{
|
||||
@ -9594,5 +9682,6 @@ _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS512
|
||||
#undef __DEFAULT_FN_ATTRS128
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* __AVX512FINTRIN_H */
|
||||
|
@ -33,78 +33,78 @@
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
|
||||
(long long const *)(addr), (int)(scale), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \
|
||||
(long long const *)(addr), (int)(scale), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdps((__mmask16)(mask), \
|
||||
(__v16si)(__m512i)(index), (int const *)(addr), \
|
||||
(__v16si)(__m512i)(index), (void const *)(addr), \
|
||||
(int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdps((__mmask16) -1, \
|
||||
(__v16si)(__m512i)(index), (int const *)(addr), \
|
||||
(__v16si)(__m512i)(index), (void const *)(addr), \
|
||||
(int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(long long const *)(addr), (int)(scale), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \
|
||||
(long long const *)(addr), (int)(scale), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(int const *)(addr), (int)(scale), (int)(hint))
|
||||
(void const *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \
|
||||
(int const *)(addr), (int)(scale), (int)(hint))
|
||||
(void const *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \
|
||||
(long long *)(addr), (int)(scale), \
|
||||
(void *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
|
||||
(long long *)(addr), (int)(scale), \
|
||||
(void *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \
|
||||
(int *)(addr), (int)(scale), (int)(hint))
|
||||
(void *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdps((__mmask16)(mask), \
|
||||
(__v16si)(__m512i)(index), (int *)(addr), \
|
||||
(__v16si)(__m512i)(index), (void *)(addr), \
|
||||
(int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \
|
||||
(long long *)(addr), (int)(scale), \
|
||||
(void *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(long long *)(addr), (int)(scale), \
|
||||
(void *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \
|
||||
(int *)(addr), (int)(scale), (int)(hint))
|
||||
(void *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(int *)(addr), (int)(scale), (int)(hint))
|
||||
(void *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
|
@ -227,167 +227,141 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
|
||||
(__v32hi)_mm512_setzero_si512())
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
|
||||
_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B,
|
||||
(__v8di)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_selectq_512(__U,
|
||||
(__v8di)_mm512_shldv_epi64(__A, __B, __C),
|
||||
(__v8di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m512i)__builtin_ia32_selectq_512(__U,
|
||||
(__v8di)_mm512_shldv_epi64(__A, __B, __C),
|
||||
(__v8di)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_shldv_epi32(__A, __B, __C),
|
||||
(__v16si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_shldv_epi32(__A, __B, __C),
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B,
|
||||
(__v32hi)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__mmask32) -1);
|
||||
return (__m512i)__builtin_ia32_selectw_512(__U,
|
||||
(__v32hi)_mm512_shldv_epi16(__A, __B, __C),
|
||||
(__v32hi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
|
||||
_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_selectw_512(__U,
|
||||
(__v32hi)_mm512_shldv_epi16(__A, __B, __C),
|
||||
(__v32hi)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B,
|
||||
(__v8di)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m512i)__builtin_ia32_selectq_512(__U,
|
||||
(__v8di)_mm512_shrdv_epi64(__A, __B, __C),
|
||||
(__v8di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_selectq_512(__U,
|
||||
(__v8di)_mm512_shrdv_epi64(__A, __B, __C),
|
||||
(__v8di)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
return (__m512i) __builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_shrdv_epi32(__A, __B, __C),
|
||||
(__v16si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
return (__m512i) __builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_shrdv_epi32(__A, __B, __C),
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__mmask32) -1);
|
||||
return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B,
|
||||
(__v32hi)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectw_512(__U,
|
||||
(__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
|
||||
(__v32hi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectw_512(__U,
|
||||
(__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
|
||||
(__v32hi)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
|
||||
|
@ -91,30 +91,26 @@ _mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
|
||||
_mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __M);
|
||||
return (__m512i)__builtin_ia32_vpmultishiftqb512((__v64qi)__X, (__v64qi) __Y);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
|
||||
_mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512i __X,
|
||||
__m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi) _mm512_setzero_si512 (),
|
||||
(__mmask64) __M);
|
||||
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
|
||||
(__v64qi)_mm512_multishift_epi64_epi8(__X, __Y),
|
||||
(__v64qi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
|
||||
_mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi) _mm512_undefined_epi32 (),
|
||||
(__mmask64) -1);
|
||||
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
|
||||
(__v64qi)_mm512_multishift_epi64_epi8(__X, __Y),
|
||||
(__v64qi)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
|
||||
|
@ -150,61 +150,49 @@ _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
|
||||
_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi) __W,
|
||||
(__mmask16) __M);
|
||||
return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
|
||||
_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X,
|
||||
__m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __M);
|
||||
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
|
||||
(__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
|
||||
(__v16qi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
|
||||
_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi)
|
||||
_mm_undefined_si128 (),
|
||||
(__mmask16) -1);
|
||||
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
|
||||
(__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
|
||||
(__v16qi)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
|
||||
_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi) __W,
|
||||
(__mmask32) __M);
|
||||
return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
|
||||
_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X,
|
||||
__m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __M);
|
||||
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
|
||||
(__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
|
||||
(__v32qi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
|
||||
_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi)
|
||||
_mm256_undefined_si256 (),
|
||||
(__mmask32) -1);
|
||||
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
|
||||
(__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
|
||||
(__v32qi)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
|
||||
|
@ -2297,6 +2297,15 @@ _mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
|
||||
(__v32qi) _mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_loadu_epi16 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi16 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi16*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -2314,6 +2323,15 @@ _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_loadu_epi16 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi16 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi16*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
|
||||
{
|
||||
@ -2331,6 +2349,15 @@ _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_loadu_epi8 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi8 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi8*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
|
||||
{
|
||||
@ -2348,6 +2375,15 @@ _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_loadu_epi8 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi8 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi8*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
|
||||
{
|
||||
@ -2364,7 +2400,17 @@ _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __U);
|
||||
}
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS128
|
||||
_mm_storeu_epi16 (void *__P, __m128i __A)
|
||||
{
|
||||
struct __storeu_epi16 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi16*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
__builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
|
||||
@ -2372,6 +2418,15 @@ _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS256
|
||||
_mm256_storeu_epi16 (void *__P, __m256i __A)
|
||||
{
|
||||
struct __storeu_epi16 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi16*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
|
||||
{
|
||||
@ -2380,6 +2435,15 @@ _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS128
|
||||
_mm_storeu_epi8 (void *__P, __m128i __A)
|
||||
{
|
||||
struct __storeu_epi8 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi8*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
|
||||
{
|
||||
@ -2388,6 +2452,15 @@ _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS256
|
||||
_mm256_storeu_epi8 (void *__P, __m256i __A)
|
||||
{
|
||||
struct __storeu_epi8 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi8*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
|
||||
{
|
||||
|
@ -461,11 +461,17 @@ _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
|
||||
(__v4si)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_and_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v8su)__a & (__v8su)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
|
||||
(__v8si)_mm256_and_si256(__A, __B),
|
||||
(__v8si)_mm256_and_epi32(__A, __B),
|
||||
(__v8si)__W);
|
||||
}
|
||||
|
||||
@ -475,11 +481,17 @@ _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_and_epi32(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)((__v4su)__a & (__v4su)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
|
||||
(__v4si)_mm_and_si128(__A, __B),
|
||||
(__v4si)_mm_and_epi32(__A, __B),
|
||||
(__v4si)__W);
|
||||
}
|
||||
|
||||
@ -489,11 +501,17 @@ _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_andnot_epi32(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)(~(__v8su)__A & (__v8su)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
|
||||
(__v8si)_mm256_andnot_si256(__A, __B),
|
||||
(__v8si)_mm256_andnot_epi32(__A, __B),
|
||||
(__v8si)__W);
|
||||
}
|
||||
|
||||
@ -504,25 +522,37 @@ _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
__U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_andnot_epi32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)(~(__v4su)__A & (__v4su)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
|
||||
(__v4si)_mm_andnot_si128(__A, __B),
|
||||
(__v4si)_mm_andnot_epi32(__A, __B),
|
||||
(__v4si)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_or_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v8su)__a | (__v8su)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
|
||||
(__v8si)_mm256_or_si256(__A, __B),
|
||||
(__v8si)_mm256_or_epi32(__A, __B),
|
||||
(__v8si)__W);
|
||||
}
|
||||
|
||||
@ -532,11 +562,17 @@ _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_or_epi32(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)((__v4su)__a | (__v4su)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
|
||||
(__v4si)_mm_or_si128(__A, __B),
|
||||
(__v4si)_mm_or_epi32(__A, __B),
|
||||
(__v4si)__W);
|
||||
}
|
||||
|
||||
@ -546,11 +582,17 @@ _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_xor_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v8su)__a ^ (__v8su)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
|
||||
(__v8si)_mm256_xor_si256(__A, __B),
|
||||
(__v8si)_mm256_xor_epi32(__A, __B),
|
||||
(__v8si)__W);
|
||||
}
|
||||
|
||||
@ -561,11 +603,16 @@ _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A,
|
||||
__m128i __B)
|
||||
_mm_xor_epi32(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)((__v4su)__a ^ (__v4su)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
|
||||
(__v4si)_mm_xor_si128(__A, __B),
|
||||
(__v4si)_mm_xor_epi32(__A, __B),
|
||||
(__v4si)__W);
|
||||
}
|
||||
|
||||
@ -575,11 +622,17 @@ _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_and_epi64(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v4du)__a & (__v4du)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
|
||||
(__v4di)_mm256_and_si256(__A, __B),
|
||||
(__v4di)_mm256_and_epi64(__A, __B),
|
||||
(__v4di)__W);
|
||||
}
|
||||
|
||||
@ -589,11 +642,17 @@ _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_and_epi64(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)((__v2du)__a & (__v2du)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
|
||||
(__v2di)_mm_and_si128(__A, __B),
|
||||
(__v2di)_mm_and_epi64(__A, __B),
|
||||
(__v2di)__W);
|
||||
}
|
||||
|
||||
@ -603,11 +662,17 @@ _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_andnot_epi64(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)(~(__v4du)__A & (__v4du)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
|
||||
(__v4di)_mm256_andnot_si256(__A, __B),
|
||||
(__v4di)_mm256_andnot_epi64(__A, __B),
|
||||
(__v4di)__W);
|
||||
}
|
||||
|
||||
@ -618,11 +683,17 @@ _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
__U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_andnot_epi64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)(~(__v2du)__A & (__v2du)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
|
||||
(__v2di)_mm_andnot_si128(__A, __B),
|
||||
(__v2di)_mm_andnot_epi64(__A, __B),
|
||||
(__v2di)__W);
|
||||
}
|
||||
|
||||
@ -632,11 +703,17 @@ _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_or_epi64(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v4du)__a | (__v4du)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
|
||||
(__v4di)_mm256_or_si256(__A, __B),
|
||||
(__v4di)_mm256_or_epi64(__A, __B),
|
||||
(__v4di)__W);
|
||||
}
|
||||
|
||||
@ -646,11 +723,17 @@ _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_or_epi64(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)((__v2du)__a | (__v2du)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
|
||||
(__v2di)_mm_or_si128(__A, __B),
|
||||
(__v2di)_mm_or_epi64(__A, __B),
|
||||
(__v2di)__W);
|
||||
}
|
||||
|
||||
@ -660,11 +743,17 @@ _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_xor_epi64(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v4du)__a ^ (__v4du)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
|
||||
(__v4di)_mm256_xor_si256(__A, __B),
|
||||
(__v4di)_mm256_xor_epi64(__A, __B),
|
||||
(__v4di)__W);
|
||||
}
|
||||
|
||||
@ -674,12 +763,18 @@ _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_xor_epi64(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)((__v2du)__a ^ (__v2du)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
|
||||
(__v2di)_mm_xor_si128(__A, __B),
|
||||
(__v2di)_mm_xor_epi64(__A, __B),
|
||||
(__v2di)__W);
|
||||
}
|
||||
|
||||
@ -3389,162 +3484,162 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
|
||||
}
|
||||
|
||||
#define _mm_i64scatter_pd(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__v2df)(__m128d)(v1), (int)(scale))
|
||||
|
||||
#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__v2df)(__m128d)(v1), (int)(scale))
|
||||
|
||||
#define _mm_i64scatter_epi64(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__v2di)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__v2di)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_i64scatter_pd(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__v4df)(__m256d)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__v4df)(__m256d)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__v4di)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__v4di)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_i64scatter_ps(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
|
||||
(__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
|
||||
(__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm_i64scatter_epi32(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__v4si)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__v4si)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_i64scatter_ps(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
|
||||
(__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__v4si)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__v4si)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_i32scatter_pd(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v2df)(__m128d)(v1), (int)(scale))
|
||||
|
||||
#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v2df)(__m128d)(v1), (int)(scale))
|
||||
|
||||
#define _mm_i32scatter_epi64(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v2di)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v2di)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_i32scatter_pd(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v4df)(__m256d)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v4df)(__m256d)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v4di)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v4di)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_i32scatter_ps(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
|
||||
(__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm_i32scatter_epi32(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v4si)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v4si)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_i32scatter_ps(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
|
||||
(__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__v8si)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__v8si)(__m256i)(v1), (int)(scale))
|
||||
|
||||
@ -4989,6 +5084,12 @@ _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
|
||||
(__v8si) _mm256_setzero_si256 ());
|
||||
}
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_load_epi32 (void const *__P)
|
||||
{
|
||||
return *(__m128i *) __P;
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5008,6 +5109,12 @@ _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_load_epi32 (void const *__P)
|
||||
{
|
||||
return *(__m256i *) __P;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5027,6 +5134,12 @@ _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS128
|
||||
_mm_store_epi32 (void *__P, __m128i __A)
|
||||
{
|
||||
*(__m128i *) __P = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
@ -5035,6 +5148,12 @@ _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS256
|
||||
_mm256_store_epi32 (void *__P, __m256i __A)
|
||||
{
|
||||
*(__m256i *) __P = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
@ -5075,6 +5194,12 @@ _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
|
||||
(__v4di) _mm256_setzero_si256 ());
|
||||
}
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_load_epi64 (void const *__P)
|
||||
{
|
||||
return *(__m128i *) __P;
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5094,6 +5219,12 @@ _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_load_epi64 (void const *__P)
|
||||
{
|
||||
return *(__m256i *) __P;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5113,6 +5244,12 @@ _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS128
|
||||
_mm_store_epi64 (void *__P, __m128i __A)
|
||||
{
|
||||
*(__m128i *) __P = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
@ -5121,6 +5258,12 @@ _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS256
|
||||
_mm256_store_epi64 (void *__P, __m256i __A)
|
||||
{
|
||||
*(__m256i *) __P = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
@ -5366,6 +5509,15 @@ _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_loadu_epi64 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi64 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi64*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5383,6 +5535,15 @@ _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_loadu_epi64 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi64 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi64*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5400,6 +5561,15 @@ _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_loadu_epi32 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi32 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi32*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5417,6 +5587,15 @@ _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_loadu_epi32 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi32 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi32*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5534,6 +5713,15 @@ _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS128
|
||||
_mm_storeu_epi64 (void *__P, __m128i __A)
|
||||
{
|
||||
struct __storeu_epi64 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi64*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
@ -5542,6 +5730,15 @@ _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS256
|
||||
_mm256_storeu_epi64 (void *__P, __m256i __A)
|
||||
{
|
||||
struct __storeu_epi64 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi64*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
@ -5550,6 +5747,15 @@ _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS128
|
||||
_mm_storeu_epi32 (void *__P, __m128i __A)
|
||||
{
|
||||
struct __storeu_epi32 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi32*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
@ -5558,6 +5764,15 @@ _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS256
|
||||
_mm256_storeu_epi32 (void *__P, __m256i __A)
|
||||
{
|
||||
struct __storeu_epi32 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi32*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
@ -7769,97 +7984,97 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
|
||||
|
||||
#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
|
||||
(__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
|
||||
(__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
|
||||
(__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
|
||||
(__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
|
||||
(__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
|
||||
(__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
|
||||
(__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
|
||||
(__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
|
||||
(__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
|
||||
(__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
|
||||
(__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
|
||||
(__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
|
||||
(__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
|
||||
(__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
|
||||
(__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
|
||||
(__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
|
@ -421,327 +421,279 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
|
||||
(__v8hi)_mm_setzero_si128())
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
_mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B,
|
||||
(__v4di)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectq_256(__U,
|
||||
(__v4di)_mm256_shldv_epi64(__A, __B, __C),
|
||||
(__v4di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m256i)__builtin_ia32_selectq_256(__U,
|
||||
(__v4di)_mm256_shldv_epi64(__A, __B, __C),
|
||||
(__v4di)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B,
|
||||
(__v2di)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectq_128(__U,
|
||||
(__v2di)_mm_shldv_epi64(__A, __B, __C),
|
||||
(__v2di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_selectq_128(__U,
|
||||
(__v2di)_mm_shldv_epi64(__A, __B, __C),
|
||||
(__v2di)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
_mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_shldv_epi32(__A, __B, __C),
|
||||
(__v8si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_shldv_epi32(__A, __B, __C),
|
||||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_shldv_epi32(__A, __B, __C),
|
||||
(__v4si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_shldv_epi32(__A, __B, __C),
|
||||
(__v4si)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
|
||||
_mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B,
|
||||
(__v16hi)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectw_256(__U,
|
||||
(__v16hi)_mm256_shldv_epi16(__A, __B, __C),
|
||||
(__v16hi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
(__mmask16) -1);
|
||||
return (__m256i)__builtin_ia32_selectw_256(__U,
|
||||
(__v16hi)_mm256_shldv_epi16(__A, __B, __C),
|
||||
(__v16hi)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B,
|
||||
(__v8hi)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectw_128(__U,
|
||||
(__v8hi)_mm_shldv_epi16(__A, __B, __C),
|
||||
(__v8hi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_selectw_128(__U,
|
||||
(__v8hi)_mm_shldv_epi16(__A, __B, __C),
|
||||
(__v8hi)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
_mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B,
|
||||
(__v4di)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectq_256(__U,
|
||||
(__v4di)_mm256_shrdv_epi64(__A, __B, __C),
|
||||
(__v4di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m256i)__builtin_ia32_selectq_256(__U,
|
||||
(__v4di)_mm256_shrdv_epi64(__A, __B, __C),
|
||||
(__v4di)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B,
|
||||
(__v2di)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectq_128(__U,
|
||||
(__v2di)_mm_shrdv_epi64(__A, __B, __C),
|
||||
(__v2di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_selectq_128(__U,
|
||||
(__v2di)_mm_shrdv_epi64(__A, __B, __C),
|
||||
(__v2di)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
_mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_shrdv_epi32(__A, __B, __C),
|
||||
(__v8si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_shrdv_epi32(__A, __B, __C),
|
||||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_shrdv_epi32(__A, __B, __C),
|
||||
(__v4si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_shrdv_epi32(__A, __B, __C),
|
||||
(__v4si)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
|
||||
_mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B,
|
||||
(__v16hi)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectw_256(__U,
|
||||
(__v16hi)_mm256_shrdv_epi16(__A, __B, __C),
|
||||
(__v16hi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
(__mmask16) -1);
|
||||
return (__m256i)__builtin_ia32_selectw_256(__U,
|
||||
(__v16hi)_mm256_shrdv_epi16(__A, __B, __C),
|
||||
(__v16hi)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B,
|
||||
(__v8hi)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectw_128(__U,
|
||||
(__v8hi)_mm_shrdv_epi16(__A, __B, __C),
|
||||
(__v8hi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_selectw_128(__U,
|
||||
(__v8hi)_mm_shrdv_epi16(__A, __B, __C),
|
||||
(__v8hi)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
|
||||
|
@ -62,7 +62,7 @@
|
||||
static __inline__ unsigned short __RELAXED_FN_ATTRS
|
||||
__tzcnt_u16(unsigned short __X)
|
||||
{
|
||||
return __X ? __builtin_ctzs(__X) : 16;
|
||||
return __builtin_ia32_tzcnt_u16(__X);
|
||||
}
|
||||
|
||||
/// Performs a bitwise AND of the second operand with the one's
|
||||
@ -196,7 +196,7 @@ __blsr_u32(unsigned int __X)
|
||||
static __inline__ unsigned int __RELAXED_FN_ATTRS
|
||||
__tzcnt_u32(unsigned int __X)
|
||||
{
|
||||
return __X ? __builtin_ctz(__X) : 32;
|
||||
return __builtin_ia32_tzcnt_u32(__X);
|
||||
}
|
||||
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
@ -212,7 +212,7 @@ __tzcnt_u32(unsigned int __X)
|
||||
static __inline__ int __RELAXED_FN_ATTRS
|
||||
_mm_tzcnt_32(unsigned int __X)
|
||||
{
|
||||
return __X ? __builtin_ctz(__X) : 32;
|
||||
return __builtin_ia32_tzcnt_u32(__X);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
@ -359,7 +359,7 @@ __blsr_u64(unsigned long long __X)
|
||||
static __inline__ unsigned long long __RELAXED_FN_ATTRS
|
||||
__tzcnt_u64(unsigned long long __X)
|
||||
{
|
||||
return __X ? __builtin_ctzll(__X) : 64;
|
||||
return __builtin_ia32_tzcnt_u64(__X);
|
||||
}
|
||||
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
@ -375,7 +375,7 @@ __tzcnt_u64(unsigned long long __X)
|
||||
static __inline__ long long __RELAXED_FN_ATTRS
|
||||
_mm_tzcnt_64(unsigned long long __X)
|
||||
{
|
||||
return __X ? __builtin_ctzll(__X) : 64;
|
||||
return __builtin_ia32_tzcnt_u64(__X);
|
||||
}
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
@ -73,10 +73,12 @@ __device__ inline void operator delete[](void *ptr,
|
||||
|
||||
// Sized delete, C++14 only.
|
||||
#if __cplusplus >= 201402L
|
||||
__device__ void operator delete(void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
|
||||
__device__ inline void operator delete(void *ptr,
|
||||
__SIZE_TYPE__ size) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
__device__ void operator delete[](void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
|
||||
__device__ inline void operator delete[](void *ptr,
|
||||
__SIZE_TYPE__ size) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
#endif
|
||||
|
@ -1675,7 +1675,49 @@ _mm_loadu_si64(void const *__a)
|
||||
long long __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
long long __u = ((struct __loadu_si64*)__a)->__v;
|
||||
return __extension__ (__m128i)(__v2di){__u, 0L};
|
||||
return __extension__ (__m128i)(__v2di){__u, 0LL};
|
||||
}
|
||||
|
||||
/// Loads a 32-bit integer value to the low element of a 128-bit integer
|
||||
/// vector and clears the upper element.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// A pointer to a 32-bit memory location. The address of the memory
|
||||
/// location does not have to be aligned.
|
||||
/// \returns A 128-bit vector of [4 x i32] containing the loaded value.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_loadu_si32(void const *__a)
|
||||
{
|
||||
struct __loadu_si32 {
|
||||
int __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
int __u = ((struct __loadu_si32*)__a)->__v;
|
||||
return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};
|
||||
}
|
||||
|
||||
/// Loads a 16-bit integer value to the low element of a 128-bit integer
|
||||
/// vector and clears the upper element.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic does not correspond to a specific instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// A pointer to a 16-bit memory location. The address of the memory
|
||||
/// location does not have to be aligned.
|
||||
/// \returns A 128-bit vector of [8 x i16] containing the loaded value.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_loadu_si16(void const *__a)
|
||||
{
|
||||
struct __loadu_si16 {
|
||||
short __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
short __u = ((struct __loadu_si16*)__a)->__v;
|
||||
return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
|
||||
}
|
||||
|
||||
/// Loads a 64-bit double-precision value to the low element of a
|
||||
@ -3993,6 +4035,69 @@ _mm_storeu_si128(__m128i *__p, __m128i __b)
|
||||
((struct __storeu_si128*)__p)->__v = __b;
|
||||
}
|
||||
|
||||
/// Stores a 64-bit integer value from the low element of a 128-bit integer
|
||||
/// vector.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
|
||||
///
|
||||
/// \param __p
|
||||
/// A pointer to a 64-bit memory location. The address of the memory
|
||||
/// location does not have to be algned.
|
||||
/// \param __b
|
||||
/// A 128-bit integer vector containing the value to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_storeu_si64(void const *__p, __m128i __b)
|
||||
{
|
||||
struct __storeu_si64 {
|
||||
long long __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0];
|
||||
}
|
||||
|
||||
/// Stores a 32-bit integer value from the low element of a 128-bit integer
|
||||
/// vector.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
|
||||
///
|
||||
/// \param __p
|
||||
/// A pointer to a 32-bit memory location. The address of the memory
|
||||
/// location does not have to be aligned.
|
||||
/// \param __b
|
||||
/// A 128-bit integer vector containing the value to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_storeu_si32(void const *__p, __m128i __b)
|
||||
{
|
||||
struct __storeu_si32 {
|
||||
int __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0];
|
||||
}
|
||||
|
||||
/// Stores a 16-bit integer value from the low element of a 128-bit integer
|
||||
/// vector.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic does not correspond to a specific instruction.
|
||||
///
|
||||
/// \param __p
|
||||
/// A pointer to a 16-bit memory location. The address of the memory
|
||||
/// location does not have to be aligned.
|
||||
/// \param __b
|
||||
/// A 128-bit integer vector containing the value to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_storeu_si16(void const *__p, __m128i __b)
|
||||
{
|
||||
struct __storeu_si16 {
|
||||
short __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0];
|
||||
}
|
||||
|
||||
/// Moves bytes selected by the mask from the first operand to the
|
||||
/// specified unaligned memory location. When a mask bit is 1, the
|
||||
/// corresponding byte is written, otherwise it is not written.
|
||||
|
@ -21,8 +21,8 @@
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __FLOAT_H
|
||||
#define __FLOAT_H
|
||||
#ifndef __CLANG_FLOAT_H
|
||||
#define __CLANG_FLOAT_H
|
||||
|
||||
/* If we're on MinGW, fall back to the system's float.h, which might have
|
||||
* additional definitions provided for Windows.
|
||||
@ -85,6 +85,9 @@
|
||||
# undef FLT_DECIMAL_DIG
|
||||
# undef DBL_DECIMAL_DIG
|
||||
# undef LDBL_DECIMAL_DIG
|
||||
# undef FLT_HAS_SUBNORM
|
||||
# undef DBL_HAS_SUBNORM
|
||||
# undef LDBL_HAS_SUBNORM
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@ -141,6 +144,9 @@
|
||||
# define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__
|
||||
# define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__
|
||||
# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__
|
||||
# define FLT_HAS_SUBNORM __FLT_HAS_DENORM__
|
||||
# define DBL_HAS_SUBNORM __DBL_HAS_DENORM__
|
||||
# define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__
|
||||
#endif
|
||||
|
||||
#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||
@ -157,4 +163,4 @@
|
||||
# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__
|
||||
#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */
|
||||
|
||||
#endif /* __FLOAT_H */
|
||||
#endif /* __CLANG_FLOAT_H */
|
||||
|
@ -306,6 +306,65 @@ _writegsbase_u64(unsigned long long __V)
|
||||
#endif
|
||||
#endif /* __FSGSBASE__ */
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)
|
||||
|
||||
/* The structs used below are to force the load/store to be unaligned. This
|
||||
* is accomplished with the __packed__ attribute. The __may_alias__ prevents
|
||||
* tbaa metadata from being generated based on the struct and the type of the
|
||||
* field inside of it.
|
||||
*/
|
||||
|
||||
static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_loadbe_i16(void const * __P) {
|
||||
struct __loadu_i16 {
|
||||
short __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_storebe_i16(void * __P, short __D) {
|
||||
struct __storeu_i16 {
|
||||
short __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_loadbe_i32(void const * __P) {
|
||||
struct __loadu_i32 {
|
||||
int __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_storebe_i32(void * __P, int __D) {
|
||||
struct __storeu_i32 {
|
||||
int __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_loadbe_i64(void const * __P) {
|
||||
struct __loadu_i64 {
|
||||
long long __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_storebe_i64(void * __P, long long __D) {
|
||||
struct __storeu_i64 {
|
||||
long long __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
|
||||
}
|
||||
#endif
|
||||
#endif /* __MOVBE */
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
|
||||
#include <rtmintrin.h>
|
||||
#include <xtestintrin.h>
|
||||
|
@ -90,8 +90,6 @@ void __inwordstring(unsigned short, unsigned short *, unsigned long);
|
||||
void __lidt(void *);
|
||||
unsigned __int64 __ll_lshift(unsigned __int64, int);
|
||||
__int64 __ll_rshift(__int64, int);
|
||||
unsigned int __lzcnt(unsigned int);
|
||||
unsigned short __lzcnt16(unsigned short);
|
||||
static __inline__
|
||||
void __movsb(unsigned char *, unsigned char const *, size_t);
|
||||
static __inline__
|
||||
@ -219,7 +217,6 @@ void __incgsbyte(unsigned long);
|
||||
void __incgsdword(unsigned long);
|
||||
void __incgsqword(unsigned long);
|
||||
void __incgsword(unsigned long);
|
||||
unsigned __int64 __lzcnt64(unsigned __int64);
|
||||
static __inline__
|
||||
void __movsq(unsigned long long *, unsigned long long const *, size_t);
|
||||
static __inline__
|
||||
@ -329,189 +326,63 @@ __int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);
|
||||
|* Interlocked Exchange Add
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);
|
||||
}
|
||||
char _InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value);
|
||||
char _InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value);
|
||||
char _InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value);
|
||||
short _InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value);
|
||||
short _InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value);
|
||||
short _InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value);
|
||||
long _InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value);
|
||||
long _InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value);
|
||||
long _InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value);
|
||||
__int64 _InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value);
|
||||
__int64 _InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value);
|
||||
__int64 _InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Increment
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement16_acq(short volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement16_nf(short volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement16_rel(short volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement_acq(long volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement_nf(long volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement_rel(long volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement64_acq(__int64 volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement64_nf(__int64 volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement64_rel(__int64 volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
short _InterlockedIncrement16_acq(short volatile *_Value);
|
||||
short _InterlockedIncrement16_nf(short volatile *_Value);
|
||||
short _InterlockedIncrement16_rel(short volatile *_Value);
|
||||
long _InterlockedIncrement_acq(long volatile *_Value);
|
||||
long _InterlockedIncrement_nf(long volatile *_Value);
|
||||
long _InterlockedIncrement_rel(long volatile *_Value);
|
||||
__int64 _InterlockedIncrement64_acq(__int64 volatile *_Value);
|
||||
__int64 _InterlockedIncrement64_nf(__int64 volatile *_Value);
|
||||
__int64 _InterlockedIncrement64_rel(__int64 volatile *_Value);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Decrement
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement16_acq(short volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement16_nf(short volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement16_rel(short volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement_acq(long volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement_nf(long volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement_rel(long volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement64_acq(__int64 volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement64_nf(__int64 volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement64_rel(__int64 volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
short _InterlockedDecrement16_acq(short volatile *_Value);
|
||||
short _InterlockedDecrement16_nf(short volatile *_Value);
|
||||
short _InterlockedDecrement16_rel(short volatile *_Value);
|
||||
long _InterlockedDecrement_acq(long volatile *_Value);
|
||||
long _InterlockedDecrement_nf(long volatile *_Value);
|
||||
long _InterlockedDecrement_rel(long volatile *_Value);
|
||||
__int64 _InterlockedDecrement64_acq(__int64 volatile *_Value);
|
||||
__int64 _InterlockedDecrement64_nf(__int64 volatile *_Value);
|
||||
__int64 _InterlockedDecrement64_rel(__int64 volatile *_Value);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked And
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd8_acq(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd8_nf(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd8_rel(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd16_acq(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd16_nf(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd16_rel(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd_acq(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd_nf(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd_rel(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
char _InterlockedAnd8_acq(char volatile *_Value, char _Mask);
|
||||
char _InterlockedAnd8_nf(char volatile *_Value, char _Mask);
|
||||
char _InterlockedAnd8_rel(char volatile *_Value, char _Mask);
|
||||
short _InterlockedAnd16_acq(short volatile *_Value, short _Mask);
|
||||
short _InterlockedAnd16_nf(short volatile *_Value, short _Mask);
|
||||
short _InterlockedAnd16_rel(short volatile *_Value, short _Mask);
|
||||
long _InterlockedAnd_acq(long volatile *_Value, long _Mask);
|
||||
long _InterlockedAnd_nf(long volatile *_Value, long _Mask);
|
||||
long _InterlockedAnd_rel(long volatile *_Value, long _Mask);
|
||||
__int64 _InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Bit Counting and Testing
|
||||
@ -534,261 +405,81 @@ unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,
|
||||
|* Interlocked Or
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr8_acq(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr8_nf(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr8_rel(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr16_acq(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr16_nf(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr16_rel(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr_acq(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr_nf(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr_rel(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
char _InterlockedOr8_acq(char volatile *_Value, char _Mask);
|
||||
char _InterlockedOr8_nf(char volatile *_Value, char _Mask);
|
||||
char _InterlockedOr8_rel(char volatile *_Value, char _Mask);
|
||||
short _InterlockedOr16_acq(short volatile *_Value, short _Mask);
|
||||
short _InterlockedOr16_nf(short volatile *_Value, short _Mask);
|
||||
short _InterlockedOr16_rel(short volatile *_Value, short _Mask);
|
||||
long _InterlockedOr_acq(long volatile *_Value, long _Mask);
|
||||
long _InterlockedOr_nf(long volatile *_Value, long _Mask);
|
||||
long _InterlockedOr_rel(long volatile *_Value, long _Mask);
|
||||
__int64 _InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Xor
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor8_acq(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor8_nf(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor8_rel(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor16_acq(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor16_nf(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor16_rel(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor_acq(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor_nf(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor_rel(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
char _InterlockedXor8_acq(char volatile *_Value, char _Mask);
|
||||
char _InterlockedXor8_nf(char volatile *_Value, char _Mask);
|
||||
char _InterlockedXor8_rel(char volatile *_Value, char _Mask);
|
||||
short _InterlockedXor16_acq(short volatile *_Value, short _Mask);
|
||||
short _InterlockedXor16_nf(short volatile *_Value, short _Mask);
|
||||
short _InterlockedXor16_rel(short volatile *_Value, short _Mask);
|
||||
long _InterlockedXor_acq(long volatile *_Value, long _Mask);
|
||||
long _InterlockedXor_nf(long volatile *_Value, long _Mask);
|
||||
long _InterlockedXor_rel(long volatile *_Value, long _Mask);
|
||||
__int64 _InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Exchange
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange8_acq(char volatile *_Target, char _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange8_nf(char volatile *_Target, char _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange8_rel(char volatile *_Target, char _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange16_acq(short volatile *_Target, short _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange16_nf(short volatile *_Target, short _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange16_rel(short volatile *_Target, short _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange_acq(long volatile *_Target, long _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange_nf(long volatile *_Target, long _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange_rel(long volatile *_Target, long _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);
|
||||
return _Value;
|
||||
}
|
||||
char _InterlockedExchange8_acq(char volatile *_Target, char _Value);
|
||||
char _InterlockedExchange8_nf(char volatile *_Target, char _Value);
|
||||
char _InterlockedExchange8_rel(char volatile *_Target, char _Value);
|
||||
short _InterlockedExchange16_acq(short volatile *_Target, short _Value);
|
||||
short _InterlockedExchange16_nf(short volatile *_Target, short _Value);
|
||||
short _InterlockedExchange16_rel(short volatile *_Target, short _Value);
|
||||
long _InterlockedExchange_acq(long volatile *_Target, long _Value);
|
||||
long _InterlockedExchange_nf(long volatile *_Target, long _Value);
|
||||
long _InterlockedExchange_rel(long volatile *_Target, long _Value);
|
||||
__int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value);
|
||||
__int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value);
|
||||
__int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Compare Exchange
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange8_acq(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange8_nf(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange8_rel(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELEASE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange16_acq(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange16_nf(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange16_rel(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELEASE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange_acq(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange_nf(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange_rel(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELEASE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange64_acq(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange64_nf(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELEASE);
|
||||
return _Comparand;
|
||||
}
|
||||
char _InterlockedCompareExchange8_acq(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand);
|
||||
char _InterlockedCompareExchange8_nf(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand);
|
||||
char _InterlockedCompareExchange8_rel(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand);
|
||||
short _InterlockedCompareExchange16_acq(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
short _InterlockedCompareExchange16_nf(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
short _InterlockedCompareExchange16_rel(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
long _InterlockedCompareExchange_acq(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand);
|
||||
long _InterlockedCompareExchange_nf(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand);
|
||||
long _InterlockedCompareExchange_rel(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_acq(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand);
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
@ -841,7 +532,7 @@ __stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__cpuid(int __info[4], int __level) {
|
||||
__asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3])
|
||||
: "a"(__level));
|
||||
: "a"(__level), "c"(0));
|
||||
}
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__cpuidex(int __info[4], int __level, int __ecx) {
|
||||
@ -858,12 +549,35 @@ static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__halt(void) {
|
||||
__asm__ volatile ("hlt");
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__nop(void) {
|
||||
__asm__ volatile ("nop");
|
||||
}
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* MS AArch64 specific
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__aarch64__)
|
||||
unsigned __int64 __getReg(int);
|
||||
long _InterlockedAdd(long volatile *Addend, long Value);
|
||||
int _ReadStatusReg(int);
|
||||
void _WriteStatusReg(int, int);
|
||||
|
||||
static inline unsigned short _byteswap_ushort (unsigned short val) {
|
||||
return __builtin_bswap16(val);
|
||||
}
|
||||
static inline unsigned long _byteswap_ulong (unsigned long val) {
|
||||
return __builtin_bswap32(val);
|
||||
}
|
||||
static inline unsigned __int64 _byteswap_uint64 (unsigned __int64 val) {
|
||||
return __builtin_bswap64(val);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Privileged intrinsics
|
||||
\*----------------------------------------------------------------------------*/
|
||||
|
@ -31,6 +31,7 @@
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
|
||||
|
||||
#ifndef _MSC_VER
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@ -41,11 +42,8 @@
|
||||
/// An unsigned 16-bit integer whose leading zeros are to be counted.
|
||||
/// \returns An unsigned 16-bit integer containing the number of leading zero
|
||||
/// bits in the operand.
|
||||
static __inline__ unsigned short __DEFAULT_FN_ATTRS
|
||||
__lzcnt16(unsigned short __X)
|
||||
{
|
||||
return __X ? __builtin_clzs(__X) : 16;
|
||||
}
|
||||
#define __lzcnt16(X) __builtin_ia32_lzcnt_u16((unsigned short)(X))
|
||||
#endif // _MSC_VER
|
||||
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
///
|
||||
@ -61,7 +59,7 @@ __lzcnt16(unsigned short __X)
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__lzcnt32(unsigned int __X)
|
||||
{
|
||||
return __X ? __builtin_clz(__X) : 32;
|
||||
return __builtin_ia32_lzcnt_u32(__X);
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
@ -78,10 +76,11 @@ __lzcnt32(unsigned int __X)
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_lzcnt_u32(unsigned int __X)
|
||||
{
|
||||
return __X ? __builtin_clz(__X) : 32;
|
||||
return __builtin_ia32_lzcnt_u32(__X);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
#ifndef _MSC_VER
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@ -93,11 +92,8 @@ _lzcnt_u32(unsigned int __X)
|
||||
/// \returns An unsigned 64-bit integer containing the number of leading zero
|
||||
/// bits in the operand.
|
||||
/// \see _lzcnt_u64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__lzcnt64(unsigned long long __X)
|
||||
{
|
||||
return __X ? __builtin_clzll(__X) : 64;
|
||||
}
|
||||
#define __lzcnt64(X) __builtin_ia32_lzcnt_u64((unsigned long long)(X))
|
||||
#endif // _MSC_VER
|
||||
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
///
|
||||
@ -113,7 +109,7 @@ __lzcnt64(unsigned long long __X)
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
_lzcnt_u64(unsigned long long __X)
|
||||
{
|
||||
return __X ? __builtin_clzll(__X) : 64;
|
||||
return __builtin_ia32_lzcnt_u64(__X);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -22,6 +22,14 @@
|
||||
#endif //cl_khr_3d_image_writes
|
||||
#endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#ifndef cl_intel_planar_yuv
|
||||
#define cl_intel_planar_yuv
|
||||
#endif // cl_intel_planar_yuv
|
||||
#pragma OPENCL EXTENSION cl_intel_planar_yuv : begin
|
||||
#pragma OPENCL EXTENSION cl_intel_planar_yuv : end
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
|
||||
#define __ovld __attribute__((overloadable))
|
||||
#define __conv __attribute__((convergent))
|
||||
|
||||
@ -14602,6 +14610,7 @@ int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, f
|
||||
uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, int4 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord);
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);
|
||||
float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
|
||||
|
||||
@ -14609,6 +14618,7 @@ int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_
|
||||
int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
|
||||
float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, int coord);
|
||||
float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord);
|
||||
@ -14618,6 +14628,7 @@ int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, f
|
||||
uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, int coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord);
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);
|
||||
float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
|
||||
|
||||
@ -14625,6 +14636,7 @@ int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_
|
||||
int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
|
||||
#ifdef cl_khr_depth_images
|
||||
float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord);
|
||||
@ -14727,6 +14739,8 @@ uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler,
|
||||
#endif //cl_khr_mipmap_image
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
|
||||
/**
|
||||
* Sampler-less Image Access
|
||||
*/
|
||||
@ -14760,24 +14774,31 @@ float4 __purefn __ovld read_imagef(read_only image3d_t image, int4 coord);
|
||||
int4 __purefn __ovld read_imagei(read_only image3d_t image, int4 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image3d_t image, int4 coord);
|
||||
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
|
||||
// Image read functions returning half4 type
|
||||
#ifdef cl_khr_fp16
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, int coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, float coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, int2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, float2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, int4 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, float4 coord);
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, int4 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, float4 coord);
|
||||
/**
|
||||
* Sampler-less Image Access
|
||||
*/
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_t image, int coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_t image, int2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image3d_t image, int4 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, int2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_array_t image, int4 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord);
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#endif //cl_khr_fp16
|
||||
|
||||
// Image read functions for read_write images
|
||||
@ -15707,7 +15728,6 @@ double __ovld __conv work_group_scan_inclusive_max(double x);
|
||||
|
||||
// OpenCL v2.0 s6.13.16 - Pipe Functions
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#define PIPE_RESERVE_ID_VALID_BIT (1U << 30)
|
||||
#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t))
|
||||
bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
@ -16193,6 +16213,637 @@ void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, u
|
||||
void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
|
||||
#endif // cl_intel_subgroups_short
|
||||
|
||||
#ifdef cl_intel_device_side_avc_motion_estimation
|
||||
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin
|
||||
|
||||
#define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0
|
||||
#define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1
|
||||
#define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2
|
||||
#define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_MINOR_8x8_INTEL 0x0
|
||||
#define CLK_AVC_ME_MINOR_8x4_INTEL 0x1
|
||||
#define CLK_AVC_ME_MINOR_4x8_INTEL 0x2
|
||||
#define CLK_AVC_ME_MINOR_4x4_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0
|
||||
#define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
|
||||
#define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
|
||||
|
||||
#define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
|
||||
#define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
|
||||
#define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
|
||||
#define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
|
||||
#define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
|
||||
#define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
|
||||
#define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
|
||||
#define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
|
||||
|
||||
#define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
|
||||
#define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
|
||||
#define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
|
||||
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
|
||||
|
||||
#define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
|
||||
#define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
|
||||
|
||||
#define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
|
||||
#define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
|
||||
#define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
|
||||
#define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
|
||||
#define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
|
||||
#define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
|
||||
#define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
|
||||
#define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
|
||||
#define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
|
||||
#define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
|
||||
|
||||
#define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
|
||||
#define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
|
||||
#define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
|
||||
#define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
|
||||
|
||||
#define CLK_AVC_ME_INTRA_16x16_INTEL 0x0
|
||||
#define CLK_AVC_ME_INTRA_8x8_INTEL 0x1
|
||||
#define CLK_AVC_ME_INTRA_4x4_INTEL 0x2
|
||||
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
|
||||
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30)
|
||||
|
||||
#define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
|
||||
#define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
|
||||
|
||||
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0
|
||||
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
|
||||
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
|
||||
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
|
||||
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
|
||||
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
|
||||
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
|
||||
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
|
||||
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
|
||||
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
|
||||
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1
|
||||
#define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2
|
||||
#define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
|
||||
#define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
|
||||
|
||||
#define CLK_AVC_ME_INITIALIZE_INTEL 0x0
|
||||
|
||||
#define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0
|
||||
|
||||
#define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0
|
||||
|
||||
#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0
|
||||
|
||||
// MCE built-in functions
|
||||
uchar __ovld
|
||||
intel_sub_group_avc_mce_get_default_inter_base_multi_reference_penalty(
|
||||
uchar slice_type, uchar qp);
|
||||
ulong __ovld intel_sub_group_avc_mce_get_default_inter_shape_penalty(
|
||||
uchar slice_type, uchar qp);
|
||||
uchar __ovld intel_sub_group_avc_mce_get_default_inter_direction_penalty(
|
||||
uchar slice_type, uchar qp);
|
||||
uint __ovld intel_sub_group_avc_mce_get_default_intra_luma_shape_penalty(
|
||||
uchar slice_type, uchar qp);
|
||||
uint2 __ovld
|
||||
intel_sub_group_avc_mce_get_default_inter_motion_vector_cost_table(
|
||||
uchar slice_type, uchar qp);
|
||||
uchar __ovld intel_sub_group_avc_mce_get_default_intra_luma_mode_penalty(
|
||||
uchar slice_type, uchar qp);
|
||||
|
||||
uint2 __ovld intel_sub_group_avc_mce_get_default_high_penalty_cost_table();
|
||||
uint2 __ovld intel_sub_group_avc_mce_get_default_medium_penalty_cost_table();
|
||||
uint2 __ovld intel_sub_group_avc_mce_get_default_low_penalty_cost_table();
|
||||
uint __ovld intel_sub_group_avc_mce_get_default_non_dc_luma_intra_penalty();
|
||||
uchar __ovld
|
||||
intel_sub_group_avc_mce_get_default_intra_chroma_mode_base_penalty();
|
||||
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty(
|
||||
uchar reference_base_penalty, intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_inter_shape_penalty(
|
||||
ulong packed_shape_penalty, intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_inter_direction_penalty(
|
||||
uchar direction_cost, intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_motion_vector_cost_function(
|
||||
ulong packed_cost_center_delta, uint2 packed_cost_table,
|
||||
uchar cost_precision, intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_ac_only_haar(
|
||||
intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_source_interlaced_field_polarity(
|
||||
uchar src_field_polarity, intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity(
|
||||
uchar ref_field_polarity, intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities(
|
||||
uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,
|
||||
intel_sub_group_avc_mce_payload_t payload);
|
||||
|
||||
ulong __ovld intel_sub_group_avc_mce_get_motion_vectors(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_mce_get_inter_distortions(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_mce_get_best_inter_distortion(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_mce_get_inter_major_shape(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_mce_get_inter_minor_shapes(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_mce_get_inter_directions(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_mce_get_inter_motion_vector_count(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
uint __ovld intel_sub_group_avc_mce_get_inter_reference_ids(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
uchar __ovld
|
||||
intel_sub_group_avc_mce_get_inter_reference_interlaced_field_polarities(
|
||||
uint packed_reference_ids, uint packed_reference_parameter_field_polarities,
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
|
||||
// IME built-in functions
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_initialize(
|
||||
ushort2 src_coord, uchar partition_mask, uchar sad_adjustment);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_single_reference(
|
||||
short2 ref_offset, uchar search_window_config,
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_dual_reference(
|
||||
short2 fwd_ref_offset, short2 bwd_ref_offset, uchar search_window_config,
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_max_motion_vector_count(
|
||||
uchar max_motion_vector_count, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_unidirectional_mix_disable(
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_early_search_termination_threshold(
|
||||
uchar threshold, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_weighted_sad(
|
||||
uint packed_sad_weights, intel_sub_group_avc_ime_payload_t payload);
|
||||
|
||||
__attribute__((deprecated("If you use the latest Intel driver, please use "
|
||||
"intel_sub_group_avc_ime_ref_window_size instead",
|
||||
"intel_sub_group_avc_ime_ref_window_size")))
|
||||
ushort2 __ovld
|
||||
intel_sub_group_ime_ref_window_size(uchar search_window_config, char dual_ref);
|
||||
ushort2 __ovld intel_sub_group_avc_ime_ref_window_size(
|
||||
uchar search_window_config, char dual_ref);
|
||||
short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset(
|
||||
short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size,
|
||||
ushort2 image_size);
|
||||
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_single_reference(
|
||||
read_only image2d_t src_image, read_only image2d_t ref_image,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_dual_reference(
|
||||
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
|
||||
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_single_reference_streamout(
|
||||
read_only image2d_t src_image, read_only image2d_t ref_image,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_dual_reference_streamout(
|
||||
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
|
||||
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_single_reference_streamin(
|
||||
read_only image2d_t src_image, read_only image2d_t ref_image,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload,
|
||||
intel_sub_group_avc_ime_single_reference_streamin_t streamin_components);
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_dual_reference_streamin(
|
||||
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
|
||||
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_ime_payload_t payload,
|
||||
intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_single_reference_streaminout(
|
||||
read_only image2d_t src_image, read_only image2d_t ref_image,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload,
|
||||
intel_sub_group_avc_ime_single_reference_streamin_t streamin_components);
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout(
|
||||
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
|
||||
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_ime_payload_t payload,
|
||||
intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);
|
||||
|
||||
intel_sub_group_avc_ime_single_reference_streamin_t __ovld
|
||||
intel_sub_group_avc_ime_get_single_reference_streamin(
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t result);
|
||||
intel_sub_group_avc_ime_dual_reference_streamin_t __ovld
|
||||
intel_sub_group_avc_ime_get_dual_reference_streamin(
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t result);
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_ime_strip_single_reference_streamout(
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t result);
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_ime_strip_dual_reference_streamout(
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t result);
|
||||
|
||||
uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t result,
|
||||
uchar major_shape);
|
||||
ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions(
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t result,
|
||||
uchar major_shape);
|
||||
uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t result,
|
||||
uchar major_shape);
|
||||
uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
|
||||
uchar major_shape, uchar direction);
|
||||
ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions(
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
|
||||
uchar major_shape, uchar direction);
|
||||
uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
|
||||
uchar major_shape, uchar direction);
|
||||
|
||||
uchar __ovld intel_sub_group_avc_ime_get_border_reached(
|
||||
uchar image_select, intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_ime_get_truncated_search_indication(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld
|
||||
intel_sub_group_avc_ime_get_unidirectional_early_search_termination(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uint __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_motion_vector(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_distortion(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
|
||||
// REF built-in functions
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_fme_initialize(
|
||||
ushort2 src_coord, ulong motion_vectors, uchar major_shapes,
|
||||
uchar minor_shapes, uchar directions, uchar pixel_resolution,
|
||||
uchar sad_adjustment);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_bme_initialize(
|
||||
ushort2 src_coord, ulong motion_vectors, uchar major_shapes,
|
||||
uchar minor_shapes, uchar directions, uchar pixel_resolution,
|
||||
uchar bidirectional_weight, uchar sad_adjustment);
|
||||
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_bidirectional_mix_disable(
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_bilinear_filter_enable(
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ref_result_t __ovld
|
||||
intel_sub_group_avc_ref_evaluate_with_single_reference(
|
||||
read_only image2d_t src_image, read_only image2d_t ref_image,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_ref_result_t __ovld
|
||||
intel_sub_group_avc_ref_evaluate_with_dual_reference(
|
||||
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
|
||||
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_ref_result_t __ovld
|
||||
intel_sub_group_avc_ref_evaluate_with_multi_reference(
|
||||
read_only image2d_t src_image, uint packed_reference_ids,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_ref_result_t __ovld
|
||||
intel_sub_group_avc_ref_evaluate_with_multi_reference(
|
||||
read_only image2d_t src_image, uint packed_reference_ids,
|
||||
uchar packed_reference_field_polarities, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
|
||||
// SIC built-in functions
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_initialize(
|
||||
ushort2 src_coord);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_configure_skc(
|
||||
uint skip_block_partition_type, uint skip_motion_vector_mask,
|
||||
ulong motion_vectors, uchar bidirectional_weight, uchar skip_sad_adjustment,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_configure_ipe(
|
||||
uchar luma_intra_partition_mask, uchar intra_neighbour_availabilty,
|
||||
uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel,
|
||||
uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels,
|
||||
uchar intra_sad_adjustment, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_configure_ipe(
|
||||
uchar luma_intra_partition_mask, uchar intra_neighbour_availabilty,
|
||||
uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel,
|
||||
uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels,
|
||||
ushort left_edge_chroma_pixels, ushort upper_left_corner_chroma_pixel,
|
||||
ushort upper_edge_chroma_pixels, uchar intra_sad_adjustment,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
uint __ovld
|
||||
intel_sub_group_avc_sic_get_motion_vector_mask(
|
||||
uint skip_block_partition_type, uchar direction);
|
||||
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_intra_luma_shape_penalty(
|
||||
uint packed_shape_cost, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_intra_luma_mode_cost_function(
|
||||
uchar luma_mode_penalty, uint luma_packed_neighbor_modes,
|
||||
uint luma_packed_non_dc_penalty, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_intra_chroma_mode_cost_function(
|
||||
uchar chroma_mode_penalty, intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_skc_bilinear_filter_enable(
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_skc_forward_transform_enable(
|
||||
ulong packed_sad_coefficients, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_block_based_raw_skip_sad(
|
||||
uchar block_based_skip_type,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_sic_result_t __ovld
|
||||
intel_sub_group_avc_sic_evaluate_ipe(
|
||||
read_only image2d_t src_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_result_t __ovld
|
||||
intel_sub_group_avc_sic_evaluate_with_single_reference(
|
||||
read_only image2d_t src_image, read_only image2d_t ref_image,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_result_t __ovld
|
||||
intel_sub_group_avc_sic_evaluate_with_dual_reference(
|
||||
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
|
||||
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_result_t __ovld
|
||||
intel_sub_group_avc_sic_evaluate_with_multi_reference(
|
||||
read_only image2d_t src_image, uint packed_reference_ids,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_result_t __ovld
|
||||
intel_sub_group_avc_sic_evaluate_with_multi_reference(
|
||||
read_only image2d_t src_image, uint packed_reference_ids,
|
||||
uchar packed_reference_field_polarities, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_sic_get_best_ipe_luma_distortion(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_sic_get_best_ipe_chroma_distortion(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
ulong __ovld intel_sub_group_avc_sic_get_packed_ipe_luma_modes(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_sic_get_ipe_chroma_mode(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
uint __ovld intel_sub_group_avc_sic_get_packed_skc_luma_count_threshold(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
ulong __ovld intel_sub_group_avc_sic_get_packed_skc_luma_sum_threshold(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_sic_get_inter_raw_sads(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
|
||||
// Wrappers
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_inter_base_multi_reference_penalty(
|
||||
uchar reference_base_penalty, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_inter_base_multi_reference_penalty(
|
||||
uchar reference_base_penalty, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_inter_base_multi_reference_penalty(
|
||||
uchar reference_base_penalty, intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_inter_shape_penalty(
|
||||
ulong packed_shape_cost, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_inter_shape_penalty(
|
||||
ulong packed_shape_cost, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_inter_shape_penalty(
|
||||
ulong packed_shape_cost, intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_inter_direction_penalty(
|
||||
uchar direction_cost, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_inter_direction_penalty(
|
||||
uchar direction_cost, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_inter_direction_penalty(
|
||||
uchar direction_cost, intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_motion_vector_cost_function(
|
||||
ulong packed_cost_center_delta, uint2 packed_cost_table,
|
||||
uchar cost_precision, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_motion_vector_cost_function(
|
||||
ulong packed_cost_center_delta, uint2 packed_cost_table,
|
||||
uchar cost_precision, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_motion_vector_cost_function(
|
||||
ulong packed_cost_center_delta, uint2 packed_cost_table,
|
||||
uchar cost_precision, intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_source_interlaced_field_polarity(
|
||||
uchar src_field_polarity, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_source_interlaced_field_polarity(
|
||||
uchar src_field_polarity, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_source_interlaced_field_polarity(
|
||||
uchar src_field_polarity, intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_single_reference_interlaced_field_polarity(
|
||||
uchar ref_field_polarity, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_single_reference_interlaced_field_polarity(
|
||||
uchar ref_field_polarity, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_single_reference_interlaced_field_polarity(
|
||||
uchar ref_field_polarity, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_dual_reference_interlaced_field_polarities(
|
||||
uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_dual_reference_interlaced_field_polarities(
|
||||
uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_dual_reference_interlaced_field_polarities(
|
||||
uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_ac_only_haar(
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_ac_only_haar(
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_ac_only_haar(
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
ulong __ovld intel_sub_group_avc_ime_get_motion_vectors(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
ulong __ovld intel_sub_group_avc_ref_get_motion_vectors(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
ushort __ovld intel_sub_group_avc_ime_get_inter_distortions(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_ref_get_inter_distortions(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_sic_get_inter_distortions(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
|
||||
ushort __ovld intel_sub_group_avc_ime_get_best_inter_distortion(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_ref_get_best_inter_distortion(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
uchar __ovld intel_sub_group_avc_ime_get_inter_major_shape(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_ref_get_inter_major_shape(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_ime_get_inter_minor_shapes(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_ref_get_inter_minor_shapes(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
uchar __ovld intel_sub_group_avc_ime_get_inter_directions(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_ref_get_inter_directions(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
uchar __ovld intel_sub_group_avc_ime_get_inter_motion_vector_count(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_ref_get_inter_motion_vector_count(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
uint __ovld intel_sub_group_avc_ime_get_inter_reference_ids(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uint __ovld intel_sub_group_avc_ref_get_inter_reference_ids(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
uchar __ovld
|
||||
intel_sub_group_avc_ime_get_inter_reference_interlaced_field_polarities(
|
||||
uint packed_reference_ids, uint packed_reference_parameter_field_polarities,
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld
|
||||
intel_sub_group_avc_ref_get_inter_reference_interlaced_field_polarities(
|
||||
uint packed_reference_ids, uint packed_reference_parameter_field_polarities,
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
// Type conversion functions
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_ime_convert_to_mce_payload(
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_mce_convert_to_ime_payload(
|
||||
intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_ref_convert_to_mce_payload(
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_mce_convert_to_ref_payload(
|
||||
intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_sic_convert_to_mce_payload(
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_mce_convert_to_sic_payload(
|
||||
intel_sub_group_avc_mce_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_mce_result_t __ovld
|
||||
intel_sub_group_avc_ime_convert_to_mce_result(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_mce_convert_to_ime_result(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
intel_sub_group_avc_mce_result_t __ovld
|
||||
intel_sub_group_avc_ref_convert_to_mce_result(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
intel_sub_group_avc_ref_result_t __ovld
|
||||
intel_sub_group_avc_mce_convert_to_ref_result(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
intel_sub_group_avc_mce_result_t __ovld
|
||||
intel_sub_group_avc_sic_convert_to_mce_result(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
intel_sub_group_avc_sic_result_t __ovld
|
||||
intel_sub_group_avc_mce_convert_to_sic_result(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end
|
||||
#endif // cl_intel_device_side_avc_motion_estimation
|
||||
|
||||
#ifdef cl_amd_media_ops
|
||||
uint __ovld amd_bitalign(uint a, uint b, uint c);
|
||||
uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c);
|
||||
|
@ -381,7 +381,7 @@ vec_insert_and_zero(const unsigned long long *__ptr) {
|
||||
static inline __ATTRS_o_ai vector float
|
||||
vec_insert_and_zero(const float *__ptr) {
|
||||
vector float __vec = (vector float)0;
|
||||
__vec[0] = *__ptr;
|
||||
__vec[1] = *__ptr;
|
||||
return __vec;
|
||||
}
|
||||
#endif
|
||||
@ -5942,13 +5942,13 @@ vec_orc(vector unsigned long long __a, vector unsigned long long __b) {
|
||||
|
||||
static inline __ATTRS_o_ai vector float
|
||||
vec_orc(vector float __a, vector float __b) {
|
||||
return (vector float)((vector unsigned int)__a &
|
||||
return (vector float)((vector unsigned int)__a |
|
||||
~(vector unsigned int)__b);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector double
|
||||
vec_orc(vector double __a, vector double __b) {
|
||||
return (vector double)((vector unsigned long long)__a &
|
||||
return (vector double)((vector unsigned long long)__a |
|
||||
~(vector unsigned long long)__b);
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user