From face8d65a85e9b6184d9eb7c3d5b345090350b27 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 8 Dec 2015 17:51:59 -0700 Subject: [PATCH] parseh: add c header files --- CMakeLists.txt | 71 + c_headers/Intrin.h | 958 ++++ c_headers/__stddef_max_align_t.h | 43 + c_headers/__wmmintrin_aes.h | 72 + c_headers/__wmmintrin_pclmul.h | 34 + c_headers/adxintrin.h | 88 + c_headers/ammintrin.h | 215 + c_headers/arm_acle.h | 304 + c_headers/avx2intrin.h | 1256 +++++ c_headers/avx512bwintrin.h | 1250 +++++ c_headers/avx512cdintrin.h | 131 + c_headers/avx512dqintrin.h | 242 + c_headers/avx512erintrin.h | 285 + c_headers/avx512fintrin.h | 2457 ++++++++ c_headers/avx512vlbwintrin.h | 1907 +++++++ c_headers/avx512vldqintrin.h | 353 ++ c_headers/avx512vlintrin.h | 1982 +++++++ c_headers/avxintrin.h | 1308 +++++ c_headers/bmi2intrin.h | 99 + c_headers/bmiintrin.h | 153 + c_headers/cpuid.h | 209 + c_headers/cuda_builtin_vars.h | 110 + c_headers/emmintrin.h | 1480 +++++ c_headers/f16cintrin.h | 63 + c_headers/float.h | 124 + c_headers/fma4intrin.h | 236 + c_headers/fmaintrin.h | 234 + c_headers/fxsrintrin.h | 55 + c_headers/htmintrin.h | 226 + c_headers/htmxlintrin.h | 363 ++ c_headers/ia32intrin.h | 101 + c_headers/immintrin.h | 203 + c_headers/inttypes.h | 102 + c_headers/iso646.h | 43 + c_headers/limits.h | 118 + c_headers/lzcntintrin.h | 72 + c_headers/mm3dnow.h | 167 + c_headers/mm_malloc.h | 75 + c_headers/mmintrin.h | 507 ++ c_headers/nmmintrin.h | 35 + c_headers/pmmintrin.h | 122 + c_headers/popcntintrin.h | 50 + c_headers/prfchwintrin.h | 39 + c_headers/rdseedintrin.h | 59 + c_headers/rtmintrin.h | 59 + c_headers/s390intrin.h | 39 + c_headers/shaintrin.h | 79 + c_headers/smmintrin.h | 487 ++ c_headers/stdalign.h | 35 + c_headers/stdarg.h | 52 + c_headers/stdatomic.h | 190 + c_headers/stdbool.h | 44 + c_headers/stddef.h | 137 + c_headers/stdint.h | 707 +++ c_headers/stdnoreturn.h | 30 + c_headers/tbmintrin.h | 154 + c_headers/tgmath.h | 1374 +++++ c_headers/tmmintrin.h | 230 + c_headers/unwind.h | 282 + c_headers/vadefs.h | 65 + c_headers/varargs.h | 26 + c_headers/vecintrin.h | 8946 ++++++++++++++++++++++++++++++ c_headers/wmmintrin.h | 42 + c_headers/x86intrin.h | 81 + c_headers/xmmintrin.h | 1008 ++++ c_headers/xopintrin.h | 809 +++ c_headers/xtestintrin.h | 41 + src/config.h.in | 2 + src/parseh.cpp | 30 +- 69 files changed, 32943 insertions(+), 7 deletions(-) create mode 100644 c_headers/Intrin.h create mode 100644 c_headers/__stddef_max_align_t.h create mode 100644 c_headers/__wmmintrin_aes.h create mode 100644 c_headers/__wmmintrin_pclmul.h create mode 100644 c_headers/adxintrin.h create mode 100644 c_headers/ammintrin.h create mode 100644 c_headers/arm_acle.h create mode 100644 c_headers/avx2intrin.h create mode 100644 c_headers/avx512bwintrin.h create mode 100644 c_headers/avx512cdintrin.h create mode 100644 c_headers/avx512dqintrin.h create mode 100644 c_headers/avx512erintrin.h create mode 100644 c_headers/avx512fintrin.h create mode 100644 c_headers/avx512vlbwintrin.h create mode 100644 c_headers/avx512vldqintrin.h create mode 100644 c_headers/avx512vlintrin.h create mode 100644 c_headers/avxintrin.h create mode 100644 c_headers/bmi2intrin.h create mode 100644 c_headers/bmiintrin.h create mode 100644 c_headers/cpuid.h create mode 100644 c_headers/cuda_builtin_vars.h create mode 100644 c_headers/emmintrin.h create mode 100644 c_headers/f16cintrin.h create mode 100644 c_headers/float.h create mode 100644 c_headers/fma4intrin.h create mode 100644 c_headers/fmaintrin.h create mode 100644 c_headers/fxsrintrin.h create mode 100644 c_headers/htmintrin.h create mode 100644 c_headers/htmxlintrin.h create mode 100644 c_headers/ia32intrin.h create mode 100644 c_headers/immintrin.h create mode 100644 c_headers/inttypes.h create mode 100644 c_headers/iso646.h create mode 100644 c_headers/limits.h create mode 100644 c_headers/lzcntintrin.h create mode 100644 c_headers/mm3dnow.h create mode 100644 c_headers/mm_malloc.h create mode 100644 c_headers/mmintrin.h create mode 100644 c_headers/nmmintrin.h create mode 100644 c_headers/pmmintrin.h create mode 100644 c_headers/popcntintrin.h create mode 100644 c_headers/prfchwintrin.h create mode 100644 c_headers/rdseedintrin.h create mode 100644 c_headers/rtmintrin.h create mode 100644 c_headers/s390intrin.h create mode 100644 c_headers/shaintrin.h create mode 100644 c_headers/smmintrin.h create mode 100644 c_headers/stdalign.h create mode 100644 c_headers/stdarg.h create mode 100644 c_headers/stdatomic.h create mode 100644 c_headers/stdbool.h create mode 100644 c_headers/stddef.h create mode 100644 c_headers/stdint.h create mode 100644 c_headers/stdnoreturn.h create mode 100644 c_headers/tbmintrin.h create mode 100644 c_headers/tgmath.h create mode 100644 c_headers/tmmintrin.h create mode 100644 c_headers/unwind.h create mode 100644 c_headers/vadefs.h create mode 100644 c_headers/varargs.h create mode 100644 c_headers/vecintrin.h create mode 100644 c_headers/wmmintrin.h create mode 100644 c_headers/x86intrin.h create mode 100644 c_headers/xmmintrin.h create mode 100644 c_headers/xopintrin.h create mode 100644 c_headers/xtestintrin.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 335ec83af4..3321ea5151 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,7 +46,76 @@ set(TEST_SOURCES "${CMAKE_SOURCE_DIR}/test/run_tests.cpp" ) +set(C_HEADERS + "${CMAKE_SOURCE_DIR}/c_headers/adxintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/ammintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/arm_acle.h" + "${CMAKE_SOURCE_DIR}/c_headers/avx2intrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/avx512bwintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/avx512cdintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/avx512dqintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/avx512erintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/avx512fintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/avx512vlbwintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/avx512vldqintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/avx512vlintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/avxintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/bmi2intrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/bmiintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/cpuid.h" + "${CMAKE_SOURCE_DIR}/c_headers/cuda_builtin_vars.h" + "${CMAKE_SOURCE_DIR}/c_headers/emmintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/f16cintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/float.h" + "${CMAKE_SOURCE_DIR}/c_headers/fma4intrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/fmaintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/fxsrintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/htmintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/htmxlintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/ia32intrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/immintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/Intrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/inttypes.h" + "${CMAKE_SOURCE_DIR}/c_headers/iso646.h" + "${CMAKE_SOURCE_DIR}/c_headers/limits.h" + "${CMAKE_SOURCE_DIR}/c_headers/lzcntintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/mm3dnow.h" + "${CMAKE_SOURCE_DIR}/c_headers/mmintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/mm_malloc.h" + "${CMAKE_SOURCE_DIR}/c_headers/nmmintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/pmmintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/popcntintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/prfchwintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/rdseedintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/rtmintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/s390intrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/shaintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/smmintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/stdalign.h" + "${CMAKE_SOURCE_DIR}/c_headers/stdarg.h" + "${CMAKE_SOURCE_DIR}/c_headers/stdatomic.h" + "${CMAKE_SOURCE_DIR}/c_headers/stdbool.h" + "${CMAKE_SOURCE_DIR}/c_headers/stddef.h" + "${CMAKE_SOURCE_DIR}/c_headers/__stddef_max_align_t.h" + "${CMAKE_SOURCE_DIR}/c_headers/stdint.h" + "${CMAKE_SOURCE_DIR}/c_headers/stdnoreturn.h" + "${CMAKE_SOURCE_DIR}/c_headers/tbmintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/tgmath.h" + "${CMAKE_SOURCE_DIR}/c_headers/tmmintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/unwind.h" + "${CMAKE_SOURCE_DIR}/c_headers/vadefs.h" + "${CMAKE_SOURCE_DIR}/c_headers/varargs.h" + "${CMAKE_SOURCE_DIR}/c_headers/vecintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/__wmmintrin_aes.h" + "${CMAKE_SOURCE_DIR}/c_headers/wmmintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/__wmmintrin_pclmul.h" + "${CMAKE_SOURCE_DIR}/c_headers/x86intrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/xmmintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/xopintrin.h" + "${CMAKE_SOURCE_DIR}/c_headers/xtestintrin.h" +) +set(C_HEADERS_DEST "lib/zig/include") set(CONFIGURE_OUT_FILE "${CMAKE_BINARY_DIR}/config.h") configure_file ( "${CMAKE_SOURCE_DIR}/src/config.h.in" @@ -72,6 +141,8 @@ target_link_libraries(zig LINK_PUBLIC ) install(TARGETS zig DESTINATION bin) +install(FILES ${C_HEADERS} DESTINATION ${C_HEADERS_DEST}) + add_executable(run_tests ${TEST_SOURCES}) target_link_libraries(run_tests) set_target_properties(run_tests PROPERTIES diff --git a/c_headers/Intrin.h b/c_headers/Intrin.h new file mode 100644 index 0000000000..24b3eae8bf --- /dev/null +++ b/c_headers/Intrin.h @@ -0,0 +1,958 @@ +/* ===-------- Intrin.h ---------------------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +/* Only include this if we're compiling for the windows platform. */ +#ifndef _MSC_VER +#include_next +#else + +#ifndef __INTRIN_H +#define __INTRIN_H + +/* First include the standard intrinsics. */ +#if defined(__i386__) || defined(__x86_64__) +#include +#endif + +/* For the definition of jmp_buf. */ +#if __STDC_HOSTED__ +#include +#endif + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__MMX__) +/* And the random ones that aren't in those files. */ +__m64 _m_from_float(float); +__m64 _m_from_int(int _l); +void _m_prefetch(void *); +float _m_to_float(__m64); +int _m_to_int(__m64 _M); +#endif + +/* Other assorted instruction intrinsics. */ +void __addfsbyte(unsigned long, unsigned char); +void __addfsdword(unsigned long, unsigned long); +void __addfsword(unsigned long, unsigned short); +void __code_seg(const char *); +static __inline__ +void __cpuid(int[4], int); +static __inline__ +void __cpuidex(int[4], int, int); +void __debugbreak(void); +__int64 __emul(int, int); +unsigned __int64 __emulu(unsigned int, unsigned int); +void __cdecl __fastfail(unsigned int); +unsigned int __getcallerseflags(void); +static __inline__ +void __halt(void); +unsigned char __inbyte(unsigned short); +void __inbytestring(unsigned short, unsigned char *, unsigned long); +void __incfsbyte(unsigned long); +void __incfsdword(unsigned long); +void __incfsword(unsigned long); +unsigned long __indword(unsigned short); +void __indwordstring(unsigned short, unsigned long *, unsigned long); +void __int2c(void); +void __invlpg(void *); +unsigned short __inword(unsigned short); +void __inwordstring(unsigned short, unsigned short *, unsigned long); +void __lidt(void *); +unsigned __int64 __ll_lshift(unsigned __int64, int); +__int64 __ll_rshift(__int64, int); +void __llwpcb(void *); +unsigned char __lwpins32(unsigned int, unsigned int, unsigned int); +void __lwpval32(unsigned int, unsigned int, unsigned int); +unsigned int __lzcnt(unsigned int); +unsigned short __lzcnt16(unsigned short); +static __inline__ +void __movsb(unsigned char *, unsigned char const *, size_t); +static __inline__ +void __movsd(unsigned long *, unsigned long const *, size_t); +static __inline__ +void __movsw(unsigned short *, unsigned short const *, size_t); +void __nop(void); +void __nvreg_restore_fence(void); +void __nvreg_save_fence(void); +void __outbyte(unsigned short, unsigned char); +void __outbytestring(unsigned short, unsigned char *, unsigned long); +void __outdword(unsigned short, unsigned long); +void __outdwordstring(unsigned short, unsigned long *, unsigned long); +void __outword(unsigned short, unsigned short); +void __outwordstring(unsigned short, unsigned short *, unsigned long); +static __inline__ +unsigned int __popcnt(unsigned int); +static __inline__ +unsigned short __popcnt16(unsigned short); +unsigned long __readcr0(void); +unsigned long __readcr2(void); +static __inline__ +unsigned long __readcr3(void); +unsigned long __readcr4(void); +unsigned long __readcr8(void); +unsigned int __readdr(unsigned int); +#ifdef __i386__ +static __inline__ +unsigned char __readfsbyte(unsigned long); +static __inline__ +unsigned long __readfsdword(unsigned long); +static __inline__ +unsigned __int64 __readfsqword(unsigned long); +static __inline__ +unsigned short __readfsword(unsigned long); +#endif +static __inline__ +unsigned __int64 __readmsr(unsigned long); +unsigned __int64 __readpmc(unsigned long); +unsigned long __segmentlimit(unsigned long); +void __sidt(void *); +void *__slwpcb(void); +static __inline__ +void __stosb(unsigned char *, unsigned char, size_t); +static __inline__ +void __stosd(unsigned long *, unsigned long, size_t); +static __inline__ +void __stosw(unsigned short *, unsigned short, size_t); +void __svm_clgi(void); +void __svm_invlpga(void *, int); +void __svm_skinit(int); +void __svm_stgi(void); +void __svm_vmload(size_t); +void __svm_vmrun(size_t); +void __svm_vmsave(size_t); +void __ud2(void); +unsigned __int64 __ull_rshift(unsigned __int64, int); +void __vmx_off(void); +void __vmx_vmptrst(unsigned __int64 *); +void __wbinvd(void); +void __writecr0(unsigned int); +static __inline__ +void __writecr3(unsigned int); +void __writecr4(unsigned int); +void __writecr8(unsigned int); +void __writedr(unsigned int, unsigned int); +void __writefsbyte(unsigned long, unsigned char); +void __writefsdword(unsigned long, unsigned long); +void __writefsqword(unsigned long, unsigned __int64); +void __writefsword(unsigned long, unsigned short); +void __writemsr(unsigned long, unsigned __int64); +static __inline__ +void *_AddressOfReturnAddress(void); +static __inline__ +unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); +static __inline__ +unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); +static __inline__ +unsigned char _bittest(long const *, long); +static __inline__ +unsigned char _bittestandcomplement(long *, long); +static __inline__ +unsigned char _bittestandreset(long *, long); +static __inline__ +unsigned char _bittestandset(long *, long); +unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); +unsigned long __cdecl _byteswap_ulong(unsigned long); +unsigned short __cdecl _byteswap_ushort(unsigned short); +void __cdecl _disable(void); +void __cdecl _enable(void); +long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value); +static __inline__ +long _InterlockedAnd(long volatile *_Value, long _Mask); +static __inline__ +short _InterlockedAnd16(short volatile *_Value, short _Mask); +static __inline__ +char _InterlockedAnd8(char volatile *_Value, char _Mask); +unsigned char _interlockedbittestandreset(long volatile *, long); +static __inline__ +unsigned char _interlockedbittestandset(long volatile *, long); +static __inline__ +long __cdecl _InterlockedCompareExchange(long volatile *_Destination, + long _Exchange, long _Comparand); +long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long); +long _InterlockedCompareExchange_HLERelease(long volatile *, long, long); +static __inline__ +short _InterlockedCompareExchange16(short volatile *_Destination, + short _Exchange, short _Comparand); +static __inline__ +__int64 _InterlockedCompareExchange64(__int64 volatile *_Destination, + __int64 _Exchange, __int64 _Comparand); +__int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64, + __int64); +__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, + __int64); +static __inline__ +char _InterlockedCompareExchange8(char volatile *_Destination, char _Exchange, + char _Comparand); +void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *, + void *); +void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *, + void *); +static __inline__ +long __cdecl _InterlockedDecrement(long volatile *_Addend); +static __inline__ +short _InterlockedDecrement16(short volatile *_Addend); +long _InterlockedExchange(long volatile *_Target, long _Value); +static __inline__ +short _InterlockedExchange16(short volatile *_Target, short _Value); +static __inline__ +char _InterlockedExchange8(char volatile *_Target, char _Value); +static __inline__ +long __cdecl _InterlockedExchangeAdd(long volatile *_Addend, long _Value); +long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long); +long _InterlockedExchangeAdd_HLERelease(long volatile *, long); +static __inline__ +short _InterlockedExchangeAdd16(short volatile *_Addend, short _Value); +__int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64); +__int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64); +static __inline__ +char _InterlockedExchangeAdd8(char volatile *_Addend, char _Value); +static __inline__ +long __cdecl _InterlockedIncrement(long volatile *_Addend); +static __inline__ +short _InterlockedIncrement16(short volatile *_Addend); +static __inline__ +long _InterlockedOr(long volatile *_Value, long _Mask); +static __inline__ +short _InterlockedOr16(short volatile *_Value, short _Mask); +static __inline__ +char _InterlockedOr8(char volatile *_Value, char _Mask); +static __inline__ +long _InterlockedXor(long volatile *_Value, long _Mask); +static __inline__ +short _InterlockedXor16(short volatile *_Value, short _Mask); +static __inline__ +char _InterlockedXor8(char volatile *_Value, char _Mask); +void __cdecl _invpcid(unsigned int, void *); +static __inline__ +unsigned long __cdecl _lrotl(unsigned long, int); +static __inline__ +unsigned long __cdecl _lrotr(unsigned long, int); +static __inline__ +static __inline__ +void _ReadBarrier(void); +static __inline__ +void _ReadWriteBarrier(void); +static __inline__ +void *_ReturnAddress(void); +unsigned int _rorx_u32(unsigned int, const unsigned int); +static __inline__ +unsigned int __cdecl _rotl(unsigned int _Value, int _Shift); +static __inline__ +unsigned short _rotl16(unsigned short _Value, unsigned char _Shift); +static __inline__ +unsigned __int64 __cdecl _rotl64(unsigned __int64 _Value, int _Shift); +static __inline__ +unsigned char _rotl8(unsigned char _Value, unsigned char _Shift); +static __inline__ +unsigned int __cdecl _rotr(unsigned int _Value, int _Shift); +static __inline__ +unsigned short _rotr16(unsigned short _Value, unsigned char _Shift); +static __inline__ +unsigned __int64 __cdecl _rotr64(unsigned __int64 _Value, int _Shift); +static __inline__ +unsigned char _rotr8(unsigned char _Value, unsigned char _Shift); +int _sarx_i32(int, unsigned int); +#if __STDC_HOSTED__ +int __cdecl _setjmp(jmp_buf); +#endif +unsigned int _shlx_u32(unsigned int, unsigned int); +unsigned int _shrx_u32(unsigned int, unsigned int); +void _Store_HLERelease(long volatile *, long); +void _Store64_HLERelease(__int64 volatile *, __int64); +void _StorePointer_HLERelease(void *volatile *, void *); +static __inline__ +void _WriteBarrier(void); +unsigned __int32 xbegin(void); +void _xend(void); +static __inline__ +#define _XCR_XFEATURE_ENABLED_MASK 0 +unsigned __int64 __cdecl _xgetbv(unsigned int); +void __cdecl _xrstor(void const *, unsigned __int64); +void __cdecl _xsave(void *, unsigned __int64); +void __cdecl _xsaveopt(void *, unsigned __int64); +void __cdecl _xsetbv(unsigned int, unsigned __int64); + +/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */ +#ifdef __x86_64__ +void __addgsbyte(unsigned long, unsigned char); +void __addgsdword(unsigned long, unsigned long); +void __addgsqword(unsigned long, unsigned __int64); +void __addgsword(unsigned long, unsigned short); +static __inline__ +void __faststorefence(void); +void __incgsbyte(unsigned long); +void __incgsdword(unsigned long); +void __incgsqword(unsigned long); +void __incgsword(unsigned long); +unsigned char __lwpins64(unsigned __int64, unsigned int, unsigned int); +void __lwpval64(unsigned __int64, unsigned int, unsigned int); +unsigned __int64 __lzcnt64(unsigned __int64); +static __inline__ +void __movsq(unsigned long long *, unsigned long long const *, size_t); +__int64 __mulh(__int64, __int64); +static __inline__ +unsigned __int64 __popcnt64(unsigned __int64); +static __inline__ +unsigned char __readgsbyte(unsigned long); +static __inline__ +unsigned long __readgsdword(unsigned long); +static __inline__ +unsigned __int64 __readgsqword(unsigned long); +unsigned short __readgsword(unsigned long); +unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, + unsigned __int64 _HighPart, + unsigned char _Shift); +unsigned __int64 __shiftright128(unsigned __int64 _LowPart, + unsigned __int64 _HighPart, + unsigned char _Shift); +static __inline__ +void __stosq(unsigned __int64 *, unsigned __int64, size_t); +unsigned char __vmx_on(unsigned __int64 *); +unsigned char __vmx_vmclear(unsigned __int64 *); +unsigned char __vmx_vmlaunch(void); +unsigned char __vmx_vmptrld(unsigned __int64 *); +unsigned char __vmx_vmread(size_t, size_t *); +unsigned char __vmx_vmresume(void); +unsigned char __vmx_vmwrite(size_t, size_t); +void __writegsbyte(unsigned long, unsigned char); +void __writegsdword(unsigned long, unsigned long); +void __writegsqword(unsigned long, unsigned __int64); +void __writegsword(unsigned long, unsigned short); +static __inline__ +unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); +static __inline__ +unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); +static __inline__ +unsigned char _bittest64(__int64 const *, __int64); +static __inline__ +unsigned char _bittestandcomplement64(__int64 *, __int64); +static __inline__ +unsigned char _bittestandreset64(__int64 *, __int64); +static __inline__ +unsigned char _bittestandset64(__int64 *, __int64); +unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); +long _InterlockedAnd_np(long volatile *_Value, long _Mask); +short _InterlockedAnd16_np(short volatile *_Value, short _Mask); +__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask); +char _InterlockedAnd8_np(char volatile *_Value, char _Mask); +unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64); +static __inline__ +unsigned char _interlockedbittestandset64(__int64 volatile *, __int64); +long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange, + long _Comparand); +unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_CompareandResult); +unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_ComparandResult); +short _InterlockedCompareExchange16_np(short volatile *_Destination, + short _Exchange, short _Comparand); +__int64 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *, __int64, + __int64); +__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, + __int64); +__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination, + __int64 _Exchange, __int64 _Comparand); +void *_InterlockedCompareExchangePointer(void *volatile *_Destination, + void *_Exchange, void *_Comparand); +void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination, + void *_Exchange, void *_Comparand); +static __inline__ +__int64 _InterlockedDecrement64(__int64 volatile *_Addend); +static __inline__ +__int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value); +static __inline__ +__int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value); +void *_InterlockedExchangePointer(void *volatile *_Target, void *_Value); +static __inline__ +__int64 _InterlockedIncrement64(__int64 volatile *_Addend); +long _InterlockedOr_np(long volatile *_Value, long _Mask); +short _InterlockedOr16_np(short volatile *_Value, short _Mask); +static __inline__ +__int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask); +__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask); +char _InterlockedOr8_np(char volatile *_Value, char _Mask); +long _InterlockedXor_np(long volatile *_Value, long _Mask); +short _InterlockedXor16_np(short volatile *_Value, short _Mask); +static __inline__ +__int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask); +__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask); +char _InterlockedXor8_np(char volatile *_Value, char _Mask); +static __inline__ +__int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand, + __int64 *_HighProduct); +unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int); +__int64 _sarx_i64(__int64, unsigned int); +#if __STDC_HOSTED__ +int __cdecl _setjmpex(jmp_buf); +#endif +unsigned __int64 _shlx_u64(unsigned __int64, unsigned int); +unsigned __int64 _shrx_u64(unsigned __int64, unsigned int); +/* + * Multiply two 64-bit integers and obtain a 64-bit result. + * The low-half is returned directly and the high half is in an out parameter. + */ +static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS +_umul128(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand, + unsigned __int64 *_HighProduct) { + unsigned __int128 _FullProduct = + (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand; + *_HighProduct = _FullProduct >> 64; + return _FullProduct; +} +static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS +__umulh(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand) { + unsigned __int128 _FullProduct = + (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand; + return _FullProduct >> 64; +} +void __cdecl _xrstor64(void const *, unsigned __int64); +void __cdecl _xsave64(void *, unsigned __int64); +void __cdecl _xsaveopt64(void *, unsigned __int64); + +#endif /* __x86_64__ */ + +/*----------------------------------------------------------------------------*\ +|* Bit Twiddling +\*----------------------------------------------------------------------------*/ +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_rotl8(unsigned char _Value, unsigned char _Shift) { + _Shift &= 0x7; + return _Shift ? (_Value << _Shift) | (_Value >> (8 - _Shift)) : _Value; +} +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_rotr8(unsigned char _Value, unsigned char _Shift) { + _Shift &= 0x7; + return _Shift ? (_Value >> _Shift) | (_Value << (8 - _Shift)) : _Value; +} +static __inline__ unsigned short __DEFAULT_FN_ATTRS +_rotl16(unsigned short _Value, unsigned char _Shift) { + _Shift &= 0xf; + return _Shift ? (_Value << _Shift) | (_Value >> (16 - _Shift)) : _Value; +} +static __inline__ unsigned short __DEFAULT_FN_ATTRS +_rotr16(unsigned short _Value, unsigned char _Shift) { + _Shift &= 0xf; + return _Shift ? (_Value >> _Shift) | (_Value << (16 - _Shift)) : _Value; +} +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_rotl(unsigned int _Value, int _Shift) { + _Shift &= 0x1f; + return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; +} +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_rotr(unsigned int _Value, int _Shift) { + _Shift &= 0x1f; + return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; +} +static __inline__ unsigned long __DEFAULT_FN_ATTRS +_lrotl(unsigned long _Value, int _Shift) { + _Shift &= 0x1f; + return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; +} +static __inline__ unsigned long __DEFAULT_FN_ATTRS +_lrotr(unsigned long _Value, int _Shift) { + _Shift &= 0x1f; + return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; +} +static +__inline__ unsigned __int64 __DEFAULT_FN_ATTRS +_rotl64(unsigned __int64 _Value, int _Shift) { + _Shift &= 0x3f; + return _Shift ? (_Value << _Shift) | (_Value >> (64 - _Shift)) : _Value; +} +static +__inline__ unsigned __int64 __DEFAULT_FN_ATTRS +_rotr64(unsigned __int64 _Value, int _Shift) { + _Shift &= 0x3f; + return _Shift ? (_Value >> _Shift) | (_Value << (64 - _Shift)) : _Value; +} +/*----------------------------------------------------------------------------*\ +|* Bit Counting and Testing +\*----------------------------------------------------------------------------*/ +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_BitScanForward(unsigned long *_Index, unsigned long _Mask) { + if (!_Mask) + return 0; + *_Index = __builtin_ctzl(_Mask); + return 1; +} +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_BitScanReverse(unsigned long *_Index, unsigned long _Mask) { + if (!_Mask) + return 0; + *_Index = 31 - __builtin_clzl(_Mask); + return 1; +} +static __inline__ unsigned short __DEFAULT_FN_ATTRS +__popcnt16(unsigned short _Value) { + return __builtin_popcount((int)_Value); +} +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__popcnt(unsigned int _Value) { + return __builtin_popcount(_Value); +} +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_bittest(long const *_BitBase, long _BitPos) { + return (*_BitBase >> _BitPos) & 1; +} +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_bittestandcomplement(long *_BitBase, long _BitPos) { + unsigned char _Res = (*_BitBase >> _BitPos) & 1; + *_BitBase = *_BitBase ^ (1 << _BitPos); + return _Res; +} +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_bittestandreset(long *_BitBase, long _BitPos) { + unsigned char _Res = (*_BitBase >> _BitPos) & 1; + *_BitBase = *_BitBase & ~(1 << _BitPos); + return _Res; +} +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_bittestandset(long *_BitBase, long _BitPos) { + unsigned char _Res = (*_BitBase >> _BitPos) & 1; + *_BitBase = *_BitBase | (1 << _BitPos); + return _Res; +} +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_interlockedbittestandset(long volatile *_BitBase, long _BitPos) { + long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_SEQ_CST); + return (_PrevVal >> _BitPos) & 1; +} +#ifdef __x86_64__ +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) { + if (!_Mask) + return 0; + *_Index = __builtin_ctzll(_Mask); + return 1; +} +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) { + if (!_Mask) + return 0; + *_Index = 63 - __builtin_clzll(_Mask); + return 1; +} +static __inline__ +unsigned __int64 __DEFAULT_FN_ATTRS +__popcnt64(unsigned __int64 _Value) { + return __builtin_popcountll(_Value); +} +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_bittest64(__int64 const *_BitBase, __int64 _BitPos) { + return (*_BitBase >> _BitPos) & 1; +} +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_bittestandcomplement64(__int64 *_BitBase, __int64 _BitPos) { + unsigned char _Res = (*_BitBase >> _BitPos) & 1; + *_BitBase = *_BitBase ^ (1ll << _BitPos); + return _Res; +} +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_bittestandreset64(__int64 *_BitBase, __int64 _BitPos) { + unsigned char _Res = (*_BitBase >> _BitPos) & 1; + *_BitBase = *_BitBase & ~(1ll << _BitPos); + return _Res; +} +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_bittestandset64(__int64 *_BitBase, __int64 _BitPos) { + unsigned char _Res = (*_BitBase >> _BitPos) & 1; + *_BitBase = *_BitBase | (1ll << _BitPos); + return _Res; +} +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_interlockedbittestandset64(__int64 volatile *_BitBase, __int64 _BitPos) { + long long _PrevVal = + __atomic_fetch_or(_BitBase, 1ll << _BitPos, __ATOMIC_SEQ_CST); + return (_PrevVal >> _BitPos) & 1; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Exchange Add +\*----------------------------------------------------------------------------*/ +static __inline__ char __DEFAULT_FN_ATTRS +_InterlockedExchangeAdd8(char volatile *_Addend, char _Value) { + return __atomic_fetch_add(_Addend, _Value, __ATOMIC_SEQ_CST); +} +static __inline__ short __DEFAULT_FN_ATTRS +_InterlockedExchangeAdd16(short volatile *_Addend, short _Value) { + return __atomic_fetch_add(_Addend, _Value, __ATOMIC_SEQ_CST); +} +#ifdef __x86_64__ +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) { + return __atomic_fetch_add(_Addend, _Value, __ATOMIC_SEQ_CST); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Exchange Sub +\*----------------------------------------------------------------------------*/ +static __inline__ char __DEFAULT_FN_ATTRS +_InterlockedExchangeSub8(char volatile *_Subend, char _Value) { + return __atomic_fetch_sub(_Subend, _Value, __ATOMIC_SEQ_CST); +} +static __inline__ short __DEFAULT_FN_ATTRS +_InterlockedExchangeSub16(short volatile *_Subend, short _Value) { + return __atomic_fetch_sub(_Subend, _Value, __ATOMIC_SEQ_CST); +} +static __inline__ long __DEFAULT_FN_ATTRS +_InterlockedExchangeSub(long volatile *_Subend, long _Value) { + return __atomic_fetch_sub(_Subend, _Value, __ATOMIC_SEQ_CST); +} +#ifdef __x86_64__ +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) { + return __atomic_fetch_sub(_Subend, _Value, __ATOMIC_SEQ_CST); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Increment +\*----------------------------------------------------------------------------*/ +static __inline__ short __DEFAULT_FN_ATTRS +_InterlockedIncrement16(short volatile *_Value) { + return __atomic_add_fetch(_Value, 1, __ATOMIC_SEQ_CST); +} +#ifdef __x86_64__ +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedIncrement64(__int64 volatile *_Value) { + return __atomic_add_fetch(_Value, 1, __ATOMIC_SEQ_CST); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Decrement +\*----------------------------------------------------------------------------*/ +static __inline__ short __DEFAULT_FN_ATTRS +_InterlockedDecrement16(short volatile *_Value) { + return __atomic_sub_fetch(_Value, 1, __ATOMIC_SEQ_CST); +} +#ifdef __x86_64__ +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedDecrement64(__int64 volatile *_Value) { + return __atomic_sub_fetch(_Value, 1, __ATOMIC_SEQ_CST); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked And +\*----------------------------------------------------------------------------*/ +static __inline__ char __DEFAULT_FN_ATTRS +_InterlockedAnd8(char volatile *_Value, char _Mask) { + return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); +} +static __inline__ short __DEFAULT_FN_ATTRS +_InterlockedAnd16(short volatile *_Value, short _Mask) { + return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); +} +static __inline__ long __DEFAULT_FN_ATTRS +_InterlockedAnd(long volatile *_Value, long _Mask) { + return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); +} +#ifdef __x86_64__ +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) { + return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Or +\*----------------------------------------------------------------------------*/ +static __inline__ char __DEFAULT_FN_ATTRS +_InterlockedOr8(char volatile *_Value, char _Mask) { + return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); +} +static __inline__ short __DEFAULT_FN_ATTRS +_InterlockedOr16(short volatile *_Value, short _Mask) { + return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); +} +static __inline__ long __DEFAULT_FN_ATTRS +_InterlockedOr(long volatile *_Value, long _Mask) { + return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); +} +#ifdef __x86_64__ +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) { + return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Xor +\*----------------------------------------------------------------------------*/ +static __inline__ char __DEFAULT_FN_ATTRS +_InterlockedXor8(char volatile *_Value, char _Mask) { + return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); +} +static __inline__ short __DEFAULT_FN_ATTRS +_InterlockedXor16(short volatile *_Value, short _Mask) { + return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); +} +static __inline__ long __DEFAULT_FN_ATTRS +_InterlockedXor(long volatile *_Value, long _Mask) { + return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); +} +#ifdef __x86_64__ +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) { + return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Exchange +\*----------------------------------------------------------------------------*/ +static __inline__ char __DEFAULT_FN_ATTRS +_InterlockedExchange8(char volatile *_Target, char _Value) { + __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_SEQ_CST); + return _Value; +} +static __inline__ short __DEFAULT_FN_ATTRS +_InterlockedExchange16(short volatile *_Target, short _Value) { + __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_SEQ_CST); + return _Value; +} +#ifdef __x86_64__ +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) { + __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_SEQ_CST); + return _Value; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Compare Exchange +\*----------------------------------------------------------------------------*/ +static __inline__ char __DEFAULT_FN_ATTRS +_InterlockedCompareExchange8(char volatile *_Destination, + char _Exchange, char _Comparand) { + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return _Comparand; +} +static __inline__ short __DEFAULT_FN_ATTRS +_InterlockedCompareExchange16(short volatile *_Destination, + short _Exchange, short _Comparand) { + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return _Comparand; +} +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedCompareExchange64(__int64 volatile *_Destination, + __int64 _Exchange, __int64 _Comparand) { + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return _Comparand; +} +/*----------------------------------------------------------------------------*\ +|* Barriers +\*----------------------------------------------------------------------------*/ +#if defined(__i386__) || defined(__x86_64__) +static __inline__ void __DEFAULT_FN_ATTRS +__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) +_ReadWriteBarrier(void) { + __asm__ volatile ("" : : : "memory"); +} +static __inline__ void __DEFAULT_FN_ATTRS +__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) +_ReadBarrier(void) { + __asm__ volatile ("" : : : "memory"); +} +static __inline__ void __DEFAULT_FN_ATTRS +__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) +_WriteBarrier(void) { + __asm__ volatile ("" : : : "memory"); +} +#endif +#ifdef __x86_64__ +static __inline__ void __DEFAULT_FN_ATTRS +__faststorefence(void) { + __asm__ volatile("lock orq $0, (%%rsp)" : : : "memory"); +} +#endif +/*----------------------------------------------------------------------------*\ +|* readfs, readgs +|* (Pointers in address space #256 and #257 are relative to the GS and FS +|* segment registers, respectively.) +\*----------------------------------------------------------------------------*/ +#define __ptr_to_addr_space(__addr_space_nbr, __type, __offset) \ + ((volatile __type __attribute__((__address_space__(__addr_space_nbr)))*) \ + (__offset)) + +#ifdef __i386__ +static __inline__ unsigned char __DEFAULT_FN_ATTRS +__readfsbyte(unsigned long __offset) { + return *__ptr_to_addr_space(257, unsigned char, __offset); +} +static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS +__readfsqword(unsigned long __offset) { + return *__ptr_to_addr_space(257, unsigned __int64, __offset); +} +static __inline__ unsigned short __DEFAULT_FN_ATTRS +__readfsword(unsigned long __offset) { + return *__ptr_to_addr_space(257, unsigned short, __offset); +} +#endif +#ifdef __x86_64__ +static __inline__ unsigned char __DEFAULT_FN_ATTRS +__readgsbyte(unsigned long __offset) { + return *__ptr_to_addr_space(256, unsigned char, __offset); +} +static __inline__ unsigned long __DEFAULT_FN_ATTRS +__readgsdword(unsigned long __offset) { + return *__ptr_to_addr_space(256, unsigned long, __offset); +} +static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS +__readgsqword(unsigned long __offset) { + return *__ptr_to_addr_space(256, unsigned __int64, __offset); +} +static __inline__ unsigned short __DEFAULT_FN_ATTRS +__readgsword(unsigned long __offset) { + return *__ptr_to_addr_space(256, unsigned short, __offset); +} +#endif +#undef __ptr_to_addr_space +/*----------------------------------------------------------------------------*\ +|* movs, stos +\*----------------------------------------------------------------------------*/ +#if defined(__i386__) || defined(__x86_64__) +static __inline__ void __DEFAULT_FN_ATTRS +__movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) { + __asm__("rep movsb" : : "D"(__dst), "S"(__src), "c"(__n) + : "%edi", "%esi", "%ecx"); +} +static __inline__ void __DEFAULT_FN_ATTRS +__movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) { + __asm__("rep movsl" : : "D"(__dst), "S"(__src), "c"(__n) + : "%edi", "%esi", "%ecx"); +} +static __inline__ void __DEFAULT_FN_ATTRS +__movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) { + __asm__("rep movsh" : : "D"(__dst), "S"(__src), "c"(__n) + : "%edi", "%esi", "%ecx"); +} +static __inline__ void __DEFAULT_FN_ATTRS +__stosb(unsigned char *__dst, unsigned char __x, size_t __n) { + __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n) + : "%edi", "%ecx"); +} +static __inline__ void __DEFAULT_FN_ATTRS +__stosd(unsigned long *__dst, unsigned long __x, size_t __n) { + __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n) + : "%edi", "%ecx"); +} +static __inline__ void __DEFAULT_FN_ATTRS +__stosw(unsigned short *__dst, unsigned short __x, size_t __n) { + __asm__("rep stosh" : : "D"(__dst), "a"(__x), "c"(__n) + : "%edi", "%ecx"); +} +#endif +#ifdef __x86_64__ +static __inline__ void __DEFAULT_FN_ATTRS +__movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) { + __asm__("rep movsq" : : "D"(__dst), "S"(__src), "c"(__n) + : "%edi", "%esi", "%ecx"); +} +static __inline__ void __DEFAULT_FN_ATTRS +__stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) { + __asm__("rep stosq" : : "D"(__dst), "a"(__x), "c"(__n) + : "%edi", "%ecx"); +} +#endif + +/*----------------------------------------------------------------------------*\ +|* Misc +\*----------------------------------------------------------------------------*/ +static __inline__ void * __DEFAULT_FN_ATTRS +_AddressOfReturnAddress(void) { + return (void*)((char*)__builtin_frame_address(0) + sizeof(void*)); +} +static __inline__ void * __DEFAULT_FN_ATTRS +_ReturnAddress(void) { + return __builtin_return_address(0); +} +#if defined(__i386__) || defined(__x86_64__) +static __inline__ void __DEFAULT_FN_ATTRS +__cpuid(int __info[4], int __level) { + __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3]) + : "a"(__level)); +} +static __inline__ void __DEFAULT_FN_ATTRS +__cpuidex(int __info[4], int __level, int __ecx) { + __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3]) + : "a"(__level), "c"(__ecx)); +} +static __inline__ unsigned __int64 __cdecl __DEFAULT_FN_ATTRS +_xgetbv(unsigned int __xcr_no) { + unsigned int __eax, __edx; + __asm__ ("xgetbv" : "=a" (__eax), "=d" (__edx) : "c" (__xcr_no)); + return ((unsigned __int64)__edx << 32) | __eax; +} +static __inline__ void __DEFAULT_FN_ATTRS +__halt(void) { + __asm__ volatile ("hlt"); +} +#endif + +/*----------------------------------------------------------------------------*\ +|* Privileged intrinsics +\*----------------------------------------------------------------------------*/ +#if defined(__i386__) || defined(__x86_64__) +static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS +__readmsr(unsigned long __register) { + // Loads the contents of a 64-bit model specific register (MSR) specified in + // the ECX register into registers EDX:EAX. The EDX register is loaded with + // the high-order 32 bits of the MSR and the EAX register is loaded with the + // low-order 32 bits. If less than 64 bits are implemented in the MSR being + // read, the values returned to EDX:EAX in unimplemented bit locations are + // undefined. + unsigned long __edx; + unsigned long __eax; + __asm__ ("rdmsr" : "=d"(__edx), "=a"(__eax) : "c"(__register)); + return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax; +} + +static __inline__ unsigned long __DEFAULT_FN_ATTRS +__readcr3(void) { + unsigned long __cr3_val; + __asm__ __volatile__ ("mov %%cr3, %0" : "=q"(__cr3_val) : : "memory"); + return __cr3_val; +} + +static __inline__ void __DEFAULT_FN_ATTRS +__writecr3(unsigned int __cr3_val) { + __asm__ ("mov %0, %%cr3" : : "q"(__cr3_val) : "memory"); +} +#endif + +#ifdef __cplusplus +} +#endif + +#undef __DEFAULT_FN_ATTRS + +#endif /* __INTRIN_H */ +#endif /* _MSC_VER */ diff --git a/c_headers/__stddef_max_align_t.h b/c_headers/__stddef_max_align_t.h new file mode 100644 index 0000000000..1e10ca9865 --- /dev/null +++ b/c_headers/__stddef_max_align_t.h @@ -0,0 +1,43 @@ +/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---=== + * + * Copyright (c) 2014 Chandler Carruth + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_MAX_ALIGN_T_DEFINED +#define __CLANG_MAX_ALIGN_T_DEFINED + +#if defined(_MSC_VER) +typedef double max_align_t; +#elif defined(__APPLE__) +typedef long double max_align_t; +#else +// Define 'max_align_t' to match the GCC definition. +typedef struct { + long long __clang_max_align_nonce1 + __attribute__((__aligned__(__alignof__(long long)))); + long double __clang_max_align_nonce2 + __attribute__((__aligned__(__alignof__(long double)))); +} max_align_t; +#endif + +#endif diff --git a/c_headers/__wmmintrin_aes.h b/c_headers/__wmmintrin_aes.h new file mode 100644 index 0000000000..9f594ee560 --- /dev/null +++ b/c_headers/__wmmintrin_aes.h @@ -0,0 +1,72 @@ +/*===---- __wmmintrin_aes.h - AES intrinsics -------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef _WMMINTRIN_AES_H +#define _WMMINTRIN_AES_H + +#include + +#if !defined (__AES__) +# error "AES instructions not enabled" +#else + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_aesenc_si128(__m128i __V, __m128i __R) +{ + return (__m128i)__builtin_ia32_aesenc128(__V, __R); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_aesenclast_si128(__m128i __V, __m128i __R) +{ + return (__m128i)__builtin_ia32_aesenclast128(__V, __R); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_aesdec_si128(__m128i __V, __m128i __R) +{ + return (__m128i)__builtin_ia32_aesdec128(__V, __R); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_aesdeclast_si128(__m128i __V, __m128i __R) +{ + return (__m128i)__builtin_ia32_aesdeclast128(__V, __R); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_aesimc_si128(__m128i __V) +{ + return (__m128i)__builtin_ia32_aesimc128(__V); +} + +#define _mm_aeskeygenassist_si128(C, R) \ + __builtin_ia32_aeskeygenassist128((C), (R)) + +#undef __DEFAULT_FN_ATTRS + +#endif + +#endif /* _WMMINTRIN_AES_H */ diff --git a/c_headers/__wmmintrin_pclmul.h b/c_headers/__wmmintrin_pclmul.h new file mode 100644 index 0000000000..8d1f1b7c08 --- /dev/null +++ b/c_headers/__wmmintrin_pclmul.h @@ -0,0 +1,34 @@ +/*===---- __wmmintrin_pclmul.h - AES intrinsics ----------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef _WMMINTRIN_PCLMUL_H +#define _WMMINTRIN_PCLMUL_H + +#if !defined (__PCLMUL__) +# error "PCLMUL instruction is not enabled" +#else +#define _mm_clmulepi64_si128(__X, __Y, __I) \ + ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \ + (__v2di)(__m128i)(__Y), (char)(__I))) +#endif + +#endif /* _WMMINTRIN_PCLMUL_H */ diff --git a/c_headers/adxintrin.h b/c_headers/adxintrin.h new file mode 100644 index 0000000000..b8eb9cbf6e --- /dev/null +++ b/c_headers/adxintrin.h @@ -0,0 +1,88 @@ +/*===---- adxintrin.h - ADX intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __ADXINTRIN_H +#define __ADXINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +/* Intrinsics that are available only if __ADX__ defined */ +#ifdef __ADX__ +static __inline unsigned char __DEFAULT_FN_ATTRS +_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y, + unsigned int *__p) +{ + return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p); +} + +#ifdef __x86_64__ +static __inline unsigned char __DEFAULT_FN_ATTRS +_addcarryx_u64(unsigned char __cf, unsigned long long __x, + unsigned long long __y, unsigned long long *__p) +{ + return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p); +} +#endif +#endif + +/* Intrinsics that are also available if __ADX__ undefined */ +static __inline unsigned char __DEFAULT_FN_ATTRS +_addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y, + unsigned int *__p) +{ + return __builtin_ia32_addcarry_u32(__cf, __x, __y, __p); +} + +#ifdef __x86_64__ +static __inline unsigned char __DEFAULT_FN_ATTRS +_addcarry_u64(unsigned char __cf, unsigned long long __x, + unsigned long long __y, unsigned long long *__p) +{ + return __builtin_ia32_addcarry_u64(__cf, __x, __y, __p); +} +#endif + +static __inline unsigned char __DEFAULT_FN_ATTRS +_subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y, + unsigned int *__p) +{ + return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p); +} + +#ifdef __x86_64__ +static __inline unsigned char __DEFAULT_FN_ATTRS +_subborrow_u64(unsigned char __cf, unsigned long long __x, + unsigned long long __y, unsigned long long *__p) +{ + return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p); +} +#endif + +#undef __DEFAULT_FN_ATTRS + +#endif /* __ADXINTRIN_H */ diff --git a/c_headers/ammintrin.h b/c_headers/ammintrin.h new file mode 100644 index 0000000000..4d0e770ff9 --- /dev/null +++ b/c_headers/ammintrin.h @@ -0,0 +1,215 @@ +/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __AMMINTRIN_H +#define __AMMINTRIN_H + +#ifndef __SSE4A__ +#error "SSE4A instruction set not enabled" +#else + +#include + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit +/// integer vector operand at the index idx and of the length len. +/// +/// \headerfile +/// +/// \code +/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx); +/// \endcode +/// +/// \code +/// This intrinsic corresponds to the \c EXTRQ instruction. +/// \endcode +/// +/// \param x +/// The value from which bits are extracted. +/// \param len +/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] +/// are zero, the length is interpreted as 64. +/// \param idx +/// Bits [5:0] specify the index of the least significant bit; the other +/// bits are ignored. If the sum of the index and length is greater than +/// 64, the result is undefined. If the length and index are both zero, +/// bits [63:0] of parameter x are extracted. If the length is zero +/// but the index is non-zero, the result is undefined. +/// \returns A 128-bit integer vector whose lower 64 bits contain the bits +/// extracted from the source operand. +#define _mm_extracti_si64(x, len, idx) \ + ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ + (char)(len), (char)(idx))) + +/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit +/// integer vector operand at the index and of the length specified by __y. +/// +/// \headerfile +/// +/// \code +/// This intrinsic corresponds to the \c EXTRQ instruction. +/// \endcode +/// +/// \param __x +/// The value from which bits are extracted. +/// \param __y +/// Specifies the index of the least significant bit at [13:8] +/// and the length at [5:0]; all other bits are ignored. +/// If bits [5:0] are zero, the length is interpreted as 64. +/// If the sum of the index and length is greater than 64, the result is +/// undefined. If the length and index are both zero, bits [63:0] of +/// parameter __x are extracted. If the length is zero but the index is +/// non-zero, the result is undefined. +/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted +/// from the source operand. +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_extract_si64(__m128i __x, __m128i __y) +{ + return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); +} + +/// \brief Inserts bits of a specified length from the source integer vector +/// y into the lower 64 bits of the destination integer vector x at the +/// index idx and of the length len. +/// +/// \headerfile +/// +/// \code +/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len, +/// const int idx); +/// \endcode +/// +/// \code +/// This intrinsic corresponds to the \c INSERTQ instruction. +/// \endcode +/// +/// \param x +/// The destination operand where bits will be inserted. The inserted bits +/// are defined by the length len and by the index idx specifying the least +/// significant bit. +/// \param y +/// The source operand containing the bits to be extracted. The extracted +/// bits are the least significant bits of operand y of length len. +/// \param len +/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] +/// are zero, the length is interpreted as 64. +/// \param idx +/// Bits [5:0] specify the index of the least significant bit; the other +/// bits are ignored. If the sum of the index and length is greater than +/// 64, the result is undefined. If the length and index are both zero, +/// bits [63:0] of parameter y are inserted into parameter x. If the +/// length is zero but the index is non-zero, the result is undefined. +/// \returns A 128-bit integer vector containing the original lower 64-bits +/// of destination operand x with the specified bitfields replaced by the +/// lower bits of source operand y. The upper 64 bits of the return value +/// are undefined. + +#define _mm_inserti_si64(x, y, len, idx) \ + ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ + (__v2di)(__m128i)(y), \ + (char)(len), (char)(idx))) + +/// \brief Inserts bits of a specified length from the source integer vector +/// __y into the lower 64 bits of the destination integer vector __x at +/// the index and of the length specified by __y. +/// +/// \headerfile +/// +/// \code +/// This intrinsic corresponds to the \c INSERTQ instruction. +/// \endcode +/// +/// \param __x +/// The destination operand where bits will be inserted. The inserted bits +/// are defined by the length and by the index of the least significant bit +/// specified by operand __y. +/// \param __y +/// The source operand containing the bits to be extracted. The extracted +/// bits are the least significant bits of operand __y with length specified +/// by bits [69:64]. These are inserted into the destination at the index +/// specified by bits [77:72]; all other bits are ignored. +/// If bits [69:64] are zero, the length is interpreted as 64. +/// If the sum of the index and length is greater than 64, the result is +/// undefined. If the length and index are both zero, bits [63:0] of +/// parameter __y are inserted into parameter __x. If the length +/// is zero but the index is non-zero, the result is undefined. +/// \returns A 128-bit integer vector containing the original lower 64-bits +/// of destination operand __x with the specified bitfields replaced by the +/// lower bits of source operand __y. The upper 64 bits of the return value +/// are undefined. + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_insert_si64(__m128i __x, __m128i __y) +{ + return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); +} + +/// \brief Stores a 64-bit double-precision value in a 64-bit memory location. +/// To minimize caching, the data is flagged as non-temporal (unlikely to be +/// used again soon). +/// +/// \headerfile +/// +/// \code +/// This intrinsic corresponds to the \c MOVNTSD instruction. +/// \endcode +/// +/// \param __p +/// The 64-bit memory location used to store the register value. +/// \param __a +/// The 64-bit double-precision floating-point register value to +/// be stored. +static __inline__ void __DEFAULT_FN_ATTRS +_mm_stream_sd(double *__p, __m128d __a) +{ + __builtin_ia32_movntsd(__p, (__v2df)__a); +} + +/// \brief Stores a 32-bit single-precision floating-point value in a 32-bit +/// memory location. To minimize caching, the data is flagged as +/// non-temporal (unlikely to be used again soon). +/// +/// \headerfile +/// +/// \code +/// This intrinsic corresponds to the \c MOVNTSS instruction. +/// \endcode +/// +/// \param __p +/// The 32-bit memory location used to store the register value. +/// \param __a +/// The 32-bit single-precision floating-point register value to +/// be stored. +static __inline__ void __DEFAULT_FN_ATTRS +_mm_stream_ss(float *__p, __m128 __a) +{ + __builtin_ia32_movntss(__p, (__v4sf)__a); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __SSE4A__ */ + +#endif /* __AMMINTRIN_H */ diff --git a/c_headers/arm_acle.h b/c_headers/arm_acle.h new file mode 100644 index 0000000000..73a7e76ce3 --- /dev/null +++ b/c_headers/arm_acle.h @@ -0,0 +1,304 @@ +/*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __ARM_ACLE_H +#define __ARM_ACLE_H + +#ifndef __ARM_ACLE +#error "ACLE intrinsics support not enabled." +#endif + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */ +/* 8.3 Memory barriers */ +#if !defined(_MSC_VER) +#define __dmb(i) __builtin_arm_dmb(i) +#define __dsb(i) __builtin_arm_dsb(i) +#define __isb(i) __builtin_arm_isb(i) +#endif + +/* 8.4 Hints */ + +#if !defined(_MSC_VER) +static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) { + __builtin_arm_wfi(); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) { + __builtin_arm_wfe(); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) { + __builtin_arm_sev(); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) { + __builtin_arm_sevl(); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) { + __builtin_arm_yield(); +} +#endif + +#if __ARM_32BIT_STATE +#define __dbg(t) __builtin_arm_dbg(t) +#endif + +/* 8.5 Swap */ +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __swp(uint32_t x, volatile uint32_t *p) { + uint32_t v; + do v = __builtin_arm_ldrex(p); while (__builtin_arm_strex(x, p)); + return v; +} + +/* 8.6 Memory prefetch intrinsics */ +/* 8.6.1 Data prefetch */ +#define __pld(addr) __pldx(0, 0, 0, addr) + +#if __ARM_32BIT_STATE +#define __pldx(access_kind, cache_level, retention_policy, addr) \ + __builtin_arm_prefetch(addr, access_kind, 1) +#else +#define __pldx(access_kind, cache_level, retention_policy, addr) \ + __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1) +#endif + +/* 8.6.2 Instruction prefetch */ +#define __pli(addr) __plix(0, 0, addr) + +#if __ARM_32BIT_STATE +#define __plix(cache_level, retention_policy, addr) \ + __builtin_arm_prefetch(addr, 0, 0) +#else +#define __plix(cache_level, retention_policy, addr) \ + __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0) +#endif + +/* 8.7 NOP */ +static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) { + __builtin_arm_nop(); +} + +/* 9 DATA-PROCESSING INTRINSICS */ +/* 9.2 Miscellaneous data-processing intrinsics */ +/* ROR */ +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __ror(uint32_t x, uint32_t y) { + y %= 32; + if (y == 0) return x; + return (x >> y) | (x << (32 - y)); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) + __rorll(uint64_t x, uint32_t y) { + y %= 64; + if (y == 0) return x; + return (x >> y) | (x << (64 - y)); +} + +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) + __rorl(unsigned long x, uint32_t y) { +#if __SIZEOF_LONG__ == 4 + return __ror(x, y); +#else + return __rorll(x, y); +#endif +} + + +/* CLZ */ +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __clz(uint32_t t) { + return __builtin_clz(t); +} + +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) + __clzl(unsigned long t) { + return __builtin_clzl(t); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) + __clzll(uint64_t t) { + return __builtin_clzll(t); +} + +/* REV */ +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __rev(uint32_t t) { + return __builtin_bswap32(t); +} + +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) + __revl(unsigned long t) { +#if __SIZEOF_LONG__ == 4 + return __builtin_bswap32(t); +#else + return __builtin_bswap64(t); +#endif +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) + __revll(uint64_t t) { + return __builtin_bswap64(t); +} + +/* REV16 */ +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __rev16(uint32_t t) { + return __ror(__rev(t), 16); +} + +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) + __rev16l(unsigned long t) { + return __rorl(__revl(t), sizeof(long) / 2); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) + __rev16ll(uint64_t t) { + return __rorll(__revll(t), 32); +} + +/* REVSH */ +static __inline__ int16_t __attribute__((__always_inline__, __nodebug__)) + __revsh(int16_t t) { + return __builtin_bswap16(t); +} + +/* RBIT */ +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __rbit(uint32_t t) { + return __builtin_arm_rbit(t); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) + __rbitll(uint64_t t) { +#if __ARM_32BIT_STATE + return (((uint64_t) __builtin_arm_rbit(t)) << 32) | + __builtin_arm_rbit(t >> 32); +#else + return __builtin_arm_rbit64(t); +#endif +} + +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) + __rbitl(unsigned long t) { +#if __SIZEOF_LONG__ == 4 + return __rbit(t); +#else + return __rbitll(t); +#endif +} + +/* + * 9.4 Saturating intrinsics + * + * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag + * intrinsics are implemented and the flag is enabled. + */ +/* 9.4.1 Width-specified saturation intrinsics */ +#if __ARM_32BIT_STATE +#define __ssat(x, y) __builtin_arm_ssat(x, y) +#define __usat(x, y) __builtin_arm_usat(x, y) +#endif + +/* 9.4.2 Saturating addition and subtraction intrinsics */ +#if __ARM_32BIT_STATE +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) + __qadd(int32_t t, int32_t v) { + return __builtin_arm_qadd(t, v); +} + +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) + __qsub(int32_t t, int32_t v) { + return __builtin_arm_qsub(t, v); +} + +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +__qdbl(int32_t t) { + return __builtin_arm_qadd(t, t); +} +#endif + +/* 9.7 CRC32 intrinsics */ +#if __ARM_FEATURE_CRC32 +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __crc32b(uint32_t a, uint8_t b) { + return __builtin_arm_crc32b(a, b); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __crc32h(uint32_t a, uint16_t b) { + return __builtin_arm_crc32h(a, b); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __crc32w(uint32_t a, uint32_t b) { + return __builtin_arm_crc32w(a, b); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __crc32d(uint32_t a, uint64_t b) { + return __builtin_arm_crc32d(a, b); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __crc32cb(uint32_t a, uint8_t b) { + return __builtin_arm_crc32cb(a, b); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __crc32ch(uint32_t a, uint16_t b) { + return __builtin_arm_crc32ch(a, b); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __crc32cw(uint32_t a, uint32_t b) { + return __builtin_arm_crc32cw(a, b); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) + __crc32cd(uint32_t a, uint64_t b) { + return __builtin_arm_crc32cd(a, b); +} +#endif + +/* 10.1 Special register intrinsics */ +#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg) +#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg) +#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg) +#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v) +#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v) +#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v) + +#if defined(__cplusplus) +} +#endif + +#endif /* __ARM_ACLE_H */ diff --git a/c_headers/avx2intrin.h b/c_headers/avx2intrin.h new file mode 100644 index 0000000000..d8b6b0aa4d --- /dev/null +++ b/c_headers/avx2intrin.h @@ -0,0 +1,1256 @@ +/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX2INTRIN_H +#define __AVX2INTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +/* SSE4 Multiple Packed Sums of Absolute Difference. */ +#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M)) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_abs_epi8(__m256i __a) +{ + return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_abs_epi16(__m256i __a) +{ + return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_abs_epi32(__m256i __a) +{ + return (__m256i)__builtin_ia32_pabsd256((__v8si)__a); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_packs_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_packs_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_packus_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_packus_epi32(__m256i __V1, __m256i __V2) +{ + return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_add_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)((__v32qi)__a + (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_add_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)((__v16hi)__a + (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_add_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)((__v8si)__a + (__v8si)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_add_epi64(__m256i __a, __m256i __b) +{ + return __a + __b; +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_adds_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_adds_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_adds_epu8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_adds_epu16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b); +} + +#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \ + __m256i __a = (a); \ + __m256i __b = (b); \ + (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); }) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_and_si256(__m256i __a, __m256i __b) +{ + return __a & __b; +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_andnot_si256(__m256i __a, __m256i __b) +{ + return ~__a & __b; +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_avg_epu8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_avg_epu16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) +{ + return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2, + (__v32qi)__M); +} + +#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \ + __m256i __V1 = (V1); \ + __m256i __V2 = (V2); \ + (__m256i)__builtin_shufflevector((__v16hi)__V1, (__v16hi)__V2, \ + (((M) & 0x01) ? 16 : 0), \ + (((M) & 0x02) ? 17 : 1), \ + (((M) & 0x04) ? 18 : 2), \ + (((M) & 0x08) ? 19 : 3), \ + (((M) & 0x10) ? 20 : 4), \ + (((M) & 0x20) ? 21 : 5), \ + (((M) & 0x40) ? 22 : 6), \ + (((M) & 0x80) ? 23 : 7), \ + (((M) & 0x01) ? 24 : 8), \ + (((M) & 0x02) ? 25 : 9), \ + (((M) & 0x04) ? 26 : 10), \ + (((M) & 0x08) ? 27 : 11), \ + (((M) & 0x10) ? 28 : 12), \ + (((M) & 0x20) ? 29 : 13), \ + (((M) & 0x40) ? 30 : 14), \ + (((M) & 0x80) ? 31 : 15)); }) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cmpeq_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)((__v32qi)__a == (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cmpeq_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)((__v16hi)__a == (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cmpeq_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)((__v8si)__a == (__v8si)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cmpeq_epi64(__m256i __a, __m256i __b) +{ + return (__m256i)(__a == __b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cmpgt_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)((__v32qi)__a > (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cmpgt_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)((__v16hi)__a > (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cmpgt_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)((__v8si)__a > (__v8si)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cmpgt_epi64(__m256i __a, __m256i __b) +{ + return (__m256i)(__a > __b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_hadd_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_hadd_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_hadds_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_hsub_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_hsub_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_hsubs_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maddubs_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_madd_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_max_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_max_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_max_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_max_epu8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_max_epu16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_max_epu32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_min_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_min_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_min_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_min_epu8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_min_epu16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_min_epu32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm256_movemask_epi8(__m256i __a) +{ + return __builtin_ia32_pmovmskb256((__v32qi)__a); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cvtepi8_epi16(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cvtepi8_epi32(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cvtepi8_epi64(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cvtepi16_epi32(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cvtepi16_epi64(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cvtepi32_epi64(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cvtepu8_epi16(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cvtepu8_epi32(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cvtepu8_epi64(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cvtepu16_epi32(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cvtepu16_epi64(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cvtepu32_epi64(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mul_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mulhrs_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mulhi_epu16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mulhi_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mullo_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)((__v16hi)__a * (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mullo_epi32 (__m256i __a, __m256i __b) +{ + return (__m256i)((__v8si)__a * (__v8si)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mul_epu32(__m256i __a, __m256i __b) +{ + return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_or_si256(__m256i __a, __m256i __b) +{ + return __a | __b; +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sad_epu8(__m256i __a, __m256i __b) +{ + return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shuffle_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); +} + +#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \ + __m256i __a = (a); \ + (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)_mm256_set1_epi32(0), \ + (imm) & 0x3, ((imm) & 0xc) >> 2, \ + ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ + 4 + (((imm) & 0x03) >> 0), \ + 4 + (((imm) & 0x0c) >> 2), \ + 4 + (((imm) & 0x30) >> 4), \ + 4 + (((imm) & 0xc0) >> 6)); }) + +#define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \ + __m256i __a = (a); \ + (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \ + 0, 1, 2, 3, \ + 4 + (((imm) & 0x03) >> 0), \ + 4 + (((imm) & 0x0c) >> 2), \ + 4 + (((imm) & 0x30) >> 4), \ + 4 + (((imm) & 0xc0) >> 6), \ + 8, 9, 10, 11, \ + 12 + (((imm) & 0x03) >> 0), \ + 12 + (((imm) & 0x0c) >> 2), \ + 12 + (((imm) & 0x30) >> 4), \ + 12 + (((imm) & 0xc0) >> 6)); }) + +#define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \ + __m256i __a = (a); \ + (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \ + (imm) & 0x3,((imm) & 0xc) >> 2, \ + ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ + 4, 5, 6, 7, \ + 8 + (((imm) & 0x03) >> 0), \ + 8 + (((imm) & 0x0c) >> 2), \ + 8 + (((imm) & 0x30) >> 4), \ + 8 + (((imm) & 0xc0) >> 6), \ + 12, 13, 14, 15); }) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sign_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sign_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sign_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); +} + +#define _mm256_slli_si256(a, count) __extension__ ({ \ + __m256i __a = (a); \ + (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); }) + +#define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count)) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_slli_epi16(__m256i __a, int __count) +{ + return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sll_epi16(__m256i __a, __m128i __count) +{ + return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_slli_epi32(__m256i __a, int __count) +{ + return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sll_epi32(__m256i __a, __m128i __count) +{ + return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_slli_epi64(__m256i __a, int __count) +{ + return __builtin_ia32_psllqi256(__a, __count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sll_epi64(__m256i __a, __m128i __count) +{ + return __builtin_ia32_psllq256(__a, __count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srai_epi16(__m256i __a, int __count) +{ + return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sra_epi16(__m256i __a, __m128i __count) +{ + return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srai_epi32(__m256i __a, int __count) +{ + return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sra_epi32(__m256i __a, __m128i __count) +{ + return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count); +} + +#define _mm256_srli_si256(a, count) __extension__ ({ \ + __m256i __a = (a); \ + (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); }) + +#define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count)) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srli_epi16(__m256i __a, int __count) +{ + return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srl_epi16(__m256i __a, __m128i __count) +{ + return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srli_epi32(__m256i __a, int __count) +{ + return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srl_epi32(__m256i __a, __m128i __count) +{ + return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srli_epi64(__m256i __a, int __count) +{ + return __builtin_ia32_psrlqi256(__a, __count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srl_epi64(__m256i __a, __m128i __count) +{ + return __builtin_ia32_psrlq256(__a, __count); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sub_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)((__v32qi)__a - (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sub_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)((__v16hi)__a - (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sub_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)((__v8si)__a - (__v8si)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sub_epi64(__m256i __a, __m256i __b) +{ + return __a - __b; +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_subs_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_subs_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_subs_epu8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_subs_epu16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_unpackhi_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_unpackhi_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_unpackhi_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_unpackhi_epi64(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_unpacklo_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_unpacklo_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_unpacklo_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_unpacklo_epi64(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_xor_si256(__m256i __a, __m256i __b) +{ + return __a ^ __b; +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_stream_load_si256(__m256i *__V) +{ + return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_broadcastss_ps(__m128 __X) +{ + return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_broadcastsd_pd(__m128d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 0); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_broadcastss_ps(__m128 __X) +{ + return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_broadcastsd_pd(__m128d __X) +{ + return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_broadcastsi128_si256(__m128i __X) +{ + return (__m256i)__builtin_shufflevector(__X, __X, 0, 1, 0, 1); +} + +#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \ + __m128i __V1 = (V1); \ + __m128i __V2 = (V2); \ + (__m128i)__builtin_shufflevector((__v4si)__V1, (__v4si)__V2, \ + (((M) & 0x01) ? 4 : 0), \ + (((M) & 0x02) ? 5 : 1), \ + (((M) & 0x04) ? 6 : 2), \ + (((M) & 0x08) ? 7 : 3)); }) + +#define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \ + __m256i __V1 = (V1); \ + __m256i __V2 = (V2); \ + (__m256i)__builtin_shufflevector((__v8si)__V1, (__v8si)__V2, \ + (((M) & 0x01) ? 8 : 0), \ + (((M) & 0x02) ? 9 : 1), \ + (((M) & 0x04) ? 10 : 2), \ + (((M) & 0x08) ? 11 : 3), \ + (((M) & 0x10) ? 12 : 4), \ + (((M) & 0x20) ? 13 : 5), \ + (((M) & 0x40) ? 14 : 6), \ + (((M) & 0x80) ? 15 : 7)); }) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_broadcastb_epi8(__m128i __X) +{ + return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_broadcastw_epi16(__m128i __X) +{ + return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_broadcastd_epi32(__m128i __X) +{ + return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_broadcastq_epi64(__m128i __X) +{ + return (__m256i)__builtin_ia32_pbroadcastq256(__X); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_broadcastb_epi8(__m128i __X) +{ + return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_broadcastw_epi16(__m128i __X) +{ + return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X); +} + + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_broadcastd_epi32(__m128i __X) +{ + return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_broadcastq_epi64(__m128i __X) +{ + return (__m128i)__builtin_ia32_pbroadcastq128(__X); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); +} + +#define _mm256_permute4x64_pd(V, M) __extension__ ({ \ + __m256d __V = (V); \ + (__m256d)__builtin_shufflevector((__v4df)__V, (__v4df) _mm256_setzero_pd(), \ + (M) & 0x3, ((M) & 0xc) >> 2, \ + ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_permutevar8x32_ps(__m256 __a, __m256 __b) +{ + return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b); +} + +#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \ + __m256i __V = (V); \ + (__m256i)__builtin_shufflevector((__v4di)__V, (__v4di) _mm256_setzero_si256(), \ + (M) & 0x3, ((M) & 0xc) >> 2, \ + ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) + +#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \ + __m256i __V1 = (V1); \ + __m256i __V2 = (V2); \ + (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); }) + +#define _mm256_extracti128_si256(V, M) __extension__ ({ \ + (__m128i)__builtin_shufflevector( \ + (__v4di)(V), \ + (__v4di)(_mm256_setzero_si256()), \ + (((M) & 1) ? 2 : 0), \ + (((M) & 1) ? 3 : 1) );}) + +#define _mm256_inserti128_si256(V1, V2, M) __extension__ ({ \ + (__m256i)__builtin_shufflevector( \ + (__v4di)(V1), \ + (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \ + (((M) & 1) ? 0 : 4), \ + (((M) & 1) ? 1 : 5), \ + (((M) & 1) ? 4 : 2), \ + (((M) & 1) ? 5 : 3) );}) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskload_epi32(int const *__X, __m256i __M) +{ + return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskload_epi64(long long const *__X, __m256i __M) +{ + return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskload_epi32(int const *__X, __m128i __M) +{ + return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskload_epi64(long long const *__X, __m128i __M) +{ + return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y) +{ + __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y) +{ + __builtin_ia32_maskstoreq256((__v4di *)__X, __M, __Y); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y) +{ + __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) +{ + __builtin_ia32_maskstoreq(( __v2di *)__X, __M, __Y); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sllv_epi32(__m256i __X, __m256i __Y) +{ + return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sllv_epi32(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sllv_epi64(__m256i __X, __m256i __Y) +{ + return (__m256i)__builtin_ia32_psllv4di(__X, __Y); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sllv_epi64(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_psllv2di(__X, __Y); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srav_epi32(__m256i __X, __m256i __Y) +{ + return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srav_epi32(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srlv_epi32(__m256i __X, __m256i __Y) +{ + return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srlv_epi32(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srlv_epi64(__m256i __X, __m256i __Y) +{ + return (__m256i)__builtin_ia32_psrlv4di(__X, __Y); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srlv_epi64(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_psrlv2di(__X, __Y); +} + +#define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m128d __a = (a); \ + double const *__m = (m); \ + __m128i __i = (i); \ + __m128d __mask = (mask); \ + (__m128d)__builtin_ia32_gatherd_pd((__v2df)__a, (const __v2df *)__m, \ + (__v4si)__i, (__v2df)__mask, (s)); }) + +#define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m256d __a = (a); \ + double const *__m = (m); \ + __m128i __i = (i); \ + __m256d __mask = (mask); \ + (__m256d)__builtin_ia32_gatherd_pd256((__v4df)__a, (const __v4df *)__m, \ + (__v4si)__i, (__v4df)__mask, (s)); }) + +#define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m128d __a = (a); \ + double const *__m = (m); \ + __m128i __i = (i); \ + __m128d __mask = (mask); \ + (__m128d)__builtin_ia32_gatherq_pd((__v2df)__a, (const __v2df *)__m, \ + (__v2di)__i, (__v2df)__mask, (s)); }) + +#define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m256d __a = (a); \ + double const *__m = (m); \ + __m256i __i = (i); \ + __m256d __mask = (mask); \ + (__m256d)__builtin_ia32_gatherq_pd256((__v4df)__a, (const __v4df *)__m, \ + (__v4di)__i, (__v4df)__mask, (s)); }) + +#define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m128 __a = (a); \ + float const *__m = (m); \ + __m128i __i = (i); \ + __m128 __mask = (mask); \ + (__m128)__builtin_ia32_gatherd_ps((__v4sf)__a, (const __v4sf *)__m, \ + (__v4si)__i, (__v4sf)__mask, (s)); }) + +#define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m256 __a = (a); \ + float const *__m = (m); \ + __m256i __i = (i); \ + __m256 __mask = (mask); \ + (__m256)__builtin_ia32_gatherd_ps256((__v8sf)__a, (const __v8sf *)__m, \ + (__v8si)__i, (__v8sf)__mask, (s)); }) + +#define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m128 __a = (a); \ + float const *__m = (m); \ + __m128i __i = (i); \ + __m128 __mask = (mask); \ + (__m128)__builtin_ia32_gatherq_ps((__v4sf)__a, (const __v4sf *)__m, \ + (__v2di)__i, (__v4sf)__mask, (s)); }) + +#define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m128 __a = (a); \ + float const *__m = (m); \ + __m256i __i = (i); \ + __m128 __mask = (mask); \ + (__m128)__builtin_ia32_gatherq_ps256((__v4sf)__a, (const __v4sf *)__m, \ + (__v4di)__i, (__v4sf)__mask, (s)); }) + +#define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherd_d((__v4si)__a, (const __v4si *)__m, \ + (__v4si)__i, (__v4si)__mask, (s)); }) + +#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m256i __a = (a); \ + int const *__m = (m); \ + __m256i __i = (i); \ + __m256i __mask = (mask); \ + (__m256i)__builtin_ia32_gatherd_d256((__v8si)__a, (const __v8si *)__m, \ + (__v8si)__i, (__v8si)__mask, (s)); }) + +#define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherq_d((__v4si)__a, (const __v4si *)__m, \ + (__v2di)__i, (__v4si)__mask, (s)); }) + +#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m256i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherq_d256((__v4si)__a, (const __v4si *)__m, \ + (__v4di)__i, (__v4si)__mask, (s)); }) + +#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + long long const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \ + (__v4si)__i, (__v2di)__mask, (s)); }) + +#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m256i __a = (a); \ + long long const *__m = (m); \ + __m128i __i = (i); \ + __m256i __mask = (mask); \ + (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \ + (__v4si)__i, (__v4di)__mask, (s)); }) + +#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + long long const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \ + (__v2di)__i, (__v2di)__mask, (s)); }) + +#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m256i __a = (a); \ + long long const *__m = (m); \ + __m256i __i = (i); \ + __m256i __mask = (mask); \ + (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \ + (__v4di)__i, (__v4di)__mask, (s)); }) + +#define _mm_i32gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m128i __i = (i); \ + (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_setzero_pd(), \ + (const __v2df *)__m, (__v4si)__i, \ + (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm256_i32gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m128i __i = (i); \ + (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_setzero_pd(), \ + (const __v4df *)__m, (__v4si)__i, \ + (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm_i64gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m128i __i = (i); \ + (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_setzero_pd(), \ + (const __v2df *)__m, (__v2di)__i, \ + (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm256_i64gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m256i __i = (i); \ + (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_setzero_pd(), \ + (const __v4df *)__m, (__v4di)__i, \ + (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm_i32gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m128i __i = (i); \ + (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_setzero_ps(), \ + (const __v4sf *)__m, (__v4si)__i, \ + (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) + +#define _mm256_i32gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m256i __i = (i); \ + (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_setzero_ps(), \ + (const __v8sf *)__m, (__v8si)__i, \ + (__v8sf)_mm256_set1_ps((float)(int)-1), (s)); }) + +#define _mm_i64gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m128i __i = (i); \ + (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_setzero_ps(), \ + (const __v4sf *)__m, (__v2di)__i, \ + (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) + +#define _mm256_i64gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m256i __i = (i); \ + (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_setzero_ps(), \ + (const __v4sf *)__m, (__v4di)__i, \ + (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) + +#define _mm_i32gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_setzero_si128(), \ + (const __v4si *)__m, (__v4si)__i, \ + (__v4si)_mm_set1_epi32(-1), (s)); }) + +#define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m256i __i = (i); \ + (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_setzero_si256(), \ + (const __v8si *)__m, (__v8si)__i, \ + (__v8si)_mm256_set1_epi32(-1), (s)); }) + +#define _mm_i64gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_setzero_si128(), \ + (const __v4si *)__m, (__v2di)__i, \ + (__v4si)_mm_set1_epi32(-1), (s)); }) + +#define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m256i __i = (i); \ + (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_setzero_si128(), \ + (const __v4si *)__m, (__v4di)__i, \ + (__v4si)_mm_set1_epi32(-1), (s)); }) + +#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \ + long long const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \ + (const __v2di *)__m, (__v4si)__i, \ + (__v2di)_mm_set1_epi64x(-1), (s)); }) + +#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \ + long long const *__m = (m); \ + __m128i __i = (i); \ + (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \ + (const __v4di *)__m, (__v4si)__i, \ + (__v4di)_mm256_set1_epi64x(-1), (s)); }) + +#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \ + long long const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \ + (const __v2di *)__m, (__v2di)__i, \ + (__v2di)_mm_set1_epi64x(-1), (s)); }) + +#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \ + long long const *__m = (m); \ + __m256i __i = (i); \ + (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \ + (const __v4di *)__m, (__v4di)__i, \ + (__v4di)_mm256_set1_epi64x(-1), (s)); }) + +#undef __DEFAULT_FN_ATTRS + +#endif /* __AVX2INTRIN_H */ diff --git a/c_headers/avx512bwintrin.h b/c_headers/avx512bwintrin.h new file mode 100644 index 0000000000..9e8297a9c9 --- /dev/null +++ b/c_headers/avx512bwintrin.h @@ -0,0 +1,1250 @@ +/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512BWINTRIN_H +#define __AVX512BWINTRIN_H + +typedef unsigned int __mmask32; +typedef unsigned long long __mmask64; +typedef char __v64qi __attribute__ ((__vector_size__ (64))); +typedef short __v32hi __attribute__ ((__vector_size__ (64))); + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline __v64qi __DEFAULT_FN_ATTRS +_mm512_setzero_qi (void) { + return (__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +static __inline __v32hi __DEFAULT_FN_ATTRS +_mm512_setzero_hi (void) { + return (__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +/* Integer compare */ + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b, + (__mmask64)-1); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b, + __u); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0, + (__mmask64)-1); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0, + __u); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, + (__mmask64)-1); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, + __u); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, + (__mmask64)-1); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, + __u); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b, + (__mmask64)-1); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b, + __u); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6, + (__mmask64)-1); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6, + __u); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_cmple_epi8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, + (__mmask64)-1); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, + __u); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_cmple_epu8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, + (__mmask64)-1); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_cmple_epi16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_cmple_epu16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, + __u); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, + (__mmask64)-1); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, + __u); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, + (__mmask64)-1); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, + __u); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, + (__mmask64)-1); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, + __u); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, + (__mmask64)-1); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, + __u); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_add_epi8 (__m512i __A, __m512i __B) { + return (__m512i) ((__v64qi) __A + (__v64qi) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) + _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_sub_epi8 (__m512i __A, __m512i __B) { + return (__m512i) ((__v64qi) __A - (__v64qi) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) + _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_add_epi16 (__m512i __A, __m512i __B) { + return (__m512i) ((__v32hi) __A + (__v32hi) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) + _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_sub_epi16 (__m512i __A, __m512i __B) { + return (__m512i) ((__v32hi) __A - (__v32hi) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) + _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mullo_epi16 (__m512i __A, __m512i __B) { + return (__m512i) ((__v32hi) __A * (__v32hi) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) + _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) +{ + return (__m512i) __builtin_ia32_blendmb_512_mask ((__v64qi) __A, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) +{ + return (__m512i) __builtin_ia32_blendmw_512_mask ((__v32hi) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_abs_epi8 (__m512i __A) +{ + return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_abs_epi16 (__m512i __A) +{ + return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_packs_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A, + (__v16si) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_packs_epi32 (__mmask32 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A, + (__v16si) __B, + (__v32hi) _mm512_setzero_hi(), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_packs_epi32 (__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A, + (__v16si) __B, + (__v32hi) __W, + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_packs_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_packs_epi16 (__m512i __W, __mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v64qi) __W, + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_packs_epi16 (__mmask64 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v64qi) _mm512_setzero_qi(), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_packus_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A, + (__v16si) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_packus_epi32 (__mmask32 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A, + (__v16si) __B, + (__v32hi) _mm512_setzero_hi(), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_packus_epi32 (__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A, + (__v16si) __B, + (__v32hi) __W, + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_packus_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_packus_epi16 (__m512i __W, __mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v64qi) __W, + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_packus_epi16 (__mmask64 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v64qi) _mm512_setzero_qi(), + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_adds_epi8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_adds_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_adds_epu8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_adds_epu16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_avg_epu8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi(), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_avg_epu16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi(), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_max_epi8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi(), + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_max_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi(), + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_max_epu8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi(), + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_max_epu16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi(), + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_min_epi8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi(), + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_min_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi(), + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_min_epu8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi(), + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_min_epu16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi(), + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shuffle_epi8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shuffle_epi8 (__m512i __W, __mmask64 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shuffle_epi8 (__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_subs_epi8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_subs_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_subs_epu8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_subs_epu16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask2_permutex2var_epi16 (__m512i __A, __m512i __I, + __mmask32 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A, + (__v32hi) __I /* idx */ , + (__v32hi) __B, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_permutex2var_epi16 (__m512i __A, __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I /* idx */, + (__v32hi) __A, + (__v32hi) __B, + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_permutex2var_epi16 (__m512i __A, __mmask32 __U, + __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I /* idx */, + (__v32hi) __A, + (__v32hi) __B, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_permutex2var_epi16 (__mmask32 __U, __m512i __A, + __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2varhi512_maskz ((__v32hi) __I + /* idx */ , + (__v32hi) __A, + (__v32hi) __B, + (__mmask32) __U); +} + +#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), \ + (p), (__mmask64)-1); }) + +#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), \ + (p), (__mmask64)(m)); }) + +#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), \ + (p), (__mmask64)-1); }) + +#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), \ + (p), (__mmask64)(m)); }) + +#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), \ + (p), (__mmask32)-1); }) + +#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), \ + (p), (__mmask32)(m)); }) + +#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), \ + (p), (__mmask32)-1); }) + +#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), \ + (p), (__mmask32)(m)); }) + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/c_headers/avx512cdintrin.h b/c_headers/avx512cdintrin.h new file mode 100644 index 0000000000..0844b23a10 --- /dev/null +++ b/c_headers/avx512cdintrin.h @@ -0,0 +1,131 @@ +/*===------------- avx512cdintrin.h - AVX512CD intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512CDINTRIN_H +#define __AVX512CDINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_conflict_epi64 (__m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A, + (__v8di) _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A, + (__v8di) _mm512_setzero_si512 (), + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_conflict_epi32 (__m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_lzcnt_epi32 (__m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_lzcnt_epi64 (__m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A, + (__v8di) _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A, + (__v8di) _mm512_setzero_si512 (), + (__mmask8) __U); +} +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/c_headers/avx512dqintrin.h b/c_headers/avx512dqintrin.h new file mode 100644 index 0000000000..c946de2867 --- /dev/null +++ b/c_headers/avx512dqintrin.h @@ -0,0 +1,242 @@ +/*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512DQINTRIN_H +#define __AVX512DQINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mullo_epi64 (__m512i __A, __m512i __B) { + return (__m512i) ((__v8di) __A * (__v8di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_xor_pd (__m512d __A, __m512d __B) { + return (__m512d) ((__v8di) __A ^ (__v8di) __B); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_xor_ps (__m512 __A, __m512 __B) { + return (__m512) ((__v16si) __A ^ (__v16si) __B); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_or_pd (__m512d __A, __m512d __B) { + return (__m512d) ((__v8di) __A | (__v8di) __B); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_or_ps (__m512 __A, __m512 __B) { + return (__m512) ((__v16si) __A | (__v16si) __B); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_and_pd (__m512d __A, __m512d __B) { + return (__m512d) ((__v8di) __A & (__v8di) __B); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_and_ps (__m512 __A, __m512 __B) { + return (__m512) ((__v16si) __A & (__v16si) __B); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_andnot_pd (__m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_andnot_ps (__m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/c_headers/avx512erintrin.h b/c_headers/avx512erintrin.h new file mode 100644 index 0000000000..56edffc11c --- /dev/null +++ b/c_headers/avx512erintrin.h @@ -0,0 +1,285 @@ +/*===---- avx512fintrin.h - AVX2 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512ERINTRIN_H +#define __AVX512ERINTRIN_H + +// exp2a23 +#define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_mask_exp2a23_round_pd(S, M, A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm512_maskz_exp2a23_round_pd(M, A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(M), (R)); }) + +#define _mm512_exp2a23_pd(A) \ + _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_exp2a23_pd(S, M, A) \ + _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_exp2a23_pd(M, A) \ + _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_exp2a23_round_ps(A, R) __extension__ ({ \ + (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_mask_exp2a23_round_ps(S, M, A, R) __extension__ ({ \ + (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm512_maskz_exp2a23_round_ps(M, A, R) __extension__ ({ \ + (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask8)(M), (R)); }) + +#define _mm512_exp2a23_ps(A) \ + _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_exp2a23_ps(S, M, A) \ + _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_exp2a23_ps(M, A) \ + _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) + +// rsqrt28 +#define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm512_maskz_rsqrt28_round_pd(M, A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(M), (R)); }) + +#define _mm512_rsqrt28_pd(A) \ + _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_rsqrt28_pd(S, M, A) \ + _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_rsqrt28_pd(M, A) \ + _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_rsqrt28_round_ps(A, R) __extension__ ({ \ + (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) __extension__ ({ \ + (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(S), \ + (__mmask16)(M), (R)); }) + +#define _mm512_maskz_rsqrt28_round_ps(M, A, R) __extension__ ({ \ + (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(M), (R)); }) + +#define _mm512_rsqrt28_ps(A) \ + _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_rsqrt28_ps(S, M, A) \ + _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_rsqrt28_ps(M, A) \ + _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \ + (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (R)); }) + +#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \ + (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \ + (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(M), (R)); }) + +#define _mm_rsqrt28_ss(A, B) \ + _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_mask_rsqrt28_ss(S, M, A, B) \ + _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_maskz_rsqrt28_ss(M, A, B) \ + _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (R)); }) + +#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(M), (R)); }) + +#define _mm_rsqrt28_sd(A, B) \ + _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_mask_rsqrt28_sd(S, M, A, B) \ + _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_maskz_rsqrt28_sd(M, A, B) \ + _mm_mask_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) + +// rcp28 +#define _mm512_rcp28_round_pd(A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_mask_rcp28_round_pd(S, M, A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm512_maskz_rcp28_round_pd(M, A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(M), (R)); }) + +#define _mm512_rcp28_pd(A) \ + _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_rcp28_pd(S, M, A) \ + _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_rcp28_pd(M, A) \ + _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_rcp28_round_ps(A, R) __extension__ ({ \ + (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_mask_rcp28_round_ps(S, M, A, R) __extension__ ({ \ + (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(S), \ + (__mmask16)(M), (R)); }) + +#define _mm512_maskz_rcp28_round_ps(M, A, R) __extension__ ({ \ + (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(M), (R)); }) + +#define _mm512_rcp28_ps(A) \ + _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_rcp28_ps(S, M, A) \ + _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_rcp28_ps(M, A) \ + _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \ + (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (R)); }) + +#define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \ + (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \ + (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(M), (R)); }) + +#define _mm_rcp28_ss(A, B) \ + _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_mask_rcp28_ss(S, M, A, B) \ + _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_maskz_rcp28_ss(M, A, B) \ + _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (R)); }) + +#define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(M), (R)); }) + +#define _mm_rcp28_sd(A, B) \ + _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_mask_rcp28_sd(S, M, A, B) \ + _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_maskz_rcp28_sd(M, A, B) \ + _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) + +#endif // __AVX512ERINTRIN_H diff --git a/c_headers/avx512fintrin.h b/c_headers/avx512fintrin.h new file mode 100644 index 0000000000..4f7cba0b15 --- /dev/null +++ b/c_headers/avx512fintrin.h @@ -0,0 +1,2457 @@ +/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512FINTRIN_H +#define __AVX512FINTRIN_H + +typedef double __v8df __attribute__((__vector_size__(64))); +typedef float __v16sf __attribute__((__vector_size__(64))); +typedef long long __v8di __attribute__((__vector_size__(64))); +typedef int __v16si __attribute__((__vector_size__(64))); + +typedef float __m512 __attribute__((__vector_size__(64))); +typedef double __m512d __attribute__((__vector_size__(64))); +typedef long long __m512i __attribute__((__vector_size__(64))); + +typedef unsigned char __mmask8; +typedef unsigned short __mmask16; + +/* Rounding mode macros. */ +#define _MM_FROUND_TO_NEAREST_INT 0x00 +#define _MM_FROUND_TO_NEG_INF 0x01 +#define _MM_FROUND_TO_POS_INF 0x02 +#define _MM_FROUND_TO_ZERO 0x03 +#define _MM_FROUND_CUR_DIRECTION 0x04 + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +/* Create vectors with repeated elements */ + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_setzero_si512(void) +{ + return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_set1_epi32(__mmask16 __M, int __A) +{ + return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) +{ +#ifdef __x86_64__ + return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, + (__v8di) + _mm512_setzero_si512 (), + __M); +#else + return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, + (__v8di) + _mm512_setzero_si512 (), + __M); +#endif +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_setzero_ps(void) +{ + return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; +} +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_setzero_pd(void) +{ + return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_set1_ps(float __w) +{ + return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w }; +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_set1_pd(double __w) +{ + return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_set1_epi32(int __s) +{ + return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, + __s, __s, __s, __s, __s, __s, __s, __s }; +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_set1_epi64(long long __d) +{ + return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_broadcastss_ps(__m128 __X) +{ + float __f = __X[0]; + return (__v16sf){ __f, __f, __f, __f, + __f, __f, __f, __f, + __f, __f, __f, __f, + __f, __f, __f, __f }; +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_broadcastsd_pd(__m128d __X) +{ + double __d = __X[0]; + return (__v8df){ __d, __d, __d, __d, + __d, __d, __d, __d }; +} + +/* Cast between vector types */ + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_castpd256_pd512(__m256d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_castps256_ps512(__m256 __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, + -1, -1, -1, -1, -1, -1, -1, -1); +} + +static __inline __m128d __DEFAULT_FN_ATTRS +_mm512_castpd512_pd128(__m512d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1); +} + +static __inline __m128 __DEFAULT_FN_ATTRS +_mm512_castps512_ps128(__m512 __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); +} + +/* Bitwise operators */ +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_and_epi32(__m512i __a, __m512i __b) +{ + return __a & __b; +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) +{ + return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a, + (__v16si) __b, + (__v16si) __src, + (__mmask16) __k); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) +{ + return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a, + (__v16si) __b, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __k); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_and_epi64(__m512i __a, __m512i __b) +{ + return __a & __b; +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) +{ + return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a, + (__v8di) __b, + (__v8di) __src, + (__mmask8) __k); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) +{ + return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a, + (__v8di) __b, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __k); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_andnot_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_andnot_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_pd (), + __U); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_or_epi32(__m512i __a, __m512i __b) +{ + return __a | __b; +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) +{ + return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a, + (__v16si) __b, + (__v16si) __src, + (__mmask16) __k); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) +{ + return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a, + (__v16si) __b, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __k); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_or_epi64(__m512i __a, __m512i __b) +{ + return __a | __b; +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) +{ + return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a, + (__v8di) __b, + (__v8di) __src, + (__mmask8) __k); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) +{ + return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a, + (__v8di) __b, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __k); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_xor_epi32(__m512i __a, __m512i __b) +{ + return __a ^ __b; +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) +{ + return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a, + (__v16si) __b, + (__v16si) __src, + (__mmask16) __k); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) +{ + return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a, + (__v16si) __b, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __k); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_xor_epi64(__m512i __a, __m512i __b) +{ + return __a ^ __b; +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) +{ + return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a, + (__v8di) __b, + (__v8di) __src, + (__mmask8) __k); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) +{ + return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a, + (__v8di) __b, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __k); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_and_si512(__m512i __a, __m512i __b) +{ + return __a & __b; +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_or_si512(__m512i __a, __m512i __b) +{ + return __a | __b; +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_xor_si512(__m512i __a, __m512i __b) +{ + return __a ^ __b; +} +/* Arithmetic */ + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_add_pd(__m512d __a, __m512d __b) +{ + return __a + __b; +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_add_ps(__m512 __a, __m512 __b) +{ + return __a + __b; +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_mul_pd(__m512d __a, __m512d __b) +{ + return __a * __b; +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_mul_ps(__m512 __a, __m512 __b) +{ + return __a * __b; +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_sub_pd(__m512d __a, __m512d __b) +{ + return __a - __b; +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_sub_ps(__m512 __a, __m512 __b) +{ + return __a - __b; +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_add_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) ((__v8di) __A + (__v8di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_sub_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) ((__v8di) __A - (__v8di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_add_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) ((__v16si) __A + (__v16si) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_sub_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) ((__v16si) __A - (__v16si) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_max_pd(__m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_max_ps(__m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512i +__DEFAULT_FN_ATTRS +_mm512_max_epi32(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_max_epu32(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_max_epi64(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_max_epu64(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_min_pd(__m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_min_ps(__m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512i +__DEFAULT_FN_ATTRS +_mm512_min_epi32(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_min_epu32(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_min_epi64(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_min_epu64(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_mul_epi32(__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) __W, __M); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) + _mm512_setzero_si512 (), + __M); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_mul_epu32(__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) __W, __M); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) + _mm512_setzero_si512 (), + __M); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_mullo_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) ((__v16si) __A * (__v16si) __B); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, __M); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_sqrt_pd(__m512d a) +{ + return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a, + (__v8df) _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_sqrt_ps(__m512 a) +{ + return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a, + (__v16sf) _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_rsqrt14_pd(__m512d __A) +{ + return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1);} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_rsqrt14_ps(__m512 __A) +{ + return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_rsqrt14_ss(__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_rsqrt14_sd(__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_rcp14_pd(__m512d __A) +{ + return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_rcp14_ps(__m512 __A) +{ + return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_rcp14_ss(__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_rcp14_sd(__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1); +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_floor_ps(__m512 __A) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_FLOOR, + (__v16sf) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_floor_pd(__m512d __A) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_FLOOR, + (__v8df) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_ceil_ps(__m512 __A) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_CEIL, + (__v16sf) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_ceil_pd(__m512d __A) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_CEIL, + (__v8df) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_abs_epi64(__m512i __A) +{ + return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_abs_epi32(__m512i __A) +{ + return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +#define _mm512_roundscale_ps(A, B) __extension__ ({ \ + (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \ + -1, _MM_FROUND_CUR_DIRECTION); }) + +#define _mm512_roundscale_pd(A, B) __extension__ ({ \ + (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \ + -1, _MM_FROUND_CUR_DIRECTION); }) + +#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) -1, (R)); }) + + +#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) -1, (R)); }) + + +#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) -1, (R)); }) + + +#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) -1, (R)); }) + + +#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) -1, (R)); }) + + +#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) -1, (R)); }) + + +#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) -1, (R)); }) + + +#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) -1, (R)); }) + + +#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) -1, (R)); }) + + +#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) -1, (R)); }) + + +#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) -1, (R)); }) + + +#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) -1, (R)); }) + + +#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + + + +/* Vector permutations */ + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I + /* idx */ , + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I + /* idx */ , + (__v8di) __A, + (__v8di) __B, + (__mmask8) -1); +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) +{ + return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I + /* idx */ , + (__v8df) __A, + (__v8df) __B, + (__mmask8) -1); +} +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) +{ + return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I + /* idx */ , + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) -1); +} + +#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), \ + (I), (__v8di)_mm512_setzero_si512(), \ + (__mmask8)-1); }) + +#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), \ + (I), (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1); }) + +/* Vector Extract */ + +#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ + __m512d __A = (A); \ + (__m256d) \ + __builtin_ia32_extractf64x4_mask((__v8df)__A, \ + (I), \ + (__v4df)_mm256_setzero_si256(), \ + (__mmask8) -1); }) + +#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ + __m512 __A = (A); \ + (__m128) \ + __builtin_ia32_extractf32x4_mask((__v16sf)__A, \ + (I), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8) -1); }) + +/* Vector Blend */ + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) +{ + return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) +{ + return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) +{ + return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) +{ + return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +/* Compare */ + +#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (P), (__mmask16)-1, (R)); }) + +#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (P), (__mmask16)(U), (R)); }) + +#define _mm512_cmp_ps_mask(A, B, P) \ + _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_cmp_ps_mask(U, A, B, P) \ + _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (P), (__mmask8)-1, (R)); }) + +#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (P), (__mmask8)(U), (R)); }) + +#define _mm512_cmp_pd_mask(A, B, P) \ + _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_cmp_pd_mask(U, A, B, P) \ + _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) + +/* Conversion */ + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_cvttps_epu32(__m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \ + (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \ + (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (R)); }) + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_cvtepi32_pd(__m256i __A) +{ + return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_cvtepu32_pd(__m256i __A) +{ + return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \ + (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_cvtps_ph(A, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \ + (__v16hi)_mm256_setzero_si256(), \ + -1); }) + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_cvtph_ps(__m256i __A) +{ + return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_cvttps_epi32(__m512 a) +{ + return (__m512i) + __builtin_ia32_cvttps2dq512_mask((__v16sf) a, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm512_cvttpd_epi32(__m512d a) +{ + return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a, + (__v8si)_mm256_setzero_si256(), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ + (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \ + (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \ + (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \ + (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \ + (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ + (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8) -1, (R)); }) + +/* Unpack and Interleave */ +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_unpackhi_pd(__m512d __a, __m512d __b) +{ + return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_unpacklo_pd(__m512d __a, __m512d __b) +{ + return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_unpackhi_ps(__m512 __a, __m512 __b) +{ + return __builtin_shufflevector(__a, __b, + 2, 18, 3, 19, + 2+4, 18+4, 3+4, 19+4, + 2+8, 18+8, 3+8, 19+8, + 2+12, 18+12, 3+12, 19+12); +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_unpacklo_ps(__m512 __a, __m512 __b) +{ + return __builtin_shufflevector(__a, __b, + 0, 16, 1, 17, + 0+4, 16+4, 1+4, 17+4, + 0+8, 16+8, 1+8, 17+8, + 0+12, 16+12, 1+12, 17+12); +} + +/* Bit Test */ + +static __inline __mmask16 __DEFAULT_FN_ATTRS +_mm512_test_epi32_mask(__m512i __A, __m512i __B) +{ + return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +static __inline __mmask8 __DEFAULT_FN_ATTRS +_mm512_test_epi64_mask(__m512i __A, __m512i __B) +{ + return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, + (__v8di) __B, + (__mmask8) -1); +} + +/* SIMD load ops */ + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) +{ + return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) +{ + return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_load_ps(__mmask16 __U, void const *__P) +{ + return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_load_pd(__mmask8 __U, void const *__P) +{ + return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_loadu_pd(double const *__p) +{ + struct __loadu_pd { + __m512d __v; + } __attribute__((__packed__, __may_alias__)); + return ((struct __loadu_pd*)__p)->__v; +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_loadu_ps(float const *__p) +{ + struct __loadu_ps { + __m512 __v; + } __attribute__((__packed__, __may_alias__)); + return ((struct __loadu_ps*)__p)->__v; +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_load_ps(double const *__p) +{ + return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_load_pd(float const *__p) +{ + return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +/* SIMD store ops */ + +static __inline void __DEFAULT_FN_ATTRS +_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) +{ + __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A, + (__mmask8) __U); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) +{ + __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A, + (__mmask16) __U); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) +{ + __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm512_storeu_pd(void *__P, __m512d __A) +{ + __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) +{ + __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A, + (__mmask16) __U); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm512_storeu_ps(void *__P, __m512 __A) +{ + __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) +{ + __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm512_store_pd(void *__P, __m512d __A) +{ + *(__m512d*)__P = __A; +} + +static __inline void __DEFAULT_FN_ATTRS +_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) +{ + __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, + (__mmask16) __U); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm512_store_ps(void *__P, __m512 __A) +{ + *(__m512*)__P = __A; +} + +/* Mask ops */ + +static __inline __mmask16 __DEFAULT_FN_ATTRS +_mm512_knot(__mmask16 __M) +{ + return __builtin_ia32_knothi(__M); +} + +/* Integer compare */ + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, + __u); +} + +#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ + (__mmask16)-1); }) + +#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ + (__mmask16)-1); }) + +#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ + (__mmask8)-1); }) + +#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ + (__mmask8)-1); }) + +#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ + (__mmask16)(m)); }) + +#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ + (__mmask16)(m)); }) + +#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ + (__mmask8)(m)); }) + +#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ + (__mmask8)(m)); }) + +#undef __DEFAULT_FN_ATTRS + +#endif // __AVX512FINTRIN_H diff --git a/c_headers/avx512vlbwintrin.h b/c_headers/avx512vlbwintrin.h new file mode 100644 index 0000000000..74ec175830 --- /dev/null +++ b/c_headers/avx512vlbwintrin.h @@ -0,0 +1,1907 @@ +/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ----------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512VLBWINTRIN_H +#define __AVX512VLBWINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +/* Integer compare */ + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_cmpeq_epi8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_cmpeq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_cmpeq_epu8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_cmpeq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_cmpeq_epu8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_cmpeq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpeq_epi16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpeq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpeq_epu16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpeq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_cmpeq_epu16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_cmpeq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_cmpge_epi8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_cmpge_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_cmpge_epu8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_cmpge_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_cmpge_epi8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_cmpge_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_cmpge_epu8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_cmpge_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpge_epi16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpge_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpge_epu16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpge_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_cmpge_epi16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_cmpge_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_cmpge_epu16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_cmpge_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_cmpgt_epi8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_cmpgt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_cmpgt_epu8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_cmpgt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_cmpgt_epi8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_cmpgt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_cmpgt_epu8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_cmpgt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpgt_epi16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpgt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpgt_epu16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpgt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_cmpgt_epi16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_cmpgt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_cmpgt_epu16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_cmpgt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_cmple_epi8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_cmple_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_cmple_epu8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_cmple_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_cmple_epi8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_cmple_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_cmple_epu8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_cmple_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmple_epi16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmple_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmple_epu16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmple_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_cmple_epi16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_cmple_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_cmple_epu16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_cmple_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_cmplt_epi8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_cmplt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_cmplt_epu8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_cmplt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_cmplt_epi8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_cmplt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_cmplt_epu8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_cmplt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmplt_epi16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmplt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmplt_epu16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmplt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_cmplt_epi16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_cmplt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_cmplt_epu16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_cmplt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_cmpneq_epi8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_cmpneq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_cmpneq_epu8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_cmpneq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_cmpneq_epi8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_cmpneq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4, + __u); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_cmpneq_epu8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4, + (__mmask32)-1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_cmpneq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpneq_epi16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpneq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpneq_epu16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpneq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_cmpneq_epi16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_cmpneq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4, + __u); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_cmpneq_epu16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4, + (__mmask16)-1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_cmpneq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4, + __u); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){ + return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { + return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { + return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { + return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_abs_epi8 (__m128i __W, __mmask16 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_abs_epi8 (__mmask16 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_abs_epi8 (__m256i __W, __mmask32 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_abs_epi16 (__m128i __W, __mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_abs_epi16 (__mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_abs_epi16 (__m256i __W, __mmask16 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_abs_epi16 (__mmask16 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_packs_epi32 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A, + (__v4si) __B, + (__v8hi) _mm_setzero_si128 (), __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_packs_epi32 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A, + (__v4si) __B, + (__v8hi) __W, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_packs_epi32 (__mmask16 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A, + (__v8si) __B, + (__v16hi) _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_packs_epi32 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A, + (__v8si) __B, + (__v16hi) __W, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_packs_epi16 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v16qi) _mm_setzero_si128 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_packs_epi16 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v16qi) __W, + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_packs_epi16 (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v32qi) _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_packs_epi16 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v32qi) __W, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_packus_epi32 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A, + (__v4si) __B, + (__v8hi) _mm_setzero_si128 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_packus_epi32 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A, + (__v4si) __B, + (__v8hi) __W, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_packus_epi32 (__mmask16 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A, + (__v8si) __B, + (__v16hi) _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_packus_epi32 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A, + (__v8si) __B, + (__v16hi) __W, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_packus_epi16 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v16qi) _mm_setzero_si128 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_packus_epi16 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v16qi) __W, + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_packus_epi16 (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v32qi) _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_packus_epi16 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v32qi) __W, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_adds_epi8 (__m128i __W, __mmask16 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_adds_epi8 (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_adds_epi8 (__m256i __W, __mmask32 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_adds_epi8 (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_adds_epi16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_adds_epi16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_adds_epi16 (__m256i __W, __mmask16 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_adds_epi16 (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_adds_epu8 (__m128i __W, __mmask16 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_adds_epu8 (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_adds_epu8 (__m256i __W, __mmask32 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_adds_epu8 (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_adds_epu16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_adds_epu16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_adds_epu16 (__m256i __W, __mmask16 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_adds_epu16 (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_avg_epu8 (__m128i __W, __mmask16 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_avg_epu8 (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_avg_epu8 (__m256i __W, __mmask32 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_avg_epu8 (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_avg_epu16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_avg_epu16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_avg_epu16 (__m256i __W, __mmask16 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_avg_epu16 (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_max_epi8 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_max_epi8 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_max_epi8 (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_max_epi8 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_max_epi16 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_max_epi16 (__m128i __W, __mmask8 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_max_epi16 (__mmask16 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_max_epi16 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_max_epu8 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_max_epu8 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_max_epu8 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_max_epu16 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_max_epu16 (__m128i __W, __mmask8 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_max_epu16 (__mmask16 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_max_epu16 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_min_epi8 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_min_epi8 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_min_epi8 (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_min_epi8 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_min_epi16 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_min_epi16 (__m128i __W, __mmask8 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_min_epi16 (__mmask16 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_min_epi16 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_min_epu8 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_min_epu8 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_min_epu8 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_min_epu16 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_min_epu16 (__m128i __W, __mmask8 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_min_epu16 (__mmask16 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_min_epu16 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_shuffle_epi8 (__m128i __W, __mmask16 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_shuffle_epi8 (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shuffle_epi8 (__m256i __W, __mmask32 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shuffle_epi8 (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_subs_epi8 (__m128i __W, __mmask16 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_subs_epi8 (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_subs_epi8 (__m256i __W, __mmask32 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_subs_epi8 (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_subs_epi16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_subs_epi16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_subs_epi16 (__m256i __W, __mmask16 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_subs_epi16 (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_subs_epu8 (__m128i __W, __mmask16 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_subs_epu8 (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_subs_epu8 (__m256i __W, __mmask32 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_subs_epu8 (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_subs_epu16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_subs_epu16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_subs_epu16 (__m256i __W, __mmask16 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_subs_epu16 (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask2_permutex2var_epi16 (__m128i __A, __m128i __I, __mmask8 __U, + __m128i __B) +{ + return (__m128i) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A, + (__v8hi) __I /* idx */ , + (__v8hi) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask2_permutex2var_epi16 (__m256i __A, __m256i __I, + __mmask16 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A, + (__v16hi) __I /* idx */ , + (__v16hi) __B, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_permutex2var_epi16 (__m128i __A, __m128i __I, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I/* idx */, + (__v8hi) __A, + (__v8hi) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_permutex2var_epi16 (__m128i __A, __mmask8 __U, __m128i __I, + __m128i __B) +{ + return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I/* idx */, + (__v8hi) __A, + (__v8hi) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I, + __m128i __B) +{ + return (__m128i) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi) __I/* idx */, + (__v8hi) __A, + (__v8hi) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_permutex2var_epi16 (__m256i __A, __m256i __I, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I/* idx */, + (__v16hi) __A, + (__v16hi) __B, + (__mmask16) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_permutex2var_epi16 (__m256i __A, __mmask16 __U, + __m256i __I, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I/* idx */, + (__v16hi) __A, + (__v16hi) __B, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, + __m256i __I, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi) __I/* idx */, + (__v16hi) __A, + (__v16hi) __B, + (__mmask16) __U); +} + +#define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), \ + (p), (__mmask16)-1); }) + +#define _mm_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), \ + (p), (__mmask16)(m)); }) + +#define _mm_cmp_epu8_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), \ + (p), (__mmask16)-1); }) + +#define _mm_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), \ + (p), (__mmask16)(m)); }) + +#define _mm256_cmp_epi8_mask(a, b, p) __extension__ ({ \ + (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ + (__v32qi)(__m256i)(b), \ + (p), (__mmask32)-1); }) + +#define _mm256_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ + (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ + (__v32qi)(__m256i)(b), \ + (p), (__mmask32)(m)); }) + +#define _mm256_cmp_epu8_mask(a, b, p) __extension__ ({ \ + (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ + (__v32qi)(__m256i)(b), \ + (p), (__mmask32)-1); }) + +#define _mm256_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ + (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ + (__v32qi)(__m256i)(b), \ + (p), (__mmask32)(m)); }) + +#define _mm_cmp_epi16_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ + (__v8hi)(__m128i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ + (__v8hi)(__m128i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm_cmp_epu16_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ + (__v8hi)(__m128i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ + (__v8hi)(__m128i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm256_cmp_epi16_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ + (__v16hi)(__m256i)(b), \ + (p), (__mmask16)-1); }) + +#define _mm256_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ + (__v16hi)(__m256i)(b), \ + (p), (__mmask16)(m)); }) + +#define _mm256_cmp_epu16_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ + (__v16hi)(__m256i)(b), \ + (p), (__mmask16)-1); }) + +#define _mm256_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ + (__v16hi)(__m256i)(b), \ + (p), (__mmask16)(m)); }) + +#undef __DEFAULT_FN_ATTRS + +#endif /* __AVX512VLBWINTRIN_H */ diff --git a/c_headers/avx512vldqintrin.h b/c_headers/avx512vldqintrin.h new file mode 100644 index 0000000000..1edf29d128 --- /dev/null +++ b/c_headers/avx512vldqintrin.h @@ -0,0 +1,353 @@ +/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512VLDQINTRIN_H +#define __AVX512VLDQINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mullo_epi64 (__m256i __A, __m256i __B) { + return (__m256i) ((__v4di) __A * (__v4di) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { + return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mullo_epi64 (__m128i __A, __m128i __B) { + return (__m128i) ((__v2di) __A * (__v2di) __B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __W, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __W, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __W, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __W, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, + __m256d __B) { + return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __W, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __W, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __W, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __W, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); +} + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/c_headers/avx512vlintrin.h b/c_headers/avx512vlintrin.h new file mode 100644 index 0000000000..fc1b9d6e7a --- /dev/null +++ b/c_headers/avx512vlintrin.h @@ -0,0 +1,1982 @@ +/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512VLINTRIN_H +#define __AVX512VLINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +/* Integer compare */ + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, + __u); +} + + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpge_epi32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpge_epu32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpge_epi64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpge_epu64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, + __u); +} + + + + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmple_epi32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmple_epu32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmple_epi32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmple_epu32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmple_epi64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmple_epu64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmple_epi64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmple_epu64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmplt_epi32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmplt_epu32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmplt_epi64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmplt_epu64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, + __u); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, + __u); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, + __m256i __Y) +{ + return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X, + (__v8si) __Y, + (__v4di) __W, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) +{ + return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X, + (__v8si) __Y, + (__v4di) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, + __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X, + (__v4si) __Y, + (__v2di) __W, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X, + (__v4si) __Y, + (__v2di) + _mm_setzero_si128 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, + __m256i __Y) +{ + return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X, + (__v8si) __Y, + (__v4di) __W, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) +{ + return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X, + (__v8si) __Y, + (__v4di) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, + __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X, + (__v4si) __Y, + (__v2di) __W, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X, + (__v4si) __Y, + (__v2di) + _mm_setzero_si128 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) __W, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) + _mm_setzero_si128 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) __W, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __W, __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) + _mm256_setzero_pd (), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __W, __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) + _mm_setzero_pd (), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __W, __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) + _mm256_setzero_pd (), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __W, __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) + _mm_setzero_pd (), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +#define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ + (__v4si)(__m128i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ + (__v4si)(__m128i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ + (__v4si)(__m128i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ + (__v4si)(__m128i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ + (__v8si)(__m256i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ + (__v8si)(__m256i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ + (__v8si)(__m256i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ + (__v8si)(__m256i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ + (__v2di)(__m128i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ + (__v2di)(__m128i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ + (__v2di)(__m128i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ + (__v2di)(__m128i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ + (__v4di)(__m256i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ + (__v4di)(__m256i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ + (__v4di)(__m256i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ + (__v4di)(__m256i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ + (__v8sf)(__m256)(b), \ + (p), (__mmask8)-1); }) + +#define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ + (__v8sf)(__m256)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256)(a), \ + (__v4df)(__m256)(b), \ + (p), (__mmask8)-1); }) + +#define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256)(a), \ + (__v4df)(__m256)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm128_cmp_ps_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ + (__v4sf)(__m128)(b), \ + (p), (__mmask8)-1); }) + +#define _mm128_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ + (__v4sf)(__m128)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm128_cmp_pd_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128)(a), \ + (__v2df)(__m128)(b), \ + (p), (__mmask8)-1); }) + +#define _mm128_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128)(a), \ + (__v2df)(__m128)(b), \ + (p), (__mmask8)(m)); }) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, + (__v2df) __B, + -(__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, + (__v4df) __B, + -(__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) +{ + return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) +{ + return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) + __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) + __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C, + (__mmask8) + __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) +{ + return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) + __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) + __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C, + (__mmask8) + __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, + __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) +{ + return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) +{ + return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) +{ + return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) + __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) +{ + return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) + __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) +{ + return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) +{ + return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) +{ + return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __AVX512VLINTRIN_H */ diff --git a/c_headers/avxintrin.h b/c_headers/avxintrin.h new file mode 100644 index 0000000000..c1bc85b39e --- /dev/null +++ b/c_headers/avxintrin.h @@ -0,0 +1,1308 @@ +/*===---- avxintrin.h - AVX intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVXINTRIN_H +#define __AVXINTRIN_H + +typedef double __v4df __attribute__ ((__vector_size__ (32))); +typedef float __v8sf __attribute__ ((__vector_size__ (32))); +typedef long long __v4di __attribute__ ((__vector_size__ (32))); +typedef int __v8si __attribute__ ((__vector_size__ (32))); +typedef short __v16hi __attribute__ ((__vector_size__ (32))); +typedef char __v32qi __attribute__ ((__vector_size__ (32))); + +typedef float __m256 __attribute__ ((__vector_size__ (32))); +typedef double __m256d __attribute__((__vector_size__(32))); +typedef long long __m256i __attribute__((__vector_size__(32))); + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +/* Arithmetic */ +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_add_pd(__m256d __a, __m256d __b) +{ + return __a+__b; +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_add_ps(__m256 __a, __m256 __b) +{ + return __a+__b; +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_sub_pd(__m256d __a, __m256d __b) +{ + return __a-__b; +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_sub_ps(__m256 __a, __m256 __b) +{ + return __a-__b; +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_addsub_pd(__m256d __a, __m256d __b) +{ + return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_addsub_ps(__m256 __a, __m256 __b) +{ + return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_div_pd(__m256d __a, __m256d __b) +{ + return __a / __b; +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_div_ps(__m256 __a, __m256 __b) +{ + return __a / __b; +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_max_pd(__m256d __a, __m256d __b) +{ + return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_max_ps(__m256 __a, __m256 __b) +{ + return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_min_pd(__m256d __a, __m256d __b) +{ + return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_min_ps(__m256 __a, __m256 __b) +{ + return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_mul_pd(__m256d __a, __m256d __b) +{ + return __a * __b; +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_mul_ps(__m256 __a, __m256 __b) +{ + return __a * __b; +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_sqrt_pd(__m256d __a) +{ + return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_sqrt_ps(__m256 __a) +{ + return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_rsqrt_ps(__m256 __a) +{ + return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_rcp_ps(__m256 __a) +{ + return (__m256)__builtin_ia32_rcpps256((__v8sf)__a); +} + +#define _mm256_round_pd(V, M) __extension__ ({ \ + __m256d __V = (V); \ + (__m256d)__builtin_ia32_roundpd256((__v4df)__V, (M)); }) + +#define _mm256_round_ps(V, M) __extension__ ({ \ + __m256 __V = (V); \ + (__m256)__builtin_ia32_roundps256((__v8sf)__V, (M)); }) + +#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL) +#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR) +#define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL) +#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR) + +/* Logical */ +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_and_pd(__m256d __a, __m256d __b) +{ + return (__m256d)((__v4di)__a & (__v4di)__b); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_and_ps(__m256 __a, __m256 __b) +{ + return (__m256)((__v8si)__a & (__v8si)__b); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_andnot_pd(__m256d __a, __m256d __b) +{ + return (__m256d)(~(__v4di)__a & (__v4di)__b); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_andnot_ps(__m256 __a, __m256 __b) +{ + return (__m256)(~(__v8si)__a & (__v8si)__b); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_or_pd(__m256d __a, __m256d __b) +{ + return (__m256d)((__v4di)__a | (__v4di)__b); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_or_ps(__m256 __a, __m256 __b) +{ + return (__m256)((__v8si)__a | (__v8si)__b); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_xor_pd(__m256d __a, __m256d __b) +{ + return (__m256d)((__v4di)__a ^ (__v4di)__b); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_xor_ps(__m256 __a, __m256 __b) +{ + return (__m256)((__v8si)__a ^ (__v8si)__b); +} + +/* Horizontal arithmetic */ +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_hadd_pd(__m256d __a, __m256d __b) +{ + return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_hadd_ps(__m256 __a, __m256 __b) +{ + return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_hsub_pd(__m256d __a, __m256d __b) +{ + return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_hsub_ps(__m256 __a, __m256 __b) +{ + return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b); +} + +/* Vector permutations */ +static __inline __m128d __DEFAULT_FN_ATTRS +_mm_permutevar_pd(__m128d __a, __m128i __c) +{ + return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_permutevar_pd(__m256d __a, __m256i __c) +{ + return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c); +} + +static __inline __m128 __DEFAULT_FN_ATTRS +_mm_permutevar_ps(__m128 __a, __m128i __c) +{ + return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_permutevar_ps(__m256 __a, __m256i __c) +{ + return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c); +} + +#define _mm_permute_pd(A, C) __extension__ ({ \ + __m128d __A = (A); \ + (__m128d)__builtin_shufflevector((__v2df)__A, (__v2df) _mm_setzero_pd(), \ + (C) & 0x1, ((C) & 0x2) >> 1); }) + +#define _mm256_permute_pd(A, C) __extension__ ({ \ + __m256d __A = (A); \ + (__m256d)__builtin_shufflevector((__v4df)__A, (__v4df) _mm256_setzero_pd(), \ + (C) & 0x1, ((C) & 0x2) >> 1, \ + 2 + (((C) & 0x4) >> 2), \ + 2 + (((C) & 0x8) >> 3)); }) + +#define _mm_permute_ps(A, C) __extension__ ({ \ + __m128 __A = (A); \ + (__m128)__builtin_shufflevector((__v4sf)__A, (__v4sf) _mm_setzero_ps(), \ + (C) & 0x3, ((C) & 0xc) >> 2, \ + ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); }) + +#define _mm256_permute_ps(A, C) __extension__ ({ \ + __m256 __A = (A); \ + (__m256)__builtin_shufflevector((__v8sf)__A, (__v8sf) _mm256_setzero_ps(), \ + (C) & 0x3, ((C) & 0xc) >> 2, \ + ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \ + 4 + (((C) & 0x03) >> 0), \ + 4 + (((C) & 0x0c) >> 2), \ + 4 + (((C) & 0x30) >> 4), \ + 4 + (((C) & 0xc0) >> 6)); }) + +#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \ + __m256d __V1 = (V1); \ + __m256d __V2 = (V2); \ + (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); }) + +#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \ + __m256 __V1 = (V1); \ + __m256 __V2 = (V2); \ + (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); }) + +#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \ + __m256i __V1 = (V1); \ + __m256i __V2 = (V2); \ + (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); }) + +/* Vector Blend */ +#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \ + __m256d __V1 = (V1); \ + __m256d __V2 = (V2); \ + (__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \ + (((M) & 0x01) ? 4 : 0), \ + (((M) & 0x02) ? 5 : 1), \ + (((M) & 0x04) ? 6 : 2), \ + (((M) & 0x08) ? 7 : 3)); }) + +#define _mm256_blend_ps(V1, V2, M) __extension__ ({ \ + __m256 __V1 = (V1); \ + __m256 __V2 = (V2); \ + (__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \ + (((M) & 0x01) ? 8 : 0), \ + (((M) & 0x02) ? 9 : 1), \ + (((M) & 0x04) ? 10 : 2), \ + (((M) & 0x08) ? 11 : 3), \ + (((M) & 0x10) ? 12 : 4), \ + (((M) & 0x20) ? 13 : 5), \ + (((M) & 0x40) ? 14 : 6), \ + (((M) & 0x80) ? 15 : 7)); }) + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) +{ + return (__m256d)__builtin_ia32_blendvpd256( + (__v4df)__a, (__v4df)__b, (__v4df)__c); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) +{ + return (__m256)__builtin_ia32_blendvps256( + (__v8sf)__a, (__v8sf)__b, (__v8sf)__c); +} + +/* Vector Dot Product */ +#define _mm256_dp_ps(V1, V2, M) __extension__ ({ \ + __m256 __V1 = (V1); \ + __m256 __V2 = (V2); \ + (__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, (M)); }) + +/* Vector shuffle */ +#define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \ + __m256 __a = (a); \ + __m256 __b = (b); \ + (__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \ + (mask) & 0x3, ((mask) & 0xc) >> 2, \ + (((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \ + ((mask) & 0x3) + 4, (((mask) & 0xc) >> 2) + 4, \ + (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); }) + +#define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \ + __m256d __a = (a); \ + __m256d __b = (b); \ + (__m256d)__builtin_shufflevector((__v4df)__a, (__v4df)__b, \ + (mask) & 0x1, \ + (((mask) & 0x2) >> 1) + 4, \ + (((mask) & 0x4) >> 2) + 2, \ + (((mask) & 0x8) >> 3) + 6); }) + +/* Compare */ +#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ +#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ +#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */ +#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */ +#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */ +#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */ +#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */ +#define _CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */ +#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ +#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */ +#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ +#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */ +#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */ +#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */ +#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */ +#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */ +#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */ +#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */ +#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */ +#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */ +#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */ +#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */ +#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */ +#define _CMP_ORD_S 0x17 /* Ordered (signaling) */ +#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */ +#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */ +#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */ +#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */ +#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */ +#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */ +#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ +#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ + +#define _mm_cmp_pd(a, b, c) __extension__ ({ \ + __m128d __a = (a); \ + __m128d __b = (b); \ + (__m128d)__builtin_ia32_cmppd((__v2df)__a, (__v2df)__b, (c)); }) + +#define _mm_cmp_ps(a, b, c) __extension__ ({ \ + __m128 __a = (a); \ + __m128 __b = (b); \ + (__m128)__builtin_ia32_cmpps((__v4sf)__a, (__v4sf)__b, (c)); }) + +#define _mm256_cmp_pd(a, b, c) __extension__ ({ \ + __m256d __a = (a); \ + __m256d __b = (b); \ + (__m256d)__builtin_ia32_cmppd256((__v4df)__a, (__v4df)__b, (c)); }) + +#define _mm256_cmp_ps(a, b, c) __extension__ ({ \ + __m256 __a = (a); \ + __m256 __b = (b); \ + (__m256)__builtin_ia32_cmpps256((__v8sf)__a, (__v8sf)__b, (c)); }) + +#define _mm_cmp_sd(a, b, c) __extension__ ({ \ + __m128d __a = (a); \ + __m128d __b = (b); \ + (__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); }) + +#define _mm_cmp_ss(a, b, c) __extension__ ({ \ + __m128 __a = (a); \ + __m128 __b = (b); \ + (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); }) + +static __inline int __DEFAULT_FN_ATTRS +_mm256_extract_epi32(__m256i __a, const int __imm) +{ + __v8si __b = (__v8si)__a; + return __b[__imm & 7]; +} + +static __inline int __DEFAULT_FN_ATTRS +_mm256_extract_epi16(__m256i __a, const int __imm) +{ + __v16hi __b = (__v16hi)__a; + return __b[__imm & 15]; +} + +static __inline int __DEFAULT_FN_ATTRS +_mm256_extract_epi8(__m256i __a, const int __imm) +{ + __v32qi __b = (__v32qi)__a; + return __b[__imm & 31]; +} + +#ifdef __x86_64__ +static __inline long long __DEFAULT_FN_ATTRS +_mm256_extract_epi64(__m256i __a, const int __imm) +{ + __v4di __b = (__v4di)__a; + return __b[__imm & 3]; +} +#endif + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_insert_epi32(__m256i __a, int __b, int const __imm) +{ + __v8si __c = (__v8si)__a; + __c[__imm & 7] = __b; + return (__m256i)__c; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_insert_epi16(__m256i __a, int __b, int const __imm) +{ + __v16hi __c = (__v16hi)__a; + __c[__imm & 15] = __b; + return (__m256i)__c; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_insert_epi8(__m256i __a, int __b, int const __imm) +{ + __v32qi __c = (__v32qi)__a; + __c[__imm & 31] = __b; + return (__m256i)__c; +} + +#ifdef __x86_64__ +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_insert_epi64(__m256i __a, long long __b, int const __imm) +{ + __v4di __c = (__v4di)__a; + __c[__imm & 3] = __b; + return (__m256i)__c; +} +#endif + +/* Conversion */ +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_cvtepi32_pd(__m128i __a) +{ + return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_cvtepi32_ps(__m256i __a) +{ + return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a); +} + +static __inline __m128 __DEFAULT_FN_ATTRS +_mm256_cvtpd_ps(__m256d __a) +{ + return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a); +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_cvtps_epi32(__m256 __a) +{ + return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_cvtps_pd(__m128 __a) +{ + return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a); +} + +static __inline __m128i __DEFAULT_FN_ATTRS +_mm256_cvttpd_epi32(__m256d __a) +{ + return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); +} + +static __inline __m128i __DEFAULT_FN_ATTRS +_mm256_cvtpd_epi32(__m256d __a) +{ + return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a); +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_cvttps_epi32(__m256 __a) +{ + return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); +} + +/* Vector replicate */ +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_movehdup_ps(__m256 __a) +{ + return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_moveldup_ps(__m256 __a) +{ + return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_movedup_pd(__m256d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 0, 2, 2); +} + +/* Unpack and Interleave */ +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_unpackhi_pd(__m256d __a, __m256d __b) +{ + return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_unpacklo_pd(__m256d __a, __m256d __b) +{ + return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_unpackhi_ps(__m256 __a, __m256 __b) +{ + return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_unpacklo_ps(__m256 __a, __m256 __b) +{ + return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1); +} + +/* Bit Test */ +static __inline int __DEFAULT_FN_ATTRS +_mm_testz_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm_testc_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm_testnzc_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm_testz_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm_testc_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm_testnzc_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm256_testz_pd(__m256d __a, __m256d __b) +{ + return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm256_testc_pd(__m256d __a, __m256d __b) +{ + return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm256_testnzc_pd(__m256d __a, __m256d __b) +{ + return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm256_testz_ps(__m256 __a, __m256 __b) +{ + return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm256_testc_ps(__m256 __a, __m256 __b) +{ + return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm256_testnzc_ps(__m256 __a, __m256 __b) +{ + return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm256_testz_si256(__m256i __a, __m256i __b) +{ + return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm256_testc_si256(__m256i __a, __m256i __b) +{ + return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm256_testnzc_si256(__m256i __a, __m256i __b) +{ + return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b); +} + +/* Vector extract sign mask */ +static __inline int __DEFAULT_FN_ATTRS +_mm256_movemask_pd(__m256d __a) +{ + return __builtin_ia32_movmskpd256((__v4df)__a); +} + +static __inline int __DEFAULT_FN_ATTRS +_mm256_movemask_ps(__m256 __a) +{ + return __builtin_ia32_movmskps256((__v8sf)__a); +} + +/* Vector __zero */ +static __inline void __DEFAULT_FN_ATTRS +_mm256_zeroall(void) +{ + __builtin_ia32_vzeroall(); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm256_zeroupper(void) +{ + __builtin_ia32_vzeroupper(); +} + +/* Vector load with broadcast */ +static __inline __m128 __DEFAULT_FN_ATTRS +_mm_broadcast_ss(float const *__a) +{ + float __f = *__a; + return (__m128)(__v4sf){ __f, __f, __f, __f }; +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_broadcast_sd(double const *__a) +{ + double __d = *__a; + return (__m256d)(__v4df){ __d, __d, __d, __d }; +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_broadcast_ss(float const *__a) +{ + float __f = *__a; + return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f }; +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_broadcast_pd(__m128d const *__a) +{ + return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_broadcast_ps(__m128 const *__a) +{ + return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a); +} + +/* SIMD load ops */ +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_load_pd(double const *__p) +{ + return *(__m256d *)__p; +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_load_ps(float const *__p) +{ + return *(__m256 *)__p; +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_loadu_pd(double const *__p) +{ + struct __loadu_pd { + __m256d __v; + } __attribute__((__packed__, __may_alias__)); + return ((struct __loadu_pd*)__p)->__v; +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_loadu_ps(float const *__p) +{ + struct __loadu_ps { + __m256 __v; + } __attribute__((__packed__, __may_alias__)); + return ((struct __loadu_ps*)__p)->__v; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_load_si256(__m256i const *__p) +{ + return *__p; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_loadu_si256(__m256i const *__p) +{ + struct __loadu_si256 { + __m256i __v; + } __attribute__((__packed__, __may_alias__)); + return ((struct __loadu_si256*)__p)->__v; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_lddqu_si256(__m256i const *__p) +{ + return (__m256i)__builtin_ia32_lddqu256((char const *)__p); +} + +/* SIMD store ops */ +static __inline void __DEFAULT_FN_ATTRS +_mm256_store_pd(double *__p, __m256d __a) +{ + *(__m256d *)__p = __a; +} + +static __inline void __DEFAULT_FN_ATTRS +_mm256_store_ps(float *__p, __m256 __a) +{ + *(__m256 *)__p = __a; +} + +static __inline void __DEFAULT_FN_ATTRS +_mm256_storeu_pd(double *__p, __m256d __a) +{ + __builtin_ia32_storeupd256(__p, (__v4df)__a); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm256_storeu_ps(float *__p, __m256 __a) +{ + __builtin_ia32_storeups256(__p, (__v8sf)__a); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm256_store_si256(__m256i *__p, __m256i __a) +{ + *__p = __a; +} + +static __inline void __DEFAULT_FN_ATTRS +_mm256_storeu_si256(__m256i *__p, __m256i __a) +{ + __builtin_ia32_storedqu256((char *)__p, (__v32qi)__a); +} + +/* Conditional load ops */ +static __inline __m128d __DEFAULT_FN_ATTRS +_mm_maskload_pd(double const *__p, __m128d __m) +{ + return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_maskload_pd(double const *__p, __m256d __m) +{ + return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p, + (__v4df)__m); +} + +static __inline __m128 __DEFAULT_FN_ATTRS +_mm_maskload_ps(float const *__p, __m128 __m) +{ + return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_maskload_ps(float const *__p, __m256 __m) +{ + return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m); +} + +/* Conditional store ops */ +static __inline void __DEFAULT_FN_ATTRS +_mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a) +{ + __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm_maskstore_pd(double *__p, __m128d __m, __m128d __a) +{ + __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a) +{ + __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm_maskstore_ps(float *__p, __m128 __m, __m128 __a) +{ + __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a); +} + +/* Cacheability support ops */ +static __inline void __DEFAULT_FN_ATTRS +_mm256_stream_si256(__m256i *__a, __m256i __b) +{ + __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm256_stream_pd(double *__a, __m256d __b) +{ + __builtin_ia32_movntpd256(__a, (__v4df)__b); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm256_stream_ps(float *__p, __m256 __a) +{ + __builtin_ia32_movntps256(__p, (__v8sf)__a); +} + +/* Create vectors */ +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_set_pd(double __a, double __b, double __c, double __d) +{ + return (__m256d){ __d, __c, __b, __a }; +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_set_ps(float __a, float __b, float __c, float __d, + float __e, float __f, float __g, float __h) +{ + return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a }; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, + int __i4, int __i5, int __i6, int __i7) +{ + return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 }; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, + short __w11, short __w10, short __w09, short __w08, + short __w07, short __w06, short __w05, short __w04, + short __w03, short __w02, short __w01, short __w00) +{ + return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06, + __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 }; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, + char __b27, char __b26, char __b25, char __b24, + char __b23, char __b22, char __b21, char __b20, + char __b19, char __b18, char __b17, char __b16, + char __b15, char __b14, char __b13, char __b12, + char __b11, char __b10, char __b09, char __b08, + char __b07, char __b06, char __b05, char __b04, + char __b03, char __b02, char __b01, char __b00) +{ + return (__m256i)(__v32qi){ + __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07, + __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15, + __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23, + __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31 + }; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d) +{ + return (__m256i)(__v4di){ __d, __c, __b, __a }; +} + +/* Create vectors with elements in reverse order */ +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_setr_pd(double __a, double __b, double __c, double __d) +{ + return (__m256d){ __a, __b, __c, __d }; +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_setr_ps(float __a, float __b, float __c, float __d, + float __e, float __f, float __g, float __h) +{ + return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h }; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, + int __i4, int __i5, int __i6, int __i7) +{ + return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 }; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, + short __w11, short __w10, short __w09, short __w08, + short __w07, short __w06, short __w05, short __w04, + short __w03, short __w02, short __w01, short __w00) +{ + return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09, + __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 }; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, + char __b27, char __b26, char __b25, char __b24, + char __b23, char __b22, char __b21, char __b20, + char __b19, char __b18, char __b17, char __b16, + char __b15, char __b14, char __b13, char __b12, + char __b11, char __b10, char __b09, char __b08, + char __b07, char __b06, char __b05, char __b04, + char __b03, char __b02, char __b01, char __b00) +{ + return (__m256i)(__v32qi){ + __b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24, + __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16, + __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08, + __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 }; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d) +{ + return (__m256i)(__v4di){ __a, __b, __c, __d }; +} + +/* Create vectors with repeated elements */ +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_set1_pd(double __w) +{ + return (__m256d){ __w, __w, __w, __w }; +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_set1_ps(float __w) +{ + return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w }; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_set1_epi32(int __i) +{ + return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i }; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_set1_epi16(short __w) +{ + return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w }; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_set1_epi8(char __b) +{ + return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b }; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_set1_epi64x(long long __q) +{ + return (__m256i)(__v4di){ __q, __q, __q, __q }; +} + +/* Create __zeroed vectors */ +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_setzero_pd(void) +{ + return (__m256d){ 0, 0, 0, 0 }; +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_setzero_ps(void) +{ + return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_setzero_si256(void) +{ + return (__m256i){ 0LL, 0LL, 0LL, 0LL }; +} + +/* Cast between vector types */ +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_castpd_ps(__m256d __a) +{ + return (__m256)__a; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_castpd_si256(__m256d __a) +{ + return (__m256i)__a; +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_castps_pd(__m256 __a) +{ + return (__m256d)__a; +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_castps_si256(__m256 __a) +{ + return (__m256i)__a; +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_castsi256_ps(__m256i __a) +{ + return (__m256)__a; +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_castsi256_pd(__m256i __a) +{ + return (__m256d)__a; +} + +static __inline __m128d __DEFAULT_FN_ATTRS +_mm256_castpd256_pd128(__m256d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1); +} + +static __inline __m128 __DEFAULT_FN_ATTRS +_mm256_castps256_ps128(__m256 __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); +} + +static __inline __m128i __DEFAULT_FN_ATTRS +_mm256_castsi256_si128(__m256i __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_castpd128_pd256(__m128d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_castps128_ps256(__m128 __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_castsi128_si256(__m128i __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); +} + +/* + Vector insert. + We use macros rather than inlines because we only want to accept + invocations where the immediate M is a constant expression. +*/ +#define _mm256_insertf128_ps(V1, V2, M) __extension__ ({ \ + (__m256)__builtin_shufflevector( \ + (__v8sf)(V1), \ + (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \ + (((M) & 1) ? 0 : 8), \ + (((M) & 1) ? 1 : 9), \ + (((M) & 1) ? 2 : 10), \ + (((M) & 1) ? 3 : 11), \ + (((M) & 1) ? 8 : 4), \ + (((M) & 1) ? 9 : 5), \ + (((M) & 1) ? 10 : 6), \ + (((M) & 1) ? 11 : 7) );}) + +#define _mm256_insertf128_pd(V1, V2, M) __extension__ ({ \ + (__m256d)__builtin_shufflevector( \ + (__v4df)(V1), \ + (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \ + (((M) & 1) ? 0 : 4), \ + (((M) & 1) ? 1 : 5), \ + (((M) & 1) ? 4 : 2), \ + (((M) & 1) ? 5 : 3) );}) + +#define _mm256_insertf128_si256(V1, V2, M) __extension__ ({ \ + (__m256i)__builtin_shufflevector( \ + (__v4di)(V1), \ + (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \ + (((M) & 1) ? 0 : 4), \ + (((M) & 1) ? 1 : 5), \ + (((M) & 1) ? 4 : 2), \ + (((M) & 1) ? 5 : 3) );}) + +/* + Vector extract. + We use macros rather than inlines because we only want to accept + invocations where the immediate M is a constant expression. +*/ +#define _mm256_extractf128_ps(V, M) __extension__ ({ \ + (__m128)__builtin_shufflevector( \ + (__v8sf)(V), \ + (__v8sf)(_mm256_setzero_ps()), \ + (((M) & 1) ? 4 : 0), \ + (((M) & 1) ? 5 : 1), \ + (((M) & 1) ? 6 : 2), \ + (((M) & 1) ? 7 : 3) );}) + +#define _mm256_extractf128_pd(V, M) __extension__ ({ \ + (__m128d)__builtin_shufflevector( \ + (__v4df)(V), \ + (__v4df)(_mm256_setzero_pd()), \ + (((M) & 1) ? 2 : 0), \ + (((M) & 1) ? 3 : 1) );}) + +#define _mm256_extractf128_si256(V, M) __extension__ ({ \ + (__m128i)__builtin_shufflevector( \ + (__v4di)(V), \ + (__v4di)(_mm256_setzero_si256()), \ + (((M) & 1) ? 2 : 0), \ + (((M) & 1) ? 3 : 1) );}) + +/* SIMD load ops (unaligned) */ +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo) +{ + struct __loadu_ps { + __m128 __v; + } __attribute__((__packed__, __may_alias__)); + + __m256 __v256 = _mm256_castps128_ps256(((struct __loadu_ps*)__addr_lo)->__v); + return _mm256_insertf128_ps(__v256, ((struct __loadu_ps*)__addr_hi)->__v, 1); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo) +{ + struct __loadu_pd { + __m128d __v; + } __attribute__((__packed__, __may_alias__)); + + __m256d __v256 = _mm256_castpd128_pd256(((struct __loadu_pd*)__addr_lo)->__v); + return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1); +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo) +{ + struct __loadu_si128 { + __m128i __v; + } __attribute__((__packed__, __may_alias__)); + __m256i __v256 = _mm256_castsi128_si256( + ((struct __loadu_si128*)__addr_lo)->__v); + return _mm256_insertf128_si256(__v256, + ((struct __loadu_si128*)__addr_hi)->__v, 1); +} + +/* SIMD store ops (unaligned) */ +static __inline void __DEFAULT_FN_ATTRS +_mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a) +{ + __m128 __v128; + + __v128 = _mm256_castps256_ps128(__a); + __builtin_ia32_storeups(__addr_lo, __v128); + __v128 = _mm256_extractf128_ps(__a, 1); + __builtin_ia32_storeups(__addr_hi, __v128); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a) +{ + __m128d __v128; + + __v128 = _mm256_castpd256_pd128(__a); + __builtin_ia32_storeupd(__addr_lo, __v128); + __v128 = _mm256_extractf128_pd(__a, 1); + __builtin_ia32_storeupd(__addr_hi, __v128); +} + +static __inline void __DEFAULT_FN_ATTRS +_mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a) +{ + __m128i __v128; + + __v128 = _mm256_castsi256_si128(__a); + __builtin_ia32_storedqu((char *)__addr_lo, (__v16qi)__v128); + __v128 = _mm256_extractf128_si256(__a, 1); + __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_set_m128 (__m128 __hi, __m128 __lo) { + return (__m256) __builtin_shufflevector(__lo, __hi, 0, 1, 2, 3, 4, 5, 6, 7); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_set_m128d (__m128d __hi, __m128d __lo) { + return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo); +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_set_m128i (__m128i __hi, __m128i __lo) { + return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_setr_m128 (__m128 __lo, __m128 __hi) { + return _mm256_set_m128(__hi, __lo); +} + +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_setr_m128d (__m128d __lo, __m128d __hi) { + return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo); +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_setr_m128i (__m128i __lo, __m128i __hi) { + return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __AVXINTRIN_H */ diff --git a/c_headers/bmi2intrin.h b/c_headers/bmi2intrin.h new file mode 100644 index 0000000000..c63397c96e --- /dev/null +++ b/c_headers/bmi2intrin.h @@ -0,0 +1,99 @@ +/*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __BMI2__ +# error "BMI2 instruction set not enabled" +#endif /* __BMI2__ */ + +#ifndef __BMI2INTRIN_H +#define __BMI2INTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_bzhi_u32(unsigned int __X, unsigned int __Y) +{ + return __builtin_ia32_bzhi_si(__X, __Y); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_pdep_u32(unsigned int __X, unsigned int __Y) +{ + return __builtin_ia32_pdep_si(__X, __Y); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_pext_u32(unsigned int __X, unsigned int __Y) +{ + return __builtin_ia32_pext_si(__X, __Y); +} + +#ifdef __x86_64__ + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_bzhi_u64(unsigned long long __X, unsigned long long __Y) +{ + return __builtin_ia32_bzhi_di(__X, __Y); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_pdep_u64(unsigned long long __X, unsigned long long __Y) +{ + return __builtin_ia32_pdep_di(__X, __Y); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_pext_u64(unsigned long long __X, unsigned long long __Y) +{ + return __builtin_ia32_pext_di(__X, __Y); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_mulx_u64 (unsigned long long __X, unsigned long long __Y, + unsigned long long *__P) +{ + unsigned __int128 __res = (unsigned __int128) __X * __Y; + *__P = (unsigned long long) (__res >> 64); + return (unsigned long long) __res; +} + +#else /* !__x86_64__ */ + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P) +{ + unsigned long long __res = (unsigned long long) __X * __Y; + *__P = (unsigned int) (__res >> 32); + return (unsigned int) __res; +} + +#endif /* !__x86_64__ */ + +#undef __DEFAULT_FN_ATTRS + +#endif /* __BMI2INTRIN_H */ diff --git a/c_headers/bmiintrin.h b/c_headers/bmiintrin.h new file mode 100644 index 0000000000..0e93d575cb --- /dev/null +++ b/c_headers/bmiintrin.h @@ -0,0 +1,153 @@ +/*===---- bmiintrin.h - BMI intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __BMI__ +# error "BMI instruction set not enabled" +#endif /* __BMI__ */ + +#ifndef __BMIINTRIN_H +#define __BMIINTRIN_H + +#define _tzcnt_u16(a) (__tzcnt_u16((a))) +#define _andn_u32(a, b) (__andn_u32((a), (b))) +/* _bextr_u32 != __bextr_u32 */ +#define _blsi_u32(a) (__blsi_u32((a))) +#define _blsmsk_u32(a) (__blsmsk_u32((a))) +#define _blsr_u32(a) (__blsr_u32((a))) +#define _tzcnt_u32(a) (__tzcnt_u32((a))) + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ unsigned short __DEFAULT_FN_ATTRS +__tzcnt_u16(unsigned short __X) +{ + return __X ? __builtin_ctzs(__X) : 16; +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__andn_u32(unsigned int __X, unsigned int __Y) +{ + return ~__X & __Y; +} + +/* AMD-specified, double-leading-underscore version of BEXTR */ +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__bextr_u32(unsigned int __X, unsigned int __Y) +{ + return __builtin_ia32_bextr_u32(__X, __Y); +} + +/* Intel-specified, single-leading-underscore version of BEXTR */ +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) +{ + return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__blsi_u32(unsigned int __X) +{ + return __X & -__X; +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__blsmsk_u32(unsigned int __X) +{ + return __X ^ (__X - 1); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__blsr_u32(unsigned int __X) +{ + return __X & (__X - 1); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__tzcnt_u32(unsigned int __X) +{ + return __X ? __builtin_ctz(__X) : 32; +} + +#ifdef __x86_64__ + +#define _andn_u64(a, b) (__andn_u64((a), (b))) +/* _bextr_u64 != __bextr_u64 */ +#define _blsi_u64(a) (__blsi_u64((a))) +#define _blsmsk_u64(a) (__blsmsk_u64((a))) +#define _blsr_u64(a) (__blsr_u64((a))) +#define _tzcnt_u64(a) (__tzcnt_u64((a))) + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__andn_u64 (unsigned long long __X, unsigned long long __Y) +{ + return ~__X & __Y; +} + +/* AMD-specified, double-leading-underscore version of BEXTR */ +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__bextr_u64(unsigned long long __X, unsigned long long __Y) +{ + return __builtin_ia32_bextr_u64(__X, __Y); +} + +/* Intel-specified, single-leading-underscore version of BEXTR */ +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) +{ + return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__blsi_u64(unsigned long long __X) +{ + return __X & -__X; +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__blsmsk_u64(unsigned long long __X) +{ + return __X ^ (__X - 1); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__blsr_u64(unsigned long long __X) +{ + return __X & (__X - 1); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__tzcnt_u64(unsigned long long __X) +{ + return __X ? __builtin_ctzll(__X) : 64; +} + +#endif /* __x86_64__ */ + +#undef __DEFAULT_FN_ATTRS + +#endif /* __BMIINTRIN_H */ diff --git a/c_headers/cpuid.h b/c_headers/cpuid.h new file mode 100644 index 0000000000..5da02e0e51 --- /dev/null +++ b/c_headers/cpuid.h @@ -0,0 +1,209 @@ +/*===---- cpuid.h - X86 cpu model detection --------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !(__x86_64__ || __i386__) +#error this header is for x86 only +#endif + +/* Responses identification request with %eax 0 */ +/* AMD: "AuthenticAMD" */ +#define signature_AMD_ebx 0x68747541 +#define signature_AMD_edx 0x69746e65 +#define signature_AMD_ecx 0x444d4163 +/* CENTAUR: "CentaurHauls" */ +#define signature_CENTAUR_ebx 0x746e6543 +#define signature_CENTAUR_edx 0x48727561 +#define signature_CENTAUR_ecx 0x736c7561 +/* CYRIX: "CyrixInstead" */ +#define signature_CYRIX_ebx 0x69727943 +#define signature_CYRIX_edx 0x736e4978 +#define signature_CYRIX_ecx 0x64616574 +/* INTEL: "GenuineIntel" */ +#define signature_INTEL_ebx 0x756e6547 +#define signature_INTEL_edx 0x49656e69 +#define signature_INTEL_ecx 0x6c65746e +/* TM1: "TransmetaCPU" */ +#define signature_TM1_ebx 0x6e617254 +#define signature_TM1_edx 0x74656d73 +#define signature_TM1_ecx 0x55504361 +/* TM2: "GenuineTMx86" */ +#define signature_TM2_ebx 0x756e6547 +#define signature_TM2_edx 0x54656e69 +#define signature_TM2_ecx 0x3638784d +/* NSC: "Geode by NSC" */ +#define signature_NSC_ebx 0x646f6547 +#define signature_NSC_edx 0x43534e20 +#define signature_NSC_ecx 0x79622065 +/* NEXGEN: "NexGenDriven" */ +#define signature_NEXGEN_ebx 0x4778654e +#define signature_NEXGEN_edx 0x72446e65 +#define signature_NEXGEN_ecx 0x6e657669 +/* RISE: "RiseRiseRise" */ +#define signature_RISE_ebx 0x65736952 +#define signature_RISE_edx 0x65736952 +#define signature_RISE_ecx 0x65736952 +/* SIS: "SiS SiS SiS " */ +#define signature_SIS_ebx 0x20536953 +#define signature_SIS_edx 0x20536953 +#define signature_SIS_ecx 0x20536953 +/* UMC: "UMC UMC UMC " */ +#define signature_UMC_ebx 0x20434d55 +#define signature_UMC_edx 0x20434d55 +#define signature_UMC_ecx 0x20434d55 +/* VIA: "VIA VIA VIA " */ +#define signature_VIA_ebx 0x20414956 +#define signature_VIA_edx 0x20414956 +#define signature_VIA_ecx 0x20414956 +/* VORTEX: "Vortex86 SoC" */ +#define signature_VORTEX_ebx 0x74726f56 +#define signature_VORTEX_edx 0x36387865 +#define signature_VORTEX_ecx 0x436f5320 + +/* Features in %ecx for level 1 */ +#define bit_SSE3 0x00000001 +#define bit_PCLMULQDQ 0x00000002 +#define bit_DTES64 0x00000004 +#define bit_MONITOR 0x00000008 +#define bit_DSCPL 0x00000010 +#define bit_VMX 0x00000020 +#define bit_SMX 0x00000040 +#define bit_EIST 0x00000080 +#define bit_TM2 0x00000100 +#define bit_SSSE3 0x00000200 +#define bit_CNXTID 0x00000400 +#define bit_FMA 0x00001000 +#define bit_CMPXCHG16B 0x00002000 +#define bit_xTPR 0x00004000 +#define bit_PDCM 0x00008000 +#define bit_PCID 0x00020000 +#define bit_DCA 0x00040000 +#define bit_SSE41 0x00080000 +#define bit_SSE42 0x00100000 +#define bit_x2APIC 0x00200000 +#define bit_MOVBE 0x00400000 +#define bit_POPCNT 0x00800000 +#define bit_TSCDeadline 0x01000000 +#define bit_AESNI 0x02000000 +#define bit_XSAVE 0x04000000 +#define bit_OSXSAVE 0x08000000 +#define bit_AVX 0x10000000 +#define bit_RDRND 0x40000000 + +/* Features in %edx for level 1 */ +#define bit_FPU 0x00000001 +#define bit_VME 0x00000002 +#define bit_DE 0x00000004 +#define bit_PSE 0x00000008 +#define bit_TSC 0x00000010 +#define bit_MSR 0x00000020 +#define bit_PAE 0x00000040 +#define bit_MCE 0x00000080 +#define bit_CX8 0x00000100 +#define bit_APIC 0x00000200 +#define bit_SEP 0x00000800 +#define bit_MTRR 0x00001000 +#define bit_PGE 0x00002000 +#define bit_MCA 0x00004000 +#define bit_CMOV 0x00008000 +#define bit_PAT 0x00010000 +#define bit_PSE36 0x00020000 +#define bit_PSN 0x00040000 +#define bit_CLFSH 0x00080000 +#define bit_DS 0x00200000 +#define bit_ACPI 0x00400000 +#define bit_MMX 0x00800000 +#define bit_FXSR 0x01000000 +#define bit_FXSAVE bit_FXSR /* for gcc compat */ +#define bit_SSE 0x02000000 +#define bit_SSE2 0x04000000 +#define bit_SS 0x08000000 +#define bit_HTT 0x10000000 +#define bit_TM 0x20000000 +#define bit_PBE 0x80000000 + +/* Features in %ebx for level 7 sub-leaf 0 */ +#define bit_FSGSBASE 0x00000001 +#define bit_SMEP 0x00000080 +#define bit_ENH_MOVSB 0x00000200 + +#if __i386__ +#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ + __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level)) + +#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ + __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level), "2"(__count)) +#else +/* x86-64 uses %rbx as the base register, so preserve it. */ +#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ + __asm(" xchgq %%rbx,%q1\n" \ + " cpuid\n" \ + " xchgq %%rbx,%q1" \ + : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level)) + +#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ + __asm(" xchgq %%rbx,%q1\n" \ + " cpuid\n" \ + " xchgq %%rbx,%q1" \ + : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level), "2"(__count)) +#endif + +static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax, + unsigned int *__ebx, unsigned int *__ecx, + unsigned int *__edx) { + __cpuid(__level, *__eax, *__ebx, *__ecx, *__edx); + return 1; +} + +static __inline int __get_cpuid_max (unsigned int __level, unsigned int *__sig) +{ + unsigned int __eax, __ebx, __ecx, __edx; +#if __i386__ + int __cpuid_supported; + + __asm(" pushfl\n" + " popl %%eax\n" + " movl %%eax,%%ecx\n" + " xorl $0x00200000,%%eax\n" + " pushl %%eax\n" + " popfl\n" + " pushfl\n" + " popl %%eax\n" + " movl $0,%0\n" + " cmpl %%eax,%%ecx\n" + " je 1f\n" + " movl $1,%0\n" + "1:" + : "=r" (__cpuid_supported) : : "eax", "ecx"); + if (!__cpuid_supported) + return 0; +#endif + + __cpuid(__level, __eax, __ebx, __ecx, __edx); + if (__sig) + *__sig = __ebx; + return __eax; +} diff --git a/c_headers/cuda_builtin_vars.h b/c_headers/cuda_builtin_vars.h new file mode 100644 index 0000000000..901356b3d5 --- /dev/null +++ b/c_headers/cuda_builtin_vars.h @@ -0,0 +1,110 @@ +/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CUDA_BUILTIN_VARS_H +#define __CUDA_BUILTIN_VARS_H + +// The file implements built-in CUDA variables using __declspec(property). +// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx +// All read accesses of built-in variable fields get converted into calls to a +// getter function which in turn would call appropriate builtin to fetch the +// value. +// +// Example: +// int x = threadIdx.x; +// IR output: +// %0 = call i32 @llvm.ptx.read.tid.x() #3 +// PTX output: +// mov.u32 %r2, %tid.x; + +#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \ + __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \ + static inline __attribute__((always_inline)) \ + __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \ + return INTRINSIC; \ + } + +#if __cplusplus >= 201103L +#define __DELETE =delete +#else +#define __DELETE +#endif + +// Make sure nobody can create instances of the special varible types. nvcc +// also disallows taking address of special variables, so we disable address-of +// operator as well. +#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \ + __attribute__((device)) TypeName() __DELETE; \ + __attribute__((device)) TypeName(const TypeName &) __DELETE; \ + __attribute__((device)) void operator=(const TypeName &) const __DELETE; \ + __attribute__((device)) TypeName *operator&() const __DELETE + +struct __cuda_builtin_threadIdx_t { + __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x()); + __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y()); + __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z()); +private: + __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); +}; + +struct __cuda_builtin_blockIdx_t { + __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x()); + __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y()); + __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z()); +private: + __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); +}; + +struct __cuda_builtin_blockDim_t { + __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x()); + __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y()); + __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z()); +private: + __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); +}; + +struct __cuda_builtin_gridDim_t { + __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x()); + __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y()); + __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z()); +private: + __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); +}; + +#define __CUDA_BUILTIN_VAR \ + extern const __attribute__((device)) __attribute__((weak)) +__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; +__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx; +__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; +__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; + +// warpSize should translate to read of %WARP_SZ but there's currently no +// builtin to do so. According to PTX v4.2 docs 'to date, all target +// architectures have a WARP_SZ value of 32'. +__attribute__((device)) const int warpSize = 32; + +#undef __CUDA_DEVICE_BUILTIN +#undef __CUDA_BUILTIN_VAR +#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS + +#endif /* __CUDA_BUILTIN_VARS_H */ diff --git a/c_headers/emmintrin.h b/c_headers/emmintrin.h new file mode 100644 index 0000000000..656bc19d3d --- /dev/null +++ b/c_headers/emmintrin.h @@ -0,0 +1,1480 @@ +/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __EMMINTRIN_H +#define __EMMINTRIN_H + +#ifndef __SSE2__ +#error "SSE2 instruction set not enabled" +#else + +#include + +typedef double __m128d __attribute__((__vector_size__(16))); +typedef long long __m128i __attribute__((__vector_size__(16))); + +/* Type defines. */ +typedef double __v2df __attribute__ ((__vector_size__ (16))); +typedef long long __v2di __attribute__ ((__vector_size__ (16))); +typedef short __v8hi __attribute__((__vector_size__(16))); +typedef char __v16qi __attribute__((__vector_size__(16))); + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_add_sd(__m128d __a, __m128d __b) +{ + __a[0] += __b[0]; + return __a; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_add_pd(__m128d __a, __m128d __b) +{ + return __a + __b; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_sub_sd(__m128d __a, __m128d __b) +{ + __a[0] -= __b[0]; + return __a; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_sub_pd(__m128d __a, __m128d __b) +{ + return __a - __b; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mul_sd(__m128d __a, __m128d __b) +{ + __a[0] *= __b[0]; + return __a; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mul_pd(__m128d __a, __m128d __b) +{ + return __a * __b; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_div_sd(__m128d __a, __m128d __b) +{ + __a[0] /= __b[0]; + return __a; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_div_pd(__m128d __a, __m128d __b) +{ + return __a / __b; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_sqrt_sd(__m128d __a, __m128d __b) +{ + __m128d __c = __builtin_ia32_sqrtsd(__b); + return (__m128d) { __c[0], __a[1] }; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_sqrt_pd(__m128d __a) +{ + return __builtin_ia32_sqrtpd(__a); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_min_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_minsd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_min_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_minpd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_max_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_maxsd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_max_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_maxpd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_and_pd(__m128d __a, __m128d __b) +{ + return (__m128d)((__v4si)__a & (__v4si)__b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_andnot_pd(__m128d __a, __m128d __b) +{ + return (__m128d)(~(__v4si)__a & (__v4si)__b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_or_pd(__m128d __a, __m128d __b) +{ + return (__m128d)((__v4si)__a | (__v4si)__b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_xor_pd(__m128d __a, __m128d __b) +{ + return (__m128d)((__v4si)__a ^ (__v4si)__b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpeq_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpeqpd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmplt_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpltpd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmple_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmplepd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpgt_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpltpd(__b, __a); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpge_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmplepd(__b, __a); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpord_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpordpd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpunord_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpunordpd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpneq_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpneqpd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpnlt_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpnltpd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpnle_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpnlepd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpngt_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpnltpd(__b, __a); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpnge_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpnlepd(__b, __a); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpeq_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpeqsd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmplt_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpltsd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmple_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmplesd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpgt_sd(__m128d __a, __m128d __b) +{ + __m128d __c = __builtin_ia32_cmpltsd(__b, __a); + return (__m128d) { __c[0], __a[1] }; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpge_sd(__m128d __a, __m128d __b) +{ + __m128d __c = __builtin_ia32_cmplesd(__b, __a); + return (__m128d) { __c[0], __a[1] }; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpord_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpordsd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpunord_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpunordsd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpneq_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpneqsd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpnlt_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpnltsd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpnle_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpnlesd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpngt_sd(__m128d __a, __m128d __b) +{ + __m128d __c = __builtin_ia32_cmpnltsd(__b, __a); + return (__m128d) { __c[0], __a[1] }; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cmpnge_sd(__m128d __a, __m128d __b) +{ + __m128d __c = __builtin_ia32_cmpnlesd(__b, __a); + return (__m128d) { __c[0], __a[1] }; +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_comieq_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_comisdeq(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_comilt_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_comisdlt(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_comile_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_comisdle(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_comigt_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_comisdgt(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_comige_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_comisdge(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_comineq_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_comisdneq(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_ucomieq_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_ucomisdeq(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_ucomilt_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_ucomisdlt(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_ucomile_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_ucomisdle(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_ucomigt_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_ucomisdgt(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_ucomige_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_ucomisdge(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_ucomineq_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_ucomisdneq(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cvtpd_ps(__m128d __a) +{ + return __builtin_ia32_cvtpd2ps(__a); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cvtps_pd(__m128 __a) +{ + return __builtin_ia32_cvtps2pd(__a); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cvtepi32_pd(__m128i __a) +{ + return __builtin_ia32_cvtdq2pd((__v4si)__a); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtpd_epi32(__m128d __a) +{ + return __builtin_ia32_cvtpd2dq(__a); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_cvtsd_si32(__m128d __a) +{ + return __builtin_ia32_cvtsd2si(__a); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cvtsd_ss(__m128 __a, __m128d __b) +{ + __a[0] = __b[0]; + return __a; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cvtsi32_sd(__m128d __a, int __b) +{ + __a[0] = __b; + return __a; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cvtss_sd(__m128d __a, __m128 __b) +{ + __a[0] = __b[0]; + return __a; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvttpd_epi32(__m128d __a) +{ + return (__m128i)__builtin_ia32_cvttpd2dq(__a); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_cvttsd_si32(__m128d __a) +{ + return __a[0]; +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cvtpd_pi32(__m128d __a) +{ + return (__m64)__builtin_ia32_cvtpd2pi(__a); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cvttpd_pi32(__m128d __a) +{ + return (__m64)__builtin_ia32_cvttpd2pi(__a); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cvtpi32_pd(__m64 __a) +{ + return __builtin_ia32_cvtpi2pd((__v2si)__a); +} + +static __inline__ double __DEFAULT_FN_ATTRS +_mm_cvtsd_f64(__m128d __a) +{ + return __a[0]; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_load_pd(double const *__dp) +{ + return *(__m128d*)__dp; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_load1_pd(double const *__dp) +{ + struct __mm_load1_pd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + double __u = ((struct __mm_load1_pd_struct*)__dp)->__u; + return (__m128d){ __u, __u }; +} + +#define _mm_load_pd1(dp) _mm_load1_pd(dp) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_loadr_pd(double const *__dp) +{ + __m128d __u = *(__m128d*)__dp; + return __builtin_shufflevector(__u, __u, 1, 0); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_loadu_pd(double const *__dp) +{ + struct __loadu_pd { + __m128d __v; + } __attribute__((__packed__, __may_alias__)); + return ((struct __loadu_pd*)__dp)->__v; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_load_sd(double const *__dp) +{ + struct __mm_load_sd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + double __u = ((struct __mm_load_sd_struct*)__dp)->__u; + return (__m128d){ __u, 0 }; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_loadh_pd(__m128d __a, double const *__dp) +{ + struct __mm_loadh_pd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u; + return (__m128d){ __a[0], __u }; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_loadl_pd(__m128d __a, double const *__dp) +{ + struct __mm_loadl_pd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u; + return (__m128d){ __u, __a[1] }; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_set_sd(double __w) +{ + return (__m128d){ __w, 0 }; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_set1_pd(double __w) +{ + return (__m128d){ __w, __w }; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_set_pd(double __w, double __x) +{ + return (__m128d){ __x, __w }; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_setr_pd(double __w, double __x) +{ + return (__m128d){ __w, __x }; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_setzero_pd(void) +{ + return (__m128d){ 0, 0 }; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_move_sd(__m128d __a, __m128d __b) +{ + return (__m128d){ __b[0], __a[1] }; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_sd(double *__dp, __m128d __a) +{ + struct __mm_store_sd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_store_sd_struct*)__dp)->__u = __a[0]; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_store1_pd(double *__dp, __m128d __a) +{ + struct __mm_store1_pd_struct { + double __u[2]; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0]; + ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0]; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_pd(double *__dp, __m128d __a) +{ + *(__m128d *)__dp = __a; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_storeu_pd(double *__dp, __m128d __a) +{ + __builtin_ia32_storeupd(__dp, __a); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_storer_pd(double *__dp, __m128d __a) +{ + __a = __builtin_shufflevector(__a, __a, 1, 0); + *(__m128d *)__dp = __a; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_storeh_pd(double *__dp, __m128d __a) +{ + struct __mm_storeh_pd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1]; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_storel_pd(double *__dp, __m128d __a) +{ + struct __mm_storeh_pd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0]; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_add_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)((__v16qi)__a + (__v16qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_add_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)((__v8hi)__a + (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_add_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)((__v4si)__a + (__v4si)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_add_si64(__m64 __a, __m64 __b) +{ + return __a + __b; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_add_epi64(__m128i __a, __m128i __b) +{ + return __a + __b; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_adds_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_adds_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_adds_epu8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_adds_epu16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_avg_epu8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_avg_epu16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_madd_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_max_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_max_epu8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_min_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_min_epu8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mulhi_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mulhi_epu16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mullo_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)((__v8hi)__a * (__v8hi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_mul_su32(__m64 __a, __m64 __b) +{ + return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mul_epu32(__m128i __a, __m128i __b) +{ + return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sad_epu8(__m128i __a, __m128i __b) +{ + return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sub_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)((__v16qi)__a - (__v16qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sub_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)((__v8hi)__a - (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sub_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)((__v4si)__a - (__v4si)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sub_si64(__m64 __a, __m64 __b) +{ + return __a - __b; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sub_epi64(__m128i __a, __m128i __b) +{ + return __a - __b; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_subs_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_subs_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_subs_epu8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_subs_epu16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_and_si128(__m128i __a, __m128i __b) +{ + return __a & __b; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_andnot_si128(__m128i __a, __m128i __b) +{ + return ~__a & __b; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_or_si128(__m128i __a, __m128i __b) +{ + return __a | __b; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_xor_si128(__m128i __a, __m128i __b) +{ + return __a ^ __b; +} + +#define _mm_slli_si128(a, imm) __extension__ ({ \ + (__m128i)__builtin_shufflevector((__v16qi)_mm_setzero_si128(), \ + (__v16qi)(__m128i)(a), \ + ((imm)&0xF0) ? 0 : 16 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 17 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 18 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 19 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 20 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 21 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 22 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 23 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 24 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 25 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 26 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 27 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 28 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 29 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 30 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 31 - ((imm)&0xF)); }) + +#define _mm_bslli_si128(a, imm) \ + _mm_slli_si128((a), (imm)) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_slli_epi16(__m128i __a, int __count) +{ + return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sll_epi16(__m128i __a, __m128i __count) +{ + return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_slli_epi32(__m128i __a, int __count) +{ + return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sll_epi32(__m128i __a, __m128i __count) +{ + return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_slli_epi64(__m128i __a, int __count) +{ + return __builtin_ia32_psllqi128(__a, __count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sll_epi64(__m128i __a, __m128i __count) +{ + return __builtin_ia32_psllq128(__a, __count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srai_epi16(__m128i __a, int __count) +{ + return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sra_epi16(__m128i __a, __m128i __count) +{ + return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srai_epi32(__m128i __a, int __count) +{ + return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sra_epi32(__m128i __a, __m128i __count) +{ + return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); +} + +#define _mm_srli_si128(a, imm) __extension__ ({ \ + (__m128i)__builtin_shufflevector((__v16qi)(__m128i)(a), \ + (__v16qi)_mm_setzero_si128(), \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 0, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 1, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 2, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 3, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 4, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 5, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 6, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 7, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 8, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 9, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 10, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 11, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 12, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 13, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 14, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 15); }) + +#define _mm_bsrli_si128(a, imm) \ + _mm_srli_si128((a), (imm)) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srli_epi16(__m128i __a, int __count) +{ + return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srl_epi16(__m128i __a, __m128i __count) +{ + return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srli_epi32(__m128i __a, int __count) +{ + return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srl_epi32(__m128i __a, __m128i __count) +{ + return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srli_epi64(__m128i __a, int __count) +{ + return __builtin_ia32_psrlqi128(__a, __count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srl_epi64(__m128i __a, __m128i __count) +{ + return __builtin_ia32_psrlq128(__a, __count); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmpeq_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)((__v16qi)__a == (__v16qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmpeq_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)((__v8hi)__a == (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmpeq_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)((__v4si)__a == (__v4si)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmpgt_epi8(__m128i __a, __m128i __b) +{ + /* This function always performs a signed comparison, but __v16qi is a char + which may be signed or unsigned. */ + typedef signed char __v16qs __attribute__((__vector_size__(16))); + return (__m128i)((__v16qs)__a > (__v16qs)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmpgt_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)((__v8hi)__a > (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmpgt_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)((__v4si)__a > (__v4si)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmplt_epi8(__m128i __a, __m128i __b) +{ + return _mm_cmpgt_epi8(__b, __a); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmplt_epi16(__m128i __a, __m128i __b) +{ + return _mm_cmpgt_epi16(__b, __a); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmplt_epi32(__m128i __a, __m128i __b) +{ + return _mm_cmpgt_epi32(__b, __a); +} + +#ifdef __x86_64__ +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_cvtsi64_sd(__m128d __a, long long __b) +{ + __a[0] = __b; + return __a; +} + +static __inline__ long long __DEFAULT_FN_ATTRS +_mm_cvtsd_si64(__m128d __a) +{ + return __builtin_ia32_cvtsd2si64(__a); +} + +static __inline__ long long __DEFAULT_FN_ATTRS +_mm_cvttsd_si64(__m128d __a) +{ + return __a[0]; +} +#endif + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cvtepi32_ps(__m128i __a) +{ + return __builtin_ia32_cvtdq2ps((__v4si)__a); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtps_epi32(__m128 __a) +{ + return (__m128i)__builtin_ia32_cvtps2dq(__a); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvttps_epi32(__m128 __a) +{ + return (__m128i)__builtin_ia32_cvttps2dq(__a); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtsi32_si128(int __a) +{ + return (__m128i)(__v4si){ __a, 0, 0, 0 }; +} + +#ifdef __x86_64__ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtsi64_si128(long long __a) +{ + return (__m128i){ __a, 0 }; +} +#endif + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_cvtsi128_si32(__m128i __a) +{ + __v4si __b = (__v4si)__a; + return __b[0]; +} + +#ifdef __x86_64__ +static __inline__ long long __DEFAULT_FN_ATTRS +_mm_cvtsi128_si64(__m128i __a) +{ + return __a[0]; +} +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_load_si128(__m128i const *__p) +{ + return *__p; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_loadu_si128(__m128i const *__p) +{ + struct __loadu_si128 { + __m128i __v; + } __attribute__((__packed__, __may_alias__)); + return ((struct __loadu_si128*)__p)->__v; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_loadl_epi64(__m128i const *__p) +{ + struct __mm_loadl_epi64_struct { + long long __u; + } __attribute__((__packed__, __may_alias__)); + return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0}; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_set_epi64x(long long q1, long long q0) +{ + return (__m128i){ q0, q1 }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_set_epi64(__m64 q1, __m64 q0) +{ + return (__m128i){ (long long)q0, (long long)q1 }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_set_epi32(int i3, int i2, int i1, int i0) +{ + return (__m128i)(__v4si){ i0, i1, i2, i3}; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0) +{ + return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0) +{ + return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_set1_epi64x(long long __q) +{ + return (__m128i){ __q, __q }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_set1_epi64(__m64 __q) +{ + return (__m128i){ (long long)__q, (long long)__q }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_set1_epi32(int __i) +{ + return (__m128i)(__v4si){ __i, __i, __i, __i }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_set1_epi16(short __w) +{ + return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_set1_epi8(char __b) +{ + return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_setr_epi64(__m64 q0, __m64 q1) +{ + return (__m128i){ (long long)q0, (long long)q1 }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_setr_epi32(int i0, int i1, int i2, int i3) +{ + return (__m128i)(__v4si){ i0, i1, i2, i3}; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7) +{ + return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15) +{ + return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_setzero_si128(void) +{ + return (__m128i){ 0LL, 0LL }; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_si128(__m128i *__p, __m128i __b) +{ + *__p = __b; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_storeu_si128(__m128i *__p, __m128i __b) +{ + __builtin_ia32_storedqu((char *)__p, (__v16qi)__b); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p) +{ + __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_storel_epi64(__m128i *__p, __m128i __a) +{ + struct __mm_storel_epi64_struct { + long long __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0]; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_stream_pd(double *__p, __m128d __a) +{ + __builtin_ia32_movntpd(__p, __a); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_stream_si128(__m128i *__p, __m128i __a) +{ + __builtin_ia32_movntdq(__p, __a); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_stream_si32(int *__p, int __a) +{ + __builtin_ia32_movnti(__p, __a); +} + +#ifdef __x86_64__ +static __inline__ void __DEFAULT_FN_ATTRS +_mm_stream_si64(long long *__p, long long __a) +{ + __builtin_ia32_movnti64(__p, __a); +} +#endif + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_clflush(void const *__p) +{ + __builtin_ia32_clflush(__p); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_lfence(void) +{ + __builtin_ia32_lfence(); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mfence(void) +{ + __builtin_ia32_mfence(); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_packs_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_packs_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_packus_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_extract_epi16(__m128i __a, int __imm) +{ + __v8hi __b = (__v8hi)__a; + return (unsigned short)__b[__imm & 7]; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_insert_epi16(__m128i __a, int __b, int __imm) +{ + __v8hi __c = (__v8hi)__a; + __c[__imm & 7] = __b; + return (__m128i)__c; +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_movemask_epi8(__m128i __a) +{ + return __builtin_ia32_pmovmskb128((__v16qi)__a); +} + +#define _mm_shuffle_epi32(a, imm) __extension__ ({ \ + (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \ + (__v4si)_mm_set1_epi32(0), \ + (imm) & 0x3, ((imm) & 0xc) >> 2, \ + ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); }) + +#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \ + (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ + (__v8hi)_mm_set1_epi16(0), \ + (imm) & 0x3, ((imm) & 0xc) >> 2, \ + ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ + 4, 5, 6, 7); }) + +#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \ + (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ + (__v8hi)_mm_set1_epi16(0), \ + 0, 1, 2, 3, \ + 4 + (((imm) & 0x03) >> 0), \ + 4 + (((imm) & 0x0c) >> 2), \ + 4 + (((imm) & 0x30) >> 4), \ + 4 + (((imm) & 0xc0) >> 6)); }) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_unpackhi_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_unpackhi_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_unpackhi_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_unpackhi_epi64(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_unpacklo_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_unpacklo_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_unpacklo_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_unpacklo_epi64(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_movepi64_pi64(__m128i __a) +{ + return (__m64)__a[0]; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_movpi64_epi64(__m64 __a) +{ + return (__m128i){ (long long)__a, 0 }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_move_epi64(__m128i __a) +{ + return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_unpackhi_pd(__m128d __a, __m128d __b) +{ + return __builtin_shufflevector(__a, __b, 1, 2+1); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_unpacklo_pd(__m128d __a, __m128d __b) +{ + return __builtin_shufflevector(__a, __b, 0, 2+0); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_movemask_pd(__m128d __a) +{ + return __builtin_ia32_movmskpd(__a); +} + +#define _mm_shuffle_pd(a, b, i) __extension__ ({ \ + __builtin_shufflevector((__m128d)(a), (__m128d)(b), \ + (i) & 1, (((i) & 2) >> 1) + 2); }) + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_castpd_ps(__m128d __a) +{ + return (__m128)__a; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_castpd_si128(__m128d __a) +{ + return (__m128i)__a; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_castps_pd(__m128 __a) +{ + return (__m128d)__a; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_castps_si128(__m128 __a) +{ + return (__m128i)__a; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_castsi128_ps(__m128i __a) +{ + return (__m128)__a; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_castsi128_pd(__m128i __a) +{ + return (__m128d)__a; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_pause(void) +{ + __asm__ volatile ("pause"); +} + +#undef __DEFAULT_FN_ATTRS + +#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) + +#endif /* __SSE2__ */ + +#endif /* __EMMINTRIN_H */ diff --git a/c_headers/f16cintrin.h b/c_headers/f16cintrin.h new file mode 100644 index 0000000000..3730ae0d3e --- /dev/null +++ b/c_headers/f16cintrin.h @@ -0,0 +1,63 @@ +/*===---- f16cintrin.h - F16C intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __F16C__ +# error "F16C instruction is not enabled" +#endif /* __F16C__ */ + +#ifndef __F16CINTRIN_H +#define __F16CINTRIN_H + +typedef float __v8sf __attribute__ ((__vector_size__ (32))); +typedef float __m256 __attribute__ ((__vector_size__ (32))); + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +#define _mm_cvtps_ph(a, imm) __extension__ ({ \ + __m128 __a = (a); \ + (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__a, (imm)); }) + +#define _mm256_cvtps_ph(a, imm) __extension__ ({ \ + __m256 __a = (a); \ + (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); }) + +static __inline __m128 __DEFAULT_FN_ATTRS +_mm_cvtph_ps(__m128i __a) +{ + return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a); +} + +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_cvtph_ps(__m128i __a) +{ + return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __F16CINTRIN_H */ diff --git a/c_headers/float.h b/c_headers/float.h new file mode 100644 index 0000000000..238cf76b05 --- /dev/null +++ b/c_headers/float.h @@ -0,0 +1,124 @@ +/*===---- float.h - Characteristics of floating point types ----------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __FLOAT_H +#define __FLOAT_H + +/* If we're on MinGW, fall back to the system's float.h, which might have + * additional definitions provided for Windows. + * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx + */ +#if (defined(__MINGW32__) || defined(_MSC_VER)) && __STDC_HOSTED__ && \ + __has_include_next() +# include_next + +/* Undefine anything that we'll be redefining below. */ +# undef FLT_EVAL_METHOD +# undef FLT_ROUNDS +# undef FLT_RADIX +# undef FLT_MANT_DIG +# undef DBL_MANT_DIG +# undef LDBL_MANT_DIG +# undef DECIMAL_DIG +# undef FLT_DIG +# undef DBL_DIG +# undef LDBL_DIG +# undef FLT_MIN_EXP +# undef DBL_MIN_EXP +# undef LDBL_MIN_EXP +# undef FLT_MIN_10_EXP +# undef DBL_MIN_10_EXP +# undef LDBL_MIN_10_EXP +# undef FLT_MAX_EXP +# undef DBL_MAX_EXP +# undef LDBL_MAX_EXP +# undef FLT_MAX_10_EXP +# undef DBL_MAX_10_EXP +# undef LDBL_MAX_10_EXP +# undef FLT_MAX +# undef DBL_MAX +# undef LDBL_MAX +# undef FLT_EPSILON +# undef DBL_EPSILON +# undef LDBL_EPSILON +# undef FLT_MIN +# undef DBL_MIN +# undef LDBL_MIN +# if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) +# undef FLT_TRUE_MIN +# undef DBL_TRUE_MIN +# undef LDBL_TRUE_MIN +# endif +#endif + +/* Characteristics of floating point types, C99 5.2.4.2.2 */ + +#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ +#define FLT_ROUNDS (__builtin_flt_rounds()) +#define FLT_RADIX __FLT_RADIX__ + +#define FLT_MANT_DIG __FLT_MANT_DIG__ +#define DBL_MANT_DIG __DBL_MANT_DIG__ +#define LDBL_MANT_DIG __LDBL_MANT_DIG__ + +#define DECIMAL_DIG __DECIMAL_DIG__ + +#define FLT_DIG __FLT_DIG__ +#define DBL_DIG __DBL_DIG__ +#define LDBL_DIG __LDBL_DIG__ + +#define FLT_MIN_EXP __FLT_MIN_EXP__ +#define DBL_MIN_EXP __DBL_MIN_EXP__ +#define LDBL_MIN_EXP __LDBL_MIN_EXP__ + +#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__ +#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__ +#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__ + +#define FLT_MAX_EXP __FLT_MAX_EXP__ +#define DBL_MAX_EXP __DBL_MAX_EXP__ +#define LDBL_MAX_EXP __LDBL_MAX_EXP__ + +#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__ +#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__ +#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__ + +#define FLT_MAX __FLT_MAX__ +#define DBL_MAX __DBL_MAX__ +#define LDBL_MAX __LDBL_MAX__ + +#define FLT_EPSILON __FLT_EPSILON__ +#define DBL_EPSILON __DBL_EPSILON__ +#define LDBL_EPSILON __LDBL_EPSILON__ + +#define FLT_MIN __FLT_MIN__ +#define DBL_MIN __DBL_MIN__ +#define LDBL_MIN __LDBL_MIN__ + +#if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) +# define FLT_TRUE_MIN __FLT_DENORM_MIN__ +# define DBL_TRUE_MIN __DBL_DENORM_MIN__ +# define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ +#endif + +#endif /* __FLOAT_H */ diff --git a/c_headers/fma4intrin.h b/c_headers/fma4intrin.h new file mode 100644 index 0000000000..d6405cf029 --- /dev/null +++ b/c_headers/fma4intrin.h @@ -0,0 +1,236 @@ +/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __FMA4INTRIN_H +#define __FMA4INTRIN_H + +#ifndef __FMA4__ +# error "FMA4 instruction set is not enabled" +#else + +#include + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __FMA4__ */ + +#endif /* __FMA4INTRIN_H */ diff --git a/c_headers/fmaintrin.h b/c_headers/fmaintrin.h new file mode 100644 index 0000000000..ad693fed0b --- /dev/null +++ b/c_headers/fmaintrin.h @@ -0,0 +1,234 @@ +/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __FMAINTRIN_H +#define __FMAINTRIN_H + +#ifndef __FMA__ +# error "FMA instruction set is not enabled" +#else + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __FMA__ */ + +#endif /* __FMAINTRIN_H */ diff --git a/c_headers/fxsrintrin.h b/c_headers/fxsrintrin.h new file mode 100644 index 0000000000..2b3549c057 --- /dev/null +++ b/c_headers/fxsrintrin.h @@ -0,0 +1,55 @@ +/*===---- fxsrintrin.h - FXSR intrinsic ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __FXSRINTRIN_H +#define __FXSRINTRIN_H + +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ void __DEFAULT_FN_ATTRS +_fxsave(void *__p) { + return __builtin_ia32_fxsave(__p); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_fxsave64(void *__p) { + return __builtin_ia32_fxsave64(__p); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_fxrstor(void *__p) { + return __builtin_ia32_fxrstor(__p); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_fxrstor64(void *__p) { + return __builtin_ia32_fxrstor64(__p); +} + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/c_headers/htmintrin.h b/c_headers/htmintrin.h new file mode 100644 index 0000000000..0088c7ccab --- /dev/null +++ b/c_headers/htmintrin.h @@ -0,0 +1,226 @@ +/*===---- htmintrin.h - Standard header for PowerPC HTM ---------------===*\ + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * +\*===----------------------------------------------------------------------===*/ + +#ifndef __HTMINTRIN_H +#define __HTMINTRIN_H + +#ifndef __HTM__ +#error "HTM instruction set not enabled" +#endif + +#ifdef __powerpc__ + +#include + +typedef uint64_t texasr_t; +typedef uint32_t texasru_t; +typedef uint32_t texasrl_t; +typedef uintptr_t tfiar_t; +typedef uintptr_t tfhar_t; + +#define _HTM_STATE(CR0) ((CR0 >> 1) & 0x3) +#define _HTM_NONTRANSACTIONAL 0x0 +#define _HTM_SUSPENDED 0x1 +#define _HTM_TRANSACTIONAL 0x2 + +#define _TEXASR_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \ + (((TEXASR) >> (63-(BITNUM))) & ((1<<(SIZE))-1)) +#define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \ + (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1)) + +#define _TEXASR_FAILURE_CODE(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 7, 8) +#define _TEXASRU_FAILURE_CODE(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 8) + +#define _TEXASR_FAILURE_PERSISTENT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 7, 1) +#define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1) + +#define _TEXASR_DISALLOWED(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 8, 1) +#define _TEXASRU_DISALLOWED(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 8, 1) + +#define _TEXASR_NESTING_OVERFLOW(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 9, 1) +#define _TEXASRU_NESTING_OVERFLOW(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 9, 1) + +#define _TEXASR_FOOTPRINT_OVERFLOW(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 10, 1) +#define _TEXASRU_FOOTPRINT_OVERFLOW(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 10, 1) + +#define _TEXASR_SELF_INDUCED_CONFLICT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 11, 1) +#define _TEXASRU_SELF_INDUCED_CONFLICT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 11, 1) + +#define _TEXASR_NON_TRANSACTIONAL_CONFLICT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 12, 1) +#define _TEXASRU_NON_TRANSACTIONAL_CONFLICT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 12, 1) + +#define _TEXASR_TRANSACTION_CONFLICT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 13, 1) +#define _TEXASRU_TRANSACTION_CONFLICT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 13, 1) + +#define _TEXASR_TRANSLATION_INVALIDATION_CONFLICT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 14, 1) +#define _TEXASRU_TRANSLATION_INVALIDATION_CONFLICT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 14, 1) + +#define _TEXASR_IMPLEMENTAION_SPECIFIC(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 15, 1) +#define _TEXASRU_IMPLEMENTAION_SPECIFIC(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 15, 1) + +#define _TEXASR_INSTRUCTION_FETCH_CONFLICT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 16, 1) +#define _TEXASRU_INSTRUCTION_FETCH_CONFLICT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 16, 1) + +#define _TEXASR_ABORT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 31, 1) +#define _TEXASRU_ABORT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 31, 1) + + +#define _TEXASR_SUSPENDED(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 32, 1) + +#define _TEXASR_PRIVILEGE(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 35, 2) + +#define _TEXASR_FAILURE_SUMMARY(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 36, 1) + +#define _TEXASR_TFIAR_EXACT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 37, 1) + +#define _TEXASR_ROT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 38, 1) + +#define _TEXASR_TRANSACTION_LEVEL(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 63, 12) + +#endif /* __powerpc */ + +#ifdef __s390__ + +/* Condition codes generated by tbegin */ +#define _HTM_TBEGIN_STARTED 0 +#define _HTM_TBEGIN_INDETERMINATE 1 +#define _HTM_TBEGIN_TRANSIENT 2 +#define _HTM_TBEGIN_PERSISTENT 3 + +/* The abort codes below this threshold are reserved for machine use. */ +#define _HTM_FIRST_USER_ABORT_CODE 256 + +/* The transaction diagnostic block is it is defined in the Principles + of Operation chapter 5-91. */ + +struct __htm_tdb { + unsigned char format; /* 0 */ + unsigned char flags; + unsigned char reserved1[4]; + unsigned short nesting_depth; + unsigned long long abort_code; /* 8 */ + unsigned long long conflict_token; /* 16 */ + unsigned long long atia; /* 24 */ + unsigned char eaid; /* 32 */ + unsigned char dxc; + unsigned char reserved2[2]; + unsigned int program_int_id; + unsigned long long exception_id; /* 40 */ + unsigned long long bea; /* 48 */ + unsigned char reserved3[72]; /* 56 */ + unsigned long long gprs[16]; /* 128 */ +} __attribute__((__packed__, __aligned__ (8))); + + +/* Helper intrinsics to retry tbegin in case of transient failure. */ + +static __inline int __attribute__((__always_inline__, __nodebug__)) +__builtin_tbegin_retry_null (int retry) +{ + int cc, i = 0; + + while ((cc = __builtin_tbegin(0)) == _HTM_TBEGIN_TRANSIENT + && i++ < retry) + __builtin_tx_assist(i); + + return cc; +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +__builtin_tbegin_retry_tdb (void *tdb, int retry) +{ + int cc, i = 0; + + while ((cc = __builtin_tbegin(tdb)) == _HTM_TBEGIN_TRANSIENT + && i++ < retry) + __builtin_tx_assist(i); + + return cc; +} + +#define __builtin_tbegin_retry(tdb, retry) \ + (__builtin_constant_p(tdb == 0) && tdb == 0 ? \ + __builtin_tbegin_retry_null(retry) : \ + __builtin_tbegin_retry_tdb(tdb, retry)) + +static __inline int __attribute__((__always_inline__, __nodebug__)) +__builtin_tbegin_retry_nofloat_null (int retry) +{ + int cc, i = 0; + + while ((cc = __builtin_tbegin_nofloat(0)) == _HTM_TBEGIN_TRANSIENT + && i++ < retry) + __builtin_tx_assist(i); + + return cc; +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +__builtin_tbegin_retry_nofloat_tdb (void *tdb, int retry) +{ + int cc, i = 0; + + while ((cc = __builtin_tbegin_nofloat(tdb)) == _HTM_TBEGIN_TRANSIENT + && i++ < retry) + __builtin_tx_assist(i); + + return cc; +} + +#define __builtin_tbegin_retry_nofloat(tdb, retry) \ + (__builtin_constant_p(tdb == 0) && tdb == 0 ? \ + __builtin_tbegin_retry_nofloat_null(retry) : \ + __builtin_tbegin_retry_nofloat_tdb(tdb, retry)) + +#endif /* __s390__ */ + +#endif /* __HTMINTRIN_H */ diff --git a/c_headers/htmxlintrin.h b/c_headers/htmxlintrin.h new file mode 100644 index 0000000000..30f524d5df --- /dev/null +++ b/c_headers/htmxlintrin.h @@ -0,0 +1,363 @@ +/*===---- htmxlintrin.h - XL compiler HTM execution intrinsics-------------===*\ + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * +\*===----------------------------------------------------------------------===*/ + +#ifndef __HTMXLINTRIN_H +#define __HTMXLINTRIN_H + +#ifndef __HTM__ +#error "HTM instruction set not enabled" +#endif + +#include + +#ifdef __powerpc__ + +#ifdef __cplusplus +extern "C" { +#endif + +#define _TEXASR_PTR(TM_BUF) \ + ((texasr_t *)((TM_BUF)+0)) +#define _TEXASRU_PTR(TM_BUF) \ + ((texasru_t *)((TM_BUF)+0)) +#define _TEXASRL_PTR(TM_BUF) \ + ((texasrl_t *)((TM_BUF)+4)) +#define _TFIAR_PTR(TM_BUF) \ + ((tfiar_t *)((TM_BUF)+8)) + +typedef char TM_buff_type[16]; + +/* This macro can be used to determine whether a transaction was successfully + started from the __TM_begin() and __TM_simple_begin() intrinsic functions + below. */ +#define _HTM_TBEGIN_STARTED 1 + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_simple_begin (void) +{ + if (__builtin_expect (__builtin_tbegin (0), 1)) + return _HTM_TBEGIN_STARTED; + return 0; +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_begin (void* const TM_buff) +{ + *_TEXASRL_PTR (TM_buff) = 0; + if (__builtin_expect (__builtin_tbegin (0), 1)) + return _HTM_TBEGIN_STARTED; +#ifdef __powerpc64__ + *_TEXASR_PTR (TM_buff) = __builtin_get_texasr (); +#else + *_TEXASRU_PTR (TM_buff) = __builtin_get_texasru (); + *_TEXASRL_PTR (TM_buff) = __builtin_get_texasr (); +#endif + *_TFIAR_PTR (TM_buff) = __builtin_get_tfiar (); + return 0; +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_end (void) +{ + if (__builtin_expect (__builtin_tend (0), 1)) + return 1; + return 0; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_abort (void) +{ + __builtin_tabort (0); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_named_abort (unsigned char const code) +{ + __builtin_tabort (code); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_resume (void) +{ + __builtin_tresume (); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_suspend (void) +{ + __builtin_tsuspend (); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_user_abort (void* const TM_buff) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + return _TEXASRU_ABORT (texasru); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_named_user_abort (void* const TM_buff, unsigned char *code) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + + *code = _TEXASRU_FAILURE_CODE (texasru); + return _TEXASRU_ABORT (texasru); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_illegal (void* const TM_buff) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + return _TEXASRU_DISALLOWED (texasru); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_footprint_exceeded (void* const TM_buff) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + return _TEXASRU_FOOTPRINT_OVERFLOW (texasru); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_nesting_depth (void* const TM_buff) +{ + texasrl_t texasrl; + + if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL) + { + texasrl = *_TEXASRL_PTR (TM_buff); + if (!_TEXASR_FAILURE_SUMMARY (texasrl)) + texasrl = 0; + } + else + texasrl = (texasrl_t) __builtin_get_texasr (); + + return _TEXASR_TRANSACTION_LEVEL (texasrl); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_nested_too_deep(void* const TM_buff) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + return _TEXASRU_NESTING_OVERFLOW (texasru); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_conflict(void* const TM_buff) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + /* Return TEXASR bits 11 (Self-Induced Conflict) through + 14 (Translation Invalidation Conflict). */ + return (_TEXASRU_EXTRACT_BITS (texasru, 14, 4)) ? 1 : 0; +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_failure_persistent(void* const TM_buff) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + return _TEXASRU_FAILURE_PERSISTENT (texasru); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_failure_address(void* const TM_buff) +{ + return *_TFIAR_PTR (TM_buff); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_failure_code(void* const TM_buff) +{ + return *_TEXASR_PTR (TM_buff); +} + +#ifdef __cplusplus +} +#endif + +#endif /* __powerpc__ */ + +#ifdef __s390__ + +#include + +/* These intrinsics are being made available for compatibility with + the IBM XL compiler. For documentation please see the "z/OS XL + C/C++ Programming Guide" publically available on the web. */ + +static __inline long __attribute__((__always_inline__, __nodebug__)) +__TM_simple_begin () +{ + return __builtin_tbegin_nofloat (0); +} + +static __inline long __attribute__((__always_inline__, __nodebug__)) +__TM_begin (void* const tdb) +{ + return __builtin_tbegin_nofloat (tdb); +} + +static __inline long __attribute__((__always_inline__, __nodebug__)) +__TM_end () +{ + return __builtin_tend (); +} + +static __inline void __attribute__((__always_inline__)) +__TM_abort () +{ + return __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +__TM_named_abort (unsigned char const code) +{ + return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + code); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +__TM_non_transactional_store (void* const addr, long long const value) +{ + __builtin_non_tx_store ((uint64_t*)addr, (uint64_t)value); +} + +static __inline long __attribute__((__always_inline__, __nodebug__)) +__TM_nesting_depth (void* const tdb_ptr) +{ + int depth = __builtin_tx_nesting_depth (); + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + if (depth != 0) + return depth; + + if (tdb->format != 1) + return 0; + return tdb->nesting_depth; +} + +/* Transaction failure diagnostics */ + +static __inline long __attribute__((__always_inline__, __nodebug__)) +__TM_is_user_abort (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + if (tdb->format != 1) + return 0; + + return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE); +} + +static __inline long __attribute__((__always_inline__, __nodebug__)) +__TM_is_named_user_abort (void* const tdb_ptr, unsigned char* code) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + if (tdb->format != 1) + return 0; + + if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE) + { + *code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE; + return 1; + } + return 0; +} + +static __inline long __attribute__((__always_inline__, __nodebug__)) +__TM_is_illegal (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return (tdb->format == 1 + && (tdb->abort_code == 4 /* unfiltered program interruption */ + || tdb->abort_code == 11 /* restricted instruction */)); +} + +static __inline long __attribute__((__always_inline__, __nodebug__)) +__TM_is_footprint_exceeded (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return (tdb->format == 1 + && (tdb->abort_code == 7 /* fetch overflow */ + || tdb->abort_code == 8 /* store overflow */)); +} + +static __inline long __attribute__((__always_inline__, __nodebug__)) +__TM_is_nested_too_deep (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return tdb->format == 1 && tdb->abort_code == 13; /* depth exceeded */ +} + +static __inline long __attribute__((__always_inline__, __nodebug__)) +__TM_is_conflict (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return (tdb->format == 1 + && (tdb->abort_code == 9 /* fetch conflict */ + || tdb->abort_code == 10 /* store conflict */)); +} + +static __inline long __attribute__((__always_inline__, __nodebug__)) +__TM_is_failure_persistent (long const result) +{ + return result == _HTM_TBEGIN_PERSISTENT; +} + +static __inline long __attribute__((__always_inline__, __nodebug__)) +__TM_failure_address (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + return tdb->atia; +} + +static __inline long __attribute__((__always_inline__, __nodebug__)) +__TM_failure_code (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return tdb->abort_code; +} + +#endif /* __s390__ */ + +#endif /* __HTMXLINTRIN_H */ diff --git a/c_headers/ia32intrin.h b/c_headers/ia32intrin.h new file mode 100644 index 0000000000..5adf3f1f5d --- /dev/null +++ b/c_headers/ia32intrin.h @@ -0,0 +1,101 @@ +/* ===-------- ia32intrin.h ---------------------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __IA32INTRIN_H +#define __IA32INTRIN_H + +#ifdef __x86_64__ +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +__readeflags(void) +{ + unsigned long long __res = 0; + __asm__ __volatile__ ("pushf\n\t" + "popq %0\n" + :"=r"(__res) + : + : + ); + return __res; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +__writeeflags(unsigned long long __f) +{ + __asm__ __volatile__ ("pushq %0\n\t" + "popf\n" + : + :"r"(__f) + :"flags" + ); +} + +#else /* !__x86_64__ */ +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__readeflags(void) +{ + unsigned int __res = 0; + __asm__ __volatile__ ("pushf\n\t" + "popl %0\n" + :"=r"(__res) + : + : + ); + return __res; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +__writeeflags(unsigned int __f) +{ + __asm__ __volatile__ ("pushl %0\n\t" + "popf\n" + : + :"r"(__f) + :"flags" + ); +} +#endif /* !__x86_64__ */ + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +__rdpmc(int __A) { + return __builtin_ia32_rdpmc(__A); +} + +/* __rdtsc */ +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +__rdtsc(void) { + return __builtin_ia32_rdtsc(); +} + +/* __rdtscp */ +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +__rdtscp(unsigned int *__A) { + return __builtin_ia32_rdtscp(__A); +} + +#define _rdtsc() __rdtsc() + +#endif /* __IA32INTRIN_H */ diff --git a/c_headers/immintrin.h b/c_headers/immintrin.h new file mode 100644 index 0000000000..21ad3281f8 --- /dev/null +++ b/c_headers/immintrin.h @@ -0,0 +1,203 @@ +/*===---- immintrin.h - Intel intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#define __IMMINTRIN_H + +#ifdef __MMX__ +#include +#endif + +#ifdef __SSE__ +#include +#endif + +#ifdef __SSE2__ +#include +#endif + +#ifdef __SSE3__ +#include +#endif + +#ifdef __SSSE3__ +#include +#endif + +#if defined (__SSE4_2__) || defined (__SSE4_1__) +#include +#endif + +#if defined (__AES__) || defined (__PCLMUL__) +#include +#endif + +#ifdef __AVX__ +#include +#endif + +#ifdef __AVX2__ +#include +#endif + +#ifdef __BMI__ +#include +#endif + +#ifdef __BMI2__ +#include +#endif + +#ifdef __LZCNT__ +#include +#endif + +#ifdef __FMA__ +#include +#endif + +#ifdef __AVX512F__ +#include +#endif + +#ifdef __AVX512VL__ +#include +#endif + +#ifdef __AVX512BW__ +#include +#endif + +#ifdef __AVX512CD__ +#include +#endif + +#ifdef __AVX512DQ__ +#include +#endif + +#if defined (__AVX512VL__) && defined (__AVX512BW__) +#include +#endif + +#if defined (__AVX512VL__) && defined (__AVX512DQ__) +#include +#endif + +#ifdef __AVX512ER__ +#include +#endif + +#ifdef __RDRND__ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdrand16_step(unsigned short *__p) +{ + return __builtin_ia32_rdrand16_step(__p); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdrand32_step(unsigned int *__p) +{ + return __builtin_ia32_rdrand32_step(__p); +} + +#ifdef __x86_64__ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdrand64_step(unsigned long long *__p) +{ + return __builtin_ia32_rdrand64_step(__p); +} +#endif +#endif /* __RDRND__ */ + +#ifdef __FSGSBASE__ +#ifdef __x86_64__ +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_readfsbase_u32(void) +{ + return __builtin_ia32_rdfsbase32(); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +_readfsbase_u64(void) +{ + return __builtin_ia32_rdfsbase64(); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_readgsbase_u32(void) +{ + return __builtin_ia32_rdgsbase32(); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +_readgsbase_u64(void) +{ + return __builtin_ia32_rdgsbase64(); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_writefsbase_u32(unsigned int __V) +{ + return __builtin_ia32_wrfsbase32(__V); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_writefsbase_u64(unsigned long long __V) +{ + return __builtin_ia32_wrfsbase64(__V); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_writegsbase_u32(unsigned int __V) +{ + return __builtin_ia32_wrgsbase32(__V); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_writegsbase_u64(unsigned long long __V) +{ + return __builtin_ia32_wrgsbase64(__V); +} +#endif +#endif /* __FSGSBASE__ */ + +#ifdef __RTM__ +#include +#endif + +#ifdef __RTM__ +#include +#endif + +#ifdef __SHA__ +#include +#endif + +#include + +/* Some intrinsics inside adxintrin.h are available only on processors with ADX, + * whereas others are also available at all times. */ +#include + +#endif /* __IMMINTRIN_H */ diff --git a/c_headers/inttypes.h b/c_headers/inttypes.h new file mode 100644 index 0000000000..3d59d141de --- /dev/null +++ b/c_headers/inttypes.h @@ -0,0 +1,102 @@ +/*===---- inttypes.h - Standard header for integer printf macros ----------===*\ + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * +\*===----------------------------------------------------------------------===*/ + +#ifndef __CLANG_INTTYPES_H +#define __CLANG_INTTYPES_H + +#include_next + +#if defined(_MSC_VER) && _MSC_VER < 1900 +/* MSVC headers define int32_t as int, but PRIx32 as "lx" instead of "x". + * This triggers format warnings, so fix it up here. */ +#undef PRId32 +#undef PRIdLEAST32 +#undef PRIdFAST32 +#undef PRIi32 +#undef PRIiLEAST32 +#undef PRIiFAST32 +#undef PRIo32 +#undef PRIoLEAST32 +#undef PRIoFAST32 +#undef PRIu32 +#undef PRIuLEAST32 +#undef PRIuFAST32 +#undef PRIx32 +#undef PRIxLEAST32 +#undef PRIxFAST32 +#undef PRIX32 +#undef PRIXLEAST32 +#undef PRIXFAST32 + +#undef SCNd32 +#undef SCNdLEAST32 +#undef SCNdFAST32 +#undef SCNi32 +#undef SCNiLEAST32 +#undef SCNiFAST32 +#undef SCNo32 +#undef SCNoLEAST32 +#undef SCNoFAST32 +#undef SCNu32 +#undef SCNuLEAST32 +#undef SCNuFAST32 +#undef SCNx32 +#undef SCNxLEAST32 +#undef SCNxFAST32 + +#define PRId32 "d" +#define PRIdLEAST32 "d" +#define PRIdFAST32 "d" +#define PRIi32 "i" +#define PRIiLEAST32 "i" +#define PRIiFAST32 "i" +#define PRIo32 "o" +#define PRIoLEAST32 "o" +#define PRIoFAST32 "o" +#define PRIu32 "u" +#define PRIuLEAST32 "u" +#define PRIuFAST32 "u" +#define PRIx32 "x" +#define PRIxLEAST32 "x" +#define PRIxFAST32 "x" +#define PRIX32 "X" +#define PRIXLEAST32 "X" +#define PRIXFAST32 "X" + +#define SCNd32 "d" +#define SCNdLEAST32 "d" +#define SCNdFAST32 "d" +#define SCNi32 "i" +#define SCNiLEAST32 "i" +#define SCNiFAST32 "i" +#define SCNo32 "o" +#define SCNoLEAST32 "o" +#define SCNoFAST32 "o" +#define SCNu32 "u" +#define SCNuLEAST32 "u" +#define SCNuFAST32 "u" +#define SCNx32 "x" +#define SCNxLEAST32 "x" +#define SCNxFAST32 "x" +#endif + +#endif /* __CLANG_INTTYPES_H */ diff --git a/c_headers/iso646.h b/c_headers/iso646.h new file mode 100644 index 0000000000..dca13c5bab --- /dev/null +++ b/c_headers/iso646.h @@ -0,0 +1,43 @@ +/*===---- iso646.h - Standard header for alternate spellings of operators---=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __ISO646_H +#define __ISO646_H + +#ifndef __cplusplus +#define and && +#define and_eq &= +#define bitand & +#define bitor | +#define compl ~ +#define not ! +#define not_eq != +#define or || +#define or_eq |= +#define xor ^ +#define xor_eq ^= +#endif + +#endif /* __ISO646_H */ diff --git a/c_headers/limits.h b/c_headers/limits.h new file mode 100644 index 0000000000..f04187ced2 --- /dev/null +++ b/c_headers/limits.h @@ -0,0 +1,118 @@ +/*===---- limits.h - Standard header for integer sizes --------------------===*\ + * + * Copyright (c) 2009 Chris Lattner + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * +\*===----------------------------------------------------------------------===*/ + +#ifndef __CLANG_LIMITS_H +#define __CLANG_LIMITS_H + +/* The system's limits.h may, in turn, try to #include_next GCC's limits.h. + Avert this #include_next madness. */ +#if defined __GNUC__ && !defined _GCC_LIMITS_H_ +#define _GCC_LIMITS_H_ +#endif + +/* System headers include a number of constants from POSIX in . + Include it if we're hosted. */ +#if __STDC_HOSTED__ && __has_include_next() +#include_next +#endif + +/* Many system headers try to "help us out" by defining these. No really, we + know how big each datatype is. */ +#undef SCHAR_MIN +#undef SCHAR_MAX +#undef UCHAR_MAX +#undef SHRT_MIN +#undef SHRT_MAX +#undef USHRT_MAX +#undef INT_MIN +#undef INT_MAX +#undef UINT_MAX +#undef LONG_MIN +#undef LONG_MAX +#undef ULONG_MAX + +#undef CHAR_BIT +#undef CHAR_MIN +#undef CHAR_MAX + +/* C90/99 5.2.4.2.1 */ +#define SCHAR_MAX __SCHAR_MAX__ +#define SHRT_MAX __SHRT_MAX__ +#define INT_MAX __INT_MAX__ +#define LONG_MAX __LONG_MAX__ + +#define SCHAR_MIN (-__SCHAR_MAX__-1) +#define SHRT_MIN (-__SHRT_MAX__ -1) +#define INT_MIN (-__INT_MAX__ -1) +#define LONG_MIN (-__LONG_MAX__ -1L) + +#define UCHAR_MAX (__SCHAR_MAX__*2 +1) +#define USHRT_MAX (__SHRT_MAX__ *2 +1) +#define UINT_MAX (__INT_MAX__ *2U +1U) +#define ULONG_MAX (__LONG_MAX__ *2UL+1UL) + +#ifndef MB_LEN_MAX +#define MB_LEN_MAX 1 +#endif + +#define CHAR_BIT __CHAR_BIT__ + +#ifdef __CHAR_UNSIGNED__ /* -funsigned-char */ +#define CHAR_MIN 0 +#define CHAR_MAX UCHAR_MAX +#else +#define CHAR_MIN SCHAR_MIN +#define CHAR_MAX __SCHAR_MAX__ +#endif + +/* C99 5.2.4.2.1: Added long long. + C++11 18.3.3.2: same contents as the Standard C Library header . + */ +#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L + +#undef LLONG_MIN +#undef LLONG_MAX +#undef ULLONG_MAX + +#define LLONG_MAX __LONG_LONG_MAX__ +#define LLONG_MIN (-__LONG_LONG_MAX__-1LL) +#define ULLONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL) +#endif + +/* LONG_LONG_MIN/LONG_LONG_MAX/ULONG_LONG_MAX are a GNU extension. It's too bad + that we don't have something like #pragma poison that could be used to + deprecate a macro - the code should just use LLONG_MAX and friends. + */ +#if defined(__GNU_LIBRARY__) ? defined(__USE_GNU) : !defined(__STRICT_ANSI__) + +#undef LONG_LONG_MIN +#undef LONG_LONG_MAX +#undef ULONG_LONG_MAX + +#define LONG_LONG_MAX __LONG_LONG_MAX__ +#define LONG_LONG_MIN (-__LONG_LONG_MAX__-1LL) +#define ULONG_LONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL) +#endif + +#endif /* __CLANG_LIMITS_H */ diff --git a/c_headers/lzcntintrin.h b/c_headers/lzcntintrin.h new file mode 100644 index 0000000000..8ee29975c2 --- /dev/null +++ b/c_headers/lzcntintrin.h @@ -0,0 +1,72 @@ +/*===---- lzcntintrin.h - LZCNT intrinsics ---------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __LZCNT__ +# error "LZCNT instruction is not enabled" +#endif /* __LZCNT__ */ + +#ifndef __LZCNTINTRIN_H +#define __LZCNTINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ unsigned short __DEFAULT_FN_ATTRS +__lzcnt16(unsigned short __X) +{ + return __X ? __builtin_clzs(__X) : 16; +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__lzcnt32(unsigned int __X) +{ + return __X ? __builtin_clz(__X) : 32; +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_lzcnt_u32(unsigned int __X) +{ + return __X ? __builtin_clz(__X) : 32; +} + +#ifdef __x86_64__ +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__lzcnt64(unsigned long long __X) +{ + return __X ? __builtin_clzll(__X) : 64; +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_lzcnt_u64(unsigned long long __X) +{ + return __X ? __builtin_clzll(__X) : 64; +} +#endif + +#undef __DEFAULT_FN_ATTRS + +#endif /* __LZCNTINTRIN_H */ diff --git a/c_headers/mm3dnow.h b/c_headers/mm3dnow.h new file mode 100644 index 0000000000..ac8e0f4af1 --- /dev/null +++ b/c_headers/mm3dnow.h @@ -0,0 +1,167 @@ +/*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _MM3DNOW_H_INCLUDED +#define _MM3DNOW_H_INCLUDED + +#include +#include + +typedef float __v2sf __attribute__((__vector_size__(8))); + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ void __DEFAULT_FN_ATTRS +_m_femms() { + __builtin_ia32_femms(); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pavgusb(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pf2id(__m64 __m) { + return (__m64)__builtin_ia32_pf2id((__v2sf)__m); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfacc(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfadd(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfcmpeq(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfcmpge(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfcmpgt(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfmax(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfmin(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfmul(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfrcp(__m64 __m) { + return (__m64)__builtin_ia32_pfrcp((__v2sf)__m); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfrcpit1(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfrcpit2(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfrsqrt(__m64 __m) { + return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfrsqrtit1(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfsub(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfsubr(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pi2fd(__m64 __m) { + return (__m64)__builtin_ia32_pi2fd((__v2si)__m); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pmulhrw(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pf2iw(__m64 __m) { + return (__m64)__builtin_ia32_pf2iw((__v2sf)__m); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfnacc(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pfpnacc(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pi2fw(__m64 __m) { + return (__m64)__builtin_ia32_pi2fw((__v2si)__m); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pswapdsf(__m64 __m) { + return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_m_pswapdsi(__m64 __m) { + return (__m64)__builtin_ia32_pswapdsi((__v2si)__m); +} + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/c_headers/mm_malloc.h b/c_headers/mm_malloc.h new file mode 100644 index 0000000000..305afd31ad --- /dev/null +++ b/c_headers/mm_malloc.h @@ -0,0 +1,75 @@ +/*===---- mm_malloc.h - Allocating and Freeing Aligned Memory Blocks -------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __MM_MALLOC_H +#define __MM_MALLOC_H + +#include + +#ifdef _WIN32 +#include +#else +#ifndef __cplusplus +extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size); +#else +// Some systems (e.g. those with GNU libc) declare posix_memalign with an +// exception specifier. Via an "egregious workaround" in +// Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid +// redeclaration of glibc's declaration. +extern "C" int posix_memalign(void **__memptr, size_t __alignment, size_t __size); +#endif +#endif + +#if !(defined(_WIN32) && defined(_mm_malloc)) +static __inline__ void *__attribute__((__always_inline__, __nodebug__, + __malloc__)) +_mm_malloc(size_t __size, size_t __align) +{ + if (__align == 1) { + return malloc(__size); + } + + if (!(__align & (__align - 1)) && __align < sizeof(void *)) + __align = sizeof(void *); + + void *__mallocedMemory; +#if defined(__MINGW32__) + __mallocedMemory = __mingw_aligned_malloc(__size, __align); +#elif defined(_WIN32) + __mallocedMemory = _aligned_malloc(__size, __align); +#else + if (posix_memalign(&__mallocedMemory, __align, __size)) + return 0; +#endif + + return __mallocedMemory; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_free(void *__p) +{ + free(__p); +} +#endif + +#endif /* __MM_MALLOC_H */ diff --git a/c_headers/mmintrin.h b/c_headers/mmintrin.h new file mode 100644 index 0000000000..0be5f32c7d --- /dev/null +++ b/c_headers/mmintrin.h @@ -0,0 +1,507 @@ +/*===---- mmintrin.h - MMX intrinsics --------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __MMINTRIN_H +#define __MMINTRIN_H + +#ifndef __MMX__ +#error "MMX instruction set not enabled" +#else + +typedef long long __m64 __attribute__((__vector_size__(8))); + +typedef int __v2si __attribute__((__vector_size__(8))); +typedef short __v4hi __attribute__((__vector_size__(8))); +typedef char __v8qi __attribute__((__vector_size__(8))); + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_empty(void) +{ + __builtin_ia32_emms(); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cvtsi32_si64(int __i) +{ + return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_cvtsi64_si32(__m64 __m) +{ + return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cvtsi64_m64(long long __i) +{ + return (__m64)__i; +} + +static __inline__ long long __DEFAULT_FN_ATTRS +_mm_cvtm64_si64(__m64 __m) +{ + return (long long)__m; +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_packs_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_packs_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_packs_pu16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_add_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_add_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_add_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_adds_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_adds_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_adds_pu8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_adds_pu16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sub_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sub_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sub_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_subs_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_subs_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_subs_pu8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_subs_pu16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_madd_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_mulhi_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_mullo_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sll_pi16(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_slli_pi16(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sll_pi32(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_slli_pi32(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sll_si64(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psllq(__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_slli_si64(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psllqi(__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sra_pi16(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_srai_pi16(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sra_pi32(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_srai_pi32(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_srl_pi16(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_srli_pi16(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_srl_pi32(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_srli_pi32(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_srl_si64(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psrlq(__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_srli_si64(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psrlqi(__m, __count); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_and_si64(__m64 __m1, __m64 __m2) +{ + return __builtin_ia32_pand(__m1, __m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_andnot_si64(__m64 __m1, __m64 __m2) +{ + return __builtin_ia32_pandn(__m1, __m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_or_si64(__m64 __m1, __m64 __m2) +{ + return __builtin_ia32_por(__m1, __m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_xor_si64(__m64 __m1, __m64 __m2) +{ + return __builtin_ia32_pxor(__m1, __m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cmpeq_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cmpeq_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cmpeq_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cmpgt_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cmpgt_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cmpgt_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_setzero_si64(void) +{ + return (__m64){ 0LL }; +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_set_pi32(int __i1, int __i0) +{ + return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_set_pi16(short __s3, short __s2, short __s1, short __s0) +{ + return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, + char __b1, char __b0) +{ + return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, + __b4, __b5, __b6, __b7); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_set1_pi32(int __i) +{ + return _mm_set_pi32(__i, __i); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_set1_pi16(short __w) +{ + return _mm_set_pi16(__w, __w, __w, __w); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_set1_pi8(char __b) +{ + return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_setr_pi32(int __i0, int __i1) +{ + return _mm_set_pi32(__i1, __i0); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) +{ + return _mm_set_pi16(__w3, __w2, __w1, __w0); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, + char __b6, char __b7) +{ + return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); +} + +#undef __DEFAULT_FN_ATTRS + +/* Aliases for compatibility. */ +#define _m_empty _mm_empty +#define _m_from_int _mm_cvtsi32_si64 +#define _m_to_int _mm_cvtsi64_si32 +#define _m_packsswb _mm_packs_pi16 +#define _m_packssdw _mm_packs_pi32 +#define _m_packuswb _mm_packs_pu16 +#define _m_punpckhbw _mm_unpackhi_pi8 +#define _m_punpckhwd _mm_unpackhi_pi16 +#define _m_punpckhdq _mm_unpackhi_pi32 +#define _m_punpcklbw _mm_unpacklo_pi8 +#define _m_punpcklwd _mm_unpacklo_pi16 +#define _m_punpckldq _mm_unpacklo_pi32 +#define _m_paddb _mm_add_pi8 +#define _m_paddw _mm_add_pi16 +#define _m_paddd _mm_add_pi32 +#define _m_paddsb _mm_adds_pi8 +#define _m_paddsw _mm_adds_pi16 +#define _m_paddusb _mm_adds_pu8 +#define _m_paddusw _mm_adds_pu16 +#define _m_psubb _mm_sub_pi8 +#define _m_psubw _mm_sub_pi16 +#define _m_psubd _mm_sub_pi32 +#define _m_psubsb _mm_subs_pi8 +#define _m_psubsw _mm_subs_pi16 +#define _m_psubusb _mm_subs_pu8 +#define _m_psubusw _mm_subs_pu16 +#define _m_pmaddwd _mm_madd_pi16 +#define _m_pmulhw _mm_mulhi_pi16 +#define _m_pmullw _mm_mullo_pi16 +#define _m_psllw _mm_sll_pi16 +#define _m_psllwi _mm_slli_pi16 +#define _m_pslld _mm_sll_pi32 +#define _m_pslldi _mm_slli_pi32 +#define _m_psllq _mm_sll_si64 +#define _m_psllqi _mm_slli_si64 +#define _m_psraw _mm_sra_pi16 +#define _m_psrawi _mm_srai_pi16 +#define _m_psrad _mm_sra_pi32 +#define _m_psradi _mm_srai_pi32 +#define _m_psrlw _mm_srl_pi16 +#define _m_psrlwi _mm_srli_pi16 +#define _m_psrld _mm_srl_pi32 +#define _m_psrldi _mm_srli_pi32 +#define _m_psrlq _mm_srl_si64 +#define _m_psrlqi _mm_srli_si64 +#define _m_pand _mm_and_si64 +#define _m_pandn _mm_andnot_si64 +#define _m_por _mm_or_si64 +#define _m_pxor _mm_xor_si64 +#define _m_pcmpeqb _mm_cmpeq_pi8 +#define _m_pcmpeqw _mm_cmpeq_pi16 +#define _m_pcmpeqd _mm_cmpeq_pi32 +#define _m_pcmpgtb _mm_cmpgt_pi8 +#define _m_pcmpgtw _mm_cmpgt_pi16 +#define _m_pcmpgtd _mm_cmpgt_pi32 + +#endif /* __MMX__ */ + +#endif /* __MMINTRIN_H */ + diff --git a/c_headers/nmmintrin.h b/c_headers/nmmintrin.h new file mode 100644 index 0000000000..f12622d7be --- /dev/null +++ b/c_headers/nmmintrin.h @@ -0,0 +1,35 @@ +/*===---- nmmintrin.h - SSE4 intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _NMMINTRIN_H +#define _NMMINTRIN_H + +#ifndef __SSE4_2__ +#error "SSE4.2 instruction set not enabled" +#else + +/* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h, + just include it now then. */ +#include +#endif /* __SSE4_2__ */ +#endif /* _NMMINTRIN_H */ diff --git a/c_headers/pmmintrin.h b/c_headers/pmmintrin.h new file mode 100644 index 0000000000..e1b8d9b603 --- /dev/null +++ b/c_headers/pmmintrin.h @@ -0,0 +1,122 @@ +/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __PMMINTRIN_H +#define __PMMINTRIN_H + +#ifndef __SSE3__ +#error "SSE3 instruction set not enabled" +#else + +#include + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_lddqu_si128(__m128i const *__p) +{ + return (__m128i)__builtin_ia32_lddqu((char const *)__p); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_addsub_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_addsubps(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_hadd_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_haddps(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_hsub_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_hsubps(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_movehdup_ps(__m128 __a) +{ + return __builtin_shufflevector(__a, __a, 1, 1, 3, 3); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_moveldup_ps(__m128 __a) +{ + return __builtin_shufflevector(__a, __a, 0, 0, 2, 2); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_addsub_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_addsubpd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_hadd_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_haddpd(__a, __b); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_hsub_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_hsubpd(__a, __b); +} + +#define _mm_loaddup_pd(dp) _mm_load1_pd(dp) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_movedup_pd(__m128d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 0); +} + +#define _MM_DENORMALS_ZERO_ON (0x0040) +#define _MM_DENORMALS_ZERO_OFF (0x0000) + +#define _MM_DENORMALS_ZERO_MASK (0x0040) + +#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) +#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints) +{ + __builtin_ia32_monitor((void *)__p, __extensions, __hints); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mwait(unsigned __extensions, unsigned __hints) +{ + __builtin_ia32_mwait(__extensions, __hints); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __SSE3__ */ + +#endif /* __PMMINTRIN_H */ diff --git a/c_headers/popcntintrin.h b/c_headers/popcntintrin.h new file mode 100644 index 0000000000..1a4e9000ae --- /dev/null +++ b/c_headers/popcntintrin.h @@ -0,0 +1,50 @@ +/*===---- popcntintrin.h - POPCNT intrinsics -------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __POPCNT__ +#error "POPCNT instruction set not enabled" +#endif + +#ifndef _POPCNTINTRIN_H +#define _POPCNTINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_popcnt_u32(unsigned int __A) +{ + return __builtin_popcount(__A); +} + +#ifdef __x86_64__ +static __inline__ long long __DEFAULT_FN_ATTRS +_mm_popcnt_u64(unsigned long long __A) +{ + return __builtin_popcountll(__A); +} +#endif /* __x86_64__ */ + +#undef __DEFAULT_FN_ATTRS + +#endif /* _POPCNTINTRIN_H */ diff --git a/c_headers/prfchwintrin.h b/c_headers/prfchwintrin.h new file mode 100644 index 0000000000..9825bd8c97 --- /dev/null +++ b/c_headers/prfchwintrin.h @@ -0,0 +1,39 @@ +/*===---- prfchwintrin.h - PREFETCHW intrinsic -----------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED) +#error "Never use directly; include or instead." +#endif + +#ifndef __PRFCHWINTRIN_H +#define __PRFCHWINTRIN_H + +#if defined(__PRFCHW__) || defined(__3dNOW__) +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_m_prefetchw(void *__P) +{ + __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */); +} +#endif + +#endif /* __PRFCHWINTRIN_H */ diff --git a/c_headers/rdseedintrin.h b/c_headers/rdseedintrin.h new file mode 100644 index 0000000000..fdf7e18afa --- /dev/null +++ b/c_headers/rdseedintrin.h @@ -0,0 +1,59 @@ +/*===---- rdseedintrin.h - RDSEED intrinsics -------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __RDSEEDINTRIN_H +#define __RDSEEDINTRIN_H + +#ifdef __RDSEED__ + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ int __DEFAULT_FN_ATTRS +_rdseed16_step(unsigned short *__p) +{ + return __builtin_ia32_rdseed16_step(__p); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_rdseed32_step(unsigned int *__p) +{ + return __builtin_ia32_rdseed32_step(__p); +} + +#ifdef __x86_64__ +static __inline__ int __DEFAULT_FN_ATTRS +_rdseed64_step(unsigned long long *__p) +{ + return __builtin_ia32_rdseed64_step(__p); +} +#endif + +#undef __DEFAULT_FN_ATTRS + +#endif /* __RDSEED__ */ +#endif /* __RDSEEDINTRIN_H */ diff --git a/c_headers/rtmintrin.h b/c_headers/rtmintrin.h new file mode 100644 index 0000000000..17256815fb --- /dev/null +++ b/c_headers/rtmintrin.h @@ -0,0 +1,59 @@ +/*===---- rtmintrin.h - RTM intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __RTMINTRIN_H +#define __RTMINTRIN_H + +#define _XBEGIN_STARTED (~0u) +#define _XABORT_EXPLICIT (1 << 0) +#define _XABORT_RETRY (1 << 1) +#define _XABORT_CONFLICT (1 << 2) +#define _XABORT_CAPACITY (1 << 3) +#define _XABORT_DEBUG (1 << 4) +#define _XABORT_NESTED (1 << 5) +#define _XABORT_CODE(x) (((x) >> 24) & 0xFF) + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_xbegin(void) +{ + return __builtin_ia32_xbegin(); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_xend(void) +{ + __builtin_ia32_xend(); +} + +#define _xabort(imm) __builtin_ia32_xabort((imm)) + +#undef __DEFAULT_FN_ATTRS + +#endif /* __RTMINTRIN_H */ diff --git a/c_headers/s390intrin.h b/c_headers/s390intrin.h new file mode 100644 index 0000000000..d51274c07d --- /dev/null +++ b/c_headers/s390intrin.h @@ -0,0 +1,39 @@ +/*===---- s390intrin.h - SystemZ intrinsics --------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __S390INTRIN_H +#define __S390INTRIN_H + +#ifndef __s390__ +#error " is for s390 only" +#endif + +#ifdef __HTM__ +#include +#endif + +#ifdef __VEC__ +#include +#endif + +#endif /* __S390INTRIN_H*/ diff --git a/c_headers/shaintrin.h b/c_headers/shaintrin.h new file mode 100644 index 0000000000..960cced7a5 --- /dev/null +++ b/c_headers/shaintrin.h @@ -0,0 +1,79 @@ +/*===---- shaintrin.h - SHA intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __SHAINTRIN_H +#define __SHAINTRIN_H + +#if !defined (__SHA__) +# error "SHA instructions not enabled" +#endif + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +#define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \ + __builtin_ia32_sha1rnds4((V1), (V2), (M)); }) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sha1nexte_epu32(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sha1msg1_epu32(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sha1msg2_epu32(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sha256msg1_epu32(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sha256msg2_epu32(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __SHAINTRIN_H */ diff --git a/c_headers/smmintrin.h b/c_headers/smmintrin.h new file mode 100644 index 0000000000..04bd0722b1 --- /dev/null +++ b/c_headers/smmintrin.h @@ -0,0 +1,487 @@ +/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _SMMINTRIN_H +#define _SMMINTRIN_H + +#ifndef __SSE4_1__ +#error "SSE4.1 instruction set not enabled" +#else + +#include + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +/* SSE4 Rounding macros. */ +#define _MM_FROUND_TO_NEAREST_INT 0x00 +#define _MM_FROUND_TO_NEG_INF 0x01 +#define _MM_FROUND_TO_POS_INF 0x02 +#define _MM_FROUND_TO_ZERO 0x03 +#define _MM_FROUND_CUR_DIRECTION 0x04 + +#define _MM_FROUND_RAISE_EXC 0x00 +#define _MM_FROUND_NO_EXC 0x08 + +#define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT) +#define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF) +#define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF) +#define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO) +#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION) +#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION) + +#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL) +#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL) +#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL) +#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL) + +#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR) +#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) +#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) +#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) + +#define _mm_round_ps(X, M) __extension__ ({ \ + __m128 __X = (X); \ + (__m128) __builtin_ia32_roundps((__v4sf)__X, (M)); }) + +#define _mm_round_ss(X, Y, M) __extension__ ({ \ + __m128 __X = (X); \ + __m128 __Y = (Y); \ + (__m128) __builtin_ia32_roundss((__v4sf)__X, (__v4sf)__Y, (M)); }) + +#define _mm_round_pd(X, M) __extension__ ({ \ + __m128d __X = (X); \ + (__m128d) __builtin_ia32_roundpd((__v2df)__X, (M)); }) + +#define _mm_round_sd(X, Y, M) __extension__ ({ \ + __m128d __X = (X); \ + __m128d __Y = (Y); \ + (__m128d) __builtin_ia32_roundsd((__v2df)__X, (__v2df)__Y, (M)); }) + +/* SSE4 Packed Blending Intrinsics. */ +#define _mm_blend_pd(V1, V2, M) __extension__ ({ \ + __m128d __V1 = (V1); \ + __m128d __V2 = (V2); \ + (__m128d)__builtin_shufflevector((__v2df)__V1, (__v2df)__V2, \ + (((M) & 0x01) ? 2 : 0), \ + (((M) & 0x02) ? 3 : 1)); }) + +#define _mm_blend_ps(V1, V2, M) __extension__ ({ \ + __m128 __V1 = (V1); \ + __m128 __V2 = (V2); \ + (__m128)__builtin_shufflevector((__v4sf)__V1, (__v4sf)__V2, \ + (((M) & 0x01) ? 4 : 0), \ + (((M) & 0x02) ? 5 : 1), \ + (((M) & 0x04) ? 6 : 2), \ + (((M) & 0x08) ? 7 : 3)); }) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M) +{ + return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2, + (__v2df)__M); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) +{ + return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2, + (__v4sf)__M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) +{ + return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2, + (__v16qi)__M); +} + +#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \ + __m128i __V1 = (V1); \ + __m128i __V2 = (V2); \ + (__m128i)__builtin_shufflevector((__v8hi)__V1, (__v8hi)__V2, \ + (((M) & 0x01) ? 8 : 0), \ + (((M) & 0x02) ? 9 : 1), \ + (((M) & 0x04) ? 10 : 2), \ + (((M) & 0x08) ? 11 : 3), \ + (((M) & 0x10) ? 12 : 4), \ + (((M) & 0x20) ? 13 : 5), \ + (((M) & 0x40) ? 14 : 6), \ + (((M) & 0x80) ? 15 : 7)); }) + +/* SSE4 Dword Multiply Instructions. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mullo_epi32 (__m128i __V1, __m128i __V2) +{ + return (__m128i) ((__v4si)__V1 * (__v4si)__V2); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mul_epi32 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2); +} + +/* SSE4 Floating Point Dot Product Instructions. */ +#define _mm_dp_ps(X, Y, M) __extension__ ({ \ + __m128 __X = (X); \ + __m128 __Y = (Y); \ + (__m128) __builtin_ia32_dpps((__v4sf)__X, (__v4sf)__Y, (M)); }) + +#define _mm_dp_pd(X, Y, M) __extension__ ({\ + __m128d __X = (X); \ + __m128d __Y = (Y); \ + (__m128d) __builtin_ia32_dppd((__v2df)__X, (__v2df)__Y, (M)); }) + +/* SSE4 Streaming Load Hint Instruction. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_stream_load_si128 (__m128i *__V) +{ + return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __V); +} + +/* SSE4 Packed Integer Min/Max Instructions. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_min_epi8 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_max_epi8 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_min_epu16 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_max_epu16 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_min_epi32 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_max_epi32 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_min_epu32 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_max_epu32 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2); +} + +/* SSE4 Insertion and Extraction from XMM Register Instructions. */ +#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N)) +#define _mm_extract_ps(X, N) (__extension__ \ + ({ union { int __i; float __f; } __t; \ + __v4sf __a = (__v4sf)(X); \ + __t.__f = __a[(N) & 3]; \ + __t.__i;})) + +/* Miscellaneous insert and extract macros. */ +/* Extract a single-precision float from X at index N into D. */ +#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \ + (D) = __a[N]; })) + +/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create + an index suitable for _mm_insert_ps. */ +#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z)) + +/* Extract a float from X at index N into the first index of the return. */ +#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \ + _MM_MK_INSERTPS_NDX((N), 0, 0x0e)) + +/* Insert int into packed integer array at index. */ +#define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ + __a[(N) & 15] = (I); \ + __a;})) +#define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ + __a[(N) & 3] = (I); \ + __a;})) +#ifdef __x86_64__ +#define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ + __a[(N) & 1] = (I); \ + __a;})) +#endif /* __x86_64__ */ + +/* Extract int from packed integer array at index. This returns the element + * as a zero extended value, so it is unsigned. + */ +#define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ + (int)(unsigned char) \ + __a[(N) & 15];})) +#define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ + __a[(N) & 3];})) +#ifdef __x86_64__ +#define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ + __a[(N) & 1];})) +#endif /* __x86_64 */ + +/* SSE4 128-bit Packed Integer Comparisons. */ +static __inline__ int __DEFAULT_FN_ATTRS +_mm_testz_si128(__m128i __M, __m128i __V) +{ + return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_testc_si128(__m128i __M, __m128i __V) +{ + return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_testnzc_si128(__m128i __M, __m128i __V) +{ + return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); +} + +#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V))) +#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) +#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V)) + +/* SSE4 64-bit Packed Integer Comparisons. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmpeq_epi64(__m128i __V1, __m128i __V2) +{ + return (__m128i)((__v2di)__V1 == (__v2di)__V2); +} + +/* SSE4 Packed Integer Sign-Extension. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi8_epi16(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi8_epi32(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovsxbd128((__v16qi) __V); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi8_epi64(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovsxbq128((__v16qi) __V); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi16_epi32(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovsxwd128((__v8hi) __V); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi16_epi64(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovsxwq128((__v8hi)__V); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi32_epi64(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovsxdq128((__v4si)__V); +} + +/* SSE4 Packed Integer Zero-Extension. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepu8_epi16(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepu8_epi32(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepu8_epi64(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepu16_epi32(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepu16_epi64(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepu32_epi64(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V); +} + +/* SSE4 Pack with Unsigned Saturation. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_packus_epi32(__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2); +} + +/* SSE4 Multiple Packed Sums of Absolute Difference. */ +#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \ + __m128i __X = (X); \ + __m128i __Y = (Y); \ + (__m128i) __builtin_ia32_mpsadbw128((__v16qi)__X, (__v16qi)__Y, (M)); }) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_minpos_epu16(__m128i __V) +{ + return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V); +} + +/* These definitions are normally in nmmintrin.h, but gcc puts them in here + so we'll do the same. */ +#ifdef __SSE4_2__ + +/* These specify the type of data that we're comparing. */ +#define _SIDD_UBYTE_OPS 0x00 +#define _SIDD_UWORD_OPS 0x01 +#define _SIDD_SBYTE_OPS 0x02 +#define _SIDD_SWORD_OPS 0x03 + +/* These specify the type of comparison operation. */ +#define _SIDD_CMP_EQUAL_ANY 0x00 +#define _SIDD_CMP_RANGES 0x04 +#define _SIDD_CMP_EQUAL_EACH 0x08 +#define _SIDD_CMP_EQUAL_ORDERED 0x0c + +/* These macros specify the polarity of the operation. */ +#define _SIDD_POSITIVE_POLARITY 0x00 +#define _SIDD_NEGATIVE_POLARITY 0x10 +#define _SIDD_MASKED_POSITIVE_POLARITY 0x20 +#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30 + +/* These macros are used in _mm_cmpXstri() to specify the return. */ +#define _SIDD_LEAST_SIGNIFICANT 0x00 +#define _SIDD_MOST_SIGNIFICANT 0x40 + +/* These macros are used in _mm_cmpXstri() to specify the return. */ +#define _SIDD_BIT_MASK 0x00 +#define _SIDD_UNIT_MASK 0x40 + +/* SSE4.2 Packed Comparison Intrinsics. */ +#define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M)) +#define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M)) + +#define _mm_cmpestrm(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M)) +#define _mm_cmpestri(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M)) + +/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ +#define _mm_cmpistra(A, B, M) \ + __builtin_ia32_pcmpistria128((A), (B), (M)) +#define _mm_cmpistrc(A, B, M) \ + __builtin_ia32_pcmpistric128((A), (B), (M)) +#define _mm_cmpistro(A, B, M) \ + __builtin_ia32_pcmpistrio128((A), (B), (M)) +#define _mm_cmpistrs(A, B, M) \ + __builtin_ia32_pcmpistris128((A), (B), (M)) +#define _mm_cmpistrz(A, B, M) \ + __builtin_ia32_pcmpistriz128((A), (B), (M)) + +#define _mm_cmpestra(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M)) +#define _mm_cmpestrc(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M)) +#define _mm_cmpestro(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M)) +#define _mm_cmpestrs(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M)) +#define _mm_cmpestrz(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M)) + +/* SSE4.2 Compare Packed Data -- Greater Than. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmpgt_epi64(__m128i __V1, __m128i __V2) +{ + return (__m128i)((__v2di)__V1 > (__v2di)__V2); +} + +/* SSE4.2 Accumulate CRC32. */ +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_crc32_u8(unsigned int __C, unsigned char __D) +{ + return __builtin_ia32_crc32qi(__C, __D); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_crc32_u16(unsigned int __C, unsigned short __D) +{ + return __builtin_ia32_crc32hi(__C, __D); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_crc32_u32(unsigned int __C, unsigned int __D) +{ + return __builtin_ia32_crc32si(__C, __D); +} + +#ifdef __x86_64__ +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_mm_crc32_u64(unsigned long long __C, unsigned long long __D) +{ + return __builtin_ia32_crc32di(__C, __D); +} +#endif /* __x86_64__ */ + +#undef __DEFAULT_FN_ATTRS + +#ifdef __POPCNT__ +#include +#endif + +#endif /* __SSE4_2__ */ +#endif /* __SSE4_1__ */ + +#endif /* _SMMINTRIN_H */ diff --git a/c_headers/stdalign.h b/c_headers/stdalign.h new file mode 100644 index 0000000000..3738d1284f --- /dev/null +++ b/c_headers/stdalign.h @@ -0,0 +1,35 @@ +/*===---- stdalign.h - Standard header for alignment ------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDALIGN_H +#define __STDALIGN_H + +#ifndef __cplusplus +#define alignas _Alignas +#define alignof _Alignof +#endif + +#define __alignas_is_defined 1 +#define __alignof_is_defined 1 + +#endif /* __STDALIGN_H */ diff --git a/c_headers/stdarg.h b/c_headers/stdarg.h new file mode 100644 index 0000000000..a57e183648 --- /dev/null +++ b/c_headers/stdarg.h @@ -0,0 +1,52 @@ +/*===---- stdarg.h - Variable argument handling ----------------------------=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDARG_H +#define __STDARG_H + +#ifndef _VA_LIST +typedef __builtin_va_list va_list; +#define _VA_LIST +#endif +#define va_start(ap, param) __builtin_va_start(ap, param) +#define va_end(ap) __builtin_va_end(ap) +#define va_arg(ap, type) __builtin_va_arg(ap, type) + +/* GCC always defines __va_copy, but does not define va_copy unless in c99 mode + * or -ansi is not specified, since it was not part of C90. + */ +#define __va_copy(d,s) __builtin_va_copy(d,s) + +#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L || !defined(__STRICT_ANSI__) +#define va_copy(dest, src) __builtin_va_copy(dest, src) +#endif + +/* Hack required to make standard headers work, at least on Ubuntu */ +#ifndef __GNUC_VA_LIST +#define __GNUC_VA_LIST 1 +#endif +typedef __builtin_va_list __gnuc_va_list; + +#endif /* __STDARG_H */ diff --git a/c_headers/stdatomic.h b/c_headers/stdatomic.h new file mode 100644 index 0000000000..e037987660 --- /dev/null +++ b/c_headers/stdatomic.h @@ -0,0 +1,190 @@ +/*===---- stdatomic.h - Standard header for atomic types and operations -----=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_STDATOMIC_H +#define __CLANG_STDATOMIC_H + +/* If we're hosted, fall back to the system's stdatomic.h. FreeBSD, for + * example, already has a Clang-compatible stdatomic.h header. + */ +#if __STDC_HOSTED__ && __has_include_next() +# include_next +#else + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* 7.17.1 Introduction */ + +#define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE +#define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE +#define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE +#define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE +#define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE +#define ATOMIC_SHORT_T_LOCK_FREE __GCC_ATOMIC_SHORT_T_LOCK_FREE +#define ATOMIC_INT_T_LOCK_FREE __GCC_ATOMIC_INT_T_LOCK_FREE +#define ATOMIC_LONG_T_LOCK_FREE __GCC_ATOMIC_LONG_T_LOCK_FREE +#define ATOMIC_LLONG_T_LOCK_FREE __GCC_ATOMIC_LLONG_T_LOCK_FREE +#define ATOMIC_POINTER_T_LOCK_FREE __GCC_ATOMIC_POINTER_T_LOCK_FREE + +/* 7.17.2 Initialization */ + +#define ATOMIC_VAR_INIT(value) (value) +#define atomic_init __c11_atomic_init + +/* 7.17.3 Order and consistency */ + +typedef enum memory_order { + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_consume = __ATOMIC_CONSUME, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST +} memory_order; + +#define kill_dependency(y) (y) + +/* 7.17.4 Fences */ + +/* These should be provided by the libc implementation. */ +void atomic_thread_fence(memory_order); +void atomic_signal_fence(memory_order); + +#define atomic_thread_fence(order) __c11_atomic_thread_fence(order) +#define atomic_signal_fence(order) __c11_atomic_signal_fence(order) + +/* 7.17.5 Lock-free property */ + +#define atomic_is_lock_free(obj) __c11_atomic_is_lock_free(sizeof(*(obj))) + +/* 7.17.6 Atomic integer types */ + +#ifdef __cplusplus +typedef _Atomic(bool) atomic_bool; +#else +typedef _Atomic(_Bool) atomic_bool; +#endif +typedef _Atomic(char) atomic_char; +typedef _Atomic(signed char) atomic_schar; +typedef _Atomic(unsigned char) atomic_uchar; +typedef _Atomic(short) atomic_short; +typedef _Atomic(unsigned short) atomic_ushort; +typedef _Atomic(int) atomic_int; +typedef _Atomic(unsigned int) atomic_uint; +typedef _Atomic(long) atomic_long; +typedef _Atomic(unsigned long) atomic_ulong; +typedef _Atomic(long long) atomic_llong; +typedef _Atomic(unsigned long long) atomic_ullong; +typedef _Atomic(uint_least16_t) atomic_char16_t; +typedef _Atomic(uint_least32_t) atomic_char32_t; +typedef _Atomic(wchar_t) atomic_wchar_t; +typedef _Atomic(int_least8_t) atomic_int_least8_t; +typedef _Atomic(uint_least8_t) atomic_uint_least8_t; +typedef _Atomic(int_least16_t) atomic_int_least16_t; +typedef _Atomic(uint_least16_t) atomic_uint_least16_t; +typedef _Atomic(int_least32_t) atomic_int_least32_t; +typedef _Atomic(uint_least32_t) atomic_uint_least32_t; +typedef _Atomic(int_least64_t) atomic_int_least64_t; +typedef _Atomic(uint_least64_t) atomic_uint_least64_t; +typedef _Atomic(int_fast8_t) atomic_int_fast8_t; +typedef _Atomic(uint_fast8_t) atomic_uint_fast8_t; +typedef _Atomic(int_fast16_t) atomic_int_fast16_t; +typedef _Atomic(uint_fast16_t) atomic_uint_fast16_t; +typedef _Atomic(int_fast32_t) atomic_int_fast32_t; +typedef _Atomic(uint_fast32_t) atomic_uint_fast32_t; +typedef _Atomic(int_fast64_t) atomic_int_fast64_t; +typedef _Atomic(uint_fast64_t) atomic_uint_fast64_t; +typedef _Atomic(intptr_t) atomic_intptr_t; +typedef _Atomic(uintptr_t) atomic_uintptr_t; +typedef _Atomic(size_t) atomic_size_t; +typedef _Atomic(ptrdiff_t) atomic_ptrdiff_t; +typedef _Atomic(intmax_t) atomic_intmax_t; +typedef _Atomic(uintmax_t) atomic_uintmax_t; + +/* 7.17.7 Operations on atomic types */ + +#define atomic_store(object, desired) __c11_atomic_store(object, desired, __ATOMIC_SEQ_CST) +#define atomic_store_explicit __c11_atomic_store + +#define atomic_load(object) __c11_atomic_load(object, __ATOMIC_SEQ_CST) +#define atomic_load_explicit __c11_atomic_load + +#define atomic_exchange(object, desired) __c11_atomic_exchange(object, desired, __ATOMIC_SEQ_CST) +#define atomic_exchange_explicit __c11_atomic_exchange + +#define atomic_compare_exchange_strong(object, expected, desired) __c11_atomic_compare_exchange_strong(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) +#define atomic_compare_exchange_strong_explicit __c11_atomic_compare_exchange_strong + +#define atomic_compare_exchange_weak(object, expected, desired) __c11_atomic_compare_exchange_weak(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) +#define atomic_compare_exchange_weak_explicit __c11_atomic_compare_exchange_weak + +#define atomic_fetch_add(object, operand) __c11_atomic_fetch_add(object, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_add_explicit __c11_atomic_fetch_add + +#define atomic_fetch_sub(object, operand) __c11_atomic_fetch_sub(object, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_sub_explicit __c11_atomic_fetch_sub + +#define atomic_fetch_or(object, operand) __c11_atomic_fetch_or(object, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_or_explicit __c11_atomic_fetch_or + +#define atomic_fetch_xor(object, operand) __c11_atomic_fetch_xor(object, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_xor_explicit __c11_atomic_fetch_xor + +#define atomic_fetch_and(object, operand) __c11_atomic_fetch_and(object, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_and_explicit __c11_atomic_fetch_and + +/* 7.17.8 Atomic flag type and operations */ + +typedef struct atomic_flag { atomic_bool _Value; } atomic_flag; + +#define ATOMIC_FLAG_INIT { 0 } + +/* These should be provided by the libc implementation. */ +#ifdef __cplusplus +bool atomic_flag_test_and_set(volatile atomic_flag *); +bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order); +#else +_Bool atomic_flag_test_and_set(volatile atomic_flag *); +_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order); +#endif +void atomic_flag_clear(volatile atomic_flag *); +void atomic_flag_clear_explicit(volatile atomic_flag *, memory_order); + +#define atomic_flag_test_and_set(object) __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST) +#define atomic_flag_test_and_set_explicit(object, order) __c11_atomic_exchange(&(object)->_Value, 1, order) + +#define atomic_flag_clear(object) __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST) +#define atomic_flag_clear_explicit(object, order) __c11_atomic_store(&(object)->_Value, 0, order) + +#ifdef __cplusplus +} +#endif + +#endif /* __STDC_HOSTED__ */ +#endif /* __CLANG_STDATOMIC_H */ + diff --git a/c_headers/stdbool.h b/c_headers/stdbool.h new file mode 100644 index 0000000000..0467893f34 --- /dev/null +++ b/c_headers/stdbool.h @@ -0,0 +1,44 @@ +/*===---- stdbool.h - Standard header for booleans -------------------------=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDBOOL_H +#define __STDBOOL_H + +/* Don't define bool, true, and false in C++, except as a GNU extension. */ +#ifndef __cplusplus +#define bool _Bool +#define true 1 +#define false 0 +#elif defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Define _Bool, bool, false, true as a GNU extension. */ +#define _Bool bool +#define bool bool +#define false false +#define true true +#endif + +#define __bool_true_false_are_defined 1 + +#endif /* __STDBOOL_H */ diff --git a/c_headers/stddef.h b/c_headers/stddef.h new file mode 100644 index 0000000000..7354996711 --- /dev/null +++ b/c_headers/stddef.h @@ -0,0 +1,137 @@ +/*===---- stddef.h - Basic type definitions --------------------------------=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \ + defined(__need_size_t) || defined(__need_wchar_t) || \ + defined(__need_NULL) || defined(__need_wint_t) + +#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \ + !defined(__need_wchar_t) && !defined(__need_NULL) && \ + !defined(__need_wint_t) +/* Always define miscellaneous pieces when modules are available. */ +#if !__has_feature(modules) +#define __STDDEF_H +#endif +#define __need_ptrdiff_t +#define __need_size_t +#define __need_wchar_t +#define __need_NULL +#define __need_STDDEF_H_misc +/* __need_wint_t is intentionally not defined here. */ +#endif + +#if defined(__need_ptrdiff_t) +#if !defined(_PTRDIFF_T) || __has_feature(modules) +/* Always define ptrdiff_t when modules are available. */ +#if !__has_feature(modules) +#define _PTRDIFF_T +#endif +typedef __PTRDIFF_TYPE__ ptrdiff_t; +#endif +#undef __need_ptrdiff_t +#endif /* defined(__need_ptrdiff_t) */ + +#if defined(__need_size_t) +#if !defined(_SIZE_T) || __has_feature(modules) +/* Always define size_t when modules are available. */ +#if !__has_feature(modules) +#define _SIZE_T +#endif +typedef __SIZE_TYPE__ size_t; +#endif +#undef __need_size_t +#endif /*defined(__need_size_t) */ + +#if defined(__need_STDDEF_H_misc) +/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is + * enabled. */ +#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \ + !defined(_RSIZE_T)) || __has_feature(modules) +/* Always define rsize_t when modules are available. */ +#if !__has_feature(modules) +#define _RSIZE_T +#endif +typedef __SIZE_TYPE__ rsize_t; +#endif +#endif /* defined(__need_STDDEF_H_misc) */ + +#if defined(__need_wchar_t) +#ifndef __cplusplus +/* Always define wchar_t when modules are available. */ +#if !defined(_WCHAR_T) || __has_feature(modules) +#if !__has_feature(modules) +#define _WCHAR_T +#if defined(_MSC_EXTENSIONS) +#define _WCHAR_T_DEFINED +#endif +#endif +typedef __WCHAR_TYPE__ wchar_t; +#endif +#endif +#undef __need_wchar_t +#endif /* defined(__need_wchar_t) */ + +#if defined(__need_NULL) +#undef NULL +#ifdef __cplusplus +# if !defined(__MINGW32__) && !defined(_MSC_VER) +# define NULL __null +# else +# define NULL 0 +# endif +#else +# define NULL ((void*)0) +#endif +#ifdef __cplusplus +#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED) +namespace std { typedef decltype(nullptr) nullptr_t; } +using ::std::nullptr_t; +#endif +#endif +#undef __need_NULL +#endif /* defined(__need_NULL) */ + +#if defined(__need_STDDEF_H_misc) +#if __STDC_VERSION__ >= 201112L || __cplusplus >= 201103L +#include "__stddef_max_align_t.h" +#endif +#define offsetof(t, d) __builtin_offsetof(t, d) +#undef __need_STDDEF_H_misc +#endif /* defined(__need_STDDEF_H_misc) */ + +/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use +__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */ +#if defined(__need_wint_t) +/* Always define wint_t when modules are available. */ +#if !defined(_WINT_T) || __has_feature(modules) +#if !__has_feature(modules) +#define _WINT_T +#endif +typedef __WINT_TYPE__ wint_t; +#endif +#undef __need_wint_t +#endif /* __need_wint_t */ + +#endif diff --git a/c_headers/stdint.h b/c_headers/stdint.h new file mode 100644 index 0000000000..0303db90be --- /dev/null +++ b/c_headers/stdint.h @@ -0,0 +1,707 @@ +/*===---- stdint.h - Standard header for sized integer types --------------===*\ + * + * Copyright (c) 2009 Chris Lattner + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * +\*===----------------------------------------------------------------------===*/ + +#ifndef __CLANG_STDINT_H +#define __CLANG_STDINT_H + +/* If we're hosted, fall back to the system's stdint.h, which might have + * additional definitions. + */ +#if __STDC_HOSTED__ && __has_include_next() + +// C99 7.18.3 Limits of other integer types +// +// Footnote 219, 220: C++ implementations should define these macros only when +// __STDC_LIMIT_MACROS is defined before is included. +// +// Footnote 222: C++ implementations should define these macros only when +// __STDC_CONSTANT_MACROS is defined before is included. +// +// C++11 [cstdint.syn]p2: +// +// The macros defined by are provided unconditionally. In particular, +// the symbols __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS (mentioned in +// footnotes 219, 220, and 222 in the C standard) play no role in C++. +// +// C11 removed the problematic footnotes. +// +// Work around this inconsistency by always defining those macros in C++ mode, +// so that a C library implementation which follows the C99 standard can be +// used in C++. +# ifdef __cplusplus +# if !defined(__STDC_LIMIT_MACROS) +# define __STDC_LIMIT_MACROS +# define __STDC_LIMIT_MACROS_DEFINED_BY_CLANG +# endif +# if !defined(__STDC_CONSTANT_MACROS) +# define __STDC_CONSTANT_MACROS +# define __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG +# endif +# endif + +# include_next + +# ifdef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG +# undef __STDC_LIMIT_MACROS +# undef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG +# endif +# ifdef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG +# undef __STDC_CONSTANT_MACROS +# undef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG +# endif + +#else + +/* C99 7.18.1.1 Exact-width integer types. + * C99 7.18.1.2 Minimum-width integer types. + * C99 7.18.1.3 Fastest minimum-width integer types. + * + * The standard requires that exact-width type be defined for 8-, 16-, 32-, and + * 64-bit types if they are implemented. Other exact width types are optional. + * This implementation defines an exact-width types for every integer width + * that is represented in the standard integer types. + * + * The standard also requires minimum-width types be defined for 8-, 16-, 32-, + * and 64-bit widths regardless of whether there are corresponding exact-width + * types. + * + * To accommodate targets that are missing types that are exactly 8, 16, 32, or + * 64 bits wide, this implementation takes an approach of cascading + * redefintions, redefining __int_leastN_t to successively smaller exact-width + * types. It is therefore important that the types are defined in order of + * descending widths. + * + * We currently assume that the minimum-width types and the fastest + * minimum-width types are the same. This is allowed by the standard, but is + * suboptimal. + * + * In violation of the standard, some targets do not implement a type that is + * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit). + * To accommodate these targets, a required minimum-width type is only + * defined if there exists an exact-width type of equal or greater width. + */ + +#ifdef __INT64_TYPE__ +# ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/ +typedef __INT64_TYPE__ int64_t; +# endif /* __int8_t_defined */ +typedef __UINT64_TYPE__ uint64_t; +# define __int_least64_t int64_t +# define __uint_least64_t uint64_t +# define __int_least32_t int64_t +# define __uint_least32_t uint64_t +# define __int_least16_t int64_t +# define __uint_least16_t uint64_t +# define __int_least8_t int64_t +# define __uint_least8_t uint64_t +#endif /* __INT64_TYPE__ */ + +#ifdef __int_least64_t +typedef __int_least64_t int_least64_t; +typedef __uint_least64_t uint_least64_t; +typedef __int_least64_t int_fast64_t; +typedef __uint_least64_t uint_fast64_t; +#endif /* __int_least64_t */ + +#ifdef __INT56_TYPE__ +typedef __INT56_TYPE__ int56_t; +typedef __UINT56_TYPE__ uint56_t; +typedef int56_t int_least56_t; +typedef uint56_t uint_least56_t; +typedef int56_t int_fast56_t; +typedef uint56_t uint_fast56_t; +# define __int_least32_t int56_t +# define __uint_least32_t uint56_t +# define __int_least16_t int56_t +# define __uint_least16_t uint56_t +# define __int_least8_t int56_t +# define __uint_least8_t uint56_t +#endif /* __INT56_TYPE__ */ + + +#ifdef __INT48_TYPE__ +typedef __INT48_TYPE__ int48_t; +typedef __UINT48_TYPE__ uint48_t; +typedef int48_t int_least48_t; +typedef uint48_t uint_least48_t; +typedef int48_t int_fast48_t; +typedef uint48_t uint_fast48_t; +# define __int_least32_t int48_t +# define __uint_least32_t uint48_t +# define __int_least16_t int48_t +# define __uint_least16_t uint48_t +# define __int_least8_t int48_t +# define __uint_least8_t uint48_t +#endif /* __INT48_TYPE__ */ + + +#ifdef __INT40_TYPE__ +typedef __INT40_TYPE__ int40_t; +typedef __UINT40_TYPE__ uint40_t; +typedef int40_t int_least40_t; +typedef uint40_t uint_least40_t; +typedef int40_t int_fast40_t; +typedef uint40_t uint_fast40_t; +# define __int_least32_t int40_t +# define __uint_least32_t uint40_t +# define __int_least16_t int40_t +# define __uint_least16_t uint40_t +# define __int_least8_t int40_t +# define __uint_least8_t uint40_t +#endif /* __INT40_TYPE__ */ + + +#ifdef __INT32_TYPE__ + +# ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/ +typedef __INT32_TYPE__ int32_t; +# endif /* __int8_t_defined */ + +# ifndef __uint32_t_defined /* more glibc compatibility */ +# define __uint32_t_defined +typedef __UINT32_TYPE__ uint32_t; +# endif /* __uint32_t_defined */ + +# define __int_least32_t int32_t +# define __uint_least32_t uint32_t +# define __int_least16_t int32_t +# define __uint_least16_t uint32_t +# define __int_least8_t int32_t +# define __uint_least8_t uint32_t +#endif /* __INT32_TYPE__ */ + +#ifdef __int_least32_t +typedef __int_least32_t int_least32_t; +typedef __uint_least32_t uint_least32_t; +typedef __int_least32_t int_fast32_t; +typedef __uint_least32_t uint_fast32_t; +#endif /* __int_least32_t */ + +#ifdef __INT24_TYPE__ +typedef __INT24_TYPE__ int24_t; +typedef __UINT24_TYPE__ uint24_t; +typedef int24_t int_least24_t; +typedef uint24_t uint_least24_t; +typedef int24_t int_fast24_t; +typedef uint24_t uint_fast24_t; +# define __int_least16_t int24_t +# define __uint_least16_t uint24_t +# define __int_least8_t int24_t +# define __uint_least8_t uint24_t +#endif /* __INT24_TYPE__ */ + +#ifdef __INT16_TYPE__ +#ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/ +typedef __INT16_TYPE__ int16_t; +#endif /* __int8_t_defined */ +typedef __UINT16_TYPE__ uint16_t; +# define __int_least16_t int16_t +# define __uint_least16_t uint16_t +# define __int_least8_t int16_t +# define __uint_least8_t uint16_t +#endif /* __INT16_TYPE__ */ + +#ifdef __int_least16_t +typedef __int_least16_t int_least16_t; +typedef __uint_least16_t uint_least16_t; +typedef __int_least16_t int_fast16_t; +typedef __uint_least16_t uint_fast16_t; +#endif /* __int_least16_t */ + + +#ifdef __INT8_TYPE__ +#ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/ +typedef __INT8_TYPE__ int8_t; +#endif /* __int8_t_defined */ +typedef __UINT8_TYPE__ uint8_t; +# define __int_least8_t int8_t +# define __uint_least8_t uint8_t +#endif /* __INT8_TYPE__ */ + +#ifdef __int_least8_t +typedef __int_least8_t int_least8_t; +typedef __uint_least8_t uint_least8_t; +typedef __int_least8_t int_fast8_t; +typedef __uint_least8_t uint_fast8_t; +#endif /* __int_least8_t */ + +/* prevent glibc sys/types.h from defining conflicting types */ +#ifndef __int8_t_defined +# define __int8_t_defined +#endif /* __int8_t_defined */ + +/* C99 7.18.1.4 Integer types capable of holding object pointers. + */ +#define __stdint_join3(a,b,c) a ## b ## c + +#define __intn_t(n) __stdint_join3( int, n, _t) +#define __uintn_t(n) __stdint_join3(uint, n, _t) + +#ifndef _INTPTR_T +#ifndef __intptr_t_defined +typedef __intn_t(__INTPTR_WIDTH__) intptr_t; +#define __intptr_t_defined +#define _INTPTR_T +#endif +#endif + +#ifndef _UINTPTR_T +typedef __uintn_t(__INTPTR_WIDTH__) uintptr_t; +#define _UINTPTR_T +#endif + +/* C99 7.18.1.5 Greatest-width integer types. + */ +typedef __INTMAX_TYPE__ intmax_t; +typedef __UINTMAX_TYPE__ uintmax_t; + +/* C99 7.18.4 Macros for minimum-width integer constants. + * + * The standard requires that integer constant macros be defined for all the + * minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width + * types are required, the corresponding integer constant macros are defined + * here. This implementation also defines minimum-width types for every other + * integer width that the target implements, so corresponding macros are + * defined below, too. + * + * These macros are defined using the same successive-shrinking approach as + * the type definitions above. It is likewise important that macros are defined + * in order of decending width. + * + * Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the + * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]). + */ + +#define __int_c_join(a, b) a ## b +#define __int_c(v, suffix) __int_c_join(v, suffix) +#define __uint_c(v, suffix) __int_c_join(v##U, suffix) + + +#ifdef __INT64_TYPE__ +# ifdef __INT64_C_SUFFIX__ +# define __int64_c_suffix __INT64_C_SUFFIX__ +# define __int32_c_suffix __INT64_C_SUFFIX__ +# define __int16_c_suffix __INT64_C_SUFFIX__ +# define __int8_c_suffix __INT64_C_SUFFIX__ +# else +# undef __int64_c_suffix +# undef __int32_c_suffix +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT64_C_SUFFIX__ */ +#endif /* __INT64_TYPE__ */ + +#ifdef __int_least64_t +# ifdef __int64_c_suffix +# define INT64_C(v) __int_c(v, __int64_c_suffix) +# define UINT64_C(v) __uint_c(v, __int64_c_suffix) +# else +# define INT64_C(v) v +# define UINT64_C(v) v ## U +# endif /* __int64_c_suffix */ +#endif /* __int_least64_t */ + + +#ifdef __INT56_TYPE__ +# ifdef __INT56_C_SUFFIX__ +# define INT56_C(v) __int_c(v, __INT56_C_SUFFIX__) +# define UINT56_C(v) __uint_c(v, __INT56_C_SUFFIX__) +# define __int32_c_suffix __INT56_C_SUFFIX__ +# define __int16_c_suffix __INT56_C_SUFFIX__ +# define __int8_c_suffix __INT56_C_SUFFIX__ +# else +# define INT56_C(v) v +# define UINT56_C(v) v ## U +# undef __int32_c_suffix +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT56_C_SUFFIX__ */ +#endif /* __INT56_TYPE__ */ + + +#ifdef __INT48_TYPE__ +# ifdef __INT48_C_SUFFIX__ +# define INT48_C(v) __int_c(v, __INT48_C_SUFFIX__) +# define UINT48_C(v) __uint_c(v, __INT48_C_SUFFIX__) +# define __int32_c_suffix __INT48_C_SUFFIX__ +# define __int16_c_suffix __INT48_C_SUFFIX__ +# define __int8_c_suffix __INT48_C_SUFFIX__ +# else +# define INT48_C(v) v +# define UINT48_C(v) v ## U +# undef __int32_c_suffix +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT48_C_SUFFIX__ */ +#endif /* __INT48_TYPE__ */ + + +#ifdef __INT40_TYPE__ +# ifdef __INT40_C_SUFFIX__ +# define INT40_C(v) __int_c(v, __INT40_C_SUFFIX__) +# define UINT40_C(v) __uint_c(v, __INT40_C_SUFFIX__) +# define __int32_c_suffix __INT40_C_SUFFIX__ +# define __int16_c_suffix __INT40_C_SUFFIX__ +# define __int8_c_suffix __INT40_C_SUFFIX__ +# else +# define INT40_C(v) v +# define UINT40_C(v) v ## U +# undef __int32_c_suffix +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT40_C_SUFFIX__ */ +#endif /* __INT40_TYPE__ */ + + +#ifdef __INT32_TYPE__ +# ifdef __INT32_C_SUFFIX__ +# define __int32_c_suffix __INT32_C_SUFFIX__ +# define __int16_c_suffix __INT32_C_SUFFIX__ +# define __int8_c_suffix __INT32_C_SUFFIX__ +#else +# undef __int32_c_suffix +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT32_C_SUFFIX__ */ +#endif /* __INT32_TYPE__ */ + +#ifdef __int_least32_t +# ifdef __int32_c_suffix +# define INT32_C(v) __int_c(v, __int32_c_suffix) +# define UINT32_C(v) __uint_c(v, __int32_c_suffix) +# else +# define INT32_C(v) v +# define UINT32_C(v) v ## U +# endif /* __int32_c_suffix */ +#endif /* __int_least32_t */ + + +#ifdef __INT24_TYPE__ +# ifdef __INT24_C_SUFFIX__ +# define INT24_C(v) __int_c(v, __INT24_C_SUFFIX__) +# define UINT24_C(v) __uint_c(v, __INT24_C_SUFFIX__) +# define __int16_c_suffix __INT24_C_SUFFIX__ +# define __int8_c_suffix __INT24_C_SUFFIX__ +# else +# define INT24_C(v) v +# define UINT24_C(v) v ## U +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT24_C_SUFFIX__ */ +#endif /* __INT24_TYPE__ */ + + +#ifdef __INT16_TYPE__ +# ifdef __INT16_C_SUFFIX__ +# define __int16_c_suffix __INT16_C_SUFFIX__ +# define __int8_c_suffix __INT16_C_SUFFIX__ +#else +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT16_C_SUFFIX__ */ +#endif /* __INT16_TYPE__ */ + +#ifdef __int_least16_t +# ifdef __int16_c_suffix +# define INT16_C(v) __int_c(v, __int16_c_suffix) +# define UINT16_C(v) __uint_c(v, __int16_c_suffix) +# else +# define INT16_C(v) v +# define UINT16_C(v) v ## U +# endif /* __int16_c_suffix */ +#endif /* __int_least16_t */ + + +#ifdef __INT8_TYPE__ +# ifdef __INT8_C_SUFFIX__ +# define __int8_c_suffix __INT8_C_SUFFIX__ +#else +# undef __int8_c_suffix +# endif /* __INT8_C_SUFFIX__ */ +#endif /* __INT8_TYPE__ */ + +#ifdef __int_least8_t +# ifdef __int8_c_suffix +# define INT8_C(v) __int_c(v, __int8_c_suffix) +# define UINT8_C(v) __uint_c(v, __int8_c_suffix) +# else +# define INT8_C(v) v +# define UINT8_C(v) v ## U +# endif /* __int8_c_suffix */ +#endif /* __int_least8_t */ + + +/* C99 7.18.2.1 Limits of exact-width integer types. + * C99 7.18.2.2 Limits of minimum-width integer types. + * C99 7.18.2.3 Limits of fastest minimum-width integer types. + * + * The presence of limit macros are completely optional in C99. This + * implementation defines limits for all of the types (exact- and + * minimum-width) that it defines above, using the limits of the minimum-width + * type for any types that do not have exact-width representations. + * + * As in the type definitions, this section takes an approach of + * successive-shrinking to determine which limits to use for the standard (8, + * 16, 32, 64) bit widths when they don't have exact representations. It is + * therefore important that the defintions be kept in order of decending + * widths. + * + * Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the + * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]). + */ + +#ifdef __INT64_TYPE__ +# define INT64_MAX INT64_C( 9223372036854775807) +# define INT64_MIN (-INT64_C( 9223372036854775807)-1) +# define UINT64_MAX UINT64_C(18446744073709551615) +# define __INT_LEAST64_MIN INT64_MIN +# define __INT_LEAST64_MAX INT64_MAX +# define __UINT_LEAST64_MAX UINT64_MAX +# define __INT_LEAST32_MIN INT64_MIN +# define __INT_LEAST32_MAX INT64_MAX +# define __UINT_LEAST32_MAX UINT64_MAX +# define __INT_LEAST16_MIN INT64_MIN +# define __INT_LEAST16_MAX INT64_MAX +# define __UINT_LEAST16_MAX UINT64_MAX +# define __INT_LEAST8_MIN INT64_MIN +# define __INT_LEAST8_MAX INT64_MAX +# define __UINT_LEAST8_MAX UINT64_MAX +#endif /* __INT64_TYPE__ */ + +#ifdef __INT_LEAST64_MIN +# define INT_LEAST64_MIN __INT_LEAST64_MIN +# define INT_LEAST64_MAX __INT_LEAST64_MAX +# define UINT_LEAST64_MAX __UINT_LEAST64_MAX +# define INT_FAST64_MIN __INT_LEAST64_MIN +# define INT_FAST64_MAX __INT_LEAST64_MAX +# define UINT_FAST64_MAX __UINT_LEAST64_MAX +#endif /* __INT_LEAST64_MIN */ + + +#ifdef __INT56_TYPE__ +# define INT56_MAX INT56_C(36028797018963967) +# define INT56_MIN (-INT56_C(36028797018963967)-1) +# define UINT56_MAX UINT56_C(72057594037927935) +# define INT_LEAST56_MIN INT56_MIN +# define INT_LEAST56_MAX INT56_MAX +# define UINT_LEAST56_MAX UINT56_MAX +# define INT_FAST56_MIN INT56_MIN +# define INT_FAST56_MAX INT56_MAX +# define UINT_FAST56_MAX UINT56_MAX +# define __INT_LEAST32_MIN INT56_MIN +# define __INT_LEAST32_MAX INT56_MAX +# define __UINT_LEAST32_MAX UINT56_MAX +# define __INT_LEAST16_MIN INT56_MIN +# define __INT_LEAST16_MAX INT56_MAX +# define __UINT_LEAST16_MAX UINT56_MAX +# define __INT_LEAST8_MIN INT56_MIN +# define __INT_LEAST8_MAX INT56_MAX +# define __UINT_LEAST8_MAX UINT56_MAX +#endif /* __INT56_TYPE__ */ + + +#ifdef __INT48_TYPE__ +# define INT48_MAX INT48_C(140737488355327) +# define INT48_MIN (-INT48_C(140737488355327)-1) +# define UINT48_MAX UINT48_C(281474976710655) +# define INT_LEAST48_MIN INT48_MIN +# define INT_LEAST48_MAX INT48_MAX +# define UINT_LEAST48_MAX UINT48_MAX +# define INT_FAST48_MIN INT48_MIN +# define INT_FAST48_MAX INT48_MAX +# define UINT_FAST48_MAX UINT48_MAX +# define __INT_LEAST32_MIN INT48_MIN +# define __INT_LEAST32_MAX INT48_MAX +# define __UINT_LEAST32_MAX UINT48_MAX +# define __INT_LEAST16_MIN INT48_MIN +# define __INT_LEAST16_MAX INT48_MAX +# define __UINT_LEAST16_MAX UINT48_MAX +# define __INT_LEAST8_MIN INT48_MIN +# define __INT_LEAST8_MAX INT48_MAX +# define __UINT_LEAST8_MAX UINT48_MAX +#endif /* __INT48_TYPE__ */ + + +#ifdef __INT40_TYPE__ +# define INT40_MAX INT40_C(549755813887) +# define INT40_MIN (-INT40_C(549755813887)-1) +# define UINT40_MAX UINT40_C(1099511627775) +# define INT_LEAST40_MIN INT40_MIN +# define INT_LEAST40_MAX INT40_MAX +# define UINT_LEAST40_MAX UINT40_MAX +# define INT_FAST40_MIN INT40_MIN +# define INT_FAST40_MAX INT40_MAX +# define UINT_FAST40_MAX UINT40_MAX +# define __INT_LEAST32_MIN INT40_MIN +# define __INT_LEAST32_MAX INT40_MAX +# define __UINT_LEAST32_MAX UINT40_MAX +# define __INT_LEAST16_MIN INT40_MIN +# define __INT_LEAST16_MAX INT40_MAX +# define __UINT_LEAST16_MAX UINT40_MAX +# define __INT_LEAST8_MIN INT40_MIN +# define __INT_LEAST8_MAX INT40_MAX +# define __UINT_LEAST8_MAX UINT40_MAX +#endif /* __INT40_TYPE__ */ + + +#ifdef __INT32_TYPE__ +# define INT32_MAX INT32_C(2147483647) +# define INT32_MIN (-INT32_C(2147483647)-1) +# define UINT32_MAX UINT32_C(4294967295) +# define __INT_LEAST32_MIN INT32_MIN +# define __INT_LEAST32_MAX INT32_MAX +# define __UINT_LEAST32_MAX UINT32_MAX +# define __INT_LEAST16_MIN INT32_MIN +# define __INT_LEAST16_MAX INT32_MAX +# define __UINT_LEAST16_MAX UINT32_MAX +# define __INT_LEAST8_MIN INT32_MIN +# define __INT_LEAST8_MAX INT32_MAX +# define __UINT_LEAST8_MAX UINT32_MAX +#endif /* __INT32_TYPE__ */ + +#ifdef __INT_LEAST32_MIN +# define INT_LEAST32_MIN __INT_LEAST32_MIN +# define INT_LEAST32_MAX __INT_LEAST32_MAX +# define UINT_LEAST32_MAX __UINT_LEAST32_MAX +# define INT_FAST32_MIN __INT_LEAST32_MIN +# define INT_FAST32_MAX __INT_LEAST32_MAX +# define UINT_FAST32_MAX __UINT_LEAST32_MAX +#endif /* __INT_LEAST32_MIN */ + + +#ifdef __INT24_TYPE__ +# define INT24_MAX INT24_C(8388607) +# define INT24_MIN (-INT24_C(8388607)-1) +# define UINT24_MAX UINT24_C(16777215) +# define INT_LEAST24_MIN INT24_MIN +# define INT_LEAST24_MAX INT24_MAX +# define UINT_LEAST24_MAX UINT24_MAX +# define INT_FAST24_MIN INT24_MIN +# define INT_FAST24_MAX INT24_MAX +# define UINT_FAST24_MAX UINT24_MAX +# define __INT_LEAST16_MIN INT24_MIN +# define __INT_LEAST16_MAX INT24_MAX +# define __UINT_LEAST16_MAX UINT24_MAX +# define __INT_LEAST8_MIN INT24_MIN +# define __INT_LEAST8_MAX INT24_MAX +# define __UINT_LEAST8_MAX UINT24_MAX +#endif /* __INT24_TYPE__ */ + + +#ifdef __INT16_TYPE__ +#define INT16_MAX INT16_C(32767) +#define INT16_MIN (-INT16_C(32767)-1) +#define UINT16_MAX UINT16_C(65535) +# define __INT_LEAST16_MIN INT16_MIN +# define __INT_LEAST16_MAX INT16_MAX +# define __UINT_LEAST16_MAX UINT16_MAX +# define __INT_LEAST8_MIN INT16_MIN +# define __INT_LEAST8_MAX INT16_MAX +# define __UINT_LEAST8_MAX UINT16_MAX +#endif /* __INT16_TYPE__ */ + +#ifdef __INT_LEAST16_MIN +# define INT_LEAST16_MIN __INT_LEAST16_MIN +# define INT_LEAST16_MAX __INT_LEAST16_MAX +# define UINT_LEAST16_MAX __UINT_LEAST16_MAX +# define INT_FAST16_MIN __INT_LEAST16_MIN +# define INT_FAST16_MAX __INT_LEAST16_MAX +# define UINT_FAST16_MAX __UINT_LEAST16_MAX +#endif /* __INT_LEAST16_MIN */ + + +#ifdef __INT8_TYPE__ +# define INT8_MAX INT8_C(127) +# define INT8_MIN (-INT8_C(127)-1) +# define UINT8_MAX UINT8_C(255) +# define __INT_LEAST8_MIN INT8_MIN +# define __INT_LEAST8_MAX INT8_MAX +# define __UINT_LEAST8_MAX UINT8_MAX +#endif /* __INT8_TYPE__ */ + +#ifdef __INT_LEAST8_MIN +# define INT_LEAST8_MIN __INT_LEAST8_MIN +# define INT_LEAST8_MAX __INT_LEAST8_MAX +# define UINT_LEAST8_MAX __UINT_LEAST8_MAX +# define INT_FAST8_MIN __INT_LEAST8_MIN +# define INT_FAST8_MAX __INT_LEAST8_MAX +# define UINT_FAST8_MAX __UINT_LEAST8_MAX +#endif /* __INT_LEAST8_MIN */ + +/* Some utility macros */ +#define __INTN_MIN(n) __stdint_join3( INT, n, _MIN) +#define __INTN_MAX(n) __stdint_join3( INT, n, _MAX) +#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX) +#define __INTN_C(n, v) __stdint_join3( INT, n, _C(v)) +#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v)) + +/* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */ +/* C99 7.18.3 Limits of other integer types. */ + +#define INTPTR_MIN __INTN_MIN(__INTPTR_WIDTH__) +#define INTPTR_MAX __INTN_MAX(__INTPTR_WIDTH__) +#define UINTPTR_MAX __UINTN_MAX(__INTPTR_WIDTH__) +#define PTRDIFF_MIN __INTN_MIN(__PTRDIFF_WIDTH__) +#define PTRDIFF_MAX __INTN_MAX(__PTRDIFF_WIDTH__) +#define SIZE_MAX __UINTN_MAX(__SIZE_WIDTH__) + +/* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__ + * is enabled. */ +#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 +#define RSIZE_MAX (SIZE_MAX >> 1) +#endif + +/* C99 7.18.2.5 Limits of greatest-width integer types. */ +#define INTMAX_MIN __INTN_MIN(__INTMAX_WIDTH__) +#define INTMAX_MAX __INTN_MAX(__INTMAX_WIDTH__) +#define UINTMAX_MAX __UINTN_MAX(__INTMAX_WIDTH__) + +/* C99 7.18.3 Limits of other integer types. */ +#define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__) +#define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__) +#ifdef __WINT_UNSIGNED__ +# define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0) +# define WINT_MAX __UINTN_MAX(__WINT_WIDTH__) +#else +# define WINT_MIN __INTN_MIN(__WINT_WIDTH__) +# define WINT_MAX __INTN_MAX(__WINT_WIDTH__) +#endif + +#ifndef WCHAR_MAX +# define WCHAR_MAX __WCHAR_MAX__ +#endif +#ifndef WCHAR_MIN +# if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__) +# define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__) +# else +# define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0) +# endif +#endif + +/* 7.18.4.2 Macros for greatest-width integer constants. */ +#define INTMAX_C(v) __INTN_C(__INTMAX_WIDTH__, v) +#define UINTMAX_C(v) __UINTN_C(__INTMAX_WIDTH__, v) + +#endif /* __STDC_HOSTED__ */ +#endif /* __CLANG_STDINT_H */ diff --git a/c_headers/stdnoreturn.h b/c_headers/stdnoreturn.h new file mode 100644 index 0000000000..a7a301d7e0 --- /dev/null +++ b/c_headers/stdnoreturn.h @@ -0,0 +1,30 @@ +/*===---- stdnoreturn.h - Standard header for noreturn macro ---------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDNORETURN_H +#define __STDNORETURN_H + +#define noreturn _Noreturn +#define __noreturn_is_defined 1 + +#endif /* __STDNORETURN_H */ diff --git a/c_headers/tbmintrin.h b/c_headers/tbmintrin.h new file mode 100644 index 0000000000..48c0b07f42 --- /dev/null +++ b/c_headers/tbmintrin.h @@ -0,0 +1,154 @@ +/*===---- tbmintrin.h - TBM intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __TBM__ +#error "TBM instruction set is not enabled" +#endif + +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __TBMINTRIN_H +#define __TBMINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +#define __bextri_u32(a, b) (__builtin_ia32_bextri_u32((a), (b))) + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__blcfill_u32(unsigned int a) +{ + return a & (a + 1); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__blci_u32(unsigned int a) +{ + return a | ~(a + 1); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__blcic_u32(unsigned int a) +{ + return ~a & (a + 1); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__blcmsk_u32(unsigned int a) +{ + return a ^ (a + 1); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__blcs_u32(unsigned int a) +{ + return a | (a + 1); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__blsfill_u32(unsigned int a) +{ + return a | (a - 1); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__blsic_u32(unsigned int a) +{ + return ~a | (a - 1); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__t1mskc_u32(unsigned int a) +{ + return ~a | (a + 1); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__tzmsk_u32(unsigned int a) +{ + return ~a & (a - 1); +} + +#ifdef __x86_64__ +#define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b))) + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__blcfill_u64(unsigned long long a) +{ + return a & (a + 1); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__blci_u64(unsigned long long a) +{ + return a | ~(a + 1); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__blcic_u64(unsigned long long a) +{ + return ~a & (a + 1); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__blcmsk_u64(unsigned long long a) +{ + return a ^ (a + 1); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__blcs_u64(unsigned long long a) +{ + return a | (a + 1); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__blsfill_u64(unsigned long long a) +{ + return a | (a - 1); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__blsic_u64(unsigned long long a) +{ + return ~a | (a - 1); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__t1mskc_u64(unsigned long long a) +{ + return ~a | (a + 1); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__tzmsk_u64(unsigned long long a) +{ + return ~a & (a - 1); +} +#endif + +#undef __DEFAULT_FN_ATTRS + +#endif /* __TBMINTRIN_H */ diff --git a/c_headers/tgmath.h b/c_headers/tgmath.h new file mode 100644 index 0000000000..a48e267e60 --- /dev/null +++ b/c_headers/tgmath.h @@ -0,0 +1,1374 @@ +/*===---- tgmath.h - Standard header for type generic math ----------------===*\ + * + * Copyright (c) 2009 Howard Hinnant + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * +\*===----------------------------------------------------------------------===*/ + +#ifndef __TGMATH_H +#define __TGMATH_H + +/* C99 7.22 Type-generic math . */ +#include + +/* C++ handles type genericity with overloading in math.h. */ +#ifndef __cplusplus +#include + +#define _TG_ATTRSp __attribute__((__overloadable__)) +#define _TG_ATTRS __attribute__((__overloadable__, __always_inline__)) + +// promotion + +typedef void _Argument_type_is_not_arithmetic; +static _Argument_type_is_not_arithmetic __tg_promote(...) + __attribute__((__unavailable__,__overloadable__)); +static double _TG_ATTRSp __tg_promote(int); +static double _TG_ATTRSp __tg_promote(unsigned int); +static double _TG_ATTRSp __tg_promote(long); +static double _TG_ATTRSp __tg_promote(unsigned long); +static double _TG_ATTRSp __tg_promote(long long); +static double _TG_ATTRSp __tg_promote(unsigned long long); +static float _TG_ATTRSp __tg_promote(float); +static double _TG_ATTRSp __tg_promote(double); +static long double _TG_ATTRSp __tg_promote(long double); +static float _Complex _TG_ATTRSp __tg_promote(float _Complex); +static double _Complex _TG_ATTRSp __tg_promote(double _Complex); +static long double _Complex _TG_ATTRSp __tg_promote(long double _Complex); + +#define __tg_promote1(__x) (__typeof__(__tg_promote(__x))) +#define __tg_promote2(__x, __y) (__typeof__(__tg_promote(__x) + \ + __tg_promote(__y))) +#define __tg_promote3(__x, __y, __z) (__typeof__(__tg_promote(__x) + \ + __tg_promote(__y) + \ + __tg_promote(__z))) + +// acos + +static float + _TG_ATTRS + __tg_acos(float __x) {return acosf(__x);} + +static double + _TG_ATTRS + __tg_acos(double __x) {return acos(__x);} + +static long double + _TG_ATTRS + __tg_acos(long double __x) {return acosl(__x);} + +static float _Complex + _TG_ATTRS + __tg_acos(float _Complex __x) {return cacosf(__x);} + +static double _Complex + _TG_ATTRS + __tg_acos(double _Complex __x) {return cacos(__x);} + +static long double _Complex + _TG_ATTRS + __tg_acos(long double _Complex __x) {return cacosl(__x);} + +#undef acos +#define acos(__x) __tg_acos(__tg_promote1((__x))(__x)) + +// asin + +static float + _TG_ATTRS + __tg_asin(float __x) {return asinf(__x);} + +static double + _TG_ATTRS + __tg_asin(double __x) {return asin(__x);} + +static long double + _TG_ATTRS + __tg_asin(long double __x) {return asinl(__x);} + +static float _Complex + _TG_ATTRS + __tg_asin(float _Complex __x) {return casinf(__x);} + +static double _Complex + _TG_ATTRS + __tg_asin(double _Complex __x) {return casin(__x);} + +static long double _Complex + _TG_ATTRS + __tg_asin(long double _Complex __x) {return casinl(__x);} + +#undef asin +#define asin(__x) __tg_asin(__tg_promote1((__x))(__x)) + +// atan + +static float + _TG_ATTRS + __tg_atan(float __x) {return atanf(__x);} + +static double + _TG_ATTRS + __tg_atan(double __x) {return atan(__x);} + +static long double + _TG_ATTRS + __tg_atan(long double __x) {return atanl(__x);} + +static float _Complex + _TG_ATTRS + __tg_atan(float _Complex __x) {return catanf(__x);} + +static double _Complex + _TG_ATTRS + __tg_atan(double _Complex __x) {return catan(__x);} + +static long double _Complex + _TG_ATTRS + __tg_atan(long double _Complex __x) {return catanl(__x);} + +#undef atan +#define atan(__x) __tg_atan(__tg_promote1((__x))(__x)) + +// acosh + +static float + _TG_ATTRS + __tg_acosh(float __x) {return acoshf(__x);} + +static double + _TG_ATTRS + __tg_acosh(double __x) {return acosh(__x);} + +static long double + _TG_ATTRS + __tg_acosh(long double __x) {return acoshl(__x);} + +static float _Complex + _TG_ATTRS + __tg_acosh(float _Complex __x) {return cacoshf(__x);} + +static double _Complex + _TG_ATTRS + __tg_acosh(double _Complex __x) {return cacosh(__x);} + +static long double _Complex + _TG_ATTRS + __tg_acosh(long double _Complex __x) {return cacoshl(__x);} + +#undef acosh +#define acosh(__x) __tg_acosh(__tg_promote1((__x))(__x)) + +// asinh + +static float + _TG_ATTRS + __tg_asinh(float __x) {return asinhf(__x);} + +static double + _TG_ATTRS + __tg_asinh(double __x) {return asinh(__x);} + +static long double + _TG_ATTRS + __tg_asinh(long double __x) {return asinhl(__x);} + +static float _Complex + _TG_ATTRS + __tg_asinh(float _Complex __x) {return casinhf(__x);} + +static double _Complex + _TG_ATTRS + __tg_asinh(double _Complex __x) {return casinh(__x);} + +static long double _Complex + _TG_ATTRS + __tg_asinh(long double _Complex __x) {return casinhl(__x);} + +#undef asinh +#define asinh(__x) __tg_asinh(__tg_promote1((__x))(__x)) + +// atanh + +static float + _TG_ATTRS + __tg_atanh(float __x) {return atanhf(__x);} + +static double + _TG_ATTRS + __tg_atanh(double __x) {return atanh(__x);} + +static long double + _TG_ATTRS + __tg_atanh(long double __x) {return atanhl(__x);} + +static float _Complex + _TG_ATTRS + __tg_atanh(float _Complex __x) {return catanhf(__x);} + +static double _Complex + _TG_ATTRS + __tg_atanh(double _Complex __x) {return catanh(__x);} + +static long double _Complex + _TG_ATTRS + __tg_atanh(long double _Complex __x) {return catanhl(__x);} + +#undef atanh +#define atanh(__x) __tg_atanh(__tg_promote1((__x))(__x)) + +// cos + +static float + _TG_ATTRS + __tg_cos(float __x) {return cosf(__x);} + +static double + _TG_ATTRS + __tg_cos(double __x) {return cos(__x);} + +static long double + _TG_ATTRS + __tg_cos(long double __x) {return cosl(__x);} + +static float _Complex + _TG_ATTRS + __tg_cos(float _Complex __x) {return ccosf(__x);} + +static double _Complex + _TG_ATTRS + __tg_cos(double _Complex __x) {return ccos(__x);} + +static long double _Complex + _TG_ATTRS + __tg_cos(long double _Complex __x) {return ccosl(__x);} + +#undef cos +#define cos(__x) __tg_cos(__tg_promote1((__x))(__x)) + +// sin + +static float + _TG_ATTRS + __tg_sin(float __x) {return sinf(__x);} + +static double + _TG_ATTRS + __tg_sin(double __x) {return sin(__x);} + +static long double + _TG_ATTRS + __tg_sin(long double __x) {return sinl(__x);} + +static float _Complex + _TG_ATTRS + __tg_sin(float _Complex __x) {return csinf(__x);} + +static double _Complex + _TG_ATTRS + __tg_sin(double _Complex __x) {return csin(__x);} + +static long double _Complex + _TG_ATTRS + __tg_sin(long double _Complex __x) {return csinl(__x);} + +#undef sin +#define sin(__x) __tg_sin(__tg_promote1((__x))(__x)) + +// tan + +static float + _TG_ATTRS + __tg_tan(float __x) {return tanf(__x);} + +static double + _TG_ATTRS + __tg_tan(double __x) {return tan(__x);} + +static long double + _TG_ATTRS + __tg_tan(long double __x) {return tanl(__x);} + +static float _Complex + _TG_ATTRS + __tg_tan(float _Complex __x) {return ctanf(__x);} + +static double _Complex + _TG_ATTRS + __tg_tan(double _Complex __x) {return ctan(__x);} + +static long double _Complex + _TG_ATTRS + __tg_tan(long double _Complex __x) {return ctanl(__x);} + +#undef tan +#define tan(__x) __tg_tan(__tg_promote1((__x))(__x)) + +// cosh + +static float + _TG_ATTRS + __tg_cosh(float __x) {return coshf(__x);} + +static double + _TG_ATTRS + __tg_cosh(double __x) {return cosh(__x);} + +static long double + _TG_ATTRS + __tg_cosh(long double __x) {return coshl(__x);} + +static float _Complex + _TG_ATTRS + __tg_cosh(float _Complex __x) {return ccoshf(__x);} + +static double _Complex + _TG_ATTRS + __tg_cosh(double _Complex __x) {return ccosh(__x);} + +static long double _Complex + _TG_ATTRS + __tg_cosh(long double _Complex __x) {return ccoshl(__x);} + +#undef cosh +#define cosh(__x) __tg_cosh(__tg_promote1((__x))(__x)) + +// sinh + +static float + _TG_ATTRS + __tg_sinh(float __x) {return sinhf(__x);} + +static double + _TG_ATTRS + __tg_sinh(double __x) {return sinh(__x);} + +static long double + _TG_ATTRS + __tg_sinh(long double __x) {return sinhl(__x);} + +static float _Complex + _TG_ATTRS + __tg_sinh(float _Complex __x) {return csinhf(__x);} + +static double _Complex + _TG_ATTRS + __tg_sinh(double _Complex __x) {return csinh(__x);} + +static long double _Complex + _TG_ATTRS + __tg_sinh(long double _Complex __x) {return csinhl(__x);} + +#undef sinh +#define sinh(__x) __tg_sinh(__tg_promote1((__x))(__x)) + +// tanh + +static float + _TG_ATTRS + __tg_tanh(float __x) {return tanhf(__x);} + +static double + _TG_ATTRS + __tg_tanh(double __x) {return tanh(__x);} + +static long double + _TG_ATTRS + __tg_tanh(long double __x) {return tanhl(__x);} + +static float _Complex + _TG_ATTRS + __tg_tanh(float _Complex __x) {return ctanhf(__x);} + +static double _Complex + _TG_ATTRS + __tg_tanh(double _Complex __x) {return ctanh(__x);} + +static long double _Complex + _TG_ATTRS + __tg_tanh(long double _Complex __x) {return ctanhl(__x);} + +#undef tanh +#define tanh(__x) __tg_tanh(__tg_promote1((__x))(__x)) + +// exp + +static float + _TG_ATTRS + __tg_exp(float __x) {return expf(__x);} + +static double + _TG_ATTRS + __tg_exp(double __x) {return exp(__x);} + +static long double + _TG_ATTRS + __tg_exp(long double __x) {return expl(__x);} + +static float _Complex + _TG_ATTRS + __tg_exp(float _Complex __x) {return cexpf(__x);} + +static double _Complex + _TG_ATTRS + __tg_exp(double _Complex __x) {return cexp(__x);} + +static long double _Complex + _TG_ATTRS + __tg_exp(long double _Complex __x) {return cexpl(__x);} + +#undef exp +#define exp(__x) __tg_exp(__tg_promote1((__x))(__x)) + +// log + +static float + _TG_ATTRS + __tg_log(float __x) {return logf(__x);} + +static double + _TG_ATTRS + __tg_log(double __x) {return log(__x);} + +static long double + _TG_ATTRS + __tg_log(long double __x) {return logl(__x);} + +static float _Complex + _TG_ATTRS + __tg_log(float _Complex __x) {return clogf(__x);} + +static double _Complex + _TG_ATTRS + __tg_log(double _Complex __x) {return clog(__x);} + +static long double _Complex + _TG_ATTRS + __tg_log(long double _Complex __x) {return clogl(__x);} + +#undef log +#define log(__x) __tg_log(__tg_promote1((__x))(__x)) + +// pow + +static float + _TG_ATTRS + __tg_pow(float __x, float __y) {return powf(__x, __y);} + +static double + _TG_ATTRS + __tg_pow(double __x, double __y) {return pow(__x, __y);} + +static long double + _TG_ATTRS + __tg_pow(long double __x, long double __y) {return powl(__x, __y);} + +static float _Complex + _TG_ATTRS + __tg_pow(float _Complex __x, float _Complex __y) {return cpowf(__x, __y);} + +static double _Complex + _TG_ATTRS + __tg_pow(double _Complex __x, double _Complex __y) {return cpow(__x, __y);} + +static long double _Complex + _TG_ATTRS + __tg_pow(long double _Complex __x, long double _Complex __y) + {return cpowl(__x, __y);} + +#undef pow +#define pow(__x, __y) __tg_pow(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// sqrt + +static float + _TG_ATTRS + __tg_sqrt(float __x) {return sqrtf(__x);} + +static double + _TG_ATTRS + __tg_sqrt(double __x) {return sqrt(__x);} + +static long double + _TG_ATTRS + __tg_sqrt(long double __x) {return sqrtl(__x);} + +static float _Complex + _TG_ATTRS + __tg_sqrt(float _Complex __x) {return csqrtf(__x);} + +static double _Complex + _TG_ATTRS + __tg_sqrt(double _Complex __x) {return csqrt(__x);} + +static long double _Complex + _TG_ATTRS + __tg_sqrt(long double _Complex __x) {return csqrtl(__x);} + +#undef sqrt +#define sqrt(__x) __tg_sqrt(__tg_promote1((__x))(__x)) + +// fabs + +static float + _TG_ATTRS + __tg_fabs(float __x) {return fabsf(__x);} + +static double + _TG_ATTRS + __tg_fabs(double __x) {return fabs(__x);} + +static long double + _TG_ATTRS + __tg_fabs(long double __x) {return fabsl(__x);} + +static float + _TG_ATTRS + __tg_fabs(float _Complex __x) {return cabsf(__x);} + +static double + _TG_ATTRS + __tg_fabs(double _Complex __x) {return cabs(__x);} + +static long double + _TG_ATTRS + __tg_fabs(long double _Complex __x) {return cabsl(__x);} + +#undef fabs +#define fabs(__x) __tg_fabs(__tg_promote1((__x))(__x)) + +// atan2 + +static float + _TG_ATTRS + __tg_atan2(float __x, float __y) {return atan2f(__x, __y);} + +static double + _TG_ATTRS + __tg_atan2(double __x, double __y) {return atan2(__x, __y);} + +static long double + _TG_ATTRS + __tg_atan2(long double __x, long double __y) {return atan2l(__x, __y);} + +#undef atan2 +#define atan2(__x, __y) __tg_atan2(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// cbrt + +static float + _TG_ATTRS + __tg_cbrt(float __x) {return cbrtf(__x);} + +static double + _TG_ATTRS + __tg_cbrt(double __x) {return cbrt(__x);} + +static long double + _TG_ATTRS + __tg_cbrt(long double __x) {return cbrtl(__x);} + +#undef cbrt +#define cbrt(__x) __tg_cbrt(__tg_promote1((__x))(__x)) + +// ceil + +static float + _TG_ATTRS + __tg_ceil(float __x) {return ceilf(__x);} + +static double + _TG_ATTRS + __tg_ceil(double __x) {return ceil(__x);} + +static long double + _TG_ATTRS + __tg_ceil(long double __x) {return ceill(__x);} + +#undef ceil +#define ceil(__x) __tg_ceil(__tg_promote1((__x))(__x)) + +// copysign + +static float + _TG_ATTRS + __tg_copysign(float __x, float __y) {return copysignf(__x, __y);} + +static double + _TG_ATTRS + __tg_copysign(double __x, double __y) {return copysign(__x, __y);} + +static long double + _TG_ATTRS + __tg_copysign(long double __x, long double __y) {return copysignl(__x, __y);} + +#undef copysign +#define copysign(__x, __y) __tg_copysign(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// erf + +static float + _TG_ATTRS + __tg_erf(float __x) {return erff(__x);} + +static double + _TG_ATTRS + __tg_erf(double __x) {return erf(__x);} + +static long double + _TG_ATTRS + __tg_erf(long double __x) {return erfl(__x);} + +#undef erf +#define erf(__x) __tg_erf(__tg_promote1((__x))(__x)) + +// erfc + +static float + _TG_ATTRS + __tg_erfc(float __x) {return erfcf(__x);} + +static double + _TG_ATTRS + __tg_erfc(double __x) {return erfc(__x);} + +static long double + _TG_ATTRS + __tg_erfc(long double __x) {return erfcl(__x);} + +#undef erfc +#define erfc(__x) __tg_erfc(__tg_promote1((__x))(__x)) + +// exp2 + +static float + _TG_ATTRS + __tg_exp2(float __x) {return exp2f(__x);} + +static double + _TG_ATTRS + __tg_exp2(double __x) {return exp2(__x);} + +static long double + _TG_ATTRS + __tg_exp2(long double __x) {return exp2l(__x);} + +#undef exp2 +#define exp2(__x) __tg_exp2(__tg_promote1((__x))(__x)) + +// expm1 + +static float + _TG_ATTRS + __tg_expm1(float __x) {return expm1f(__x);} + +static double + _TG_ATTRS + __tg_expm1(double __x) {return expm1(__x);} + +static long double + _TG_ATTRS + __tg_expm1(long double __x) {return expm1l(__x);} + +#undef expm1 +#define expm1(__x) __tg_expm1(__tg_promote1((__x))(__x)) + +// fdim + +static float + _TG_ATTRS + __tg_fdim(float __x, float __y) {return fdimf(__x, __y);} + +static double + _TG_ATTRS + __tg_fdim(double __x, double __y) {return fdim(__x, __y);} + +static long double + _TG_ATTRS + __tg_fdim(long double __x, long double __y) {return fdiml(__x, __y);} + +#undef fdim +#define fdim(__x, __y) __tg_fdim(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// floor + +static float + _TG_ATTRS + __tg_floor(float __x) {return floorf(__x);} + +static double + _TG_ATTRS + __tg_floor(double __x) {return floor(__x);} + +static long double + _TG_ATTRS + __tg_floor(long double __x) {return floorl(__x);} + +#undef floor +#define floor(__x) __tg_floor(__tg_promote1((__x))(__x)) + +// fma + +static float + _TG_ATTRS + __tg_fma(float __x, float __y, float __z) + {return fmaf(__x, __y, __z);} + +static double + _TG_ATTRS + __tg_fma(double __x, double __y, double __z) + {return fma(__x, __y, __z);} + +static long double + _TG_ATTRS + __tg_fma(long double __x,long double __y, long double __z) + {return fmal(__x, __y, __z);} + +#undef fma +#define fma(__x, __y, __z) \ + __tg_fma(__tg_promote3((__x), (__y), (__z))(__x), \ + __tg_promote3((__x), (__y), (__z))(__y), \ + __tg_promote3((__x), (__y), (__z))(__z)) + +// fmax + +static float + _TG_ATTRS + __tg_fmax(float __x, float __y) {return fmaxf(__x, __y);} + +static double + _TG_ATTRS + __tg_fmax(double __x, double __y) {return fmax(__x, __y);} + +static long double + _TG_ATTRS + __tg_fmax(long double __x, long double __y) {return fmaxl(__x, __y);} + +#undef fmax +#define fmax(__x, __y) __tg_fmax(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// fmin + +static float + _TG_ATTRS + __tg_fmin(float __x, float __y) {return fminf(__x, __y);} + +static double + _TG_ATTRS + __tg_fmin(double __x, double __y) {return fmin(__x, __y);} + +static long double + _TG_ATTRS + __tg_fmin(long double __x, long double __y) {return fminl(__x, __y);} + +#undef fmin +#define fmin(__x, __y) __tg_fmin(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// fmod + +static float + _TG_ATTRS + __tg_fmod(float __x, float __y) {return fmodf(__x, __y);} + +static double + _TG_ATTRS + __tg_fmod(double __x, double __y) {return fmod(__x, __y);} + +static long double + _TG_ATTRS + __tg_fmod(long double __x, long double __y) {return fmodl(__x, __y);} + +#undef fmod +#define fmod(__x, __y) __tg_fmod(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// frexp + +static float + _TG_ATTRS + __tg_frexp(float __x, int* __y) {return frexpf(__x, __y);} + +static double + _TG_ATTRS + __tg_frexp(double __x, int* __y) {return frexp(__x, __y);} + +static long double + _TG_ATTRS + __tg_frexp(long double __x, int* __y) {return frexpl(__x, __y);} + +#undef frexp +#define frexp(__x, __y) __tg_frexp(__tg_promote1((__x))(__x), __y) + +// hypot + +static float + _TG_ATTRS + __tg_hypot(float __x, float __y) {return hypotf(__x, __y);} + +static double + _TG_ATTRS + __tg_hypot(double __x, double __y) {return hypot(__x, __y);} + +static long double + _TG_ATTRS + __tg_hypot(long double __x, long double __y) {return hypotl(__x, __y);} + +#undef hypot +#define hypot(__x, __y) __tg_hypot(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// ilogb + +static int + _TG_ATTRS + __tg_ilogb(float __x) {return ilogbf(__x);} + +static int + _TG_ATTRS + __tg_ilogb(double __x) {return ilogb(__x);} + +static int + _TG_ATTRS + __tg_ilogb(long double __x) {return ilogbl(__x);} + +#undef ilogb +#define ilogb(__x) __tg_ilogb(__tg_promote1((__x))(__x)) + +// ldexp + +static float + _TG_ATTRS + __tg_ldexp(float __x, int __y) {return ldexpf(__x, __y);} + +static double + _TG_ATTRS + __tg_ldexp(double __x, int __y) {return ldexp(__x, __y);} + +static long double + _TG_ATTRS + __tg_ldexp(long double __x, int __y) {return ldexpl(__x, __y);} + +#undef ldexp +#define ldexp(__x, __y) __tg_ldexp(__tg_promote1((__x))(__x), __y) + +// lgamma + +static float + _TG_ATTRS + __tg_lgamma(float __x) {return lgammaf(__x);} + +static double + _TG_ATTRS + __tg_lgamma(double __x) {return lgamma(__x);} + +static long double + _TG_ATTRS + __tg_lgamma(long double __x) {return lgammal(__x);} + +#undef lgamma +#define lgamma(__x) __tg_lgamma(__tg_promote1((__x))(__x)) + +// llrint + +static long long + _TG_ATTRS + __tg_llrint(float __x) {return llrintf(__x);} + +static long long + _TG_ATTRS + __tg_llrint(double __x) {return llrint(__x);} + +static long long + _TG_ATTRS + __tg_llrint(long double __x) {return llrintl(__x);} + +#undef llrint +#define llrint(__x) __tg_llrint(__tg_promote1((__x))(__x)) + +// llround + +static long long + _TG_ATTRS + __tg_llround(float __x) {return llroundf(__x);} + +static long long + _TG_ATTRS + __tg_llround(double __x) {return llround(__x);} + +static long long + _TG_ATTRS + __tg_llround(long double __x) {return llroundl(__x);} + +#undef llround +#define llround(__x) __tg_llround(__tg_promote1((__x))(__x)) + +// log10 + +static float + _TG_ATTRS + __tg_log10(float __x) {return log10f(__x);} + +static double + _TG_ATTRS + __tg_log10(double __x) {return log10(__x);} + +static long double + _TG_ATTRS + __tg_log10(long double __x) {return log10l(__x);} + +#undef log10 +#define log10(__x) __tg_log10(__tg_promote1((__x))(__x)) + +// log1p + +static float + _TG_ATTRS + __tg_log1p(float __x) {return log1pf(__x);} + +static double + _TG_ATTRS + __tg_log1p(double __x) {return log1p(__x);} + +static long double + _TG_ATTRS + __tg_log1p(long double __x) {return log1pl(__x);} + +#undef log1p +#define log1p(__x) __tg_log1p(__tg_promote1((__x))(__x)) + +// log2 + +static float + _TG_ATTRS + __tg_log2(float __x) {return log2f(__x);} + +static double + _TG_ATTRS + __tg_log2(double __x) {return log2(__x);} + +static long double + _TG_ATTRS + __tg_log2(long double __x) {return log2l(__x);} + +#undef log2 +#define log2(__x) __tg_log2(__tg_promote1((__x))(__x)) + +// logb + +static float + _TG_ATTRS + __tg_logb(float __x) {return logbf(__x);} + +static double + _TG_ATTRS + __tg_logb(double __x) {return logb(__x);} + +static long double + _TG_ATTRS + __tg_logb(long double __x) {return logbl(__x);} + +#undef logb +#define logb(__x) __tg_logb(__tg_promote1((__x))(__x)) + +// lrint + +static long + _TG_ATTRS + __tg_lrint(float __x) {return lrintf(__x);} + +static long + _TG_ATTRS + __tg_lrint(double __x) {return lrint(__x);} + +static long + _TG_ATTRS + __tg_lrint(long double __x) {return lrintl(__x);} + +#undef lrint +#define lrint(__x) __tg_lrint(__tg_promote1((__x))(__x)) + +// lround + +static long + _TG_ATTRS + __tg_lround(float __x) {return lroundf(__x);} + +static long + _TG_ATTRS + __tg_lround(double __x) {return lround(__x);} + +static long + _TG_ATTRS + __tg_lround(long double __x) {return lroundl(__x);} + +#undef lround +#define lround(__x) __tg_lround(__tg_promote1((__x))(__x)) + +// nearbyint + +static float + _TG_ATTRS + __tg_nearbyint(float __x) {return nearbyintf(__x);} + +static double + _TG_ATTRS + __tg_nearbyint(double __x) {return nearbyint(__x);} + +static long double + _TG_ATTRS + __tg_nearbyint(long double __x) {return nearbyintl(__x);} + +#undef nearbyint +#define nearbyint(__x) __tg_nearbyint(__tg_promote1((__x))(__x)) + +// nextafter + +static float + _TG_ATTRS + __tg_nextafter(float __x, float __y) {return nextafterf(__x, __y);} + +static double + _TG_ATTRS + __tg_nextafter(double __x, double __y) {return nextafter(__x, __y);} + +static long double + _TG_ATTRS + __tg_nextafter(long double __x, long double __y) {return nextafterl(__x, __y);} + +#undef nextafter +#define nextafter(__x, __y) __tg_nextafter(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// nexttoward + +static float + _TG_ATTRS + __tg_nexttoward(float __x, long double __y) {return nexttowardf(__x, __y);} + +static double + _TG_ATTRS + __tg_nexttoward(double __x, long double __y) {return nexttoward(__x, __y);} + +static long double + _TG_ATTRS + __tg_nexttoward(long double __x, long double __y) {return nexttowardl(__x, __y);} + +#undef nexttoward +#define nexttoward(__x, __y) __tg_nexttoward(__tg_promote1((__x))(__x), (__y)) + +// remainder + +static float + _TG_ATTRS + __tg_remainder(float __x, float __y) {return remainderf(__x, __y);} + +static double + _TG_ATTRS + __tg_remainder(double __x, double __y) {return remainder(__x, __y);} + +static long double + _TG_ATTRS + __tg_remainder(long double __x, long double __y) {return remainderl(__x, __y);} + +#undef remainder +#define remainder(__x, __y) __tg_remainder(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// remquo + +static float + _TG_ATTRS + __tg_remquo(float __x, float __y, int* __z) + {return remquof(__x, __y, __z);} + +static double + _TG_ATTRS + __tg_remquo(double __x, double __y, int* __z) + {return remquo(__x, __y, __z);} + +static long double + _TG_ATTRS + __tg_remquo(long double __x,long double __y, int* __z) + {return remquol(__x, __y, __z);} + +#undef remquo +#define remquo(__x, __y, __z) \ + __tg_remquo(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y), \ + (__z)) + +// rint + +static float + _TG_ATTRS + __tg_rint(float __x) {return rintf(__x);} + +static double + _TG_ATTRS + __tg_rint(double __x) {return rint(__x);} + +static long double + _TG_ATTRS + __tg_rint(long double __x) {return rintl(__x);} + +#undef rint +#define rint(__x) __tg_rint(__tg_promote1((__x))(__x)) + +// round + +static float + _TG_ATTRS + __tg_round(float __x) {return roundf(__x);} + +static double + _TG_ATTRS + __tg_round(double __x) {return round(__x);} + +static long double + _TG_ATTRS + __tg_round(long double __x) {return roundl(__x);} + +#undef round +#define round(__x) __tg_round(__tg_promote1((__x))(__x)) + +// scalbn + +static float + _TG_ATTRS + __tg_scalbn(float __x, int __y) {return scalbnf(__x, __y);} + +static double + _TG_ATTRS + __tg_scalbn(double __x, int __y) {return scalbn(__x, __y);} + +static long double + _TG_ATTRS + __tg_scalbn(long double __x, int __y) {return scalbnl(__x, __y);} + +#undef scalbn +#define scalbn(__x, __y) __tg_scalbn(__tg_promote1((__x))(__x), __y) + +// scalbln + +static float + _TG_ATTRS + __tg_scalbln(float __x, long __y) {return scalblnf(__x, __y);} + +static double + _TG_ATTRS + __tg_scalbln(double __x, long __y) {return scalbln(__x, __y);} + +static long double + _TG_ATTRS + __tg_scalbln(long double __x, long __y) {return scalblnl(__x, __y);} + +#undef scalbln +#define scalbln(__x, __y) __tg_scalbln(__tg_promote1((__x))(__x), __y) + +// tgamma + +static float + _TG_ATTRS + __tg_tgamma(float __x) {return tgammaf(__x);} + +static double + _TG_ATTRS + __tg_tgamma(double __x) {return tgamma(__x);} + +static long double + _TG_ATTRS + __tg_tgamma(long double __x) {return tgammal(__x);} + +#undef tgamma +#define tgamma(__x) __tg_tgamma(__tg_promote1((__x))(__x)) + +// trunc + +static float + _TG_ATTRS + __tg_trunc(float __x) {return truncf(__x);} + +static double + _TG_ATTRS + __tg_trunc(double __x) {return trunc(__x);} + +static long double + _TG_ATTRS + __tg_trunc(long double __x) {return truncl(__x);} + +#undef trunc +#define trunc(__x) __tg_trunc(__tg_promote1((__x))(__x)) + +// carg + +static float + _TG_ATTRS + __tg_carg(float __x) {return atan2f(0.F, __x);} + +static double + _TG_ATTRS + __tg_carg(double __x) {return atan2(0., __x);} + +static long double + _TG_ATTRS + __tg_carg(long double __x) {return atan2l(0.L, __x);} + +static float + _TG_ATTRS + __tg_carg(float _Complex __x) {return cargf(__x);} + +static double + _TG_ATTRS + __tg_carg(double _Complex __x) {return carg(__x);} + +static long double + _TG_ATTRS + __tg_carg(long double _Complex __x) {return cargl(__x);} + +#undef carg +#define carg(__x) __tg_carg(__tg_promote1((__x))(__x)) + +// cimag + +static float + _TG_ATTRS + __tg_cimag(float __x) {return 0;} + +static double + _TG_ATTRS + __tg_cimag(double __x) {return 0;} + +static long double + _TG_ATTRS + __tg_cimag(long double __x) {return 0;} + +static float + _TG_ATTRS + __tg_cimag(float _Complex __x) {return cimagf(__x);} + +static double + _TG_ATTRS + __tg_cimag(double _Complex __x) {return cimag(__x);} + +static long double + _TG_ATTRS + __tg_cimag(long double _Complex __x) {return cimagl(__x);} + +#undef cimag +#define cimag(__x) __tg_cimag(__tg_promote1((__x))(__x)) + +// conj + +static float _Complex + _TG_ATTRS + __tg_conj(float __x) {return __x;} + +static double _Complex + _TG_ATTRS + __tg_conj(double __x) {return __x;} + +static long double _Complex + _TG_ATTRS + __tg_conj(long double __x) {return __x;} + +static float _Complex + _TG_ATTRS + __tg_conj(float _Complex __x) {return conjf(__x);} + +static double _Complex + _TG_ATTRS + __tg_conj(double _Complex __x) {return conj(__x);} + +static long double _Complex + _TG_ATTRS + __tg_conj(long double _Complex __x) {return conjl(__x);} + +#undef conj +#define conj(__x) __tg_conj(__tg_promote1((__x))(__x)) + +// cproj + +static float _Complex + _TG_ATTRS + __tg_cproj(float __x) {return cprojf(__x);} + +static double _Complex + _TG_ATTRS + __tg_cproj(double __x) {return cproj(__x);} + +static long double _Complex + _TG_ATTRS + __tg_cproj(long double __x) {return cprojl(__x);} + +static float _Complex + _TG_ATTRS + __tg_cproj(float _Complex __x) {return cprojf(__x);} + +static double _Complex + _TG_ATTRS + __tg_cproj(double _Complex __x) {return cproj(__x);} + +static long double _Complex + _TG_ATTRS + __tg_cproj(long double _Complex __x) {return cprojl(__x);} + +#undef cproj +#define cproj(__x) __tg_cproj(__tg_promote1((__x))(__x)) + +// creal + +static float + _TG_ATTRS + __tg_creal(float __x) {return __x;} + +static double + _TG_ATTRS + __tg_creal(double __x) {return __x;} + +static long double + _TG_ATTRS + __tg_creal(long double __x) {return __x;} + +static float + _TG_ATTRS + __tg_creal(float _Complex __x) {return crealf(__x);} + +static double + _TG_ATTRS + __tg_creal(double _Complex __x) {return creal(__x);} + +static long double + _TG_ATTRS + __tg_creal(long double _Complex __x) {return creall(__x);} + +#undef creal +#define creal(__x) __tg_creal(__tg_promote1((__x))(__x)) + +#undef _TG_ATTRSp +#undef _TG_ATTRS + +#endif /* __cplusplus */ +#endif /* __TGMATH_H */ diff --git a/c_headers/tmmintrin.h b/c_headers/tmmintrin.h new file mode 100644 index 0000000000..2ecc730e90 --- /dev/null +++ b/c_headers/tmmintrin.h @@ -0,0 +1,230 @@ +/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __TMMINTRIN_H +#define __TMMINTRIN_H + +#ifndef __SSSE3__ +#error "SSSE3 instruction set not enabled" +#else + +#include + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_abs_pi8(__m64 __a) +{ + return (__m64)__builtin_ia32_pabsb((__v8qi)__a); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_abs_epi8(__m128i __a) +{ + return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_abs_pi16(__m64 __a) +{ + return (__m64)__builtin_ia32_pabsw((__v4hi)__a); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_abs_epi16(__m128i __a) +{ + return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_abs_pi32(__m64 __a) +{ + return (__m64)__builtin_ia32_pabsd((__v2si)__a); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_abs_epi32(__m128i __a) +{ + return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); +} + +#define _mm_alignr_epi8(a, b, n) __extension__ ({ \ + __m128i __a = (a); \ + __m128i __b = (b); \ + (__m128i)__builtin_ia32_palignr128((__v16qi)__a, (__v16qi)__b, (n)); }) + +#define _mm_alignr_pi8(a, b, n) __extension__ ({ \ + __m64 __a = (a); \ + __m64 __b = (b); \ + (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); }) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hadd_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hadd_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_hadd_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_hadd_pi32(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hadds_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_hadds_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hsub_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hsub_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_hsub_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_hsub_pi32(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hsubs_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_hsubs_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maddubs_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_maddubs_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mulhrs_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_mulhrs_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_shuffle_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_shuffle_pi8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sign_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sign_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sign_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sign_pi8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sign_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sign_pi32(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __SSSE3__ */ + +#endif /* __TMMINTRIN_H */ diff --git a/c_headers/unwind.h b/c_headers/unwind.h new file mode 100644 index 0000000000..303d79288a --- /dev/null +++ b/c_headers/unwind.h @@ -0,0 +1,282 @@ +/*===---- unwind.h - Stack unwinding ----------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +/* See "Data Definitions for libgcc_s" in the Linux Standard Base.*/ + +#ifndef __CLANG_UNWIND_H +#define __CLANG_UNWIND_H + +#if defined(__APPLE__) && __has_include_next() +/* Darwin (from 11.x on) provide an unwind.h. If that's available, + * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE, + * so define that around the include.*/ +# ifndef _GNU_SOURCE +# define _SHOULD_UNDEFINE_GNU_SOURCE +# define _GNU_SOURCE +# endif +// libunwind's unwind.h reflects the current visibility. However, Mozilla +// builds with -fvisibility=hidden and relies on gcc's unwind.h to reset the +// visibility to default and export its contents. gcc also allows users to +// override its override by #defining HIDE_EXPORTS (but note, this only obeys +// the user's -fvisibility setting; it doesn't hide any exports on its own). We +// imitate gcc's header here: +# ifdef HIDE_EXPORTS +# include_next +# else +# pragma GCC visibility push(default) +# include_next +# pragma GCC visibility pop +# endif +# ifdef _SHOULD_UNDEFINE_GNU_SOURCE +# undef _GNU_SOURCE +# undef _SHOULD_UNDEFINE_GNU_SOURCE +# endif +#else + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* It is a bit strange for a header to play with the visibility of the + symbols it declares, but this matches gcc's behavior and some programs + depend on it */ +#ifndef HIDE_EXPORTS +#pragma GCC visibility push(default) +#endif + +typedef uintptr_t _Unwind_Word; +typedef intptr_t _Unwind_Sword; +typedef uintptr_t _Unwind_Ptr; +typedef uintptr_t _Unwind_Internal_Ptr; +typedef uint64_t _Unwind_Exception_Class; + +typedef intptr_t _sleb128_t; +typedef uintptr_t _uleb128_t; + +struct _Unwind_Context; +struct _Unwind_Exception; +typedef enum { + _URC_NO_REASON = 0, + _URC_FOREIGN_EXCEPTION_CAUGHT = 1, + + _URC_FATAL_PHASE2_ERROR = 2, + _URC_FATAL_PHASE1_ERROR = 3, + _URC_NORMAL_STOP = 4, + + _URC_END_OF_STACK = 5, + _URC_HANDLER_FOUND = 6, + _URC_INSTALL_CONTEXT = 7, + _URC_CONTINUE_UNWIND = 8 +} _Unwind_Reason_Code; + +typedef enum { + _UA_SEARCH_PHASE = 1, + _UA_CLEANUP_PHASE = 2, + + _UA_HANDLER_FRAME = 4, + _UA_FORCE_UNWIND = 8, + _UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */ +} _Unwind_Action; + +typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code, + struct _Unwind_Exception *); + +struct _Unwind_Exception { + _Unwind_Exception_Class exception_class; + _Unwind_Exception_Cleanup_Fn exception_cleanup; + _Unwind_Word private_1; + _Unwind_Word private_2; + /* The Itanium ABI requires that _Unwind_Exception objects are "double-word + * aligned". GCC has interpreted this to mean "use the maximum useful + * alignment for the target"; so do we. */ +} __attribute__((__aligned__)); + +typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action, + _Unwind_Exception_Class, + struct _Unwind_Exception *, + struct _Unwind_Context *, + void *); + +typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)( + int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *, + struct _Unwind_Context *); +typedef _Unwind_Personality_Fn __personality_routine; + +typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *, + void *); + +#if defined(__arm__) && !defined(__APPLE__) + +typedef enum { + _UVRSC_CORE = 0, /* integer register */ + _UVRSC_VFP = 1, /* vfp */ + _UVRSC_WMMXD = 3, /* Intel WMMX data register */ + _UVRSC_WMMXC = 4 /* Intel WMMX control register */ +} _Unwind_VRS_RegClass; + +typedef enum { + _UVRSD_UINT32 = 0, + _UVRSD_VFPX = 1, + _UVRSD_UINT64 = 3, + _UVRSD_FLOAT = 4, + _UVRSD_DOUBLE = 5 +} _Unwind_VRS_DataRepresentation; + +typedef enum { + _UVRSR_OK = 0, + _UVRSR_NOT_IMPLEMENTED = 1, + _UVRSR_FAILED = 2 +} _Unwind_VRS_Result; + +_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context, + _Unwind_VRS_RegClass __regclass, + uint32_t __regno, + _Unwind_VRS_DataRepresentation __representation, + void *__valuep); + +_Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context, + _Unwind_VRS_RegClass __regclass, + uint32_t __regno, + _Unwind_VRS_DataRepresentation __representation, + void *__valuep); + +static __inline__ +_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) { + _Unwind_Word __value; + _Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value); + return __value; +} + +static __inline__ +void _Unwind_SetGR(struct _Unwind_Context *__context, int __index, + _Unwind_Word __value) { + _Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value); +} + +static __inline__ +_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) { + _Unwind_Word __ip = _Unwind_GetGR(__context, 15); + return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */ +} + +static __inline__ +void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) { + _Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1; + _Unwind_SetGR(__context, 15, __value | __thumb_mode_bit); +} +#else +_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int); +void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word); + +_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *); +void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word); +#endif + + +_Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *); + +_Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *); + +_Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *); + +void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *); + +_Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *); + +/* DWARF EH functions; currently not available on Darwin/ARM */ +#if !defined(__APPLE__) || !defined(__arm__) + +_Unwind_Reason_Code _Unwind_RaiseException(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *, + _Unwind_Stop_Fn, void *); +void _Unwind_DeleteException(struct _Unwind_Exception *); +void _Unwind_Resume(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *); + +#endif + +_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *); + +/* setjmp(3)/longjmp(3) stuff */ +typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t; + +void _Unwind_SjLj_Register(_Unwind_FunctionContext_t); +void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t); +_Unwind_Reason_Code _Unwind_SjLj_RaiseException(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(struct _Unwind_Exception *, + _Unwind_Stop_Fn, void *); +void _Unwind_SjLj_Resume(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *); + +void *_Unwind_FindEnclosingFunction(void *); + +#ifdef __APPLE__ + +_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *) + __attribute__((__unavailable__)); +_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *) + __attribute__((__unavailable__)); + +/* Darwin-specific functions */ +void __register_frame(const void *); +void __deregister_frame(const void *); + +struct dwarf_eh_bases { + uintptr_t tbase; + uintptr_t dbase; + uintptr_t func; +}; +void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *); + +void __register_frame_info_bases(const void *, void *, void *, void *) + __attribute__((__unavailable__)); +void __register_frame_info(const void *, void *) __attribute__((__unavailable__)); +void __register_frame_info_table_bases(const void *, void*, void *, void *) + __attribute__((__unavailable__)); +void __register_frame_info_table(const void *, void *) + __attribute__((__unavailable__)); +void __register_frame_table(const void *) __attribute__((__unavailable__)); +void __deregister_frame_info(const void *) __attribute__((__unavailable__)); +void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__)); + +#else + +_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *); +_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *); + +#endif + + +#ifndef HIDE_EXPORTS +#pragma GCC visibility pop +#endif + +#ifdef __cplusplus +} +#endif + +#endif + +#endif /* __CLANG_UNWIND_H */ diff --git a/c_headers/vadefs.h b/c_headers/vadefs.h new file mode 100644 index 0000000000..7fe9a74e3f --- /dev/null +++ b/c_headers/vadefs.h @@ -0,0 +1,65 @@ +/* ===-------- vadefs.h ---------------------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +/* Only include this if we are aiming for MSVC compatibility. */ +#ifndef _MSC_VER +#include_next +#else + +#ifndef __clang_vadefs_h +#define __clang_vadefs_h + +#include_next + +/* Override macros from vadefs.h with definitions that work with Clang. */ +#ifdef _crt_va_start +#undef _crt_va_start +#define _crt_va_start(ap, param) __builtin_va_start(ap, param) +#endif +#ifdef _crt_va_end +#undef _crt_va_end +#define _crt_va_end(ap) __builtin_va_end(ap) +#endif +#ifdef _crt_va_arg +#undef _crt_va_arg +#define _crt_va_arg(ap, type) __builtin_va_arg(ap, type) +#endif + +/* VS 2015 switched to double underscore names, which is an improvement, but now + * we have to intercept those names too. + */ +#ifdef __crt_va_start +#undef __crt_va_start +#define __crt_va_start(ap, param) __builtin_va_start(ap, param) +#endif +#ifdef __crt_va_end +#undef __crt_va_end +#define __crt_va_end(ap) __builtin_va_end(ap) +#endif +#ifdef __crt_va_arg +#undef __crt_va_arg +#define __crt_va_arg(ap, type) __builtin_va_arg(ap, type) +#endif + +#endif +#endif diff --git a/c_headers/varargs.h b/c_headers/varargs.h new file mode 100644 index 0000000000..b5477d0a6a --- /dev/null +++ b/c_headers/varargs.h @@ -0,0 +1,26 @@ +/*===---- varargs.h - Variable argument handling -------------------------------------=== +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +* THE SOFTWARE. +* +*===-----------------------------------------------------------------------=== +*/ +#ifndef __VARARGS_H +#define __VARARGS_H + #error "Please use instead of " +#endif diff --git a/c_headers/vecintrin.h b/c_headers/vecintrin.h new file mode 100644 index 0000000000..ca7acb4731 --- /dev/null +++ b/c_headers/vecintrin.h @@ -0,0 +1,8946 @@ +/*===---- vecintrin.h - Vector intrinsics ----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if defined(__s390x__) && defined(__VEC__) + +#define __ATTRS_ai __attribute__((__always_inline__)) +#define __ATTRS_o __attribute__((__overloadable__)) +#define __ATTRS_o_ai __attribute__((__overloadable__, __always_inline__)) + +#define __constant(PARM) \ + __attribute__((__enable_if__ ((PARM) == (PARM), \ + "argument must be a constant integer"))) +#define __constant_range(PARM, LOW, HIGH) \ + __attribute__((__enable_if__ ((PARM) >= (LOW) && (PARM) <= (HIGH), \ + "argument must be a constant integer from " #LOW " to " #HIGH))) +#define __constant_pow2_range(PARM, LOW, HIGH) \ + __attribute__((__enable_if__ ((PARM) >= (LOW) && (PARM) <= (HIGH) && \ + ((PARM) & ((PARM) - 1)) == 0, \ + "argument must be a constant power of 2 from " #LOW " to " #HIGH))) + +/*-- __lcbb -----------------------------------------------------------------*/ + +extern __ATTRS_o unsigned int +__lcbb(const void *__ptr, unsigned short __len) + __constant_pow2_range(__len, 64, 4096); + +#define __lcbb(X, Y) ((__typeof__((__lcbb)((X), (Y)))) \ + __builtin_s390_lcbb((X), __builtin_constant_p((Y))? \ + ((Y) == 64 ? 0 : \ + (Y) == 128 ? 1 : \ + (Y) == 256 ? 2 : \ + (Y) == 512 ? 3 : \ + (Y) == 1024 ? 4 : \ + (Y) == 2048 ? 5 : \ + (Y) == 4096 ? 6 : 0) : 0)) + +/*-- vec_extract ------------------------------------------------------------*/ + +static inline __ATTRS_o_ai signed char +vec_extract(vector signed char __vec, int __index) { + return __vec[__index & 15]; +} + +static inline __ATTRS_o_ai unsigned char +vec_extract(vector bool char __vec, int __index) { + return __vec[__index & 15]; +} + +static inline __ATTRS_o_ai unsigned char +vec_extract(vector unsigned char __vec, int __index) { + return __vec[__index & 15]; +} + +static inline __ATTRS_o_ai signed short +vec_extract(vector signed short __vec, int __index) { + return __vec[__index & 7]; +} + +static inline __ATTRS_o_ai unsigned short +vec_extract(vector bool short __vec, int __index) { + return __vec[__index & 7]; +} + +static inline __ATTRS_o_ai unsigned short +vec_extract(vector unsigned short __vec, int __index) { + return __vec[__index & 7]; +} + +static inline __ATTRS_o_ai signed int +vec_extract(vector signed int __vec, int __index) { + return __vec[__index & 3]; +} + +static inline __ATTRS_o_ai unsigned int +vec_extract(vector bool int __vec, int __index) { + return __vec[__index & 3]; +} + +static inline __ATTRS_o_ai unsigned int +vec_extract(vector unsigned int __vec, int __index) { + return __vec[__index & 3]; +} + +static inline __ATTRS_o_ai signed long long +vec_extract(vector signed long long __vec, int __index) { + return __vec[__index & 1]; +} + +static inline __ATTRS_o_ai unsigned long long +vec_extract(vector bool long long __vec, int __index) { + return __vec[__index & 1]; +} + +static inline __ATTRS_o_ai unsigned long long +vec_extract(vector unsigned long long __vec, int __index) { + return __vec[__index & 1]; +} + +static inline __ATTRS_o_ai double +vec_extract(vector double __vec, int __index) { + return __vec[__index & 1]; +} + +/*-- vec_insert -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_insert(signed char __scalar, vector signed char __vec, int __index) { + __vec[__index & 15] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned char +vec_insert(unsigned char __scalar, vector bool char __vec, int __index) { + vector unsigned char __newvec = (vector unsigned char)__vec; + __newvec[__index & 15] = (unsigned char)__scalar; + return __newvec; +} + +static inline __ATTRS_o_ai vector unsigned char +vec_insert(unsigned char __scalar, vector unsigned char __vec, int __index) { + __vec[__index & 15] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector signed short +vec_insert(signed short __scalar, vector signed short __vec, int __index) { + __vec[__index & 7] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned short +vec_insert(unsigned short __scalar, vector bool short __vec, int __index) { + vector unsigned short __newvec = (vector unsigned short)__vec; + __newvec[__index & 7] = (unsigned short)__scalar; + return __newvec; +} + +static inline __ATTRS_o_ai vector unsigned short +vec_insert(unsigned short __scalar, vector unsigned short __vec, int __index) { + __vec[__index & 7] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector signed int +vec_insert(signed int __scalar, vector signed int __vec, int __index) { + __vec[__index & 3] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_insert(unsigned int __scalar, vector bool int __vec, int __index) { + vector unsigned int __newvec = (vector unsigned int)__vec; + __newvec[__index & 3] = __scalar; + return __newvec; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_insert(unsigned int __scalar, vector unsigned int __vec, int __index) { + __vec[__index & 3] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector signed long long +vec_insert(signed long long __scalar, vector signed long long __vec, + int __index) { + __vec[__index & 1] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_insert(unsigned long long __scalar, vector bool long long __vec, + int __index) { + vector unsigned long long __newvec = (vector unsigned long long)__vec; + __newvec[__index & 1] = __scalar; + return __newvec; +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_insert(unsigned long long __scalar, vector unsigned long long __vec, + int __index) { + __vec[__index & 1] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector double +vec_insert(double __scalar, vector double __vec, int __index) { + __vec[__index & 1] = __scalar; + return __vec; +} + +/*-- vec_promote ------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_promote(signed char __scalar, int __index) { + const vector signed char __zero = (vector signed char)0; + vector signed char __vec = __builtin_shufflevector(__zero, __zero, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + __vec[__index & 15] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned char +vec_promote(unsigned char __scalar, int __index) { + const vector unsigned char __zero = (vector unsigned char)0; + vector unsigned char __vec = __builtin_shufflevector(__zero, __zero, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + __vec[__index & 15] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector signed short +vec_promote(signed short __scalar, int __index) { + const vector signed short __zero = (vector signed short)0; + vector signed short __vec = __builtin_shufflevector(__zero, __zero, + -1, -1, -1, -1, -1, -1, -1, -1); + __vec[__index & 7] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned short +vec_promote(unsigned short __scalar, int __index) { + const vector unsigned short __zero = (vector unsigned short)0; + vector unsigned short __vec = __builtin_shufflevector(__zero, __zero, + -1, -1, -1, -1, -1, -1, -1, -1); + __vec[__index & 7] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector signed int +vec_promote(signed int __scalar, int __index) { + const vector signed int __zero = (vector signed int)0; + vector signed int __vec = __builtin_shufflevector(__zero, __zero, + -1, -1, -1, -1); + __vec[__index & 3] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_promote(unsigned int __scalar, int __index) { + const vector unsigned int __zero = (vector unsigned int)0; + vector unsigned int __vec = __builtin_shufflevector(__zero, __zero, + -1, -1, -1, -1); + __vec[__index & 3] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector signed long long +vec_promote(signed long long __scalar, int __index) { + const vector signed long long __zero = (vector signed long long)0; + vector signed long long __vec = __builtin_shufflevector(__zero, __zero, + -1, -1); + __vec[__index & 1] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_promote(unsigned long long __scalar, int __index) { + const vector unsigned long long __zero = (vector unsigned long long)0; + vector unsigned long long __vec = __builtin_shufflevector(__zero, __zero, + -1, -1); + __vec[__index & 1] = __scalar; + return __vec; +} + +static inline __ATTRS_o_ai vector double +vec_promote(double __scalar, int __index) { + const vector double __zero = (vector double)0; + vector double __vec = __builtin_shufflevector(__zero, __zero, -1, -1); + __vec[__index & 1] = __scalar; + return __vec; +} + +/*-- vec_insert_and_zero ----------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_insert_and_zero(const signed char *__ptr) { + vector signed char __vec = (vector signed char)0; + __vec[7] = *__ptr; + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned char +vec_insert_and_zero(const unsigned char *__ptr) { + vector unsigned char __vec = (vector unsigned char)0; + __vec[7] = *__ptr; + return __vec; +} + +static inline __ATTRS_o_ai vector signed short +vec_insert_and_zero(const signed short *__ptr) { + vector signed short __vec = (vector signed short)0; + __vec[3] = *__ptr; + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned short +vec_insert_and_zero(const unsigned short *__ptr) { + vector unsigned short __vec = (vector unsigned short)0; + __vec[3] = *__ptr; + return __vec; +} + +static inline __ATTRS_o_ai vector signed int +vec_insert_and_zero(const signed int *__ptr) { + vector signed int __vec = (vector signed int)0; + __vec[1] = *__ptr; + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_insert_and_zero(const unsigned int *__ptr) { + vector unsigned int __vec = (vector unsigned int)0; + __vec[1] = *__ptr; + return __vec; +} + +static inline __ATTRS_o_ai vector signed long long +vec_insert_and_zero(const signed long long *__ptr) { + vector signed long long __vec = (vector signed long long)0; + __vec[0] = *__ptr; + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_insert_and_zero(const unsigned long long *__ptr) { + vector unsigned long long __vec = (vector unsigned long long)0; + __vec[0] = *__ptr; + return __vec; +} + +static inline __ATTRS_o_ai vector double +vec_insert_and_zero(const double *__ptr) { + vector double __vec = (vector double)0; + __vec[0] = *__ptr; + return __vec; +} + +/*-- vec_perm ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_perm(vector signed char __a, vector signed char __b, + vector unsigned char __c) { + return (vector signed char)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_perm(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return (vector unsigned char)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai vector bool char +vec_perm(vector bool char __a, vector bool char __b, + vector unsigned char __c) { + return (vector bool char)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai vector signed short +vec_perm(vector signed short __a, vector signed short __b, + vector unsigned char __c) { + return (vector signed short)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_perm(vector unsigned short __a, vector unsigned short __b, + vector unsigned char __c) { + return (vector unsigned short)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai vector bool short +vec_perm(vector bool short __a, vector bool short __b, + vector unsigned char __c) { + return (vector bool short)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai vector signed int +vec_perm(vector signed int __a, vector signed int __b, + vector unsigned char __c) { + return (vector signed int)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_perm(vector unsigned int __a, vector unsigned int __b, + vector unsigned char __c) { + return (vector unsigned int)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai vector bool int +vec_perm(vector bool int __a, vector bool int __b, + vector unsigned char __c) { + return (vector bool int)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai vector signed long long +vec_perm(vector signed long long __a, vector signed long long __b, + vector unsigned char __c) { + return (vector signed long long)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_perm(vector unsigned long long __a, vector unsigned long long __b, + vector unsigned char __c) { + return (vector unsigned long long)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai vector bool long long +vec_perm(vector bool long long __a, vector bool long long __b, + vector unsigned char __c) { + return (vector bool long long)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai vector double +vec_perm(vector double __a, vector double __b, + vector unsigned char __c) { + return (vector double)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} + +/*-- vec_permi --------------------------------------------------------------*/ + +extern __ATTRS_o vector signed long long +vec_permi(vector signed long long __a, vector signed long long __b, int __c) + __constant_range(__c, 0, 3); + +extern __ATTRS_o vector unsigned long long +vec_permi(vector unsigned long long __a, vector unsigned long long __b, int __c) + __constant_range(__c, 0, 3); + +extern __ATTRS_o vector bool long long +vec_permi(vector bool long long __a, vector bool long long __b, int __c) + __constant_range(__c, 0, 3); + +extern __ATTRS_o vector double +vec_permi(vector double __a, vector double __b, int __c) + __constant_range(__c, 0, 3); + +#define vec_permi(X, Y, Z) ((__typeof__((vec_permi)((X), (Y), (Z)))) \ + __builtin_s390_vpdi((vector unsigned long long)(X), \ + (vector unsigned long long)(Y), \ + (((Z) & 2) << 1) | ((Z) & 1))) + +/*-- vec_sel ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_sel(vector signed char __a, vector signed char __b, + vector unsigned char __c) { + return ((vector signed char)__c & __b) | (~(vector signed char)__c & __a); +} + +static inline __ATTRS_o_ai vector signed char +vec_sel(vector signed char __a, vector signed char __b, vector bool char __c) { + return ((vector signed char)__c & __b) | (~(vector signed char)__c & __a); +} + +static inline __ATTRS_o_ai vector bool char +vec_sel(vector bool char __a, vector bool char __b, vector unsigned char __c) { + return ((vector bool char)__c & __b) | (~(vector bool char)__c & __a); +} + +static inline __ATTRS_o_ai vector bool char +vec_sel(vector bool char __a, vector bool char __b, vector bool char __c) { + return (__c & __b) | (~__c & __a); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_sel(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return (__c & __b) | (~__c & __a); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_sel(vector unsigned char __a, vector unsigned char __b, + vector bool char __c) { + return ((vector unsigned char)__c & __b) | (~(vector unsigned char)__c & __a); +} + +static inline __ATTRS_o_ai vector signed short +vec_sel(vector signed short __a, vector signed short __b, + vector unsigned short __c) { + return ((vector signed short)__c & __b) | (~(vector signed short)__c & __a); +} + +static inline __ATTRS_o_ai vector signed short +vec_sel(vector signed short __a, vector signed short __b, + vector bool short __c) { + return ((vector signed short)__c & __b) | (~(vector signed short)__c & __a); +} + +static inline __ATTRS_o_ai vector bool short +vec_sel(vector bool short __a, vector bool short __b, + vector unsigned short __c) { + return ((vector bool short)__c & __b) | (~(vector bool short)__c & __a); +} + +static inline __ATTRS_o_ai vector bool short +vec_sel(vector bool short __a, vector bool short __b, vector bool short __c) { + return (__c & __b) | (~__c & __a); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_sel(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c) { + return (__c & __b) | (~__c & __a); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_sel(vector unsigned short __a, vector unsigned short __b, + vector bool short __c) { + return (((vector unsigned short)__c & __b) | + (~(vector unsigned short)__c & __a)); +} + +static inline __ATTRS_o_ai vector signed int +vec_sel(vector signed int __a, vector signed int __b, + vector unsigned int __c) { + return ((vector signed int)__c & __b) | (~(vector signed int)__c & __a); +} + +static inline __ATTRS_o_ai vector signed int +vec_sel(vector signed int __a, vector signed int __b, vector bool int __c) { + return ((vector signed int)__c & __b) | (~(vector signed int)__c & __a); +} + +static inline __ATTRS_o_ai vector bool int +vec_sel(vector bool int __a, vector bool int __b, vector unsigned int __c) { + return ((vector bool int)__c & __b) | (~(vector bool int)__c & __a); +} + +static inline __ATTRS_o_ai vector bool int +vec_sel(vector bool int __a, vector bool int __b, vector bool int __c) { + return (__c & __b) | (~__c & __a); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_sel(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + return (__c & __b) | (~__c & __a); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_sel(vector unsigned int __a, vector unsigned int __b, vector bool int __c) { + return ((vector unsigned int)__c & __b) | (~(vector unsigned int)__c & __a); +} + +static inline __ATTRS_o_ai vector signed long long +vec_sel(vector signed long long __a, vector signed long long __b, + vector unsigned long long __c) { + return (((vector signed long long)__c & __b) | + (~(vector signed long long)__c & __a)); +} + +static inline __ATTRS_o_ai vector signed long long +vec_sel(vector signed long long __a, vector signed long long __b, + vector bool long long __c) { + return (((vector signed long long)__c & __b) | + (~(vector signed long long)__c & __a)); +} + +static inline __ATTRS_o_ai vector bool long long +vec_sel(vector bool long long __a, vector bool long long __b, + vector unsigned long long __c) { + return (((vector bool long long)__c & __b) | + (~(vector bool long long)__c & __a)); +} + +static inline __ATTRS_o_ai vector bool long long +vec_sel(vector bool long long __a, vector bool long long __b, + vector bool long long __c) { + return (__c & __b) | (~__c & __a); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_sel(vector unsigned long long __a, vector unsigned long long __b, + vector unsigned long long __c) { + return (__c & __b) | (~__c & __a); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_sel(vector unsigned long long __a, vector unsigned long long __b, + vector bool long long __c) { + return (((vector unsigned long long)__c & __b) | + (~(vector unsigned long long)__c & __a)); +} + +static inline __ATTRS_o_ai vector double +vec_sel(vector double __a, vector double __b, vector unsigned long long __c) { + return (vector double)((__c & (vector unsigned long long)__b) | + (~__c & (vector unsigned long long)__a)); +} + +static inline __ATTRS_o_ai vector double +vec_sel(vector double __a, vector double __b, vector bool long long __c) { + vector unsigned long long __ac = (vector unsigned long long)__a; + vector unsigned long long __bc = (vector unsigned long long)__b; + vector unsigned long long __cc = (vector unsigned long long)__c; + return (vector double)((__cc & __bc) | (~__cc & __ac)); +} + +/*-- vec_gather_element -----------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed int +vec_gather_element(vector signed int __vec, vector unsigned int __offset, + const signed int *__ptr, int __index) + __constant_range(__index, 0, 3) { + __vec[__index] = *(const signed int *)( + (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + return __vec; +} + +static inline __ATTRS_o_ai vector bool int +vec_gather_element(vector bool int __vec, vector unsigned int __offset, + const unsigned int *__ptr, int __index) + __constant_range(__index, 0, 3) { + __vec[__index] = *(const unsigned int *)( + (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_gather_element(vector unsigned int __vec, vector unsigned int __offset, + const unsigned int *__ptr, int __index) + __constant_range(__index, 0, 3) { + __vec[__index] = *(const unsigned int *)( + (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + return __vec; +} + +static inline __ATTRS_o_ai vector signed long long +vec_gather_element(vector signed long long __vec, + vector unsigned long long __offset, + const signed long long *__ptr, int __index) + __constant_range(__index, 0, 1) { + __vec[__index] = *(const signed long long *)( + (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + return __vec; +} + +static inline __ATTRS_o_ai vector bool long long +vec_gather_element(vector bool long long __vec, + vector unsigned long long __offset, + const unsigned long long *__ptr, int __index) + __constant_range(__index, 0, 1) { + __vec[__index] = *(const unsigned long long *)( + (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + return __vec; +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_gather_element(vector unsigned long long __vec, + vector unsigned long long __offset, + const unsigned long long *__ptr, int __index) + __constant_range(__index, 0, 1) { + __vec[__index] = *(const unsigned long long *)( + (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + return __vec; +} + +static inline __ATTRS_o_ai vector double +vec_gather_element(vector double __vec, vector unsigned long long __offset, + const double *__ptr, int __index) + __constant_range(__index, 0, 1) { + __vec[__index] = *(const double *)( + (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + return __vec; +} + +/*-- vec_scatter_element ----------------------------------------------------*/ + +static inline __ATTRS_o_ai void +vec_scatter_element(vector signed int __vec, vector unsigned int __offset, + signed int *__ptr, int __index) + __constant_range(__index, 0, 3) { + *(signed int *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + __vec[__index]; +} + +static inline __ATTRS_o_ai void +vec_scatter_element(vector bool int __vec, vector unsigned int __offset, + unsigned int *__ptr, int __index) + __constant_range(__index, 0, 3) { + *(unsigned int *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + __vec[__index]; +} + +static inline __ATTRS_o_ai void +vec_scatter_element(vector unsigned int __vec, vector unsigned int __offset, + unsigned int *__ptr, int __index) + __constant_range(__index, 0, 3) { + *(unsigned int *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + __vec[__index]; +} + +static inline __ATTRS_o_ai void +vec_scatter_element(vector signed long long __vec, + vector unsigned long long __offset, + signed long long *__ptr, int __index) + __constant_range(__index, 0, 1) { + *(signed long long *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + __vec[__index]; +} + +static inline __ATTRS_o_ai void +vec_scatter_element(vector bool long long __vec, + vector unsigned long long __offset, + unsigned long long *__ptr, int __index) + __constant_range(__index, 0, 1) { + *(unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + __vec[__index]; +} + +static inline __ATTRS_o_ai void +vec_scatter_element(vector unsigned long long __vec, + vector unsigned long long __offset, + unsigned long long *__ptr, int __index) + __constant_range(__index, 0, 1) { + *(unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + __vec[__index]; +} + +static inline __ATTRS_o_ai void +vec_scatter_element(vector double __vec, vector unsigned long long __offset, + double *__ptr, int __index) + __constant_range(__index, 0, 1) { + *(double *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + __vec[__index]; +} + +/*-- vec_xld2 ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_xld2(long __offset, const signed char *__ptr) { + return *(const vector signed char *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_xld2(long __offset, const unsigned char *__ptr) { + return *(const vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector signed short +vec_xld2(long __offset, const signed short *__ptr) { + return *(const vector signed short *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_xld2(long __offset, const unsigned short *__ptr) { + return *(const vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector signed int +vec_xld2(long __offset, const signed int *__ptr) { + return *(const vector signed int *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_xld2(long __offset, const unsigned int *__ptr) { + return *(const vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector signed long long +vec_xld2(long __offset, const signed long long *__ptr) { + return *(const vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_xld2(long __offset, const unsigned long long *__ptr) { + return *(const vector unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector double +vec_xld2(long __offset, const double *__ptr) { + return *(const vector double *)((__INTPTR_TYPE__)__ptr + __offset); +} + +/*-- vec_xlw4 ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_xlw4(long __offset, const signed char *__ptr) { + return *(const vector signed char *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_xlw4(long __offset, const unsigned char *__ptr) { + return *(const vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector signed short +vec_xlw4(long __offset, const signed short *__ptr) { + return *(const vector signed short *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_xlw4(long __offset, const unsigned short *__ptr) { + return *(const vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector signed int +vec_xlw4(long __offset, const signed int *__ptr) { + return *(const vector signed int *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_xlw4(long __offset, const unsigned int *__ptr) { + return *(const vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset); +} + +/*-- vec_xstd2 --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai void +vec_xstd2(vector signed char __vec, long __offset, signed char *__ptr) { + *(vector signed char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xstd2(vector unsigned char __vec, long __offset, unsigned char *__ptr) { + *(vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xstd2(vector signed short __vec, long __offset, signed short *__ptr) { + *(vector signed short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xstd2(vector unsigned short __vec, long __offset, unsigned short *__ptr) { + *(vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xstd2(vector signed int __vec, long __offset, signed int *__ptr) { + *(vector signed int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xstd2(vector unsigned int __vec, long __offset, unsigned int *__ptr) { + *(vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xstd2(vector signed long long __vec, long __offset, + signed long long *__ptr) { + *(vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xstd2(vector unsigned long long __vec, long __offset, + unsigned long long *__ptr) { + *(vector unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset) = + __vec; +} + +static inline __ATTRS_o_ai void +vec_xstd2(vector double __vec, long __offset, double *__ptr) { + *(vector double *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +/*-- vec_xstw4 --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai void +vec_xstw4(vector signed char __vec, long __offset, signed char *__ptr) { + *(vector signed char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xstw4(vector unsigned char __vec, long __offset, unsigned char *__ptr) { + *(vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xstw4(vector signed short __vec, long __offset, signed short *__ptr) { + *(vector signed short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xstw4(vector unsigned short __vec, long __offset, unsigned short *__ptr) { + *(vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xstw4(vector signed int __vec, long __offset, signed int *__ptr) { + *(vector signed int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xstw4(vector unsigned int __vec, long __offset, unsigned int *__ptr) { + *(vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +/*-- vec_load_bndry ---------------------------------------------------------*/ + +extern __ATTRS_o vector signed char +vec_load_bndry(const signed char *__ptr, unsigned short __len) + __constant_pow2_range(__len, 64, 4096); + +extern __ATTRS_o vector unsigned char +vec_load_bndry(const unsigned char *__ptr, unsigned short __len) + __constant_pow2_range(__len, 64, 4096); + +extern __ATTRS_o vector signed short +vec_load_bndry(const signed short *__ptr, unsigned short __len) + __constant_pow2_range(__len, 64, 4096); + +extern __ATTRS_o vector unsigned short +vec_load_bndry(const unsigned short *__ptr, unsigned short __len) + __constant_pow2_range(__len, 64, 4096); + +extern __ATTRS_o vector signed int +vec_load_bndry(const signed int *__ptr, unsigned short __len) + __constant_pow2_range(__len, 64, 4096); + +extern __ATTRS_o vector unsigned int +vec_load_bndry(const unsigned int *__ptr, unsigned short __len) + __constant_pow2_range(__len, 64, 4096); + +extern __ATTRS_o vector signed long long +vec_load_bndry(const signed long long *__ptr, unsigned short __len) + __constant_pow2_range(__len, 64, 4096); + +extern __ATTRS_o vector unsigned long long +vec_load_bndry(const unsigned long long *__ptr, unsigned short __len) + __constant_pow2_range(__len, 64, 4096); + +extern __ATTRS_o vector double +vec_load_bndry(const double *__ptr, unsigned short __len) + __constant_pow2_range(__len, 64, 4096); + +#define vec_load_bndry(X, Y) ((__typeof__((vec_load_bndry)((X), (Y)))) \ + __builtin_s390_vlbb((X), ((Y) == 64 ? 0 : \ + (Y) == 128 ? 1 : \ + (Y) == 256 ? 2 : \ + (Y) == 512 ? 3 : \ + (Y) == 1024 ? 4 : \ + (Y) == 2048 ? 5 : \ + (Y) == 4096 ? 6 : -1))) + +/*-- vec_load_len -----------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_load_len(const signed char *__ptr, unsigned int __len) { + return (vector signed char)__builtin_s390_vll(__len, __ptr); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_load_len(const unsigned char *__ptr, unsigned int __len) { + return (vector unsigned char)__builtin_s390_vll(__len, __ptr); +} + +static inline __ATTRS_o_ai vector signed short +vec_load_len(const signed short *__ptr, unsigned int __len) { + return (vector signed short)__builtin_s390_vll(__len, __ptr); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_load_len(const unsigned short *__ptr, unsigned int __len) { + return (vector unsigned short)__builtin_s390_vll(__len, __ptr); +} + +static inline __ATTRS_o_ai vector signed int +vec_load_len(const signed int *__ptr, unsigned int __len) { + return (vector signed int)__builtin_s390_vll(__len, __ptr); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_load_len(const unsigned int *__ptr, unsigned int __len) { + return (vector unsigned int)__builtin_s390_vll(__len, __ptr); +} + +static inline __ATTRS_o_ai vector signed long long +vec_load_len(const signed long long *__ptr, unsigned int __len) { + return (vector signed long long)__builtin_s390_vll(__len, __ptr); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_load_len(const unsigned long long *__ptr, unsigned int __len) { + return (vector unsigned long long)__builtin_s390_vll(__len, __ptr); +} + +static inline __ATTRS_o_ai vector double +vec_load_len(const double *__ptr, unsigned int __len) { + return (vector double)__builtin_s390_vll(__len, __ptr); +} + +/*-- vec_store_len ----------------------------------------------------------*/ + +static inline __ATTRS_o_ai void +vec_store_len(vector signed char __vec, signed char *__ptr, + unsigned int __len) { + __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); +} + +static inline __ATTRS_o_ai void +vec_store_len(vector unsigned char __vec, unsigned char *__ptr, + unsigned int __len) { + __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); +} + +static inline __ATTRS_o_ai void +vec_store_len(vector signed short __vec, signed short *__ptr, + unsigned int __len) { + __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); +} + +static inline __ATTRS_o_ai void +vec_store_len(vector unsigned short __vec, unsigned short *__ptr, + unsigned int __len) { + __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); +} + +static inline __ATTRS_o_ai void +vec_store_len(vector signed int __vec, signed int *__ptr, + unsigned int __len) { + __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); +} + +static inline __ATTRS_o_ai void +vec_store_len(vector unsigned int __vec, unsigned int *__ptr, + unsigned int __len) { + __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); +} + +static inline __ATTRS_o_ai void +vec_store_len(vector signed long long __vec, signed long long *__ptr, + unsigned int __len) { + __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); +} + +static inline __ATTRS_o_ai void +vec_store_len(vector unsigned long long __vec, unsigned long long *__ptr, + unsigned int __len) { + __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); +} + +static inline __ATTRS_o_ai void +vec_store_len(vector double __vec, double *__ptr, + unsigned int __len) { + __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); +} + +/*-- vec_load_pair ----------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed long long +vec_load_pair(signed long long __a, signed long long __b) { + return (vector signed long long)(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_load_pair(unsigned long long __a, unsigned long long __b) { + return (vector unsigned long long)(__a, __b); +} + +/*-- vec_genmask ------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_genmask(unsigned short __mask) + __constant(__mask) { + return (vector unsigned char)( + __mask & 0x8000 ? 0xff : 0, + __mask & 0x4000 ? 0xff : 0, + __mask & 0x2000 ? 0xff : 0, + __mask & 0x1000 ? 0xff : 0, + __mask & 0x0800 ? 0xff : 0, + __mask & 0x0400 ? 0xff : 0, + __mask & 0x0200 ? 0xff : 0, + __mask & 0x0100 ? 0xff : 0, + __mask & 0x0080 ? 0xff : 0, + __mask & 0x0040 ? 0xff : 0, + __mask & 0x0020 ? 0xff : 0, + __mask & 0x0010 ? 0xff : 0, + __mask & 0x0008 ? 0xff : 0, + __mask & 0x0004 ? 0xff : 0, + __mask & 0x0002 ? 0xff : 0, + __mask & 0x0001 ? 0xff : 0); +} + +/*-- vec_genmasks_* ---------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_genmasks_8(unsigned char __first, unsigned char __last) + __constant(__first) __constant(__last) { + unsigned char __bit1 = __first & 7; + unsigned char __bit2 = __last & 7; + unsigned char __mask1 = (unsigned char)(1U << (7 - __bit1) << 1) - 1; + unsigned char __mask2 = (unsigned char)(1U << (7 - __bit2)) - 1; + unsigned char __value = (__bit1 <= __bit2 ? + __mask1 & ~__mask2 : + __mask1 | ~__mask2); + return (vector unsigned char)__value; +} + +static inline __ATTRS_o_ai vector unsigned short +vec_genmasks_16(unsigned char __first, unsigned char __last) + __constant(__first) __constant(__last) { + unsigned char __bit1 = __first & 15; + unsigned char __bit2 = __last & 15; + unsigned short __mask1 = (unsigned short)(1U << (15 - __bit1) << 1) - 1; + unsigned short __mask2 = (unsigned short)(1U << (15 - __bit2)) - 1; + unsigned short __value = (__bit1 <= __bit2 ? + __mask1 & ~__mask2 : + __mask1 | ~__mask2); + return (vector unsigned short)__value; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_genmasks_32(unsigned char __first, unsigned char __last) + __constant(__first) __constant(__last) { + unsigned char __bit1 = __first & 31; + unsigned char __bit2 = __last & 31; + unsigned int __mask1 = (1U << (31 - __bit1) << 1) - 1; + unsigned int __mask2 = (1U << (31 - __bit2)) - 1; + unsigned int __value = (__bit1 <= __bit2 ? + __mask1 & ~__mask2 : + __mask1 | ~__mask2); + return (vector unsigned int)__value; +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_genmasks_64(unsigned char __first, unsigned char __last) + __constant(__first) __constant(__last) { + unsigned char __bit1 = __first & 63; + unsigned char __bit2 = __last & 63; + unsigned long long __mask1 = (1ULL << (63 - __bit1) << 1) - 1; + unsigned long long __mask2 = (1ULL << (63 - __bit2)) - 1; + unsigned long long __value = (__bit1 <= __bit2 ? + __mask1 & ~__mask2 : + __mask1 | ~__mask2); + return (vector unsigned long long)__value; +} + +/*-- vec_splat --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_splat(vector signed char __vec, int __index) + __constant_range(__index, 0, 15) { + return (vector signed char)__vec[__index]; +} + +static inline __ATTRS_o_ai vector bool char +vec_splat(vector bool char __vec, int __index) + __constant_range(__index, 0, 15) { + return (vector bool char)(vector unsigned char)__vec[__index]; +} + +static inline __ATTRS_o_ai vector unsigned char +vec_splat(vector unsigned char __vec, int __index) + __constant_range(__index, 0, 15) { + return (vector unsigned char)__vec[__index]; +} + +static inline __ATTRS_o_ai vector signed short +vec_splat(vector signed short __vec, int __index) + __constant_range(__index, 0, 7) { + return (vector signed short)__vec[__index]; +} + +static inline __ATTRS_o_ai vector bool short +vec_splat(vector bool short __vec, int __index) + __constant_range(__index, 0, 7) { + return (vector bool short)(vector unsigned short)__vec[__index]; +} + +static inline __ATTRS_o_ai vector unsigned short +vec_splat(vector unsigned short __vec, int __index) + __constant_range(__index, 0, 7) { + return (vector unsigned short)__vec[__index]; +} + +static inline __ATTRS_o_ai vector signed int +vec_splat(vector signed int __vec, int __index) + __constant_range(__index, 0, 3) { + return (vector signed int)__vec[__index]; +} + +static inline __ATTRS_o_ai vector bool int +vec_splat(vector bool int __vec, int __index) + __constant_range(__index, 0, 3) { + return (vector bool int)(vector unsigned int)__vec[__index]; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_splat(vector unsigned int __vec, int __index) + __constant_range(__index, 0, 3) { + return (vector unsigned int)__vec[__index]; +} + +static inline __ATTRS_o_ai vector signed long long +vec_splat(vector signed long long __vec, int __index) + __constant_range(__index, 0, 1) { + return (vector signed long long)__vec[__index]; +} + +static inline __ATTRS_o_ai vector bool long long +vec_splat(vector bool long long __vec, int __index) + __constant_range(__index, 0, 1) { + return (vector bool long long)(vector unsigned long long)__vec[__index]; +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_splat(vector unsigned long long __vec, int __index) + __constant_range(__index, 0, 1) { + return (vector unsigned long long)__vec[__index]; +} + +static inline __ATTRS_o_ai vector double +vec_splat(vector double __vec, int __index) + __constant_range(__index, 0, 1) { + return (vector double)__vec[__index]; +} + +/*-- vec_splat_s* -----------------------------------------------------------*/ + +static inline __ATTRS_ai vector signed char +vec_splat_s8(signed char __scalar) + __constant(__scalar) { + return (vector signed char)__scalar; +} + +static inline __ATTRS_ai vector signed short +vec_splat_s16(signed short __scalar) + __constant(__scalar) { + return (vector signed short)__scalar; +} + +static inline __ATTRS_ai vector signed int +vec_splat_s32(signed short __scalar) + __constant(__scalar) { + return (vector signed int)(signed int)__scalar; +} + +static inline __ATTRS_ai vector signed long long +vec_splat_s64(signed short __scalar) + __constant(__scalar) { + return (vector signed long long)(signed long)__scalar; +} + +/*-- vec_splat_u* -----------------------------------------------------------*/ + +static inline __ATTRS_ai vector unsigned char +vec_splat_u8(unsigned char __scalar) + __constant(__scalar) { + return (vector unsigned char)__scalar; +} + +static inline __ATTRS_ai vector unsigned short +vec_splat_u16(unsigned short __scalar) + __constant(__scalar) { + return (vector unsigned short)__scalar; +} + +static inline __ATTRS_ai vector unsigned int +vec_splat_u32(signed short __scalar) + __constant(__scalar) { + return (vector unsigned int)(signed int)__scalar; +} + +static inline __ATTRS_ai vector unsigned long long +vec_splat_u64(signed short __scalar) + __constant(__scalar) { + return (vector unsigned long long)(signed long long)__scalar; +} + +/*-- vec_splats -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_splats(signed char __scalar) { + return (vector signed char)__scalar; +} + +static inline __ATTRS_o_ai vector unsigned char +vec_splats(unsigned char __scalar) { + return (vector unsigned char)__scalar; +} + +static inline __ATTRS_o_ai vector signed short +vec_splats(signed short __scalar) { + return (vector signed short)__scalar; +} + +static inline __ATTRS_o_ai vector unsigned short +vec_splats(unsigned short __scalar) { + return (vector unsigned short)__scalar; +} + +static inline __ATTRS_o_ai vector signed int +vec_splats(signed int __scalar) { + return (vector signed int)__scalar; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_splats(unsigned int __scalar) { + return (vector unsigned int)__scalar; +} + +static inline __ATTRS_o_ai vector signed long long +vec_splats(signed long long __scalar) { + return (vector signed long long)__scalar; +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_splats(unsigned long long __scalar) { + return (vector unsigned long long)__scalar; +} + +static inline __ATTRS_o_ai vector double +vec_splats(double __scalar) { + return (vector double)__scalar; +} + +/*-- vec_extend_s64 ---------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed long long +vec_extend_s64(vector signed char __a) { + return (vector signed long long)(__a[7], __a[15]); +} + +static inline __ATTRS_o_ai vector signed long long +vec_extend_s64(vector signed short __a) { + return (vector signed long long)(__a[3], __a[7]); +} + +static inline __ATTRS_o_ai vector signed long long +vec_extend_s64(vector signed int __a) { + return (vector signed long long)(__a[1], __a[3]); +} + +/*-- vec_mergeh -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_mergeh(vector signed char __a, vector signed char __b) { + return (vector signed char)( + __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3], + __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); +} + +static inline __ATTRS_o_ai vector bool char +vec_mergeh(vector bool char __a, vector bool char __b) { + return (vector bool char)( + __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3], + __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_mergeh(vector unsigned char __a, vector unsigned char __b) { + return (vector unsigned char)( + __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3], + __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); +} + +static inline __ATTRS_o_ai vector signed short +vec_mergeh(vector signed short __a, vector signed short __b) { + return (vector signed short)( + __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3]); +} + +static inline __ATTRS_o_ai vector bool short +vec_mergeh(vector bool short __a, vector bool short __b) { + return (vector bool short)( + __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3]); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_mergeh(vector unsigned short __a, vector unsigned short __b) { + return (vector unsigned short)( + __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3]); +} + +static inline __ATTRS_o_ai vector signed int +vec_mergeh(vector signed int __a, vector signed int __b) { + return (vector signed int)(__a[0], __b[0], __a[1], __b[1]); +} + +static inline __ATTRS_o_ai vector bool int +vec_mergeh(vector bool int __a, vector bool int __b) { + return (vector bool int)(__a[0], __b[0], __a[1], __b[1]); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_mergeh(vector unsigned int __a, vector unsigned int __b) { + return (vector unsigned int)(__a[0], __b[0], __a[1], __b[1]); +} + +static inline __ATTRS_o_ai vector signed long long +vec_mergeh(vector signed long long __a, vector signed long long __b) { + return (vector signed long long)(__a[0], __b[0]); +} + +static inline __ATTRS_o_ai vector bool long long +vec_mergeh(vector bool long long __a, vector bool long long __b) { + return (vector bool long long)(__a[0], __b[0]); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_mergeh(vector unsigned long long __a, vector unsigned long long __b) { + return (vector unsigned long long)(__a[0], __b[0]); +} + +static inline __ATTRS_o_ai vector double +vec_mergeh(vector double __a, vector double __b) { + return (vector double)(__a[0], __b[0]); +} + +/*-- vec_mergel -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_mergel(vector signed char __a, vector signed char __b) { + return (vector signed char)( + __a[8], __b[8], __a[9], __b[9], __a[10], __b[10], __a[11], __b[11], + __a[12], __b[12], __a[13], __b[13], __a[14], __b[14], __a[15], __b[15]); +} + +static inline __ATTRS_o_ai vector bool char +vec_mergel(vector bool char __a, vector bool char __b) { + return (vector bool char)( + __a[8], __b[8], __a[9], __b[9], __a[10], __b[10], __a[11], __b[11], + __a[12], __b[12], __a[13], __b[13], __a[14], __b[14], __a[15], __b[15]); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_mergel(vector unsigned char __a, vector unsigned char __b) { + return (vector unsigned char)( + __a[8], __b[8], __a[9], __b[9], __a[10], __b[10], __a[11], __b[11], + __a[12], __b[12], __a[13], __b[13], __a[14], __b[14], __a[15], __b[15]); +} + +static inline __ATTRS_o_ai vector signed short +vec_mergel(vector signed short __a, vector signed short __b) { + return (vector signed short)( + __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); +} + +static inline __ATTRS_o_ai vector bool short +vec_mergel(vector bool short __a, vector bool short __b) { + return (vector bool short)( + __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_mergel(vector unsigned short __a, vector unsigned short __b) { + return (vector unsigned short)( + __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); +} + +static inline __ATTRS_o_ai vector signed int +vec_mergel(vector signed int __a, vector signed int __b) { + return (vector signed int)(__a[2], __b[2], __a[3], __b[3]); +} + +static inline __ATTRS_o_ai vector bool int +vec_mergel(vector bool int __a, vector bool int __b) { + return (vector bool int)(__a[2], __b[2], __a[3], __b[3]); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_mergel(vector unsigned int __a, vector unsigned int __b) { + return (vector unsigned int)(__a[2], __b[2], __a[3], __b[3]); +} + +static inline __ATTRS_o_ai vector signed long long +vec_mergel(vector signed long long __a, vector signed long long __b) { + return (vector signed long long)(__a[1], __b[1]); +} + +static inline __ATTRS_o_ai vector bool long long +vec_mergel(vector bool long long __a, vector bool long long __b) { + return (vector bool long long)(__a[1], __b[1]); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_mergel(vector unsigned long long __a, vector unsigned long long __b) { + return (vector unsigned long long)(__a[1], __b[1]); +} + +static inline __ATTRS_o_ai vector double +vec_mergel(vector double __a, vector double __b) { + return (vector double)(__a[1], __b[1]); +} + +/*-- vec_pack ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_pack(vector signed short __a, vector signed short __b) { + vector signed char __ac = (vector signed char)__a; + vector signed char __bc = (vector signed char)__b; + return (vector signed char)( + __ac[1], __ac[3], __ac[5], __ac[7], __ac[9], __ac[11], __ac[13], __ac[15], + __bc[1], __bc[3], __bc[5], __bc[7], __bc[9], __bc[11], __bc[13], __bc[15]); +} + +static inline __ATTRS_o_ai vector bool char +vec_pack(vector bool short __a, vector bool short __b) { + vector bool char __ac = (vector bool char)__a; + vector bool char __bc = (vector bool char)__b; + return (vector bool char)( + __ac[1], __ac[3], __ac[5], __ac[7], __ac[9], __ac[11], __ac[13], __ac[15], + __bc[1], __bc[3], __bc[5], __bc[7], __bc[9], __bc[11], __bc[13], __bc[15]); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_pack(vector unsigned short __a, vector unsigned short __b) { + vector unsigned char __ac = (vector unsigned char)__a; + vector unsigned char __bc = (vector unsigned char)__b; + return (vector unsigned char)( + __ac[1], __ac[3], __ac[5], __ac[7], __ac[9], __ac[11], __ac[13], __ac[15], + __bc[1], __bc[3], __bc[5], __bc[7], __bc[9], __bc[11], __bc[13], __bc[15]); +} + +static inline __ATTRS_o_ai vector signed short +vec_pack(vector signed int __a, vector signed int __b) { + vector signed short __ac = (vector signed short)__a; + vector signed short __bc = (vector signed short)__b; + return (vector signed short)( + __ac[1], __ac[3], __ac[5], __ac[7], + __bc[1], __bc[3], __bc[5], __bc[7]); +} + +static inline __ATTRS_o_ai vector bool short +vec_pack(vector bool int __a, vector bool int __b) { + vector bool short __ac = (vector bool short)__a; + vector bool short __bc = (vector bool short)__b; + return (vector bool short)( + __ac[1], __ac[3], __ac[5], __ac[7], + __bc[1], __bc[3], __bc[5], __bc[7]); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_pack(vector unsigned int __a, vector unsigned int __b) { + vector unsigned short __ac = (vector unsigned short)__a; + vector unsigned short __bc = (vector unsigned short)__b; + return (vector unsigned short)( + __ac[1], __ac[3], __ac[5], __ac[7], + __bc[1], __bc[3], __bc[5], __bc[7]); +} + +static inline __ATTRS_o_ai vector signed int +vec_pack(vector signed long long __a, vector signed long long __b) { + vector signed int __ac = (vector signed int)__a; + vector signed int __bc = (vector signed int)__b; + return (vector signed int)(__ac[1], __ac[3], __bc[1], __bc[3]); +} + +static inline __ATTRS_o_ai vector bool int +vec_pack(vector bool long long __a, vector bool long long __b) { + vector bool int __ac = (vector bool int)__a; + vector bool int __bc = (vector bool int)__b; + return (vector bool int)(__ac[1], __ac[3], __bc[1], __bc[3]); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_pack(vector unsigned long long __a, vector unsigned long long __b) { + vector unsigned int __ac = (vector unsigned int)__a; + vector unsigned int __bc = (vector unsigned int)__b; + return (vector unsigned int)(__ac[1], __ac[3], __bc[1], __bc[3]); +} + +/*-- vec_packs --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_packs(vector signed short __a, vector signed short __b) { + return __builtin_s390_vpksh(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_packs(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vpklsh(__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_packs(vector signed int __a, vector signed int __b) { + return __builtin_s390_vpksf(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_packs(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vpklsf(__a, __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_packs(vector signed long long __a, vector signed long long __b) { + return __builtin_s390_vpksg(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_packs(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_s390_vpklsg(__a, __b); +} + +/*-- vec_packs_cc -----------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_packs_cc(vector signed short __a, vector signed short __b, int *__cc) { + return __builtin_s390_vpkshs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_packs_cc(vector unsigned short __a, vector unsigned short __b, int *__cc) { + return __builtin_s390_vpklshs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector signed short +vec_packs_cc(vector signed int __a, vector signed int __b, int *__cc) { + return __builtin_s390_vpksfs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_packs_cc(vector unsigned int __a, vector unsigned int __b, int *__cc) { + return __builtin_s390_vpklsfs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector signed int +vec_packs_cc(vector signed long long __a, vector signed long long __b, + int *__cc) { + return __builtin_s390_vpksgs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_packs_cc(vector unsigned long long __a, vector unsigned long long __b, + int *__cc) { + return __builtin_s390_vpklsgs(__a, __b, __cc); +} + +/*-- vec_packsu -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_packsu(vector signed short __a, vector signed short __b) { + const vector signed short __zero = (vector signed short)0; + return __builtin_s390_vpklsh( + (vector unsigned short)(__a >= __zero) & (vector unsigned short)__a, + (vector unsigned short)(__b >= __zero) & (vector unsigned short)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_packsu(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vpklsh(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_packsu(vector signed int __a, vector signed int __b) { + const vector signed int __zero = (vector signed int)0; + return __builtin_s390_vpklsf( + (vector unsigned int)(__a >= __zero) & (vector unsigned int)__a, + (vector unsigned int)(__b >= __zero) & (vector unsigned int)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_packsu(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vpklsf(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_packsu(vector signed long long __a, vector signed long long __b) { + const vector signed long long __zero = (vector signed long long)0; + return __builtin_s390_vpklsg( + (vector unsigned long long)(__a >= __zero) & + (vector unsigned long long)__a, + (vector unsigned long long)(__b >= __zero) & + (vector unsigned long long)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_packsu(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_s390_vpklsg(__a, __b); +} + +/*-- vec_packsu_cc ----------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_packsu_cc(vector unsigned short __a, vector unsigned short __b, int *__cc) { + return __builtin_s390_vpklshs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_packsu_cc(vector unsigned int __a, vector unsigned int __b, int *__cc) { + return __builtin_s390_vpklsfs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_packsu_cc(vector unsigned long long __a, vector unsigned long long __b, + int *__cc) { + return __builtin_s390_vpklsgs(__a, __b, __cc); +} + +/*-- vec_unpackh ------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed short +vec_unpackh(vector signed char __a) { + return __builtin_s390_vuphb(__a); +} + +static inline __ATTRS_o_ai vector bool short +vec_unpackh(vector bool char __a) { + return (vector bool short)__builtin_s390_vuphb((vector signed char)__a); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_unpackh(vector unsigned char __a) { + return __builtin_s390_vuplhb(__a); +} + +static inline __ATTRS_o_ai vector signed int +vec_unpackh(vector signed short __a) { + return __builtin_s390_vuphh(__a); +} + +static inline __ATTRS_o_ai vector bool int +vec_unpackh(vector bool short __a) { + return (vector bool int)__builtin_s390_vuphh((vector signed short)__a); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_unpackh(vector unsigned short __a) { + return __builtin_s390_vuplhh(__a); +} + +static inline __ATTRS_o_ai vector signed long long +vec_unpackh(vector signed int __a) { + return __builtin_s390_vuphf(__a); +} + +static inline __ATTRS_o_ai vector bool long long +vec_unpackh(vector bool int __a) { + return (vector bool long long)__builtin_s390_vuphf((vector signed int)__a); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_unpackh(vector unsigned int __a) { + return __builtin_s390_vuplhf(__a); +} + +/*-- vec_unpackl ------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed short +vec_unpackl(vector signed char __a) { + return __builtin_s390_vuplb(__a); +} + +static inline __ATTRS_o_ai vector bool short +vec_unpackl(vector bool char __a) { + return (vector bool short)__builtin_s390_vuplb((vector signed char)__a); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_unpackl(vector unsigned char __a) { + return __builtin_s390_vupllb(__a); +} + +static inline __ATTRS_o_ai vector signed int +vec_unpackl(vector signed short __a) { + return __builtin_s390_vuplhw(__a); +} + +static inline __ATTRS_o_ai vector bool int +vec_unpackl(vector bool short __a) { + return (vector bool int)__builtin_s390_vuplhw((vector signed short)__a); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_unpackl(vector unsigned short __a) { + return __builtin_s390_vupllh(__a); +} + +static inline __ATTRS_o_ai vector signed long long +vec_unpackl(vector signed int __a) { + return __builtin_s390_vuplf(__a); +} + +static inline __ATTRS_o_ai vector bool long long +vec_unpackl(vector bool int __a) { + return (vector bool long long)__builtin_s390_vuplf((vector signed int)__a); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_unpackl(vector unsigned int __a) { + return __builtin_s390_vupllf(__a); +} + +/*-- vec_cmpeq --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_cmpeq(vector bool char __a, vector bool char __b) { + return (vector bool char)(__a == __b); +} + +static inline __ATTRS_o_ai vector bool char +vec_cmpeq(vector signed char __a, vector signed char __b) { + return (vector bool char)(__a == __b); +} + +static inline __ATTRS_o_ai vector bool char +vec_cmpeq(vector unsigned char __a, vector unsigned char __b) { + return (vector bool char)(__a == __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmpeq(vector bool short __a, vector bool short __b) { + return (vector bool short)(__a == __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmpeq(vector signed short __a, vector signed short __b) { + return (vector bool short)(__a == __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmpeq(vector unsigned short __a, vector unsigned short __b) { + return (vector bool short)(__a == __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmpeq(vector bool int __a, vector bool int __b) { + return (vector bool int)(__a == __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmpeq(vector signed int __a, vector signed int __b) { + return (vector bool int)(__a == __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmpeq(vector unsigned int __a, vector unsigned int __b) { + return (vector bool int)(__a == __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmpeq(vector bool long long __a, vector bool long long __b) { + return (vector bool long long)(__a == __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmpeq(vector signed long long __a, vector signed long long __b) { + return (vector bool long long)(__a == __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmpeq(vector unsigned long long __a, vector unsigned long long __b) { + return (vector bool long long)(__a == __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmpeq(vector double __a, vector double __b) { + return (vector bool long long)(__a == __b); +} + +/*-- vec_cmpge --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_cmpge(vector signed char __a, vector signed char __b) { + return (vector bool char)(__a >= __b); +} + +static inline __ATTRS_o_ai vector bool char +vec_cmpge(vector unsigned char __a, vector unsigned char __b) { + return (vector bool char)(__a >= __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmpge(vector signed short __a, vector signed short __b) { + return (vector bool short)(__a >= __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmpge(vector unsigned short __a, vector unsigned short __b) { + return (vector bool short)(__a >= __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmpge(vector signed int __a, vector signed int __b) { + return (vector bool int)(__a >= __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmpge(vector unsigned int __a, vector unsigned int __b) { + return (vector bool int)(__a >= __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmpge(vector signed long long __a, vector signed long long __b) { + return (vector bool long long)(__a >= __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) { + return (vector bool long long)(__a >= __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmpge(vector double __a, vector double __b) { + return (vector bool long long)(__a >= __b); +} + +/*-- vec_cmpgt --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_cmpgt(vector signed char __a, vector signed char __b) { + return (vector bool char)(__a > __b); +} + +static inline __ATTRS_o_ai vector bool char +vec_cmpgt(vector unsigned char __a, vector unsigned char __b) { + return (vector bool char)(__a > __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmpgt(vector signed short __a, vector signed short __b) { + return (vector bool short)(__a > __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmpgt(vector unsigned short __a, vector unsigned short __b) { + return (vector bool short)(__a > __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmpgt(vector signed int __a, vector signed int __b) { + return (vector bool int)(__a > __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmpgt(vector unsigned int __a, vector unsigned int __b) { + return (vector bool int)(__a > __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmpgt(vector signed long long __a, vector signed long long __b) { + return (vector bool long long)(__a > __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b) { + return (vector bool long long)(__a > __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmpgt(vector double __a, vector double __b) { + return (vector bool long long)(__a > __b); +} + +/*-- vec_cmple --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_cmple(vector signed char __a, vector signed char __b) { + return (vector bool char)(__a <= __b); +} + +static inline __ATTRS_o_ai vector bool char +vec_cmple(vector unsigned char __a, vector unsigned char __b) { + return (vector bool char)(__a <= __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmple(vector signed short __a, vector signed short __b) { + return (vector bool short)(__a <= __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmple(vector unsigned short __a, vector unsigned short __b) { + return (vector bool short)(__a <= __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmple(vector signed int __a, vector signed int __b) { + return (vector bool int)(__a <= __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmple(vector unsigned int __a, vector unsigned int __b) { + return (vector bool int)(__a <= __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmple(vector signed long long __a, vector signed long long __b) { + return (vector bool long long)(__a <= __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmple(vector unsigned long long __a, vector unsigned long long __b) { + return (vector bool long long)(__a <= __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmple(vector double __a, vector double __b) { + return (vector bool long long)(__a <= __b); +} + +/*-- vec_cmplt --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_cmplt(vector signed char __a, vector signed char __b) { + return (vector bool char)(__a < __b); +} + +static inline __ATTRS_o_ai vector bool char +vec_cmplt(vector unsigned char __a, vector unsigned char __b) { + return (vector bool char)(__a < __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmplt(vector signed short __a, vector signed short __b) { + return (vector bool short)(__a < __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmplt(vector unsigned short __a, vector unsigned short __b) { + return (vector bool short)(__a < __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmplt(vector signed int __a, vector signed int __b) { + return (vector bool int)(__a < __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmplt(vector unsigned int __a, vector unsigned int __b) { + return (vector bool int)(__a < __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmplt(vector signed long long __a, vector signed long long __b) { + return (vector bool long long)(__a < __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) { + return (vector bool long long)(__a < __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_cmplt(vector double __a, vector double __b) { + return (vector bool long long)(__a < __b); +} + +/*-- vec_all_eq -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_all_eq(vector signed char __a, vector signed char __b) { + int __cc; + __builtin_s390_vceqbs(__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector signed char __a, vector bool char __b) { + int __cc; + __builtin_s390_vceqbs(__a, (vector signed char)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector bool char __a, vector signed char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector unsigned char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector unsigned char __a, vector bool char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector bool char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector bool char __a, vector bool char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector signed short __a, vector signed short __b) { + int __cc; + __builtin_s390_vceqhs(__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector signed short __a, vector bool short __b) { + int __cc; + __builtin_s390_vceqhs(__a, (vector signed short)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector bool short __a, vector signed short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector unsigned short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector unsigned short __a, vector bool short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector bool short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector bool short __a, vector bool short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector signed int __a, vector signed int __b) { + int __cc; + __builtin_s390_vceqfs(__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector signed int __a, vector bool int __b) { + int __cc; + __builtin_s390_vceqfs(__a, (vector signed int)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector bool int __a, vector signed int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector unsigned int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector unsigned int __a, vector bool int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector bool int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector bool int __a, vector bool int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector signed long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vceqgs(__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector signed long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vceqgs(__a, (vector signed long long)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector bool long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector unsigned long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector unsigned long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector bool long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector bool long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfcedbs(__a, __b, &__cc); + return __cc == 0; +} + +/*-- vec_all_ne -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_all_ne(vector signed char __a, vector signed char __b) { + int __cc; + __builtin_s390_vceqbs(__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector signed char __a, vector bool char __b) { + int __cc; + __builtin_s390_vceqbs(__a, (vector signed char)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector bool char __a, vector signed char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector unsigned char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector unsigned char __a, vector bool char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector bool char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector bool char __a, vector bool char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector signed short __a, vector signed short __b) { + int __cc; + __builtin_s390_vceqhs(__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector signed short __a, vector bool short __b) { + int __cc; + __builtin_s390_vceqhs(__a, (vector signed short)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector bool short __a, vector signed short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector unsigned short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector unsigned short __a, vector bool short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector bool short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector bool short __a, vector bool short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector signed int __a, vector signed int __b) { + int __cc; + __builtin_s390_vceqfs(__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector signed int __a, vector bool int __b) { + int __cc; + __builtin_s390_vceqfs(__a, (vector signed int)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector bool int __a, vector signed int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector unsigned int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector unsigned int __a, vector bool int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector bool int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector bool int __a, vector bool int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector signed long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vceqgs(__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector signed long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vceqgs(__a, (vector signed long long)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector bool long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector unsigned long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector unsigned long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector bool long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector bool long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfcedbs(__a, __b, &__cc); + return __cc == 3; +} + +/*-- vec_all_ge -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_all_ge(vector signed char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs(__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector signed char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchbs((vector signed char)__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector bool char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs(__b, (vector signed char)__a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector unsigned char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs(__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector unsigned char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector bool char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs(__b, (vector unsigned char)__a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector bool char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__b, + (vector unsigned char)__a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector signed short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs(__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector signed short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchhs((vector signed short)__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector bool short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs(__b, (vector signed short)__a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector unsigned short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs(__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector unsigned short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector bool short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs(__b, (vector unsigned short)__a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector bool short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__b, + (vector unsigned short)__a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector signed int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs(__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector signed int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchfs((vector signed int)__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector bool int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs(__b, (vector signed int)__a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector unsigned int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs(__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector unsigned int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector bool int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs(__b, (vector unsigned int)__a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector bool int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__b, + (vector unsigned int)__a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector signed long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs(__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector signed long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchgs((vector signed long long)__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector bool long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs(__b, (vector signed long long)__a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector unsigned long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs(__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector unsigned long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__b, __a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector bool long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs(__b, (vector unsigned long long)__a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector bool long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__b, + (vector unsigned long long)__a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchedbs(__a, __b, &__cc); + return __cc == 0; +} + +/*-- vec_all_gt -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_all_gt(vector signed char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs(__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector signed char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchbs(__a, (vector signed char)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector bool char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs((vector signed char)__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector unsigned char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs(__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector unsigned char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs(__a, (vector unsigned char)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector bool char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector bool char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__a, + (vector unsigned char)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector signed short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs(__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector signed short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchhs(__a, (vector signed short)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector bool short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs((vector signed short)__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector unsigned short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs(__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector unsigned short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs(__a, (vector unsigned short)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector bool short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector bool short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__a, + (vector unsigned short)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector signed int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs(__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector signed int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchfs(__a, (vector signed int)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector bool int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs((vector signed int)__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector unsigned int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs(__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector unsigned int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs(__a, (vector unsigned int)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector bool int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector bool int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__a, + (vector unsigned int)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector signed long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs(__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector signed long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchgs(__a, (vector signed long long)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector bool long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs((vector signed long long)__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector unsigned long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs(__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector unsigned long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs(__a, (vector unsigned long long)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector bool long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__a, __b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector bool long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__a, + (vector unsigned long long)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchdbs(__a, __b, &__cc); + return __cc == 0; +} + +/*-- vec_all_le -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_all_le(vector signed char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs(__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector signed char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchbs(__a, (vector signed char)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector bool char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs((vector signed char)__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector unsigned char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs(__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector unsigned char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs(__a, (vector unsigned char)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector bool char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector bool char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__a, + (vector unsigned char)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector signed short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs(__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector signed short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchhs(__a, (vector signed short)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector bool short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs((vector signed short)__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector unsigned short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs(__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector unsigned short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs(__a, (vector unsigned short)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector bool short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector bool short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__a, + (vector unsigned short)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector signed int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs(__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector signed int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchfs(__a, (vector signed int)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector bool int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs((vector signed int)__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector unsigned int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs(__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector unsigned int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs(__a, (vector unsigned int)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector bool int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector bool int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__a, + (vector unsigned int)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector signed long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs(__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector signed long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchgs(__a, (vector signed long long)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector bool long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs((vector signed long long)__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector unsigned long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs(__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector unsigned long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs(__a, (vector unsigned long long)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector bool long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__a, __b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector bool long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__a, + (vector unsigned long long)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchedbs(__b, __a, &__cc); + return __cc == 0; +} + +/*-- vec_all_lt -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_all_lt(vector signed char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs(__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector signed char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchbs((vector signed char)__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector bool char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs(__b, (vector signed char)__a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector unsigned char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs(__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector unsigned char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector bool char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs(__b, (vector unsigned char)__a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector bool char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__b, + (vector unsigned char)__a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector signed short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs(__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector signed short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchhs((vector signed short)__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector bool short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs(__b, (vector signed short)__a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector unsigned short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs(__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector unsigned short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector bool short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs(__b, (vector unsigned short)__a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector bool short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__b, + (vector unsigned short)__a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector signed int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs(__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector signed int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchfs((vector signed int)__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector bool int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs(__b, (vector signed int)__a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector unsigned int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs(__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector unsigned int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector bool int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs(__b, (vector unsigned int)__a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector bool int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__b, + (vector unsigned int)__a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector signed long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs(__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector signed long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchgs((vector signed long long)__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector bool long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs(__b, (vector signed long long)__a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector unsigned long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs(__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector unsigned long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__b, __a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector bool long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs(__b, (vector unsigned long long)__a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector bool long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__b, + (vector unsigned long long)__a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchdbs(__b, __a, &__cc); + return __cc == 0; +} + +/*-- vec_all_nge ------------------------------------------------------------*/ + +static inline __ATTRS_ai int +vec_all_nge(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchedbs(__a, __b, &__cc); + return __cc == 3; +} + +/*-- vec_all_ngt ------------------------------------------------------------*/ + +static inline __ATTRS_ai int +vec_all_ngt(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchdbs(__a, __b, &__cc); + return __cc == 3; +} + +/*-- vec_all_nle ------------------------------------------------------------*/ + +static inline __ATTRS_ai int +vec_all_nle(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchedbs(__b, __a, &__cc); + return __cc == 3; +} + +/*-- vec_all_nlt ------------------------------------------------------------*/ + +static inline __ATTRS_ai int +vec_all_nlt(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchdbs(__b, __a, &__cc); + return __cc == 3; +} + +/*-- vec_all_nan ------------------------------------------------------------*/ + +static inline __ATTRS_ai int +vec_all_nan(vector double __a) { + int __cc; + __builtin_s390_vftcidb(__a, 15, &__cc); + return __cc == 0; +} + +/*-- vec_all_numeric --------------------------------------------------------*/ + +static inline __ATTRS_ai int +vec_all_numeric(vector double __a) { + int __cc; + __builtin_s390_vftcidb(__a, 15, &__cc); + return __cc == 3; +} + +/*-- vec_any_eq -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_any_eq(vector signed char __a, vector signed char __b) { + int __cc; + __builtin_s390_vceqbs(__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector signed char __a, vector bool char __b) { + int __cc; + __builtin_s390_vceqbs(__a, (vector signed char)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector bool char __a, vector signed char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector unsigned char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector unsigned char __a, vector bool char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector bool char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector bool char __a, vector bool char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector signed short __a, vector signed short __b) { + int __cc; + __builtin_s390_vceqhs(__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector signed short __a, vector bool short __b) { + int __cc; + __builtin_s390_vceqhs(__a, (vector signed short)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector bool short __a, vector signed short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector unsigned short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector unsigned short __a, vector bool short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector bool short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector bool short __a, vector bool short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector signed int __a, vector signed int __b) { + int __cc; + __builtin_s390_vceqfs(__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector signed int __a, vector bool int __b) { + int __cc; + __builtin_s390_vceqfs(__a, (vector signed int)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector bool int __a, vector signed int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector unsigned int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector unsigned int __a, vector bool int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector bool int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector bool int __a, vector bool int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector signed long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vceqgs(__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector signed long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vceqgs(__a, (vector signed long long)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector bool long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector unsigned long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector unsigned long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector bool long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector bool long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfcedbs(__a, __b, &__cc); + return __cc <= 1; +} + +/*-- vec_any_ne -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_any_ne(vector signed char __a, vector signed char __b) { + int __cc; + __builtin_s390_vceqbs(__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector signed char __a, vector bool char __b) { + int __cc; + __builtin_s390_vceqbs(__a, (vector signed char)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector bool char __a, vector signed char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector unsigned char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector unsigned char __a, vector bool char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector bool char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector bool char __a, vector bool char __b) { + int __cc; + __builtin_s390_vceqbs((vector signed char)__a, + (vector signed char)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector signed short __a, vector signed short __b) { + int __cc; + __builtin_s390_vceqhs(__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector signed short __a, vector bool short __b) { + int __cc; + __builtin_s390_vceqhs(__a, (vector signed short)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector bool short __a, vector signed short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector unsigned short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector unsigned short __a, vector bool short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector bool short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector bool short __a, vector bool short __b) { + int __cc; + __builtin_s390_vceqhs((vector signed short)__a, + (vector signed short)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector signed int __a, vector signed int __b) { + int __cc; + __builtin_s390_vceqfs(__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector signed int __a, vector bool int __b) { + int __cc; + __builtin_s390_vceqfs(__a, (vector signed int)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector bool int __a, vector signed int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector unsigned int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector unsigned int __a, vector bool int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector bool int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector bool int __a, vector bool int __b) { + int __cc; + __builtin_s390_vceqfs((vector signed int)__a, + (vector signed int)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector signed long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vceqgs(__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector signed long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vceqgs(__a, (vector signed long long)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector bool long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector unsigned long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector unsigned long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector bool long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector bool long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vceqgs((vector signed long long)__a, + (vector signed long long)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ne(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfcedbs(__a, __b, &__cc); + return __cc != 0; +} + +/*-- vec_any_ge -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_any_ge(vector signed char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs(__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector signed char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchbs((vector signed char)__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector bool char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs(__b, (vector signed char)__a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector unsigned char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs(__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector unsigned char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector bool char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs(__b, (vector unsigned char)__a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector bool char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__b, + (vector unsigned char)__a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector signed short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs(__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector signed short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchhs((vector signed short)__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector bool short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs(__b, (vector signed short)__a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector unsigned short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs(__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector unsigned short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector bool short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs(__b, (vector unsigned short)__a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector bool short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__b, + (vector unsigned short)__a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector signed int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs(__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector signed int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchfs((vector signed int)__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector bool int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs(__b, (vector signed int)__a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector unsigned int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs(__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector unsigned int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector bool int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs(__b, (vector unsigned int)__a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector bool int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__b, + (vector unsigned int)__a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector signed long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs(__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector signed long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchgs((vector signed long long)__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector bool long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs(__b, (vector signed long long)__a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector unsigned long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs(__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector unsigned long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__b, __a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector bool long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs(__b, (vector unsigned long long)__a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector bool long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__b, + (vector unsigned long long)__a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchedbs(__a, __b, &__cc); + return __cc <= 1; +} + +/*-- vec_any_gt -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_any_gt(vector signed char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs(__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector signed char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchbs(__a, (vector signed char)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector bool char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs((vector signed char)__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector unsigned char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs(__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector unsigned char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs(__a, (vector unsigned char)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector bool char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector bool char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__a, + (vector unsigned char)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector signed short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs(__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector signed short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchhs(__a, (vector signed short)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector bool short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs((vector signed short)__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector unsigned short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs(__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector unsigned short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs(__a, (vector unsigned short)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector bool short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector bool short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__a, + (vector unsigned short)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector signed int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs(__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector signed int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchfs(__a, (vector signed int)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector bool int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs((vector signed int)__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector unsigned int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs(__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector unsigned int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs(__a, (vector unsigned int)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector bool int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector bool int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__a, + (vector unsigned int)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector signed long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs(__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector signed long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchgs(__a, (vector signed long long)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector bool long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs((vector signed long long)__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector unsigned long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs(__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector unsigned long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs(__a, (vector unsigned long long)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector bool long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__a, __b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector bool long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__a, + (vector unsigned long long)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchdbs(__a, __b, &__cc); + return __cc <= 1; +} + +/*-- vec_any_le -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_any_le(vector signed char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs(__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector signed char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchbs(__a, (vector signed char)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector bool char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs((vector signed char)__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector unsigned char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs(__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector unsigned char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs(__a, (vector unsigned char)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector bool char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector bool char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__a, + (vector unsigned char)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector signed short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs(__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector signed short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchhs(__a, (vector signed short)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector bool short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs((vector signed short)__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector unsigned short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs(__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector unsigned short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs(__a, (vector unsigned short)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector bool short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector bool short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__a, + (vector unsigned short)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector signed int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs(__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector signed int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchfs(__a, (vector signed int)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector bool int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs((vector signed int)__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector unsigned int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs(__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector unsigned int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs(__a, (vector unsigned int)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector bool int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector bool int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__a, + (vector unsigned int)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector signed long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs(__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector signed long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchgs(__a, (vector signed long long)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector bool long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs((vector signed long long)__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector unsigned long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs(__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector unsigned long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs(__a, (vector unsigned long long)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector bool long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__a, __b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector bool long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__a, + (vector unsigned long long)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchedbs(__b, __a, &__cc); + return __cc <= 1; +} + +/*-- vec_any_lt -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_any_lt(vector signed char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs(__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector signed char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchbs((vector signed char)__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector bool char __a, vector signed char __b) { + int __cc; + __builtin_s390_vchbs(__b, (vector signed char)__a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector unsigned char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs(__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector unsigned char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector bool char __a, vector unsigned char __b) { + int __cc; + __builtin_s390_vchlbs(__b, (vector unsigned char)__a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector bool char __a, vector bool char __b) { + int __cc; + __builtin_s390_vchlbs((vector unsigned char)__b, + (vector unsigned char)__a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector signed short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs(__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector signed short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchhs((vector signed short)__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector bool short __a, vector signed short __b) { + int __cc; + __builtin_s390_vchhs(__b, (vector signed short)__a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector unsigned short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs(__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector unsigned short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector bool short __a, vector unsigned short __b) { + int __cc; + __builtin_s390_vchlhs(__b, (vector unsigned short)__a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector bool short __a, vector bool short __b) { + int __cc; + __builtin_s390_vchlhs((vector unsigned short)__b, + (vector unsigned short)__a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector signed int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs(__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector signed int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchfs((vector signed int)__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector bool int __a, vector signed int __b) { + int __cc; + __builtin_s390_vchfs(__b, (vector signed int)__a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector unsigned int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs(__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector unsigned int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector bool int __a, vector unsigned int __b) { + int __cc; + __builtin_s390_vchlfs(__b, (vector unsigned int)__a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector bool int __a, vector bool int __b) { + int __cc; + __builtin_s390_vchlfs((vector unsigned int)__b, + (vector unsigned int)__a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector signed long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs(__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector signed long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchgs((vector signed long long)__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector bool long long __a, vector signed long long __b) { + int __cc; + __builtin_s390_vchgs(__b, (vector signed long long)__a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector unsigned long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs(__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector unsigned long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__b, __a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector bool long long __a, vector unsigned long long __b) { + int __cc; + __builtin_s390_vchlgs(__b, (vector unsigned long long)__a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector bool long long __a, vector bool long long __b) { + int __cc; + __builtin_s390_vchlgs((vector unsigned long long)__b, + (vector unsigned long long)__a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchdbs(__b, __a, &__cc); + return __cc <= 1; +} + +/*-- vec_any_nge ------------------------------------------------------------*/ + +static inline __ATTRS_ai int +vec_any_nge(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchedbs(__a, __b, &__cc); + return __cc != 0; +} + +/*-- vec_any_ngt ------------------------------------------------------------*/ + +static inline __ATTRS_ai int +vec_any_ngt(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchdbs(__a, __b, &__cc); + return __cc != 0; +} + +/*-- vec_any_nle ------------------------------------------------------------*/ + +static inline __ATTRS_ai int +vec_any_nle(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchedbs(__b, __a, &__cc); + return __cc != 0; +} + +/*-- vec_any_nlt ------------------------------------------------------------*/ + +static inline __ATTRS_ai int +vec_any_nlt(vector double __a, vector double __b) { + int __cc; + __builtin_s390_vfchdbs(__b, __a, &__cc); + return __cc != 0; +} + +/*-- vec_any_nan ------------------------------------------------------------*/ + +static inline __ATTRS_ai int +vec_any_nan(vector double __a) { + int __cc; + __builtin_s390_vftcidb(__a, 15, &__cc); + return __cc != 3; +} + +/*-- vec_any_numeric --------------------------------------------------------*/ + +static inline __ATTRS_ai int +vec_any_numeric(vector double __a) { + int __cc; + __builtin_s390_vftcidb(__a, 15, &__cc); + return __cc != 0; +} + +/*-- vec_andc ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_andc(vector bool char __a, vector bool char __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector signed char +vec_andc(vector signed char __a, vector signed char __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector signed char +vec_andc(vector bool char __a, vector signed char __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector signed char +vec_andc(vector signed char __a, vector bool char __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector unsigned char +vec_andc(vector unsigned char __a, vector unsigned char __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector unsigned char +vec_andc(vector bool char __a, vector unsigned char __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector unsigned char +vec_andc(vector unsigned char __a, vector bool char __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector bool short +vec_andc(vector bool short __a, vector bool short __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector signed short +vec_andc(vector signed short __a, vector signed short __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector signed short +vec_andc(vector bool short __a, vector signed short __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector signed short +vec_andc(vector signed short __a, vector bool short __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector unsigned short +vec_andc(vector unsigned short __a, vector unsigned short __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector unsigned short +vec_andc(vector bool short __a, vector unsigned short __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector unsigned short +vec_andc(vector unsigned short __a, vector bool short __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector bool int +vec_andc(vector bool int __a, vector bool int __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector signed int +vec_andc(vector signed int __a, vector signed int __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector signed int +vec_andc(vector bool int __a, vector signed int __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector signed int +vec_andc(vector signed int __a, vector bool int __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_andc(vector unsigned int __a, vector unsigned int __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_andc(vector bool int __a, vector unsigned int __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_andc(vector unsigned int __a, vector bool int __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector bool long long +vec_andc(vector bool long long __a, vector bool long long __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector signed long long +vec_andc(vector signed long long __a, vector signed long long __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector signed long long +vec_andc(vector bool long long __a, vector signed long long __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector signed long long +vec_andc(vector signed long long __a, vector bool long long __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_andc(vector unsigned long long __a, vector unsigned long long __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_andc(vector bool long long __a, vector unsigned long long __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_andc(vector unsigned long long __a, vector bool long long __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai vector double +vec_andc(vector double __a, vector double __b) { + return (vector double)((vector unsigned long long)__a & + ~(vector unsigned long long)__b); +} + +static inline __ATTRS_o_ai vector double +vec_andc(vector bool long long __a, vector double __b) { + return (vector double)((vector unsigned long long)__a & + ~(vector unsigned long long)__b); +} + +static inline __ATTRS_o_ai vector double +vec_andc(vector double __a, vector bool long long __b) { + return (vector double)((vector unsigned long long)__a & + ~(vector unsigned long long)__b); +} + +/*-- vec_nor ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_nor(vector bool char __a, vector bool char __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector signed char +vec_nor(vector signed char __a, vector signed char __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector signed char +vec_nor(vector bool char __a, vector signed char __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector signed char +vec_nor(vector signed char __a, vector bool char __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_nor(vector unsigned char __a, vector unsigned char __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_nor(vector bool char __a, vector unsigned char __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_nor(vector unsigned char __a, vector bool char __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_nor(vector bool short __a, vector bool short __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_nor(vector signed short __a, vector signed short __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_nor(vector bool short __a, vector signed short __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_nor(vector signed short __a, vector bool short __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_nor(vector unsigned short __a, vector unsigned short __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_nor(vector bool short __a, vector unsigned short __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_nor(vector unsigned short __a, vector bool short __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_nor(vector bool int __a, vector bool int __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_nor(vector signed int __a, vector signed int __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_nor(vector bool int __a, vector signed int __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_nor(vector signed int __a, vector bool int __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_nor(vector unsigned int __a, vector unsigned int __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_nor(vector bool int __a, vector unsigned int __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_nor(vector unsigned int __a, vector bool int __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_nor(vector bool long long __a, vector bool long long __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_nor(vector signed long long __a, vector signed long long __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_nor(vector bool long long __a, vector signed long long __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_nor(vector signed long long __a, vector bool long long __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_nor(vector unsigned long long __a, vector unsigned long long __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_nor(vector bool long long __a, vector unsigned long long __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_nor(vector unsigned long long __a, vector bool long long __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai vector double +vec_nor(vector double __a, vector double __b) { + return (vector double)~((vector unsigned long long)__a | + (vector unsigned long long)__b); +} + +static inline __ATTRS_o_ai vector double +vec_nor(vector bool long long __a, vector double __b) { + return (vector double)~((vector unsigned long long)__a | + (vector unsigned long long)__b); +} + +static inline __ATTRS_o_ai vector double +vec_nor(vector double __a, vector bool long long __b) { + return (vector double)~((vector unsigned long long)__a | + (vector unsigned long long)__b); +} + +/*-- vec_cntlz --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_cntlz(vector signed char __a) { + return __builtin_s390_vclzb((vector unsigned char)__a); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cntlz(vector unsigned char __a) { + return __builtin_s390_vclzb(__a); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cntlz(vector signed short __a) { + return __builtin_s390_vclzh((vector unsigned short)__a); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cntlz(vector unsigned short __a) { + return __builtin_s390_vclzh(__a); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cntlz(vector signed int __a) { + return __builtin_s390_vclzf((vector unsigned int)__a); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cntlz(vector unsigned int __a) { + return __builtin_s390_vclzf(__a); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_cntlz(vector signed long long __a) { + return __builtin_s390_vclzg((vector unsigned long long)__a); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_cntlz(vector unsigned long long __a) { + return __builtin_s390_vclzg(__a); +} + +/*-- vec_cnttz --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_cnttz(vector signed char __a) { + return __builtin_s390_vctzb((vector unsigned char)__a); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cnttz(vector unsigned char __a) { + return __builtin_s390_vctzb(__a); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cnttz(vector signed short __a) { + return __builtin_s390_vctzh((vector unsigned short)__a); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cnttz(vector unsigned short __a) { + return __builtin_s390_vctzh(__a); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cnttz(vector signed int __a) { + return __builtin_s390_vctzf((vector unsigned int)__a); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cnttz(vector unsigned int __a) { + return __builtin_s390_vctzf(__a); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_cnttz(vector signed long long __a) { + return __builtin_s390_vctzg((vector unsigned long long)__a); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_cnttz(vector unsigned long long __a) { + return __builtin_s390_vctzg(__a); +} + +/*-- vec_popcnt -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_popcnt(vector signed char __a) { + return __builtin_s390_vpopctb((vector unsigned char)__a); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_popcnt(vector unsigned char __a) { + return __builtin_s390_vpopctb(__a); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_popcnt(vector signed short __a) { + return __builtin_s390_vpopcth((vector unsigned short)__a); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_popcnt(vector unsigned short __a) { + return __builtin_s390_vpopcth(__a); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_popcnt(vector signed int __a) { + return __builtin_s390_vpopctf((vector unsigned int)__a); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_popcnt(vector unsigned int __a) { + return __builtin_s390_vpopctf(__a); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_popcnt(vector signed long long __a) { + return __builtin_s390_vpopctg((vector unsigned long long)__a); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_popcnt(vector unsigned long long __a) { + return __builtin_s390_vpopctg(__a); +} + +/*-- vec_rl -----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_rl(vector signed char __a, vector unsigned char __b) { + return (vector signed char)__builtin_s390_verllvb( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_rl(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_verllvb(__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_rl(vector signed short __a, vector unsigned short __b) { + return (vector signed short)__builtin_s390_verllvh( + (vector unsigned short)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_rl(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_verllvh(__a, __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_rl(vector signed int __a, vector unsigned int __b) { + return (vector signed int)__builtin_s390_verllvf( + (vector unsigned int)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_rl(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_verllvf(__a, __b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_rl(vector signed long long __a, vector unsigned long long __b) { + return (vector signed long long)__builtin_s390_verllvg( + (vector unsigned long long)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_rl(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_s390_verllvg(__a, __b); +} + +/*-- vec_rli ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_rli(vector signed char __a, unsigned long __b) { + return (vector signed char)__builtin_s390_verllb( + (vector unsigned char)__a, (int)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_rli(vector unsigned char __a, unsigned long __b) { + return __builtin_s390_verllb(__a, (int)__b); +} + +static inline __ATTRS_o_ai vector signed short +vec_rli(vector signed short __a, unsigned long __b) { + return (vector signed short)__builtin_s390_verllh( + (vector unsigned short)__a, (int)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_rli(vector unsigned short __a, unsigned long __b) { + return __builtin_s390_verllh(__a, (int)__b); +} + +static inline __ATTRS_o_ai vector signed int +vec_rli(vector signed int __a, unsigned long __b) { + return (vector signed int)__builtin_s390_verllf( + (vector unsigned int)__a, (int)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_rli(vector unsigned int __a, unsigned long __b) { + return __builtin_s390_verllf(__a, (int)__b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_rli(vector signed long long __a, unsigned long __b) { + return (vector signed long long)__builtin_s390_verllg( + (vector unsigned long long)__a, (int)__b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_rli(vector unsigned long long __a, unsigned long __b) { + return __builtin_s390_verllg(__a, (int)__b); +} + +/*-- vec_rl_mask ------------------------------------------------------------*/ + +extern __ATTRS_o vector signed char +vec_rl_mask(vector signed char __a, vector unsigned char __b, + unsigned char __c) __constant(__c); + +extern __ATTRS_o vector unsigned char +vec_rl_mask(vector unsigned char __a, vector unsigned char __b, + unsigned char __c) __constant(__c); + +extern __ATTRS_o vector signed short +vec_rl_mask(vector signed short __a, vector unsigned short __b, + unsigned char __c) __constant(__c); + +extern __ATTRS_o vector unsigned short +vec_rl_mask(vector unsigned short __a, vector unsigned short __b, + unsigned char __c) __constant(__c); + +extern __ATTRS_o vector signed int +vec_rl_mask(vector signed int __a, vector unsigned int __b, + unsigned char __c) __constant(__c); + +extern __ATTRS_o vector unsigned int +vec_rl_mask(vector unsigned int __a, vector unsigned int __b, + unsigned char __c) __constant(__c); + +extern __ATTRS_o vector signed long long +vec_rl_mask(vector signed long long __a, vector unsigned long long __b, + unsigned char __c) __constant(__c); + +extern __ATTRS_o vector unsigned long long +vec_rl_mask(vector unsigned long long __a, vector unsigned long long __b, + unsigned char __c) __constant(__c); + +#define vec_rl_mask(X, Y, Z) ((__typeof__((vec_rl_mask)((X), (Y), (Z)))) \ + __extension__ ({ \ + vector unsigned char __res; \ + vector unsigned char __x = (vector unsigned char)(X); \ + vector unsigned char __y = (vector unsigned char)(Y); \ + switch (sizeof ((X)[0])) { \ + case 1: __res = (vector unsigned char) __builtin_s390_verimb( \ + (vector unsigned char)__x, (vector unsigned char)__x, \ + (vector unsigned char)__y, (Z)); break; \ + case 2: __res = (vector unsigned char) __builtin_s390_verimh( \ + (vector unsigned short)__x, (vector unsigned short)__x, \ + (vector unsigned short)__y, (Z)); break; \ + case 4: __res = (vector unsigned char) __builtin_s390_verimf( \ + (vector unsigned int)__x, (vector unsigned int)__x, \ + (vector unsigned int)__y, (Z)); break; \ + default: __res = (vector unsigned char) __builtin_s390_verimg( \ + (vector unsigned long long)__x, (vector unsigned long long)__x, \ + (vector unsigned long long)__y, (Z)); break; \ + } __res; })) + +/*-- vec_sll ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_sll(vector signed char __a, vector unsigned char __b) { + return (vector signed char)__builtin_s390_vsl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector signed char +vec_sll(vector signed char __a, vector unsigned short __b) { + return (vector signed char)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed char +vec_sll(vector signed char __a, vector unsigned int __b) { + return (vector signed char)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool char +vec_sll(vector bool char __a, vector unsigned char __b) { + return (vector bool char)__builtin_s390_vsl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector bool char +vec_sll(vector bool char __a, vector unsigned short __b) { + return (vector bool char)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool char +vec_sll(vector bool char __a, vector unsigned int __b) { + return (vector bool char)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_sll(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vsl(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_sll(vector unsigned char __a, vector unsigned short __b) { + return __builtin_s390_vsl(__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_sll(vector unsigned char __a, vector unsigned int __b) { + return __builtin_s390_vsl(__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed short +vec_sll(vector signed short __a, vector unsigned char __b) { + return (vector signed short)__builtin_s390_vsl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_sll(vector signed short __a, vector unsigned short __b) { + return (vector signed short)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed short +vec_sll(vector signed short __a, vector unsigned int __b) { + return (vector signed short)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool short +vec_sll(vector bool short __a, vector unsigned char __b) { + return (vector bool short)__builtin_s390_vsl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_sll(vector bool short __a, vector unsigned short __b) { + return (vector bool short)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool short +vec_sll(vector bool short __a, vector unsigned int __b) { + return (vector bool short)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_sll(vector unsigned short __a, vector unsigned char __b) { + return (vector unsigned short)__builtin_s390_vsl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_sll(vector unsigned short __a, vector unsigned short __b) { + return (vector unsigned short)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_sll(vector unsigned short __a, vector unsigned int __b) { + return (vector unsigned short)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed int +vec_sll(vector signed int __a, vector unsigned char __b) { + return (vector signed int)__builtin_s390_vsl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_sll(vector signed int __a, vector unsigned short __b) { + return (vector signed int)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed int +vec_sll(vector signed int __a, vector unsigned int __b) { + return (vector signed int)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool int +vec_sll(vector bool int __a, vector unsigned char __b) { + return (vector bool int)__builtin_s390_vsl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_sll(vector bool int __a, vector unsigned short __b) { + return (vector bool int)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool int +vec_sll(vector bool int __a, vector unsigned int __b) { + return (vector bool int)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_sll(vector unsigned int __a, vector unsigned char __b) { + return (vector unsigned int)__builtin_s390_vsl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_sll(vector unsigned int __a, vector unsigned short __b) { + return (vector unsigned int)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_sll(vector unsigned int __a, vector unsigned int __b) { + return (vector unsigned int)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_sll(vector signed long long __a, vector unsigned char __b) { + return (vector signed long long)__builtin_s390_vsl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_sll(vector signed long long __a, vector unsigned short __b) { + return (vector signed long long)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_sll(vector signed long long __a, vector unsigned int __b) { + return (vector signed long long)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_sll(vector bool long long __a, vector unsigned char __b) { + return (vector bool long long)__builtin_s390_vsl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_sll(vector bool long long __a, vector unsigned short __b) { + return (vector bool long long)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_sll(vector bool long long __a, vector unsigned int __b) { + return (vector bool long long)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_sll(vector unsigned long long __a, vector unsigned char __b) { + return (vector unsigned long long)__builtin_s390_vsl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_sll(vector unsigned long long __a, vector unsigned short __b) { + return (vector unsigned long long)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_sll(vector unsigned long long __a, vector unsigned int __b) { + return (vector unsigned long long)__builtin_s390_vsl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +/*-- vec_slb ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_slb(vector signed char __a, vector signed char __b) { + return (vector signed char)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed char +vec_slb(vector signed char __a, vector unsigned char __b) { + return (vector signed char)__builtin_s390_vslb( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_slb(vector unsigned char __a, vector signed char __b) { + return __builtin_s390_vslb(__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_slb(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vslb(__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_slb(vector signed short __a, vector signed short __b) { + return (vector signed short)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed short +vec_slb(vector signed short __a, vector unsigned short __b) { + return (vector signed short)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_slb(vector unsigned short __a, vector signed short __b) { + return (vector unsigned short)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_slb(vector unsigned short __a, vector unsigned short __b) { + return (vector unsigned short)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed int +vec_slb(vector signed int __a, vector signed int __b) { + return (vector signed int)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed int +vec_slb(vector signed int __a, vector unsigned int __b) { + return (vector signed int)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_slb(vector unsigned int __a, vector signed int __b) { + return (vector unsigned int)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_slb(vector unsigned int __a, vector unsigned int __b) { + return (vector unsigned int)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_slb(vector signed long long __a, vector signed long long __b) { + return (vector signed long long)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_slb(vector signed long long __a, vector unsigned long long __b) { + return (vector signed long long)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_slb(vector unsigned long long __a, vector signed long long __b) { + return (vector unsigned long long)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_slb(vector unsigned long long __a, vector unsigned long long __b) { + return (vector unsigned long long)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector double +vec_slb(vector double __a, vector signed long long __b) { + return (vector double)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector double +vec_slb(vector double __a, vector unsigned long long __b) { + return (vector double)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +/*-- vec_sld ----------------------------------------------------------------*/ + +extern __ATTRS_o vector signed char +vec_sld(vector signed char __a, vector signed char __b, int __c) + __constant_range(__c, 0, 15); + +extern __ATTRS_o vector unsigned char +vec_sld(vector unsigned char __a, vector unsigned char __b, int __c) + __constant_range(__c, 0, 15); + +extern __ATTRS_o vector signed short +vec_sld(vector signed short __a, vector signed short __b, int __c) + __constant_range(__c, 0, 15); + +extern __ATTRS_o vector unsigned short +vec_sld(vector unsigned short __a, vector unsigned short __b, int __c) + __constant_range(__c, 0, 15); + +extern __ATTRS_o vector signed int +vec_sld(vector signed int __a, vector signed int __b, int __c) + __constant_range(__c, 0, 15); + +extern __ATTRS_o vector unsigned int +vec_sld(vector unsigned int __a, vector unsigned int __b, int __c) + __constant_range(__c, 0, 15); + +extern __ATTRS_o vector signed long long +vec_sld(vector signed long long __a, vector signed long long __b, int __c) + __constant_range(__c, 0, 15); + +extern __ATTRS_o vector unsigned long long +vec_sld(vector unsigned long long __a, vector unsigned long long __b, int __c) + __constant_range(__c, 0, 15); + +extern __ATTRS_o vector double +vec_sld(vector double __a, vector double __b, int __c) + __constant_range(__c, 0, 15); + +#define vec_sld(X, Y, Z) ((__typeof__((vec_sld)((X), (Y), (Z)))) \ + __builtin_s390_vsldb((vector unsigned char)(X), \ + (vector unsigned char)(Y), (Z))) + +/*-- vec_sldw ---------------------------------------------------------------*/ + +extern __ATTRS_o vector signed char +vec_sldw(vector signed char __a, vector signed char __b, int __c) + __constant_range(__c, 0, 3); + +extern __ATTRS_o vector unsigned char +vec_sldw(vector unsigned char __a, vector unsigned char __b, int __c) + __constant_range(__c, 0, 3); + +extern __ATTRS_o vector signed short +vec_sldw(vector signed short __a, vector signed short __b, int __c) + __constant_range(__c, 0, 3); + +extern __ATTRS_o vector unsigned short +vec_sldw(vector unsigned short __a, vector unsigned short __b, int __c) + __constant_range(__c, 0, 3); + +extern __ATTRS_o vector signed int +vec_sldw(vector signed int __a, vector signed int __b, int __c) + __constant_range(__c, 0, 3); + +extern __ATTRS_o vector unsigned int +vec_sldw(vector unsigned int __a, vector unsigned int __b, int __c) + __constant_range(__c, 0, 3); + +extern __ATTRS_o vector signed long long +vec_sldw(vector signed long long __a, vector signed long long __b, int __c) + __constant_range(__c, 0, 3); + +extern __ATTRS_o vector unsigned long long +vec_sldw(vector unsigned long long __a, vector unsigned long long __b, int __c) + __constant_range(__c, 0, 3); + +extern __ATTRS_o vector double +vec_sldw(vector double __a, vector double __b, int __c) + __constant_range(__c, 0, 3); + +#define vec_sldw(X, Y, Z) ((__typeof__((vec_sldw)((X), (Y), (Z)))) \ + __builtin_s390_vsldb((vector unsigned char)(X), \ + (vector unsigned char)(Y), (Z) * 4)) + +/*-- vec_sral ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_sral(vector signed char __a, vector unsigned char __b) { + return (vector signed char)__builtin_s390_vsra( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector signed char +vec_sral(vector signed char __a, vector unsigned short __b) { + return (vector signed char)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed char +vec_sral(vector signed char __a, vector unsigned int __b) { + return (vector signed char)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool char +vec_sral(vector bool char __a, vector unsigned char __b) { + return (vector bool char)__builtin_s390_vsra( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector bool char +vec_sral(vector bool char __a, vector unsigned short __b) { + return (vector bool char)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool char +vec_sral(vector bool char __a, vector unsigned int __b) { + return (vector bool char)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_sral(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vsra(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_sral(vector unsigned char __a, vector unsigned short __b) { + return __builtin_s390_vsra(__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_sral(vector unsigned char __a, vector unsigned int __b) { + return __builtin_s390_vsra(__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed short +vec_sral(vector signed short __a, vector unsigned char __b) { + return (vector signed short)__builtin_s390_vsra( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_sral(vector signed short __a, vector unsigned short __b) { + return (vector signed short)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed short +vec_sral(vector signed short __a, vector unsigned int __b) { + return (vector signed short)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool short +vec_sral(vector bool short __a, vector unsigned char __b) { + return (vector bool short)__builtin_s390_vsra( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_sral(vector bool short __a, vector unsigned short __b) { + return (vector bool short)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool short +vec_sral(vector bool short __a, vector unsigned int __b) { + return (vector bool short)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_sral(vector unsigned short __a, vector unsigned char __b) { + return (vector unsigned short)__builtin_s390_vsra( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_sral(vector unsigned short __a, vector unsigned short __b) { + return (vector unsigned short)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_sral(vector unsigned short __a, vector unsigned int __b) { + return (vector unsigned short)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed int +vec_sral(vector signed int __a, vector unsigned char __b) { + return (vector signed int)__builtin_s390_vsra( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_sral(vector signed int __a, vector unsigned short __b) { + return (vector signed int)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed int +vec_sral(vector signed int __a, vector unsigned int __b) { + return (vector signed int)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool int +vec_sral(vector bool int __a, vector unsigned char __b) { + return (vector bool int)__builtin_s390_vsra( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_sral(vector bool int __a, vector unsigned short __b) { + return (vector bool int)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool int +vec_sral(vector bool int __a, vector unsigned int __b) { + return (vector bool int)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_sral(vector unsigned int __a, vector unsigned char __b) { + return (vector unsigned int)__builtin_s390_vsra( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_sral(vector unsigned int __a, vector unsigned short __b) { + return (vector unsigned int)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_sral(vector unsigned int __a, vector unsigned int __b) { + return (vector unsigned int)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_sral(vector signed long long __a, vector unsigned char __b) { + return (vector signed long long)__builtin_s390_vsra( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_sral(vector signed long long __a, vector unsigned short __b) { + return (vector signed long long)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_sral(vector signed long long __a, vector unsigned int __b) { + return (vector signed long long)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_sral(vector bool long long __a, vector unsigned char __b) { + return (vector bool long long)__builtin_s390_vsra( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_sral(vector bool long long __a, vector unsigned short __b) { + return (vector bool long long)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_sral(vector bool long long __a, vector unsigned int __b) { + return (vector bool long long)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_sral(vector unsigned long long __a, vector unsigned char __b) { + return (vector unsigned long long)__builtin_s390_vsra( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_sral(vector unsigned long long __a, vector unsigned short __b) { + return (vector unsigned long long)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_sral(vector unsigned long long __a, vector unsigned int __b) { + return (vector unsigned long long)__builtin_s390_vsra( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +/*-- vec_srab ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_srab(vector signed char __a, vector signed char __b) { + return (vector signed char)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed char +vec_srab(vector signed char __a, vector unsigned char __b) { + return (vector signed char)__builtin_s390_vsrab( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_srab(vector unsigned char __a, vector signed char __b) { + return __builtin_s390_vsrab(__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_srab(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vsrab(__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_srab(vector signed short __a, vector signed short __b) { + return (vector signed short)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed short +vec_srab(vector signed short __a, vector unsigned short __b) { + return (vector signed short)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_srab(vector unsigned short __a, vector signed short __b) { + return (vector unsigned short)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_srab(vector unsigned short __a, vector unsigned short __b) { + return (vector unsigned short)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed int +vec_srab(vector signed int __a, vector signed int __b) { + return (vector signed int)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed int +vec_srab(vector signed int __a, vector unsigned int __b) { + return (vector signed int)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_srab(vector unsigned int __a, vector signed int __b) { + return (vector unsigned int)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_srab(vector unsigned int __a, vector unsigned int __b) { + return (vector unsigned int)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_srab(vector signed long long __a, vector signed long long __b) { + return (vector signed long long)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_srab(vector signed long long __a, vector unsigned long long __b) { + return (vector signed long long)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_srab(vector unsigned long long __a, vector signed long long __b) { + return (vector unsigned long long)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_srab(vector unsigned long long __a, vector unsigned long long __b) { + return (vector unsigned long long)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector double +vec_srab(vector double __a, vector signed long long __b) { + return (vector double)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector double +vec_srab(vector double __a, vector unsigned long long __b) { + return (vector double)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +/*-- vec_srl ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_srl(vector signed char __a, vector unsigned char __b) { + return (vector signed char)__builtin_s390_vsrl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector signed char +vec_srl(vector signed char __a, vector unsigned short __b) { + return (vector signed char)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed char +vec_srl(vector signed char __a, vector unsigned int __b) { + return (vector signed char)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool char +vec_srl(vector bool char __a, vector unsigned char __b) { + return (vector bool char)__builtin_s390_vsrl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector bool char +vec_srl(vector bool char __a, vector unsigned short __b) { + return (vector bool char)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool char +vec_srl(vector bool char __a, vector unsigned int __b) { + return (vector bool char)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_srl(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vsrl(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_srl(vector unsigned char __a, vector unsigned short __b) { + return __builtin_s390_vsrl(__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_srl(vector unsigned char __a, vector unsigned int __b) { + return __builtin_s390_vsrl(__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed short +vec_srl(vector signed short __a, vector unsigned char __b) { + return (vector signed short)__builtin_s390_vsrl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_srl(vector signed short __a, vector unsigned short __b) { + return (vector signed short)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed short +vec_srl(vector signed short __a, vector unsigned int __b) { + return (vector signed short)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool short +vec_srl(vector bool short __a, vector unsigned char __b) { + return (vector bool short)__builtin_s390_vsrl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_srl(vector bool short __a, vector unsigned short __b) { + return (vector bool short)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool short +vec_srl(vector bool short __a, vector unsigned int __b) { + return (vector bool short)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_srl(vector unsigned short __a, vector unsigned char __b) { + return (vector unsigned short)__builtin_s390_vsrl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_srl(vector unsigned short __a, vector unsigned short __b) { + return (vector unsigned short)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_srl(vector unsigned short __a, vector unsigned int __b) { + return (vector unsigned short)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed int +vec_srl(vector signed int __a, vector unsigned char __b) { + return (vector signed int)__builtin_s390_vsrl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_srl(vector signed int __a, vector unsigned short __b) { + return (vector signed int)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed int +vec_srl(vector signed int __a, vector unsigned int __b) { + return (vector signed int)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool int +vec_srl(vector bool int __a, vector unsigned char __b) { + return (vector bool int)__builtin_s390_vsrl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_srl(vector bool int __a, vector unsigned short __b) { + return (vector bool int)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool int +vec_srl(vector bool int __a, vector unsigned int __b) { + return (vector bool int)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_srl(vector unsigned int __a, vector unsigned char __b) { + return (vector unsigned int)__builtin_s390_vsrl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_srl(vector unsigned int __a, vector unsigned short __b) { + return (vector unsigned int)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_srl(vector unsigned int __a, vector unsigned int __b) { + return (vector unsigned int)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_srl(vector signed long long __a, vector unsigned char __b) { + return (vector signed long long)__builtin_s390_vsrl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_srl(vector signed long long __a, vector unsigned short __b) { + return (vector signed long long)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_srl(vector signed long long __a, vector unsigned int __b) { + return (vector signed long long)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_srl(vector bool long long __a, vector unsigned char __b) { + return (vector bool long long)__builtin_s390_vsrl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_srl(vector bool long long __a, vector unsigned short __b) { + return (vector bool long long)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_srl(vector bool long long __a, vector unsigned int __b) { + return (vector bool long long)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_srl(vector unsigned long long __a, vector unsigned char __b) { + return (vector unsigned long long)__builtin_s390_vsrl( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_srl(vector unsigned long long __a, vector unsigned short __b) { + return (vector unsigned long long)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_srl(vector unsigned long long __a, vector unsigned int __b) { + return (vector unsigned long long)__builtin_s390_vsrl( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +/*-- vec_srb ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_srb(vector signed char __a, vector signed char __b) { + return (vector signed char)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed char +vec_srb(vector signed char __a, vector unsigned char __b) { + return (vector signed char)__builtin_s390_vsrlb( + (vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_srb(vector unsigned char __a, vector signed char __b) { + return __builtin_s390_vsrlb(__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_srb(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vsrlb(__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_srb(vector signed short __a, vector signed short __b) { + return (vector signed short)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed short +vec_srb(vector signed short __a, vector unsigned short __b) { + return (vector signed short)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_srb(vector unsigned short __a, vector signed short __b) { + return (vector unsigned short)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_srb(vector unsigned short __a, vector unsigned short __b) { + return (vector unsigned short)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed int +vec_srb(vector signed int __a, vector signed int __b) { + return (vector signed int)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed int +vec_srb(vector signed int __a, vector unsigned int __b) { + return (vector signed int)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_srb(vector unsigned int __a, vector signed int __b) { + return (vector unsigned int)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_srb(vector unsigned int __a, vector unsigned int __b) { + return (vector unsigned int)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_srb(vector signed long long __a, vector signed long long __b) { + return (vector signed long long)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_srb(vector signed long long __a, vector unsigned long long __b) { + return (vector signed long long)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_srb(vector unsigned long long __a, vector signed long long __b) { + return (vector unsigned long long)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_srb(vector unsigned long long __a, vector unsigned long long __b) { + return (vector unsigned long long)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector double +vec_srb(vector double __a, vector signed long long __b) { + return (vector double)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector double +vec_srb(vector double __a, vector unsigned long long __b) { + return (vector double)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +/*-- vec_abs ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_abs(vector signed char __a) { + return vec_sel(__a, -__a, vec_cmplt(__a, (vector signed char)0)); +} + +static inline __ATTRS_o_ai vector signed short +vec_abs(vector signed short __a) { + return vec_sel(__a, -__a, vec_cmplt(__a, (vector signed short)0)); +} + +static inline __ATTRS_o_ai vector signed int +vec_abs(vector signed int __a) { + return vec_sel(__a, -__a, vec_cmplt(__a, (vector signed int)0)); +} + +static inline __ATTRS_o_ai vector signed long long +vec_abs(vector signed long long __a) { + return vec_sel(__a, -__a, vec_cmplt(__a, (vector signed long long)0)); +} + +static inline __ATTRS_o_ai vector double +vec_abs(vector double __a) { + return __builtin_s390_vflpdb(__a); +} + +/*-- vec_nabs ---------------------------------------------------------------*/ + +static inline __ATTRS_ai vector double +vec_nabs(vector double __a) { + return __builtin_s390_vflndb(__a); +} + +/*-- vec_max ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_max(vector signed char __a, vector signed char __b) { + return vec_sel(__b, __a, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector signed char +vec_max(vector signed char __a, vector bool char __b) { + vector signed char __bc = (vector signed char)__b; + return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector signed char +vec_max(vector bool char __a, vector signed char __b) { + vector signed char __ac = (vector signed char)__a; + return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_max(vector unsigned char __a, vector unsigned char __b) { + return vec_sel(__b, __a, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_max(vector unsigned char __a, vector bool char __b) { + vector unsigned char __bc = (vector unsigned char)__b; + return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_max(vector bool char __a, vector unsigned char __b) { + vector unsigned char __ac = (vector unsigned char)__a; + return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector signed short +vec_max(vector signed short __a, vector signed short __b) { + return vec_sel(__b, __a, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector signed short +vec_max(vector signed short __a, vector bool short __b) { + vector signed short __bc = (vector signed short)__b; + return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector signed short +vec_max(vector bool short __a, vector signed short __b) { + vector signed short __ac = (vector signed short)__a; + return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_max(vector unsigned short __a, vector unsigned short __b) { + return vec_sel(__b, __a, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_max(vector unsigned short __a, vector bool short __b) { + vector unsigned short __bc = (vector unsigned short)__b; + return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_max(vector bool short __a, vector unsigned short __b) { + vector unsigned short __ac = (vector unsigned short)__a; + return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector signed int +vec_max(vector signed int __a, vector signed int __b) { + return vec_sel(__b, __a, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector signed int +vec_max(vector signed int __a, vector bool int __b) { + vector signed int __bc = (vector signed int)__b; + return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector signed int +vec_max(vector bool int __a, vector signed int __b) { + vector signed int __ac = (vector signed int)__a; + return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_max(vector unsigned int __a, vector unsigned int __b) { + return vec_sel(__b, __a, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_max(vector unsigned int __a, vector bool int __b) { + vector unsigned int __bc = (vector unsigned int)__b; + return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_max(vector bool int __a, vector unsigned int __b) { + vector unsigned int __ac = (vector unsigned int)__a; + return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector signed long long +vec_max(vector signed long long __a, vector signed long long __b) { + return vec_sel(__b, __a, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector signed long long +vec_max(vector signed long long __a, vector bool long long __b) { + vector signed long long __bc = (vector signed long long)__b; + return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector signed long long +vec_max(vector bool long long __a, vector signed long long __b) { + vector signed long long __ac = (vector signed long long)__a; + return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_max(vector unsigned long long __a, vector unsigned long long __b) { + return vec_sel(__b, __a, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_max(vector unsigned long long __a, vector bool long long __b) { + vector unsigned long long __bc = (vector unsigned long long)__b; + return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_max(vector bool long long __a, vector unsigned long long __b) { + vector unsigned long long __ac = (vector unsigned long long)__a; + return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector double +vec_max(vector double __a, vector double __b) { + return vec_sel(__b, __a, vec_cmpgt(__a, __b)); +} + +/*-- vec_min ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_min(vector signed char __a, vector signed char __b) { + return vec_sel(__a, __b, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector signed char +vec_min(vector signed char __a, vector bool char __b) { + vector signed char __bc = (vector signed char)__b; + return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector signed char +vec_min(vector bool char __a, vector signed char __b) { + vector signed char __ac = (vector signed char)__a; + return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_min(vector unsigned char __a, vector unsigned char __b) { + return vec_sel(__a, __b, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_min(vector unsigned char __a, vector bool char __b) { + vector unsigned char __bc = (vector unsigned char)__b; + return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_min(vector bool char __a, vector unsigned char __b) { + vector unsigned char __ac = (vector unsigned char)__a; + return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector signed short +vec_min(vector signed short __a, vector signed short __b) { + return vec_sel(__a, __b, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector signed short +vec_min(vector signed short __a, vector bool short __b) { + vector signed short __bc = (vector signed short)__b; + return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector signed short +vec_min(vector bool short __a, vector signed short __b) { + vector signed short __ac = (vector signed short)__a; + return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_min(vector unsigned short __a, vector unsigned short __b) { + return vec_sel(__a, __b, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_min(vector unsigned short __a, vector bool short __b) { + vector unsigned short __bc = (vector unsigned short)__b; + return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_min(vector bool short __a, vector unsigned short __b) { + vector unsigned short __ac = (vector unsigned short)__a; + return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector signed int +vec_min(vector signed int __a, vector signed int __b) { + return vec_sel(__a, __b, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector signed int +vec_min(vector signed int __a, vector bool int __b) { + vector signed int __bc = (vector signed int)__b; + return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector signed int +vec_min(vector bool int __a, vector signed int __b) { + vector signed int __ac = (vector signed int)__a; + return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_min(vector unsigned int __a, vector unsigned int __b) { + return vec_sel(__a, __b, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_min(vector unsigned int __a, vector bool int __b) { + vector unsigned int __bc = (vector unsigned int)__b; + return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_min(vector bool int __a, vector unsigned int __b) { + vector unsigned int __ac = (vector unsigned int)__a; + return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector signed long long +vec_min(vector signed long long __a, vector signed long long __b) { + return vec_sel(__a, __b, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector signed long long +vec_min(vector signed long long __a, vector bool long long __b) { + vector signed long long __bc = (vector signed long long)__b; + return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector signed long long +vec_min(vector bool long long __a, vector signed long long __b) { + vector signed long long __ac = (vector signed long long)__a; + return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_min(vector unsigned long long __a, vector unsigned long long __b) { + return vec_sel(__a, __b, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_min(vector unsigned long long __a, vector bool long long __b) { + vector unsigned long long __bc = (vector unsigned long long)__b; + return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_min(vector bool long long __a, vector unsigned long long __b) { + vector unsigned long long __ac = (vector unsigned long long)__a; + return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); +} + +static inline __ATTRS_o_ai vector double +vec_min(vector double __a, vector double __b) { + return vec_sel(__a, __b, vec_cmpgt(__a, __b)); +} + +/*-- vec_add_u128 -----------------------------------------------------------*/ + +static inline __ATTRS_ai vector unsigned char +vec_add_u128(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vaq(__a, __b); +} + +/*-- vec_addc ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_addc(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vaccb(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_addc(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vacch(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_addc(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vaccf(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_addc(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_s390_vaccg(__a, __b); +} + +/*-- vec_addc_u128 ----------------------------------------------------------*/ + +static inline __ATTRS_ai vector unsigned char +vec_addc_u128(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vaccq(__a, __b); +} + +/*-- vec_adde_u128 ----------------------------------------------------------*/ + +static inline __ATTRS_ai vector unsigned char +vec_adde_u128(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return __builtin_s390_vacq(__a, __b, __c); +} + +/*-- vec_addec_u128 ---------------------------------------------------------*/ + +static inline __ATTRS_ai vector unsigned char +vec_addec_u128(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return __builtin_s390_vacccq(__a, __b, __c); +} + +/*-- vec_avg ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_avg(vector signed char __a, vector signed char __b) { + return __builtin_s390_vavgb(__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_avg(vector signed short __a, vector signed short __b) { + return __builtin_s390_vavgh(__a, __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_avg(vector signed int __a, vector signed int __b) { + return __builtin_s390_vavgf(__a, __b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_avg(vector signed long long __a, vector signed long long __b) { + return __builtin_s390_vavgg(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_avg(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vavglb(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_avg(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vavglh(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_avg(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vavglf(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_avg(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_s390_vavglg(__a, __b); +} + +/*-- vec_checksum -----------------------------------------------------------*/ + +static inline __ATTRS_ai vector unsigned int +vec_checksum(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vcksm(__a, __b); +} + +/*-- vec_gfmsum -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned short +vec_gfmsum(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vgfmb(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_gfmsum(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vgfmh(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_gfmsum(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vgfmf(__a, __b); +} + +/*-- vec_gfmsum_128 ---------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_gfmsum_128(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_s390_vgfmg(__a, __b); +} + +/*-- vec_gfmsum_accum -------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned short +vec_gfmsum_accum(vector unsigned char __a, vector unsigned char __b, + vector unsigned short __c) { + return __builtin_s390_vgfmab(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_gfmsum_accum(vector unsigned short __a, vector unsigned short __b, + vector unsigned int __c) { + return __builtin_s390_vgfmah(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_gfmsum_accum(vector unsigned int __a, vector unsigned int __b, + vector unsigned long long __c) { + return __builtin_s390_vgfmaf(__a, __b, __c); +} + +/*-- vec_gfmsum_accum_128 ---------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_gfmsum_accum_128(vector unsigned long long __a, + vector unsigned long long __b, + vector unsigned char __c) { + return __builtin_s390_vgfmag(__a, __b, __c); +} + +/*-- vec_mladd --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_mladd(vector signed char __a, vector signed char __b, + vector signed char __c) { + return __a * __b + __c; +} + +static inline __ATTRS_o_ai vector signed char +vec_mladd(vector unsigned char __a, vector signed char __b, + vector signed char __c) { + return (vector signed char)__a * __b + __c; +} + +static inline __ATTRS_o_ai vector signed char +vec_mladd(vector signed char __a, vector unsigned char __b, + vector unsigned char __c) { + return __a * (vector signed char)__b + (vector signed char)__c; +} + +static inline __ATTRS_o_ai vector unsigned char +vec_mladd(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return __a * __b + __c; +} + +static inline __ATTRS_o_ai vector signed short +vec_mladd(vector signed short __a, vector signed short __b, + vector signed short __c) { + return __a * __b + __c; +} + +static inline __ATTRS_o_ai vector signed short +vec_mladd(vector unsigned short __a, vector signed short __b, + vector signed short __c) { + return (vector signed short)__a * __b + __c; +} + +static inline __ATTRS_o_ai vector signed short +vec_mladd(vector signed short __a, vector unsigned short __b, + vector unsigned short __c) { + return __a * (vector signed short)__b + (vector signed short)__c; +} + +static inline __ATTRS_o_ai vector unsigned short +vec_mladd(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c) { + return __a * __b + __c; +} + +static inline __ATTRS_o_ai vector signed int +vec_mladd(vector signed int __a, vector signed int __b, + vector signed int __c) { + return __a * __b + __c; +} + +static inline __ATTRS_o_ai vector signed int +vec_mladd(vector unsigned int __a, vector signed int __b, + vector signed int __c) { + return (vector signed int)__a * __b + __c; +} + +static inline __ATTRS_o_ai vector signed int +vec_mladd(vector signed int __a, vector unsigned int __b, + vector unsigned int __c) { + return __a * (vector signed int)__b + (vector signed int)__c; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_mladd(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + return __a * __b + __c; +} + +/*-- vec_mhadd --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_mhadd(vector signed char __a, vector signed char __b, + vector signed char __c) { + return __builtin_s390_vmahb(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_mhadd(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return __builtin_s390_vmalhb(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector signed short +vec_mhadd(vector signed short __a, vector signed short __b, + vector signed short __c) { + return __builtin_s390_vmahh(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_mhadd(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c) { + return __builtin_s390_vmalhh(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector signed int +vec_mhadd(vector signed int __a, vector signed int __b, + vector signed int __c) { + return __builtin_s390_vmahf(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_mhadd(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + return __builtin_s390_vmalhf(__a, __b, __c); +} + +/*-- vec_meadd --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed short +vec_meadd(vector signed char __a, vector signed char __b, + vector signed short __c) { + return __builtin_s390_vmaeb(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_meadd(vector unsigned char __a, vector unsigned char __b, + vector unsigned short __c) { + return __builtin_s390_vmaleb(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector signed int +vec_meadd(vector signed short __a, vector signed short __b, + vector signed int __c) { + return __builtin_s390_vmaeh(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_meadd(vector unsigned short __a, vector unsigned short __b, + vector unsigned int __c) { + return __builtin_s390_vmaleh(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector signed long long +vec_meadd(vector signed int __a, vector signed int __b, + vector signed long long __c) { + return __builtin_s390_vmaef(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_meadd(vector unsigned int __a, vector unsigned int __b, + vector unsigned long long __c) { + return __builtin_s390_vmalef(__a, __b, __c); +} + +/*-- vec_moadd --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed short +vec_moadd(vector signed char __a, vector signed char __b, + vector signed short __c) { + return __builtin_s390_vmaob(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_moadd(vector unsigned char __a, vector unsigned char __b, + vector unsigned short __c) { + return __builtin_s390_vmalob(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector signed int +vec_moadd(vector signed short __a, vector signed short __b, + vector signed int __c) { + return __builtin_s390_vmaoh(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_moadd(vector unsigned short __a, vector unsigned short __b, + vector unsigned int __c) { + return __builtin_s390_vmaloh(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector signed long long +vec_moadd(vector signed int __a, vector signed int __b, + vector signed long long __c) { + return __builtin_s390_vmaof(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_moadd(vector unsigned int __a, vector unsigned int __b, + vector unsigned long long __c) { + return __builtin_s390_vmalof(__a, __b, __c); +} + +/*-- vec_mulh ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_mulh(vector signed char __a, vector signed char __b) { + return __builtin_s390_vmhb(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_mulh(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vmlhb(__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_mulh(vector signed short __a, vector signed short __b) { + return __builtin_s390_vmhh(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_mulh(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vmlhh(__a, __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_mulh(vector signed int __a, vector signed int __b) { + return __builtin_s390_vmhf(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_mulh(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vmlhf(__a, __b); +} + +/*-- vec_mule ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed short +vec_mule(vector signed char __a, vector signed char __b) { + return __builtin_s390_vmeb(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_mule(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vmleb(__a, __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_mule(vector signed short __a, vector signed short __b) { + return __builtin_s390_vmeh(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_mule(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vmleh(__a, __b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_mule(vector signed int __a, vector signed int __b) { + return __builtin_s390_vmef(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_mule(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vmlef(__a, __b); +} + +/*-- vec_mulo ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed short +vec_mulo(vector signed char __a, vector signed char __b) { + return __builtin_s390_vmob(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_mulo(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vmlob(__a, __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_mulo(vector signed short __a, vector signed short __b) { + return __builtin_s390_vmoh(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_mulo(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vmloh(__a, __b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_mulo(vector signed int __a, vector signed int __b) { + return __builtin_s390_vmof(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_mulo(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vmlof(__a, __b); +} + +/*-- vec_sub_u128 -----------------------------------------------------------*/ + +static inline __ATTRS_ai vector unsigned char +vec_sub_u128(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vsq(__a, __b); +} + +/*-- vec_subc ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_subc(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vscbib(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_subc(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vscbih(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_subc(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vscbif(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_subc(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_s390_vscbig(__a, __b); +} + +/*-- vec_subc_u128 ----------------------------------------------------------*/ + +static inline __ATTRS_ai vector unsigned char +vec_subc_u128(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vscbiq(__a, __b); +} + +/*-- vec_sube_u128 ----------------------------------------------------------*/ + +static inline __ATTRS_ai vector unsigned char +vec_sube_u128(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return __builtin_s390_vsbiq(__a, __b, __c); +} + +/*-- vec_subec_u128 ---------------------------------------------------------*/ + +static inline __ATTRS_ai vector unsigned char +vec_subec_u128(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return __builtin_s390_vsbcbiq(__a, __b, __c); +} + +/*-- vec_sum2 ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned long long +vec_sum2(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vsumgh(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_sum2(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vsumgf(__a, __b); +} + +/*-- vec_sum_u128 -----------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_sum_u128(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vsumqf(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_sum_u128(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_s390_vsumqg(__a, __b); +} + +/*-- vec_sum4 ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned int +vec_sum4(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vsumb(__a, __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_sum4(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vsumh(__a, __b); +} + +/*-- vec_test_mask ----------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_test_mask(vector signed char __a, vector unsigned char __b) { + return __builtin_s390_vtm((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai int +vec_test_mask(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vtm(__a, __b); +} + +static inline __ATTRS_o_ai int +vec_test_mask(vector signed short __a, vector unsigned short __b) { + return __builtin_s390_vtm((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai int +vec_test_mask(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vtm((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai int +vec_test_mask(vector signed int __a, vector unsigned int __b) { + return __builtin_s390_vtm((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai int +vec_test_mask(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vtm((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai int +vec_test_mask(vector signed long long __a, vector unsigned long long __b) { + return __builtin_s390_vtm((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai int +vec_test_mask(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_s390_vtm((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai int +vec_test_mask(vector double __a, vector unsigned long long __b) { + return __builtin_s390_vtm((vector unsigned char)__a, + (vector unsigned char)__b); +} + +/*-- vec_madd ---------------------------------------------------------------*/ + +static inline __ATTRS_ai vector double +vec_madd(vector double __a, vector double __b, vector double __c) { + return __builtin_s390_vfmadb(__a, __b, __c); +} + +/*-- vec_msub ---------------------------------------------------------------*/ + +static inline __ATTRS_ai vector double +vec_msub(vector double __a, vector double __b, vector double __c) { + return __builtin_s390_vfmsdb(__a, __b, __c); +} + +/*-- vec_sqrt ---------------------------------------------------------------*/ + +static inline __ATTRS_ai vector double +vec_sqrt(vector double __a) { + return __builtin_s390_vfsqdb(__a); +} + +/*-- vec_ld2f ---------------------------------------------------------------*/ + +static inline __ATTRS_ai vector double +vec_ld2f(const float *__ptr) { + typedef float __v2f32 __attribute__((__vector_size__(8))); + return __builtin_convertvector(*(const __v2f32 *)__ptr, vector double); +} + +/*-- vec_st2f ---------------------------------------------------------------*/ + +static inline __ATTRS_ai void +vec_st2f(vector double __a, float *__ptr) { + typedef float __v2f32 __attribute__((__vector_size__(8))); + *(__v2f32 *)__ptr = __builtin_convertvector(__a, __v2f32); +} + +/*-- vec_ctd ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector double +vec_ctd(vector signed long long __a, int __b) + __constant_range(__b, 0, 31) { + vector double __conv = __builtin_convertvector(__a, vector double); + __conv *= (vector double)(vector unsigned long long)((0x3ffULL - __b) << 52); + return __conv; +} + +static inline __ATTRS_o_ai vector double +vec_ctd(vector unsigned long long __a, int __b) + __constant_range(__b, 0, 31) { + vector double __conv = __builtin_convertvector(__a, vector double); + __conv *= (vector double)(vector unsigned long long)((0x3ffULL - __b) << 52); + return __conv; +} + +/*-- vec_ctsl ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed long long +vec_ctsl(vector double __a, int __b) + __constant_range(__b, 0, 31) { + __a *= (vector double)(vector unsigned long long)((0x3ffULL + __b) << 52); + return __builtin_convertvector(__a, vector signed long long); +} + +/*-- vec_ctul ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned long long +vec_ctul(vector double __a, int __b) + __constant_range(__b, 0, 31) { + __a *= (vector double)(vector unsigned long long)((0x3ffULL + __b) << 52); + return __builtin_convertvector(__a, vector unsigned long long); +} + +/*-- vec_roundp -------------------------------------------------------------*/ + +static inline __ATTRS_ai vector double +vec_roundp(vector double __a) { + return __builtin_s390_vfidb(__a, 4, 6); +} + +/*-- vec_ceil ---------------------------------------------------------------*/ + +static inline __ATTRS_ai vector double +vec_ceil(vector double __a) { + // On this platform, vec_ceil never triggers the IEEE-inexact exception. + return __builtin_s390_vfidb(__a, 4, 6); +} + +/*-- vec_roundm -------------------------------------------------------------*/ + +static inline __ATTRS_ai vector double +vec_roundm(vector double __a) { + return __builtin_s390_vfidb(__a, 4, 7); +} + +/*-- vec_floor --------------------------------------------------------------*/ + +static inline __ATTRS_ai vector double +vec_floor(vector double __a) { + // On this platform, vec_floor never triggers the IEEE-inexact exception. + return __builtin_s390_vfidb(__a, 4, 7); +} + +/*-- vec_roundz -------------------------------------------------------------*/ + +static inline __ATTRS_ai vector double +vec_roundz(vector double __a) { + return __builtin_s390_vfidb(__a, 4, 5); +} + +/*-- vec_trunc --------------------------------------------------------------*/ + +static inline __ATTRS_ai vector double +vec_trunc(vector double __a) { + // On this platform, vec_trunc never triggers the IEEE-inexact exception. + return __builtin_s390_vfidb(__a, 4, 5); +} + +/*-- vec_roundc -------------------------------------------------------------*/ + +static inline __ATTRS_ai vector double +vec_roundc(vector double __a) { + return __builtin_s390_vfidb(__a, 4, 0); +} + +/*-- vec_round --------------------------------------------------------------*/ + +static inline __ATTRS_ai vector double +vec_round(vector double __a) { + return __builtin_s390_vfidb(__a, 4, 4); +} + +/*-- vec_fp_test_data_class -------------------------------------------------*/ + +#define vec_fp_test_data_class(X, Y, Z) \ + ((vector bool long long)__builtin_s390_vftcidb((X), (Y), (Z))) + +/*-- vec_cp_until_zero ------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_cp_until_zero(vector signed char __a) { + return (vector signed char)__builtin_s390_vistrb((vector unsigned char)__a); +} + +static inline __ATTRS_o_ai vector bool char +vec_cp_until_zero(vector bool char __a) { + return (vector bool char)__builtin_s390_vistrb((vector unsigned char)__a); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cp_until_zero(vector unsigned char __a) { + return __builtin_s390_vistrb(__a); +} + +static inline __ATTRS_o_ai vector signed short +vec_cp_until_zero(vector signed short __a) { + return (vector signed short)__builtin_s390_vistrh((vector unsigned short)__a); +} + +static inline __ATTRS_o_ai vector bool short +vec_cp_until_zero(vector bool short __a) { + return (vector bool short)__builtin_s390_vistrh((vector unsigned short)__a); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cp_until_zero(vector unsigned short __a) { + return __builtin_s390_vistrh(__a); +} + +static inline __ATTRS_o_ai vector signed int +vec_cp_until_zero(vector signed int __a) { + return (vector signed int)__builtin_s390_vistrf((vector unsigned int)__a); +} + +static inline __ATTRS_o_ai vector bool int +vec_cp_until_zero(vector bool int __a) { + return (vector bool int)__builtin_s390_vistrf((vector unsigned int)__a); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cp_until_zero(vector unsigned int __a) { + return __builtin_s390_vistrf(__a); +} + +/*-- vec_cp_until_zero_cc ---------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_cp_until_zero_cc(vector signed char __a, int *__cc) { + return (vector signed char) + __builtin_s390_vistrbs((vector unsigned char)__a, __cc); +} + +static inline __ATTRS_o_ai vector bool char +vec_cp_until_zero_cc(vector bool char __a, int *__cc) { + return (vector bool char) + __builtin_s390_vistrbs((vector unsigned char)__a, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cp_until_zero_cc(vector unsigned char __a, int *__cc) { + return __builtin_s390_vistrbs(__a, __cc); +} + +static inline __ATTRS_o_ai vector signed short +vec_cp_until_zero_cc(vector signed short __a, int *__cc) { + return (vector signed short) + __builtin_s390_vistrhs((vector unsigned short)__a, __cc); +} + +static inline __ATTRS_o_ai vector bool short +vec_cp_until_zero_cc(vector bool short __a, int *__cc) { + return (vector bool short) + __builtin_s390_vistrhs((vector unsigned short)__a, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cp_until_zero_cc(vector unsigned short __a, int *__cc) { + return __builtin_s390_vistrhs(__a, __cc); +} + +static inline __ATTRS_o_ai vector signed int +vec_cp_until_zero_cc(vector signed int __a, int *__cc) { + return (vector signed int) + __builtin_s390_vistrfs((vector unsigned int)__a, __cc); +} + +static inline __ATTRS_o_ai vector bool int +vec_cp_until_zero_cc(vector bool int __a, int *__cc) { + return (vector bool int)__builtin_s390_vistrfs((vector unsigned int)__a, + __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cp_until_zero_cc(vector unsigned int __a, int *__cc) { + return __builtin_s390_vistrfs(__a, __cc); +} + +/*-- vec_cmpeq_idx ----------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_cmpeq_idx(vector signed char __a, vector signed char __b) { + return (vector signed char) + __builtin_s390_vfeeb((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpeq_idx(vector bool char __a, vector bool char __b) { + return __builtin_s390_vfeeb((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpeq_idx(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vfeeb(__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_cmpeq_idx(vector signed short __a, vector signed short __b) { + return (vector signed short) + __builtin_s390_vfeeh((vector unsigned short)__a, + (vector unsigned short)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpeq_idx(vector bool short __a, vector bool short __b) { + return __builtin_s390_vfeeh((vector unsigned short)__a, + (vector unsigned short)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpeq_idx(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vfeeh(__a, __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_cmpeq_idx(vector signed int __a, vector signed int __b) { + return (vector signed int) + __builtin_s390_vfeef((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpeq_idx(vector bool int __a, vector bool int __b) { + return __builtin_s390_vfeef((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpeq_idx(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vfeef(__a, __b); +} + +/*-- vec_cmpeq_idx_cc -------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_cmpeq_idx_cc(vector signed char __a, vector signed char __b, int *__cc) { + return (vector signed char) + __builtin_s390_vfeebs((vector unsigned char)__a, + (vector unsigned char)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpeq_idx_cc(vector bool char __a, vector bool char __b, int *__cc) { + return __builtin_s390_vfeebs((vector unsigned char)__a, + (vector unsigned char)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpeq_idx_cc(vector unsigned char __a, vector unsigned char __b, + int *__cc) { + return __builtin_s390_vfeebs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector signed short +vec_cmpeq_idx_cc(vector signed short __a, vector signed short __b, int *__cc) { + return (vector signed short) + __builtin_s390_vfeehs((vector unsigned short)__a, + (vector unsigned short)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpeq_idx_cc(vector bool short __a, vector bool short __b, int *__cc) { + return __builtin_s390_vfeehs((vector unsigned short)__a, + (vector unsigned short)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpeq_idx_cc(vector unsigned short __a, vector unsigned short __b, + int *__cc) { + return __builtin_s390_vfeehs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector signed int +vec_cmpeq_idx_cc(vector signed int __a, vector signed int __b, int *__cc) { + return (vector signed int) + __builtin_s390_vfeefs((vector unsigned int)__a, + (vector unsigned int)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpeq_idx_cc(vector bool int __a, vector bool int __b, int *__cc) { + return __builtin_s390_vfeefs((vector unsigned int)__a, + (vector unsigned int)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpeq_idx_cc(vector unsigned int __a, vector unsigned int __b, int *__cc) { + return __builtin_s390_vfeefs(__a, __b, __cc); +} + +/*-- vec_cmpeq_or_0_idx -----------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_cmpeq_or_0_idx(vector signed char __a, vector signed char __b) { + return (vector signed char) + __builtin_s390_vfeezb((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpeq_or_0_idx(vector bool char __a, vector bool char __b) { + return __builtin_s390_vfeezb((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpeq_or_0_idx(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vfeezb(__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_cmpeq_or_0_idx(vector signed short __a, vector signed short __b) { + return (vector signed short) + __builtin_s390_vfeezh((vector unsigned short)__a, + (vector unsigned short)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpeq_or_0_idx(vector bool short __a, vector bool short __b) { + return __builtin_s390_vfeezh((vector unsigned short)__a, + (vector unsigned short)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpeq_or_0_idx(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vfeezh(__a, __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_cmpeq_or_0_idx(vector signed int __a, vector signed int __b) { + return (vector signed int) + __builtin_s390_vfeezf((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpeq_or_0_idx(vector bool int __a, vector bool int __b) { + return __builtin_s390_vfeezf((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpeq_or_0_idx(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vfeezf(__a, __b); +} + +/*-- vec_cmpeq_or_0_idx_cc --------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_cmpeq_or_0_idx_cc(vector signed char __a, vector signed char __b, + int *__cc) { + return (vector signed char) + __builtin_s390_vfeezbs((vector unsigned char)__a, + (vector unsigned char)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpeq_or_0_idx_cc(vector bool char __a, vector bool char __b, int *__cc) { + return __builtin_s390_vfeezbs((vector unsigned char)__a, + (vector unsigned char)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpeq_or_0_idx_cc(vector unsigned char __a, vector unsigned char __b, + int *__cc) { + return __builtin_s390_vfeezbs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector signed short +vec_cmpeq_or_0_idx_cc(vector signed short __a, vector signed short __b, + int *__cc) { + return (vector signed short) + __builtin_s390_vfeezhs((vector unsigned short)__a, + (vector unsigned short)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpeq_or_0_idx_cc(vector bool short __a, vector bool short __b, int *__cc) { + return __builtin_s390_vfeezhs((vector unsigned short)__a, + (vector unsigned short)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpeq_or_0_idx_cc(vector unsigned short __a, vector unsigned short __b, + int *__cc) { + return __builtin_s390_vfeezhs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector signed int +vec_cmpeq_or_0_idx_cc(vector signed int __a, vector signed int __b, int *__cc) { + return (vector signed int) + __builtin_s390_vfeezfs((vector unsigned int)__a, + (vector unsigned int)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpeq_or_0_idx_cc(vector bool int __a, vector bool int __b, int *__cc) { + return __builtin_s390_vfeezfs((vector unsigned int)__a, + (vector unsigned int)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpeq_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, + int *__cc) { + return __builtin_s390_vfeezfs(__a, __b, __cc); +} + +/*-- vec_cmpne_idx ----------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_cmpne_idx(vector signed char __a, vector signed char __b) { + return (vector signed char) + __builtin_s390_vfeneb((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpne_idx(vector bool char __a, vector bool char __b) { + return __builtin_s390_vfeneb((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpne_idx(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vfeneb(__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_cmpne_idx(vector signed short __a, vector signed short __b) { + return (vector signed short) + __builtin_s390_vfeneh((vector unsigned short)__a, + (vector unsigned short)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpne_idx(vector bool short __a, vector bool short __b) { + return __builtin_s390_vfeneh((vector unsigned short)__a, + (vector unsigned short)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpne_idx(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vfeneh(__a, __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_cmpne_idx(vector signed int __a, vector signed int __b) { + return (vector signed int) + __builtin_s390_vfenef((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpne_idx(vector bool int __a, vector bool int __b) { + return __builtin_s390_vfenef((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpne_idx(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vfenef(__a, __b); +} + +/*-- vec_cmpne_idx_cc -------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_cmpne_idx_cc(vector signed char __a, vector signed char __b, int *__cc) { + return (vector signed char) + __builtin_s390_vfenebs((vector unsigned char)__a, + (vector unsigned char)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpne_idx_cc(vector bool char __a, vector bool char __b, int *__cc) { + return __builtin_s390_vfenebs((vector unsigned char)__a, + (vector unsigned char)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpne_idx_cc(vector unsigned char __a, vector unsigned char __b, + int *__cc) { + return __builtin_s390_vfenebs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector signed short +vec_cmpne_idx_cc(vector signed short __a, vector signed short __b, int *__cc) { + return (vector signed short) + __builtin_s390_vfenehs((vector unsigned short)__a, + (vector unsigned short)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpne_idx_cc(vector bool short __a, vector bool short __b, int *__cc) { + return __builtin_s390_vfenehs((vector unsigned short)__a, + (vector unsigned short)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpne_idx_cc(vector unsigned short __a, vector unsigned short __b, + int *__cc) { + return __builtin_s390_vfenehs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector signed int +vec_cmpne_idx_cc(vector signed int __a, vector signed int __b, int *__cc) { + return (vector signed int) + __builtin_s390_vfenefs((vector unsigned int)__a, + (vector unsigned int)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpne_idx_cc(vector bool int __a, vector bool int __b, int *__cc) { + return __builtin_s390_vfenefs((vector unsigned int)__a, + (vector unsigned int)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpne_idx_cc(vector unsigned int __a, vector unsigned int __b, int *__cc) { + return __builtin_s390_vfenefs(__a, __b, __cc); +} + +/*-- vec_cmpne_or_0_idx -----------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_cmpne_or_0_idx(vector signed char __a, vector signed char __b) { + return (vector signed char) + __builtin_s390_vfenezb((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpne_or_0_idx(vector bool char __a, vector bool char __b) { + return __builtin_s390_vfenezb((vector unsigned char)__a, + (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpne_or_0_idx(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vfenezb(__a, __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_cmpne_or_0_idx(vector signed short __a, vector signed short __b) { + return (vector signed short) + __builtin_s390_vfenezh((vector unsigned short)__a, + (vector unsigned short)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpne_or_0_idx(vector bool short __a, vector bool short __b) { + return __builtin_s390_vfenezh((vector unsigned short)__a, + (vector unsigned short)__b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpne_or_0_idx(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vfenezh(__a, __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_cmpne_or_0_idx(vector signed int __a, vector signed int __b) { + return (vector signed int) + __builtin_s390_vfenezf((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpne_or_0_idx(vector bool int __a, vector bool int __b) { + return __builtin_s390_vfenezf((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpne_or_0_idx(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vfenezf(__a, __b); +} + +/*-- vec_cmpne_or_0_idx_cc --------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_cmpne_or_0_idx_cc(vector signed char __a, vector signed char __b, + int *__cc) { + return (vector signed char) + __builtin_s390_vfenezbs((vector unsigned char)__a, + (vector unsigned char)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpne_or_0_idx_cc(vector bool char __a, vector bool char __b, int *__cc) { + return __builtin_s390_vfenezbs((vector unsigned char)__a, + (vector unsigned char)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpne_or_0_idx_cc(vector unsigned char __a, vector unsigned char __b, + int *__cc) { + return __builtin_s390_vfenezbs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector signed short +vec_cmpne_or_0_idx_cc(vector signed short __a, vector signed short __b, + int *__cc) { + return (vector signed short) + __builtin_s390_vfenezhs((vector unsigned short)__a, + (vector unsigned short)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpne_or_0_idx_cc(vector bool short __a, vector bool short __b, int *__cc) { + return __builtin_s390_vfenezhs((vector unsigned short)__a, + (vector unsigned short)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpne_or_0_idx_cc(vector unsigned short __a, vector unsigned short __b, + int *__cc) { + return __builtin_s390_vfenezhs(__a, __b, __cc); +} + +static inline __ATTRS_o_ai vector signed int +vec_cmpne_or_0_idx_cc(vector signed int __a, vector signed int __b, int *__cc) { + return (vector signed int) + __builtin_s390_vfenezfs((vector unsigned int)__a, + (vector unsigned int)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpne_or_0_idx_cc(vector bool int __a, vector bool int __b, int *__cc) { + return __builtin_s390_vfenezfs((vector unsigned int)__a, + (vector unsigned int)__b, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpne_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, + int *__cc) { + return __builtin_s390_vfenezfs(__a, __b, __cc); +} + +/*-- vec_cmprg --------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_cmprg(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return (vector bool char)__builtin_s390_vstrcb(__a, __b, __c, 4); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmprg(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c) { + return (vector bool short)__builtin_s390_vstrch(__a, __b, __c, 4); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmprg(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + return (vector bool int)__builtin_s390_vstrcf(__a, __b, __c, 4); +} + +/*-- vec_cmprg_cc -----------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_cmprg_cc(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c, int *__cc) { + return (vector bool char)__builtin_s390_vstrcbs(__a, __b, __c, 4, __cc); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmprg_cc(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c, int *__cc) { + return (vector bool short)__builtin_s390_vstrchs(__a, __b, __c, 4, __cc); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmprg_cc(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c, int *__cc) { + return (vector bool int)__builtin_s390_vstrcfs(__a, __b, __c, 4, __cc); +} + +/*-- vec_cmprg_idx ----------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_cmprg_idx(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return __builtin_s390_vstrcb(__a, __b, __c, 0); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmprg_idx(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c) { + return __builtin_s390_vstrch(__a, __b, __c, 0); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmprg_idx(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + return __builtin_s390_vstrcf(__a, __b, __c, 0); +} + +/*-- vec_cmprg_idx_cc -------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_cmprg_idx_cc(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrcbs(__a, __b, __c, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmprg_idx_cc(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c, int *__cc) { + return __builtin_s390_vstrchs(__a, __b, __c, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmprg_idx_cc(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c, int *__cc) { + return __builtin_s390_vstrcfs(__a, __b, __c, 0, __cc); +} + +/*-- vec_cmprg_or_0_idx -----------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_cmprg_or_0_idx(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return __builtin_s390_vstrczb(__a, __b, __c, 0); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmprg_or_0_idx(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c) { + return __builtin_s390_vstrczh(__a, __b, __c, 0); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmprg_or_0_idx(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + return __builtin_s390_vstrczf(__a, __b, __c, 0); +} + +/*-- vec_cmprg_or_0_idx_cc --------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_cmprg_or_0_idx_cc(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrczbs(__a, __b, __c, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmprg_or_0_idx_cc(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c, int *__cc) { + return __builtin_s390_vstrczhs(__a, __b, __c, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmprg_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c, int *__cc) { + return __builtin_s390_vstrczfs(__a, __b, __c, 0, __cc); +} + +/*-- vec_cmpnrg -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_cmpnrg(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return (vector bool char)__builtin_s390_vstrcb(__a, __b, __c, 12); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmpnrg(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c) { + return (vector bool short)__builtin_s390_vstrch(__a, __b, __c, 12); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmpnrg(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + return (vector bool int)__builtin_s390_vstrcf(__a, __b, __c, 12); +} + +/*-- vec_cmpnrg_cc ----------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_cmpnrg_cc(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c, int *__cc) { + return (vector bool char)__builtin_s390_vstrcbs(__a, __b, __c, 12, __cc); +} + +static inline __ATTRS_o_ai vector bool short +vec_cmpnrg_cc(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c, int *__cc) { + return (vector bool short)__builtin_s390_vstrchs(__a, __b, __c, 12, __cc); +} + +static inline __ATTRS_o_ai vector bool int +vec_cmpnrg_cc(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c, int *__cc) { + return (vector bool int)__builtin_s390_vstrcfs(__a, __b, __c, 12, __cc); +} + +/*-- vec_cmpnrg_idx ---------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpnrg_idx(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return __builtin_s390_vstrcb(__a, __b, __c, 8); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpnrg_idx(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c) { + return __builtin_s390_vstrch(__a, __b, __c, 8); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpnrg_idx(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + return __builtin_s390_vstrcf(__a, __b, __c, 8); +} + +/*-- vec_cmpnrg_idx_cc ------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpnrg_idx_cc(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrcbs(__a, __b, __c, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpnrg_idx_cc(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c, int *__cc) { + return __builtin_s390_vstrchs(__a, __b, __c, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpnrg_idx_cc(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c, int *__cc) { + return __builtin_s390_vstrcfs(__a, __b, __c, 8, __cc); +} + +/*-- vec_cmpnrg_or_0_idx ----------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpnrg_or_0_idx(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return __builtin_s390_vstrczb(__a, __b, __c, 8); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpnrg_or_0_idx(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c) { + return __builtin_s390_vstrczh(__a, __b, __c, 8); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpnrg_or_0_idx(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + return __builtin_s390_vstrczf(__a, __b, __c, 8); +} + +/*-- vec_cmpnrg_or_0_idx_cc -------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned char +vec_cmpnrg_or_0_idx_cc(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrczbs(__a, __b, __c, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_cmpnrg_or_0_idx_cc(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c, int *__cc) { + return __builtin_s390_vstrczhs(__a, __b, __c, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_cmpnrg_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c, int *__cc) { + return __builtin_s390_vstrczfs(__a, __b, __c, 8, __cc); +} + +/*-- vec_find_any_eq --------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_find_any_eq(vector signed char __a, vector signed char __b) { + return (vector bool char) + __builtin_s390_vfaeb((vector unsigned char)__a, + (vector unsigned char)__b, 4); +} + +static inline __ATTRS_o_ai vector bool char +vec_find_any_eq(vector bool char __a, vector bool char __b) { + return (vector bool char) + __builtin_s390_vfaeb((vector unsigned char)__a, + (vector unsigned char)__b, 4); +} + +static inline __ATTRS_o_ai vector bool char +vec_find_any_eq(vector unsigned char __a, vector unsigned char __b) { + return (vector bool char)__builtin_s390_vfaeb(__a, __b, 4); +} + +static inline __ATTRS_o_ai vector bool short +vec_find_any_eq(vector signed short __a, vector signed short __b) { + return (vector bool short) + __builtin_s390_vfaeh((vector unsigned short)__a, + (vector unsigned short)__b, 4); +} + +static inline __ATTRS_o_ai vector bool short +vec_find_any_eq(vector bool short __a, vector bool short __b) { + return (vector bool short) + __builtin_s390_vfaeh((vector unsigned short)__a, + (vector unsigned short)__b, 4); +} + +static inline __ATTRS_o_ai vector bool short +vec_find_any_eq(vector unsigned short __a, vector unsigned short __b) { + return (vector bool short)__builtin_s390_vfaeh(__a, __b, 4); +} + +static inline __ATTRS_o_ai vector bool int +vec_find_any_eq(vector signed int __a, vector signed int __b) { + return (vector bool int) + __builtin_s390_vfaef((vector unsigned int)__a, + (vector unsigned int)__b, 4); +} + +static inline __ATTRS_o_ai vector bool int +vec_find_any_eq(vector bool int __a, vector bool int __b) { + return (vector bool int) + __builtin_s390_vfaef((vector unsigned int)__a, + (vector unsigned int)__b, 4); +} + +static inline __ATTRS_o_ai vector bool int +vec_find_any_eq(vector unsigned int __a, vector unsigned int __b) { + return (vector bool int)__builtin_s390_vfaef(__a, __b, 4); +} + +/*-- vec_find_any_eq_cc -----------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_find_any_eq_cc(vector signed char __a, vector signed char __b, int *__cc) { + return (vector bool char) + __builtin_s390_vfaebs((vector unsigned char)__a, + (vector unsigned char)__b, 4, __cc); +} + +static inline __ATTRS_o_ai vector bool char +vec_find_any_eq_cc(vector bool char __a, vector bool char __b, int *__cc) { + return (vector bool char) + __builtin_s390_vfaebs((vector unsigned char)__a, + (vector unsigned char)__b, 4, __cc); +} + +static inline __ATTRS_o_ai vector bool char +vec_find_any_eq_cc(vector unsigned char __a, vector unsigned char __b, + int *__cc) { + return (vector bool char)__builtin_s390_vfaebs(__a, __b, 4, __cc); +} + +static inline __ATTRS_o_ai vector bool short +vec_find_any_eq_cc(vector signed short __a, vector signed short __b, + int *__cc) { + return (vector bool short) + __builtin_s390_vfaehs((vector unsigned short)__a, + (vector unsigned short)__b, 4, __cc); +} + +static inline __ATTRS_o_ai vector bool short +vec_find_any_eq_cc(vector bool short __a, vector bool short __b, int *__cc) { + return (vector bool short) + __builtin_s390_vfaehs((vector unsigned short)__a, + (vector unsigned short)__b, 4, __cc); +} + +static inline __ATTRS_o_ai vector bool short +vec_find_any_eq_cc(vector unsigned short __a, vector unsigned short __b, + int *__cc) { + return (vector bool short)__builtin_s390_vfaehs(__a, __b, 4, __cc); +} + +static inline __ATTRS_o_ai vector bool int +vec_find_any_eq_cc(vector signed int __a, vector signed int __b, int *__cc) { + return (vector bool int) + __builtin_s390_vfaefs((vector unsigned int)__a, + (vector unsigned int)__b, 4, __cc); +} + +static inline __ATTRS_o_ai vector bool int +vec_find_any_eq_cc(vector bool int __a, vector bool int __b, int *__cc) { + return (vector bool int) + __builtin_s390_vfaefs((vector unsigned int)__a, + (vector unsigned int)__b, 4, __cc); +} + +static inline __ATTRS_o_ai vector bool int +vec_find_any_eq_cc(vector unsigned int __a, vector unsigned int __b, + int *__cc) { + return (vector bool int)__builtin_s390_vfaefs(__a, __b, 4, __cc); +} + +/*-- vec_find_any_eq_idx ----------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_find_any_eq_idx(vector signed char __a, vector signed char __b) { + return (vector signed char) + __builtin_s390_vfaeb((vector unsigned char)__a, + (vector unsigned char)__b, 0); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_eq_idx(vector bool char __a, vector bool char __b) { + return __builtin_s390_vfaeb((vector unsigned char)__a, + (vector unsigned char)__b, 0); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_eq_idx(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vfaeb(__a, __b, 0); +} + +static inline __ATTRS_o_ai vector signed short +vec_find_any_eq_idx(vector signed short __a, vector signed short __b) { + return (vector signed short) + __builtin_s390_vfaeh((vector unsigned short)__a, + (vector unsigned short)__b, 0); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_eq_idx(vector bool short __a, vector bool short __b) { + return __builtin_s390_vfaeh((vector unsigned short)__a, + (vector unsigned short)__b, 0); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_eq_idx(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vfaeh(__a, __b, 0); +} + +static inline __ATTRS_o_ai vector signed int +vec_find_any_eq_idx(vector signed int __a, vector signed int __b) { + return (vector signed int) + __builtin_s390_vfaef((vector unsigned int)__a, + (vector unsigned int)__b, 0); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_eq_idx(vector bool int __a, vector bool int __b) { + return __builtin_s390_vfaef((vector unsigned int)__a, + (vector unsigned int)__b, 0); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_eq_idx(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vfaef(__a, __b, 0); +} + +/*-- vec_find_any_eq_idx_cc -------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_find_any_eq_idx_cc(vector signed char __a, vector signed char __b, + int *__cc) { + return (vector signed char) + __builtin_s390_vfaebs((vector unsigned char)__a, + (vector unsigned char)__b, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_eq_idx_cc(vector bool char __a, vector bool char __b, int *__cc) { + return __builtin_s390_vfaebs((vector unsigned char)__a, + (vector unsigned char)__b, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_eq_idx_cc(vector unsigned char __a, vector unsigned char __b, + int *__cc) { + return __builtin_s390_vfaebs(__a, __b, 0, __cc); +} + +static inline __ATTRS_o_ai vector signed short +vec_find_any_eq_idx_cc(vector signed short __a, vector signed short __b, + int *__cc) { + return (vector signed short) + __builtin_s390_vfaehs((vector unsigned short)__a, + (vector unsigned short)__b, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_eq_idx_cc(vector bool short __a, vector bool short __b, + int *__cc) { + return __builtin_s390_vfaehs((vector unsigned short)__a, + (vector unsigned short)__b, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_eq_idx_cc(vector unsigned short __a, vector unsigned short __b, + int *__cc) { + return __builtin_s390_vfaehs(__a, __b, 0, __cc); +} + +static inline __ATTRS_o_ai vector signed int +vec_find_any_eq_idx_cc(vector signed int __a, vector signed int __b, + int *__cc) { + return (vector signed int) + __builtin_s390_vfaefs((vector unsigned int)__a, + (vector unsigned int)__b, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_eq_idx_cc(vector bool int __a, vector bool int __b, int *__cc) { + return __builtin_s390_vfaefs((vector unsigned int)__a, + (vector unsigned int)__b, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_eq_idx_cc(vector unsigned int __a, vector unsigned int __b, + int *__cc) { + return __builtin_s390_vfaefs(__a, __b, 0, __cc); +} + +/*-- vec_find_any_eq_or_0_idx -----------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_find_any_eq_or_0_idx(vector signed char __a, vector signed char __b) { + return (vector signed char) + __builtin_s390_vfaezb((vector unsigned char)__a, + (vector unsigned char)__b, 0); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_eq_or_0_idx(vector bool char __a, vector bool char __b) { + return __builtin_s390_vfaezb((vector unsigned char)__a, + (vector unsigned char)__b, 0); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_eq_or_0_idx(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vfaezb(__a, __b, 0); +} + +static inline __ATTRS_o_ai vector signed short +vec_find_any_eq_or_0_idx(vector signed short __a, vector signed short __b) { + return (vector signed short) + __builtin_s390_vfaezh((vector unsigned short)__a, + (vector unsigned short)__b, 0); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_eq_or_0_idx(vector bool short __a, vector bool short __b) { + return __builtin_s390_vfaezh((vector unsigned short)__a, + (vector unsigned short)__b, 0); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_eq_or_0_idx(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vfaezh(__a, __b, 0); +} + +static inline __ATTRS_o_ai vector signed int +vec_find_any_eq_or_0_idx(vector signed int __a, vector signed int __b) { + return (vector signed int) + __builtin_s390_vfaezf((vector unsigned int)__a, + (vector unsigned int)__b, 0); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_eq_or_0_idx(vector bool int __a, vector bool int __b) { + return __builtin_s390_vfaezf((vector unsigned int)__a, + (vector unsigned int)__b, 0); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_eq_or_0_idx(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vfaezf(__a, __b, 0); +} + +/*-- vec_find_any_eq_or_0_idx_cc --------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_find_any_eq_or_0_idx_cc(vector signed char __a, vector signed char __b, + int *__cc) { + return (vector signed char) + __builtin_s390_vfaezbs((vector unsigned char)__a, + (vector unsigned char)__b, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_eq_or_0_idx_cc(vector bool char __a, vector bool char __b, + int *__cc) { + return __builtin_s390_vfaezbs((vector unsigned char)__a, + (vector unsigned char)__b, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_eq_or_0_idx_cc(vector unsigned char __a, vector unsigned char __b, + int *__cc) { + return __builtin_s390_vfaezbs(__a, __b, 0, __cc); +} + +static inline __ATTRS_o_ai vector signed short +vec_find_any_eq_or_0_idx_cc(vector signed short __a, vector signed short __b, + int *__cc) { + return (vector signed short) + __builtin_s390_vfaezhs((vector unsigned short)__a, + (vector unsigned short)__b, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_eq_or_0_idx_cc(vector bool short __a, vector bool short __b, + int *__cc) { + return __builtin_s390_vfaezhs((vector unsigned short)__a, + (vector unsigned short)__b, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_eq_or_0_idx_cc(vector unsigned short __a, + vector unsigned short __b, int *__cc) { + return __builtin_s390_vfaezhs(__a, __b, 0, __cc); +} + +static inline __ATTRS_o_ai vector signed int +vec_find_any_eq_or_0_idx_cc(vector signed int __a, vector signed int __b, + int *__cc) { + return (vector signed int) + __builtin_s390_vfaezfs((vector unsigned int)__a, + (vector unsigned int)__b, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_eq_or_0_idx_cc(vector bool int __a, vector bool int __b, + int *__cc) { + return __builtin_s390_vfaezfs((vector unsigned int)__a, + (vector unsigned int)__b, 0, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_eq_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, + int *__cc) { + return __builtin_s390_vfaezfs(__a, __b, 0, __cc); +} + +/*-- vec_find_any_ne --------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_find_any_ne(vector signed char __a, vector signed char __b) { + return (vector bool char) + __builtin_s390_vfaeb((vector unsigned char)__a, + (vector unsigned char)__b, 12); +} + +static inline __ATTRS_o_ai vector bool char +vec_find_any_ne(vector bool char __a, vector bool char __b) { + return (vector bool char) + __builtin_s390_vfaeb((vector unsigned char)__a, + (vector unsigned char)__b, 12); +} + +static inline __ATTRS_o_ai vector bool char +vec_find_any_ne(vector unsigned char __a, vector unsigned char __b) { + return (vector bool char)__builtin_s390_vfaeb(__a, __b, 12); +} + +static inline __ATTRS_o_ai vector bool short +vec_find_any_ne(vector signed short __a, vector signed short __b) { + return (vector bool short) + __builtin_s390_vfaeh((vector unsigned short)__a, + (vector unsigned short)__b, 12); +} + +static inline __ATTRS_o_ai vector bool short +vec_find_any_ne(vector bool short __a, vector bool short __b) { + return (vector bool short) + __builtin_s390_vfaeh((vector unsigned short)__a, + (vector unsigned short)__b, 12); +} + +static inline __ATTRS_o_ai vector bool short +vec_find_any_ne(vector unsigned short __a, vector unsigned short __b) { + return (vector bool short)__builtin_s390_vfaeh(__a, __b, 12); +} + +static inline __ATTRS_o_ai vector bool int +vec_find_any_ne(vector signed int __a, vector signed int __b) { + return (vector bool int) + __builtin_s390_vfaef((vector unsigned int)__a, + (vector unsigned int)__b, 12); +} + +static inline __ATTRS_o_ai vector bool int +vec_find_any_ne(vector bool int __a, vector bool int __b) { + return (vector bool int) + __builtin_s390_vfaef((vector unsigned int)__a, + (vector unsigned int)__b, 12); +} + +static inline __ATTRS_o_ai vector bool int +vec_find_any_ne(vector unsigned int __a, vector unsigned int __b) { + return (vector bool int)__builtin_s390_vfaef(__a, __b, 12); +} + +/*-- vec_find_any_ne_cc -----------------------------------------------------*/ + +static inline __ATTRS_o_ai vector bool char +vec_find_any_ne_cc(vector signed char __a, vector signed char __b, int *__cc) { + return (vector bool char) + __builtin_s390_vfaebs((vector unsigned char)__a, + (vector unsigned char)__b, 12, __cc); +} + +static inline __ATTRS_o_ai vector bool char +vec_find_any_ne_cc(vector bool char __a, vector bool char __b, int *__cc) { + return (vector bool char) + __builtin_s390_vfaebs((vector unsigned char)__a, + (vector unsigned char)__b, 12, __cc); +} + +static inline __ATTRS_o_ai vector bool char +vec_find_any_ne_cc(vector unsigned char __a, vector unsigned char __b, + int *__cc) { + return (vector bool char)__builtin_s390_vfaebs(__a, __b, 12, __cc); +} + +static inline __ATTRS_o_ai vector bool short +vec_find_any_ne_cc(vector signed short __a, vector signed short __b, + int *__cc) { + return (vector bool short) + __builtin_s390_vfaehs((vector unsigned short)__a, + (vector unsigned short)__b, 12, __cc); +} + +static inline __ATTRS_o_ai vector bool short +vec_find_any_ne_cc(vector bool short __a, vector bool short __b, int *__cc) { + return (vector bool short) + __builtin_s390_vfaehs((vector unsigned short)__a, + (vector unsigned short)__b, 12, __cc); +} + +static inline __ATTRS_o_ai vector bool short +vec_find_any_ne_cc(vector unsigned short __a, vector unsigned short __b, + int *__cc) { + return (vector bool short)__builtin_s390_vfaehs(__a, __b, 12, __cc); +} + +static inline __ATTRS_o_ai vector bool int +vec_find_any_ne_cc(vector signed int __a, vector signed int __b, int *__cc) { + return (vector bool int) + __builtin_s390_vfaefs((vector unsigned int)__a, + (vector unsigned int)__b, 12, __cc); +} + +static inline __ATTRS_o_ai vector bool int +vec_find_any_ne_cc(vector bool int __a, vector bool int __b, int *__cc) { + return (vector bool int) + __builtin_s390_vfaefs((vector unsigned int)__a, + (vector unsigned int)__b, 12, __cc); +} + +static inline __ATTRS_o_ai vector bool int +vec_find_any_ne_cc(vector unsigned int __a, vector unsigned int __b, + int *__cc) { + return (vector bool int)__builtin_s390_vfaefs(__a, __b, 12, __cc); +} + +/*-- vec_find_any_ne_idx ----------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_find_any_ne_idx(vector signed char __a, vector signed char __b) { + return (vector signed char) + __builtin_s390_vfaeb((vector unsigned char)__a, + (vector unsigned char)__b, 8); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_ne_idx(vector bool char __a, vector bool char __b) { + return __builtin_s390_vfaeb((vector unsigned char)__a, + (vector unsigned char)__b, 8); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_ne_idx(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vfaeb(__a, __b, 8); +} + +static inline __ATTRS_o_ai vector signed short +vec_find_any_ne_idx(vector signed short __a, vector signed short __b) { + return (vector signed short) + __builtin_s390_vfaeh((vector unsigned short)__a, + (vector unsigned short)__b, 8); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_ne_idx(vector bool short __a, vector bool short __b) { + return __builtin_s390_vfaeh((vector unsigned short)__a, + (vector unsigned short)__b, 8); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_ne_idx(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vfaeh(__a, __b, 8); +} + +static inline __ATTRS_o_ai vector signed int +vec_find_any_ne_idx(vector signed int __a, vector signed int __b) { + return (vector signed int) + __builtin_s390_vfaef((vector unsigned int)__a, + (vector unsigned int)__b, 8); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_ne_idx(vector bool int __a, vector bool int __b) { + return __builtin_s390_vfaef((vector unsigned int)__a, + (vector unsigned int)__b, 8); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_ne_idx(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vfaef(__a, __b, 8); +} + +/*-- vec_find_any_ne_idx_cc -------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_find_any_ne_idx_cc(vector signed char __a, vector signed char __b, + int *__cc) { + return (vector signed char) + __builtin_s390_vfaebs((vector unsigned char)__a, + (vector unsigned char)__b, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_ne_idx_cc(vector bool char __a, vector bool char __b, int *__cc) { + return __builtin_s390_vfaebs((vector unsigned char)__a, + (vector unsigned char)__b, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_ne_idx_cc(vector unsigned char __a, vector unsigned char __b, + int *__cc) { + return __builtin_s390_vfaebs(__a, __b, 8, __cc); +} + +static inline __ATTRS_o_ai vector signed short +vec_find_any_ne_idx_cc(vector signed short __a, vector signed short __b, + int *__cc) { + return (vector signed short) + __builtin_s390_vfaehs((vector unsigned short)__a, + (vector unsigned short)__b, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_ne_idx_cc(vector bool short __a, vector bool short __b, + int *__cc) { + return __builtin_s390_vfaehs((vector unsigned short)__a, + (vector unsigned short)__b, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_ne_idx_cc(vector unsigned short __a, vector unsigned short __b, + int *__cc) { + return __builtin_s390_vfaehs(__a, __b, 8, __cc); +} + +static inline __ATTRS_o_ai vector signed int +vec_find_any_ne_idx_cc(vector signed int __a, vector signed int __b, + int *__cc) { + return (vector signed int) + __builtin_s390_vfaefs((vector unsigned int)__a, + (vector unsigned int)__b, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_ne_idx_cc(vector bool int __a, vector bool int __b, int *__cc) { + return __builtin_s390_vfaefs((vector unsigned int)__a, + (vector unsigned int)__b, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_ne_idx_cc(vector unsigned int __a, vector unsigned int __b, + int *__cc) { + return __builtin_s390_vfaefs(__a, __b, 8, __cc); +} + +/*-- vec_find_any_ne_or_0_idx -----------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_find_any_ne_or_0_idx(vector signed char __a, vector signed char __b) { + return (vector signed char) + __builtin_s390_vfaezb((vector unsigned char)__a, + (vector unsigned char)__b, 8); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_ne_or_0_idx(vector bool char __a, vector bool char __b) { + return __builtin_s390_vfaezb((vector unsigned char)__a, + (vector unsigned char)__b, 8); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_ne_or_0_idx(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vfaezb(__a, __b, 8); +} + +static inline __ATTRS_o_ai vector signed short +vec_find_any_ne_or_0_idx(vector signed short __a, vector signed short __b) { + return (vector signed short) + __builtin_s390_vfaezh((vector unsigned short)__a, + (vector unsigned short)__b, 8); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_ne_or_0_idx(vector bool short __a, vector bool short __b) { + return __builtin_s390_vfaezh((vector unsigned short)__a, + (vector unsigned short)__b, 8); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_ne_or_0_idx(vector unsigned short __a, vector unsigned short __b) { + return __builtin_s390_vfaezh(__a, __b, 8); +} + +static inline __ATTRS_o_ai vector signed int +vec_find_any_ne_or_0_idx(vector signed int __a, vector signed int __b) { + return (vector signed int) + __builtin_s390_vfaezf((vector unsigned int)__a, + (vector unsigned int)__b, 8); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_ne_or_0_idx(vector bool int __a, vector bool int __b) { + return __builtin_s390_vfaezf((vector unsigned int)__a, + (vector unsigned int)__b, 8); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_ne_or_0_idx(vector unsigned int __a, vector unsigned int __b) { + return __builtin_s390_vfaezf(__a, __b, 8); +} + +/*-- vec_find_any_ne_or_0_idx_cc --------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_find_any_ne_or_0_idx_cc(vector signed char __a, vector signed char __b, + int *__cc) { + return (vector signed char) + __builtin_s390_vfaezbs((vector unsigned char)__a, + (vector unsigned char)__b, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_ne_or_0_idx_cc(vector bool char __a, vector bool char __b, + int *__cc) { + return __builtin_s390_vfaezbs((vector unsigned char)__a, + (vector unsigned char)__b, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_find_any_ne_or_0_idx_cc(vector unsigned char __a, vector unsigned char __b, + int *__cc) { + return __builtin_s390_vfaezbs(__a, __b, 8, __cc); +} + +static inline __ATTRS_o_ai vector signed short +vec_find_any_ne_or_0_idx_cc(vector signed short __a, vector signed short __b, + int *__cc) { + return (vector signed short) + __builtin_s390_vfaezhs((vector unsigned short)__a, + (vector unsigned short)__b, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_ne_or_0_idx_cc(vector bool short __a, vector bool short __b, + int *__cc) { + return __builtin_s390_vfaezhs((vector unsigned short)__a, + (vector unsigned short)__b, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_find_any_ne_or_0_idx_cc(vector unsigned short __a, + vector unsigned short __b, int *__cc) { + return __builtin_s390_vfaezhs(__a, __b, 8, __cc); +} + +static inline __ATTRS_o_ai vector signed int +vec_find_any_ne_or_0_idx_cc(vector signed int __a, vector signed int __b, + int *__cc) { + return (vector signed int) + __builtin_s390_vfaezfs((vector unsigned int)__a, + (vector unsigned int)__b, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_ne_or_0_idx_cc(vector bool int __a, vector bool int __b, + int *__cc) { + return __builtin_s390_vfaezfs((vector unsigned int)__a, + (vector unsigned int)__b, 8, __cc); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_find_any_ne_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, + int *__cc) { + return __builtin_s390_vfaezfs(__a, __b, 8, __cc); +} + +#undef __constant_pow2_range +#undef __constant_range +#undef __constant +#undef __ATTRS_o +#undef __ATTRS_o_ai +#undef __ATTRS_ai + +#else + +#error "Use -fzvector to enable vector extensions" + +#endif diff --git a/c_headers/wmmintrin.h b/c_headers/wmmintrin.h new file mode 100644 index 0000000000..369e3c208e --- /dev/null +++ b/c_headers/wmmintrin.h @@ -0,0 +1,42 @@ +/*===---- wmmintrin.h - AES intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _WMMINTRIN_H +#define _WMMINTRIN_H + +#include + +#if !defined (__AES__) && !defined (__PCLMUL__) +# error "AES/PCLMUL instructions not enabled" +#else + +#ifdef __AES__ +#include <__wmmintrin_aes.h> +#endif /* __AES__ */ + +#ifdef __PCLMUL__ +#include <__wmmintrin_pclmul.h> +#endif /* __PCLMUL__ */ + +#endif /* __AES__ || __PCLMUL__ */ +#endif /* _WMMINTRIN_H */ diff --git a/c_headers/x86intrin.h b/c_headers/x86intrin.h new file mode 100644 index 0000000000..21a43daf3c --- /dev/null +++ b/c_headers/x86intrin.h @@ -0,0 +1,81 @@ +/*===---- x86intrin.h - X86 intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#define __X86INTRIN_H + +#include + +#include + +#ifdef __3dNOW__ +#include +#endif + +#ifdef __BMI__ +#include +#endif + +#ifdef __BMI2__ +#include +#endif + +#ifdef __LZCNT__ +#include +#endif + +#ifdef __POPCNT__ +#include +#endif + +#ifdef __RDSEED__ +#include +#endif + +#ifdef __PRFCHW__ +#include +#endif + +#ifdef __SSE4A__ +#include +#endif + +#ifdef __FMA4__ +#include +#endif + +#ifdef __XOP__ +#include +#endif + +#ifdef __TBM__ +#include +#endif + +#ifdef __F16C__ +#include +#endif + +/* FIXME: LWP */ + +#endif /* __X86INTRIN_H */ diff --git a/c_headers/xmmintrin.h b/c_headers/xmmintrin.h new file mode 100644 index 0000000000..0d58c75302 --- /dev/null +++ b/c_headers/xmmintrin.h @@ -0,0 +1,1008 @@ +/*===---- xmmintrin.h - SSE intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __XMMINTRIN_H +#define __XMMINTRIN_H + +#ifndef __SSE__ +#error "SSE instruction set not enabled" +#else + +#include + +typedef int __v4si __attribute__((__vector_size__(16))); +typedef float __v4sf __attribute__((__vector_size__(16))); +typedef float __m128 __attribute__((__vector_size__(16))); + +/* This header should only be included in a hosted environment as it depends on + * a standard library to provide allocation routines. */ +#if __STDC_HOSTED__ +#include +#endif + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_add_ss(__m128 __a, __m128 __b) +{ + __a[0] += __b[0]; + return __a; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_add_ps(__m128 __a, __m128 __b) +{ + return __a + __b; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_sub_ss(__m128 __a, __m128 __b) +{ + __a[0] -= __b[0]; + return __a; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_sub_ps(__m128 __a, __m128 __b) +{ + return __a - __b; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mul_ss(__m128 __a, __m128 __b) +{ + __a[0] *= __b[0]; + return __a; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mul_ps(__m128 __a, __m128 __b) +{ + return __a * __b; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_div_ss(__m128 __a, __m128 __b) +{ + __a[0] /= __b[0]; + return __a; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_div_ps(__m128 __a, __m128 __b) +{ + return __a / __b; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_sqrt_ss(__m128 __a) +{ + __m128 __c = __builtin_ia32_sqrtss(__a); + return (__m128) { __c[0], __a[1], __a[2], __a[3] }; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_sqrt_ps(__m128 __a) +{ + return __builtin_ia32_sqrtps(__a); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_rcp_ss(__m128 __a) +{ + __m128 __c = __builtin_ia32_rcpss(__a); + return (__m128) { __c[0], __a[1], __a[2], __a[3] }; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_rcp_ps(__m128 __a) +{ + return __builtin_ia32_rcpps(__a); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_rsqrt_ss(__m128 __a) +{ + __m128 __c = __builtin_ia32_rsqrtss(__a); + return (__m128) { __c[0], __a[1], __a[2], __a[3] }; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_rsqrt_ps(__m128 __a) +{ + return __builtin_ia32_rsqrtps(__a); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_min_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_minss(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_min_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_minps(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_max_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_maxss(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_max_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_maxps(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_and_ps(__m128 __a, __m128 __b) +{ + return (__m128)((__v4si)__a & (__v4si)__b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_andnot_ps(__m128 __a, __m128 __b) +{ + return (__m128)(~(__v4si)__a & (__v4si)__b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_or_ps(__m128 __a, __m128 __b) +{ + return (__m128)((__v4si)__a | (__v4si)__b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_xor_ps(__m128 __a, __m128 __b) +{ + return (__m128)((__v4si)__a ^ (__v4si)__b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpeq_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpeqss(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpeq_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpeqps(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmplt_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpltss(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmplt_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpltps(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmple_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpless(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmple_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpleps(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpgt_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpltss(__b, __a), + 4, 1, 2, 3); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpgt_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpltps(__b, __a); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpge_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpless(__b, __a), + 4, 1, 2, 3); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpge_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpleps(__b, __a); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpneq_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpneqss(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpneq_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpneqps(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpnlt_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpnltss(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpnlt_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpnltps(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpnle_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpnless(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpnle_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpnleps(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpngt_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpnltss(__b, __a), + 4, 1, 2, 3); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpngt_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpnltps(__b, __a); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpnge_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpnless(__b, __a), + 4, 1, 2, 3); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpnge_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpnleps(__b, __a); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpord_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpordss(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpord_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpordps(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpunord_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpunordss(__a, __b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cmpunord_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpunordps(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_comieq_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_comieq(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_comilt_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_comilt(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_comile_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_comile(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_comigt_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_comigt(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_comige_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_comige(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_comineq_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_comineq(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_ucomieq_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_ucomieq(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_ucomilt_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_ucomilt(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_ucomile_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_ucomile(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_ucomigt_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_ucomigt(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_ucomige_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_ucomige(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_ucomineq_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_ucomineq(__a, __b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_cvtss_si32(__m128 __a) +{ + return __builtin_ia32_cvtss2si(__a); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_cvt_ss2si(__m128 __a) +{ + return _mm_cvtss_si32(__a); +} + +#ifdef __x86_64__ + +static __inline__ long long __DEFAULT_FN_ATTRS +_mm_cvtss_si64(__m128 __a) +{ + return __builtin_ia32_cvtss2si64(__a); +} + +#endif + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cvtps_pi32(__m128 __a) +{ + return (__m64)__builtin_ia32_cvtps2pi(__a); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cvt_ps2pi(__m128 __a) +{ + return _mm_cvtps_pi32(__a); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_cvttss_si32(__m128 __a) +{ + return __a[0]; +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_cvtt_ss2si(__m128 __a) +{ + return _mm_cvttss_si32(__a); +} + +static __inline__ long long __DEFAULT_FN_ATTRS +_mm_cvttss_si64(__m128 __a) +{ + return __a[0]; +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cvttps_pi32(__m128 __a) +{ + return (__m64)__builtin_ia32_cvttps2pi(__a); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cvtt_ps2pi(__m128 __a) +{ + return _mm_cvttps_pi32(__a); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cvtsi32_ss(__m128 __a, int __b) +{ + __a[0] = __b; + return __a; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cvt_si2ss(__m128 __a, int __b) +{ + return _mm_cvtsi32_ss(__a, __b); +} + +#ifdef __x86_64__ + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cvtsi64_ss(__m128 __a, long long __b) +{ + __a[0] = __b; + return __a; +} + +#endif + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cvtpi32_ps(__m128 __a, __m64 __b) +{ + return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cvt_pi2ps(__m128 __a, __m64 __b) +{ + return _mm_cvtpi32_ps(__a, __b); +} + +static __inline__ float __DEFAULT_FN_ATTRS +_mm_cvtss_f32(__m128 __a) +{ + return __a[0]; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_loadh_pi(__m128 __a, const __m64 *__p) +{ + typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8))); + struct __mm_loadh_pi_struct { + __mm_loadh_pi_v2f32 __u; + } __attribute__((__packed__, __may_alias__)); + __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u; + __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); + return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_loadl_pi(__m128 __a, const __m64 *__p) +{ + typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8))); + struct __mm_loadl_pi_struct { + __mm_loadl_pi_v2f32 __u; + } __attribute__((__packed__, __may_alias__)); + __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u; + __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); + return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_load_ss(const float *__p) +{ + struct __mm_load_ss_struct { + float __u; + } __attribute__((__packed__, __may_alias__)); + float __u = ((struct __mm_load_ss_struct*)__p)->__u; + return (__m128){ __u, 0, 0, 0 }; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_load1_ps(const float *__p) +{ + struct __mm_load1_ps_struct { + float __u; + } __attribute__((__packed__, __may_alias__)); + float __u = ((struct __mm_load1_ps_struct*)__p)->__u; + return (__m128){ __u, __u, __u, __u }; +} + +#define _mm_load_ps1(p) _mm_load1_ps(p) + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_load_ps(const float *__p) +{ + return *(__m128*)__p; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_loadu_ps(const float *__p) +{ + struct __loadu_ps { + __m128 __v; + } __attribute__((__packed__, __may_alias__)); + return ((struct __loadu_ps*)__p)->__v; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_loadr_ps(const float *__p) +{ + __m128 __a = _mm_load_ps(__p); + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_set_ss(float __w) +{ + return (__m128){ __w, 0, 0, 0 }; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_set1_ps(float __w) +{ + return (__m128){ __w, __w, __w, __w }; +} + +/* Microsoft specific. */ +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_set_ps1(float __w) +{ + return _mm_set1_ps(__w); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_set_ps(float __z, float __y, float __x, float __w) +{ + return (__m128){ __w, __x, __y, __z }; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_setr_ps(float __z, float __y, float __x, float __w) +{ + return (__m128){ __z, __y, __x, __w }; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_setzero_ps(void) +{ + return (__m128){ 0, 0, 0, 0 }; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_storeh_pi(__m64 *__p, __m128 __a) +{ + __builtin_ia32_storehps((__v2si *)__p, __a); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_storel_pi(__m64 *__p, __m128 __a) +{ + __builtin_ia32_storelps((__v2si *)__p, __a); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_ss(float *__p, __m128 __a) +{ + struct __mm_store_ss_struct { + float __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_store_ss_struct*)__p)->__u = __a[0]; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_storeu_ps(float *__p, __m128 __a) +{ + __builtin_ia32_storeups(__p, __a); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_store1_ps(float *__p, __m128 __a) +{ + __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0); + _mm_storeu_ps(__p, __a); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_ps1(float *__p, __m128 __a) +{ + return _mm_store1_ps(__p, __a); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_ps(float *__p, __m128 __a) +{ + *(__m128 *)__p = __a; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_storer_ps(float *__p, __m128 __a) +{ + __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0); + _mm_store_ps(__p, __a); +} + +#define _MM_HINT_T0 3 +#define _MM_HINT_T1 2 +#define _MM_HINT_T2 1 +#define _MM_HINT_NTA 0 + +#ifndef _MSC_VER +/* FIXME: We have to #define this because "sel" must be a constant integer, and + Sema doesn't do any form of constant propagation yet. */ + +#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel))) +#endif + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_stream_pi(__m64 *__p, __m64 __a) +{ + __builtin_ia32_movntq(__p, __a); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_stream_ps(float *__p, __m128 __a) +{ + __builtin_ia32_movntps(__p, __a); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_sfence(void) +{ + __builtin_ia32_sfence(); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_extract_pi16(__m64 __a, int __n) +{ + __v4hi __b = (__v4hi)__a; + return (unsigned short)__b[__n & 3]; +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_insert_pi16(__m64 __a, int __d, int __n) +{ + __v4hi __b = (__v4hi)__a; + __b[__n & 3] = __d; + return (__m64)__b; +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_max_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_max_pu8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_min_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_min_pu8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_movemask_pi8(__m64 __a) +{ + return __builtin_ia32_pmovmskb((__v8qi)__a); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_mulhi_pu16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b); +} + +#define _mm_shuffle_pi16(a, n) __extension__ ({ \ + __m64 __a = (a); \ + (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); }) + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) +{ + __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_avg_pu8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_avg_pu16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sad_pu8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_getcsr(void) +{ + return __builtin_ia32_stmxcsr(); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_setcsr(unsigned int __i) +{ + __builtin_ia32_ldmxcsr(__i); +} + +#define _mm_shuffle_ps(a, b, mask) __extension__ ({ \ + __m128 __a = (a); \ + __m128 __b = (b); \ + (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \ + (mask) & 0x3, ((mask) & 0xc) >> 2, \ + (((mask) & 0x30) >> 4) + 4, \ + (((mask) & 0xc0) >> 6) + 4); }) + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_unpackhi_ps(__m128 __a, __m128 __b) +{ + return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_unpacklo_ps(__m128 __a, __m128 __b) +{ + return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_move_ss(__m128 __a, __m128 __b) +{ + return __builtin_shufflevector(__a, __b, 4, 1, 2, 3); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_movehl_ps(__m128 __a, __m128 __b) +{ + return __builtin_shufflevector(__a, __b, 6, 7, 2, 3); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_movelh_ps(__m128 __a, __m128 __b) +{ + return __builtin_shufflevector(__a, __b, 0, 1, 4, 5); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cvtpi16_ps(__m64 __a) +{ + __m64 __b, __c; + __m128 __r; + + __b = _mm_setzero_si64(); + __b = _mm_cmpgt_pi16(__b, __a); + __c = _mm_unpackhi_pi16(__a, __b); + __r = _mm_setzero_ps(); + __r = _mm_cvtpi32_ps(__r, __c); + __r = _mm_movelh_ps(__r, __r); + __c = _mm_unpacklo_pi16(__a, __b); + __r = _mm_cvtpi32_ps(__r, __c); + + return __r; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cvtpu16_ps(__m64 __a) +{ + __m64 __b, __c; + __m128 __r; + + __b = _mm_setzero_si64(); + __c = _mm_unpackhi_pi16(__a, __b); + __r = _mm_setzero_ps(); + __r = _mm_cvtpi32_ps(__r, __c); + __r = _mm_movelh_ps(__r, __r); + __c = _mm_unpacklo_pi16(__a, __b); + __r = _mm_cvtpi32_ps(__r, __c); + + return __r; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cvtpi8_ps(__m64 __a) +{ + __m64 __b; + + __b = _mm_setzero_si64(); + __b = _mm_cmpgt_pi8(__b, __a); + __b = _mm_unpacklo_pi8(__a, __b); + + return _mm_cvtpi16_ps(__b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cvtpu8_ps(__m64 __a) +{ + __m64 __b; + + __b = _mm_setzero_si64(); + __b = _mm_unpacklo_pi8(__a, __b); + + return _mm_cvtpi16_ps(__b); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_cvtpi32x2_ps(__m64 __a, __m64 __b) +{ + __m128 __c; + + __c = _mm_setzero_ps(); + __c = _mm_cvtpi32_ps(__c, __b); + __c = _mm_movelh_ps(__c, __c); + + return _mm_cvtpi32_ps(__c, __a); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cvtps_pi16(__m128 __a) +{ + __m64 __b, __c; + + __b = _mm_cvtps_pi32(__a); + __a = _mm_movehl_ps(__a, __a); + __c = _mm_cvtps_pi32(__a); + + return _mm_packs_pi32(__b, __c); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_cvtps_pi8(__m128 __a) +{ + __m64 __b, __c; + + __b = _mm_cvtps_pi16(__a); + __c = _mm_setzero_si64(); + + return _mm_packs_pi16(__b, __c); +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_movemask_ps(__m128 __a) +{ + return __builtin_ia32_movmskps(__a); +} + +#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) + +#define _MM_EXCEPT_INVALID (0x0001) +#define _MM_EXCEPT_DENORM (0x0002) +#define _MM_EXCEPT_DIV_ZERO (0x0004) +#define _MM_EXCEPT_OVERFLOW (0x0008) +#define _MM_EXCEPT_UNDERFLOW (0x0010) +#define _MM_EXCEPT_INEXACT (0x0020) +#define _MM_EXCEPT_MASK (0x003f) + +#define _MM_MASK_INVALID (0x0080) +#define _MM_MASK_DENORM (0x0100) +#define _MM_MASK_DIV_ZERO (0x0200) +#define _MM_MASK_OVERFLOW (0x0400) +#define _MM_MASK_UNDERFLOW (0x0800) +#define _MM_MASK_INEXACT (0x1000) +#define _MM_MASK_MASK (0x1f80) + +#define _MM_ROUND_NEAREST (0x0000) +#define _MM_ROUND_DOWN (0x2000) +#define _MM_ROUND_UP (0x4000) +#define _MM_ROUND_TOWARD_ZERO (0x6000) +#define _MM_ROUND_MASK (0x6000) + +#define _MM_FLUSH_ZERO_MASK (0x8000) +#define _MM_FLUSH_ZERO_ON (0x8000) +#define _MM_FLUSH_ZERO_OFF (0x0000) + +#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK) +#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK) +#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK) +#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK) + +#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x))) +#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x))) +#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x))) +#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x))) + +#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ +do { \ + __m128 tmp3, tmp2, tmp1, tmp0; \ + tmp0 = _mm_unpacklo_ps((row0), (row1)); \ + tmp2 = _mm_unpacklo_ps((row2), (row3)); \ + tmp1 = _mm_unpackhi_ps((row0), (row1)); \ + tmp3 = _mm_unpackhi_ps((row2), (row3)); \ + (row0) = _mm_movelh_ps(tmp0, tmp2); \ + (row1) = _mm_movehl_ps(tmp2, tmp0); \ + (row2) = _mm_movelh_ps(tmp1, tmp3); \ + (row3) = _mm_movehl_ps(tmp3, tmp1); \ +} while (0) + +/* Aliases for compatibility. */ +#define _m_pextrw _mm_extract_pi16 +#define _m_pinsrw _mm_insert_pi16 +#define _m_pmaxsw _mm_max_pi16 +#define _m_pmaxub _mm_max_pu8 +#define _m_pminsw _mm_min_pi16 +#define _m_pminub _mm_min_pu8 +#define _m_pmovmskb _mm_movemask_pi8 +#define _m_pmulhuw _mm_mulhi_pu16 +#define _m_pshufw _mm_shuffle_pi16 +#define _m_maskmovq _mm_maskmove_si64 +#define _m_pavgb _mm_avg_pu8 +#define _m_pavgw _mm_avg_pu16 +#define _m_psadbw _mm_sad_pu8 +#define _m_ _mm_ +#define _m_ _mm_ + +#undef __DEFAULT_FN_ATTRS + +/* Ugly hack for backwards-compatibility (compatible with gcc) */ +#if defined(__SSE2__) && !__has_feature(modules) +#include +#endif + +#endif /* __SSE__ */ + +#endif /* __XMMINTRIN_H */ diff --git a/c_headers/xopintrin.h b/c_headers/xopintrin.h new file mode 100644 index 0000000000..2eb35c4be8 --- /dev/null +++ b/c_headers/xopintrin.h @@ -0,0 +1,809 @@ +/*===---- xopintrin.h - XOP intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __XOPINTRIN_H +#define __XOPINTRIN_H + +#ifndef __XOP__ +# error "XOP instruction set is not enabled" +#else + +#include + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_haddw_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_haddd_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_haddq_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_haddd_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_haddq_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_haddq_epi32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_haddw_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_haddd_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_haddq_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_haddd_epu16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_haddq_epu16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_haddq_epu32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hsubw_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hsubd_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hsubq_epi32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) +{ + return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_rot_epi8(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_rot_epi16(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_rot_epi32(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_rot_epi64(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); +} + +#define _mm_roti_epi8(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); }) + +#define _mm_roti_epi16(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); }) + +#define _mm_roti_epi32(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); }) + +#define _mm_roti_epi64(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); }) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_shl_epi8(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_shl_epi16(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_shl_epi32(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_shl_epi64(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sha_epi8(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sha_epi16(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sha_epi32(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sha_epi64(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); +} + +#define _mm_com_epu8(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); }) + +#define _mm_com_epu16(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); }) + +#define _mm_com_epu32(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); }) + +#define _mm_com_epu64(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); }) + +#define _mm_com_epi8(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); }) + +#define _mm_com_epi16(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); }) + +#define _mm_com_epi32(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); }) + +#define _mm_com_epi64(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); }) + +#define _MM_PCOMCTRL_LT 0 +#define _MM_PCOMCTRL_LE 1 +#define _MM_PCOMCTRL_GT 2 +#define _MM_PCOMCTRL_GE 3 +#define _MM_PCOMCTRL_EQ 4 +#define _MM_PCOMCTRL_NEQ 5 +#define _MM_PCOMCTRL_FALSE 6 +#define _MM_PCOMCTRL_TRUE 7 + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comlt_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comle_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comgt_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comge_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comeq_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comneq_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comfalse_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comtrue_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comlt_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comle_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comgt_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comge_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comeq_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comneq_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comfalse_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comtrue_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comlt_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comle_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comgt_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comge_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comeq_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comneq_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comfalse_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comtrue_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comlt_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comle_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comgt_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comge_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comeq_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comneq_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comfalse_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comtrue_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comlt_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comle_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comgt_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comge_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comeq_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comneq_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comfalse_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comtrue_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comlt_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comle_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comgt_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comge_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comeq_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comneq_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comfalse_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comtrue_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comlt_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comle_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comgt_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comge_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comeq_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comneq_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comfalse_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comtrue_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comlt_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comle_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comgt_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comge_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comeq_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comneq_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comfalse_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_comtrue_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); +} + +#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \ + __m128d __X = (X); \ + __m128d __Y = (Y); \ + __m128i __C = (C); \ + (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \ + (__v2di)__C, (I)); }) + +#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \ + __m256d __X = (X); \ + __m256d __Y = (Y); \ + __m256i __C = (C); \ + (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \ + (__v4di)__C, (I)); }) + +#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \ + __m128 __X = (X); \ + __m128 __Y = (Y); \ + __m128i __C = (C); \ + (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \ + (__v4si)__C, (I)); }) + +#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \ + __m256 __X = (X); \ + __m256 __Y = (Y); \ + __m256i __C = (C); \ + (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \ + (__v8si)__C, (I)); }) + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_frcz_ss(__m128 __A) +{ + return (__m128)__builtin_ia32_vfrczss((__v4sf)__A); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_frcz_sd(__m128d __A) +{ + return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_frcz_ps(__m128 __A) +{ + return (__m128)__builtin_ia32_vfrczps((__v4sf)__A); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_frcz_pd(__m128d __A) +{ + return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_frcz_ps(__m256 __A) +{ + return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_frcz_pd(__m256d __A) +{ + return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __XOP__ */ + +#endif /* __XOPINTRIN_H */ diff --git a/c_headers/xtestintrin.h b/c_headers/xtestintrin.h new file mode 100644 index 0000000000..9d3378fd1e --- /dev/null +++ b/c_headers/xtestintrin.h @@ -0,0 +1,41 @@ +/*===---- xtestintrin.h - XTEST intrinsic ---------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __XTESTINTRIN_H +#define __XTESTINTRIN_H + +/* xtest returns non-zero if the instruction is executed within an RTM or active + * HLE region. */ +/* FIXME: This can be an either or for RTM/HLE. Deal with this when HLE is + * supported. */ +static __inline__ int + __attribute__((__always_inline__, __nodebug__, __target__("rtm"))) + _xtest(void) { + return __builtin_ia32_xtest(); +} + +#endif diff --git a/src/config.h.in b/src/config.h.in index 75757bdc43..d4ae8a74e6 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -6,4 +6,6 @@ #define ZIG_VERSION_PATCH @ZIG_VERSION_PATCH@ #define ZIG_VERSION_STRING "@ZIG_VERSION@" +#define ZIG_HEADERS_DIR "@CMAKE_INSTALL_PREFIX@/@C_HEADERS_DEST@" + #endif diff --git a/src/parseh.cpp b/src/parseh.cpp index 8d972fc778..411819b35b 100644 --- a/src/parseh.cpp +++ b/src/parseh.cpp @@ -1,4 +1,12 @@ +/* + * Copyright (c) 2015 Andrew Kelley + * + * This file is part of zig, which is MIT licensed. + * See http://opensource.org/licenses/MIT + */ + #include "parseh.hpp" +#include "config.h" #include @@ -14,6 +22,7 @@ struct Fn { Buf *return_type; Arg *args; int arg_count; + bool is_variadic; }; struct ParseH { @@ -254,7 +263,12 @@ static enum CXChildVisitResult visit_fn_children(CXCursor cursor, CXCursor paren assert(p->cur_fn); assert(p->arg_index < p->cur_fn->arg_count); CXString name = clang_getCursorSpelling(cursor); - buf_init_from_str(&p->cur_fn->args[p->arg_index].name, clang_getCString(name)); + Buf *arg_name = &p->cur_fn->args[p->arg_index].name; + buf_init_from_str(arg_name, clang_getCString(name)); + if (buf_len(arg_name) == 0) { + buf_appendf(arg_name, "arg%d", p->arg_index); + } + p->arg_index += 1; return CXChildVisit_Continue; } @@ -280,11 +294,6 @@ static enum CXChildVisitResult fn_visitor(CXCursor cursor, CXCursor parent, CXCl return CXChildVisit_Continue; CXType fn_type = clang_getCursorType(cursor); - if (clang_isFunctionTypeVariadic(fn_type)) { - print_location(p); - fprintf(stderr, "warning: skipping variadic function, not yet supported\n"); - return CXChildVisit_Continue; - } if (clang_getFunctionTypeCallingConv(fn_type) != CXCallingConv_C) { print_location(p); fprintf(stderr, "warning: skipping non c calling convention function, not yet supported\n"); @@ -294,6 +303,8 @@ static enum CXChildVisitResult fn_visitor(CXCursor cursor, CXCursor parent, CXCl assert(!p->cur_fn); p->cur_fn = allocate(1); + p->cur_fn->is_variadic = clang_isFunctionTypeVariadic(fn_type); + CXType return_type = clang_getResultType(fn_type); p->cur_fn->return_type = to_zig_type(p, return_type); @@ -353,6 +364,8 @@ void parse_h_file(const char *target_path, ZigList *clang_argv, FI buf_init_from_str(&tmp_buf, start); clang_argv->append(buf_ptr(buf_create_from_buf(&tmp_buf))); } + clang_argv->append("-isystem"); + clang_argv->append(ZIG_HEADERS_DIR); clang_argv->append(nullptr); @@ -398,10 +411,13 @@ void parse_h_file(const char *target_path, ZigList *clang_argv, FI for (int arg_i = 0; arg_i < fn->arg_count; arg_i += 1) { Arg *arg = &fn->args[arg_i]; fprintf(p->f, "%s: %s", buf_ptr(&arg->name), buf_ptr(arg->type)); - if (arg_i + 1 < fn->arg_count) { + if (arg_i + 1 < fn->arg_count || fn->is_variadic) { fprintf(p->f, ", "); } } + if (fn->is_variadic) { + fprintf(p->f, "..."); + } fprintf(p->f, ")"); if (!buf_eql_str(fn->return_type, "void")) { fprintf(p->f, " -> %s", buf_ptr(fn->return_type));