Merge branch 'llvm19'

Upgrades the LLVM, Clang, and LLD dependencies to LLVM 19.x

Related to #16270

Big thanks to Alex Rønne Petersen for doing the bulk of the upgrade work
in this branch.
This commit is contained in:
Andrew Kelley 2024-09-19 22:47:00 -07:00
commit c6ad4521c7
796 changed files with 62849 additions and 55015 deletions

View File

@ -4,7 +4,7 @@ on:
push:
branches:
- master
- llvm18
- llvm19
concurrency:
# Cancels pending runs when a PR gets updated.
group: ${{ github.head_ref || github.run_id }}-${{ github.actor }}

View File

@ -137,9 +137,9 @@ else()
set(ZIG_SYSTEM_LIBCXX "stdc++" CACHE STRING "system libcxx name for build.zig")
endif()
find_package(llvm 18)
find_package(clang 18)
find_package(lld 18)
find_package(llvm 19)
find_package(clang 19)
find_package(lld 19)
if(ZIG_STATIC_ZLIB)
if (MSVC)
@ -834,6 +834,11 @@ else()
endif()
endif()
option(ZIG2_NO_RTLIB "Build zig2 without linking to a compiler runtime library (for `zig cc` only)" OFF)
if(ZIG2_NO_RTLIB)
set(ZIG2_LINK_FLAGS "${ZIG2_LINK_FLAGS} -rtlib=none")
endif()
set(ZIG1_WASM_MODULE "${PROJECT_SOURCE_DIR}/stage1/zig1.wasm")
set(ZIG1_C_SOURCE "${PROJECT_BINARY_DIR}/zig1.c")
set(ZIG2_C_SOURCE "${PROJECT_BINARY_DIR}/zig2.c")
@ -889,9 +894,7 @@ set(BUILD_COMPILER_RT_ARGS
--name compiler_rt
-femit-bin="${ZIG_COMPILER_RT_C_SOURCE}"
-target "${ZIG_HOST_TARGET_TRIPLE}"
--dep "build_options"
"-Mroot=lib/compiler_rt.zig"
"-Mbuild_options=${ZIG_CONFIG_ZIG_OUT}"
)
add_custom_command(

View File

@ -170,9 +170,7 @@ int main(int argc, char **argv) {
"-ofmt=c", "-OReleaseSmall",
"--name", "compiler_rt", "-femit-bin=compiler_rt.c",
"-target", host_triple,
"--dep", "build_options",
"-Mroot=lib/compiler_rt.zig",
"-Mbuild_options=config.zig",
NULL,
};
print_and_run(child_argv);

View File

@ -1099,6 +1099,8 @@ const clang_libs = [_][]const u8{
"clangToolingCore",
"clangExtractAPI",
"clangSupport",
"clangInstallAPI",
"clangAST",
};
const lld_libs = [_][]const u8{
"lldMinGW",
@ -1120,6 +1122,7 @@ const llvm_libs = [_][]const u8{
"LLVMTextAPIBinaryReader",
"LLVMCoverage",
"LLVMLineEditor",
"LLVMSandboxIR",
"LLVMXCoreDisassembler",
"LLVMXCoreCodeGen",
"LLVMXCoreDesc",
@ -1255,6 +1258,7 @@ const llvm_libs = [_][]const u8{
"LLVMDWARFLinkerParallel",
"LLVMDWARFLinkerClassic",
"LLVMDWARFLinker",
"LLVMCodeGenData",
"LLVMGlobalISel",
"LLVMMIRParser",
"LLVMAsmPrinter",

View File

@ -8,7 +8,7 @@ set -e
ARCH="$(uname -m)"
TARGET="$ARCH-linux-musl"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.418+ebd9efa85"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
PREFIX="$HOME/deps/$CACHE_BASENAME"
ZIG="$PREFIX/bin/zig"

View File

@ -8,7 +8,7 @@ set -e
ARCH="$(uname -m)"
TARGET="$ARCH-linux-musl"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.418+ebd9efa85"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
PREFIX="$HOME/deps/$CACHE_BASENAME"
ZIG="$PREFIX/bin/zig"

View File

@ -9,7 +9,7 @@ set -e
ZIGDIR="$PWD"
TARGET="$ARCH-macos-none"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
PREFIX="$HOME/$CACHE_BASENAME"
ZIG="$PREFIX/bin/zig"

View File

@ -9,7 +9,7 @@ set -e
ZIGDIR="$PWD"
TARGET="$ARCH-macos-none"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
PREFIX="$HOME/$CACHE_BASENAME"
ZIG="$PREFIX/bin/zig"

View File

@ -1,5 +1,5 @@
$TARGET = "$($Env:ARCH)-windows-gnu"
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
$MCPU = "baseline"
$ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip"
$PREFIX_PATH = "$(Get-Location)\..\$ZIG_LLVM_CLANG_LLD_NAME"

View File

@ -8,7 +8,7 @@ set -e
ARCH="$(uname -m)"
TARGET="$ARCH-linux-musl"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.418+ebd9efa85"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
PREFIX="$HOME/deps/$CACHE_BASENAME"
ZIG="$PREFIX/bin/zig"

View File

@ -8,7 +8,7 @@ set -e
ARCH="$(uname -m)"
TARGET="$ARCH-linux-musl"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.418+ebd9efa85"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
PREFIX="$HOME/deps/$CACHE_BASENAME"
ZIG="$PREFIX/bin/zig"

View File

@ -6,7 +6,7 @@ set -e
ZIGDIR="$PWD"
TARGET="$ARCH-macos-none"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
PREFIX="$HOME/$CACHE_BASENAME"
JOBS="-j3"
ZIG="$PREFIX/bin/zig"

View File

@ -1,5 +1,5 @@
$TARGET = "$($Env:ARCH)-windows-gnu"
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
$MCPU = "baseline"
$ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip"
$PREFIX_PATH = "$($Env:USERPROFILE)\$ZIG_LLVM_CLANG_LLD_NAME"

View File

@ -1,5 +1,5 @@
$TARGET = "$($Env:ARCH)-windows-gnu"
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
$MCPU = "baseline"
$ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip"
$PREFIX_PATH = "$($Env:USERPROFILE)\$ZIG_LLVM_CLANG_LLD_NAME"

View File

@ -17,10 +17,10 @@ find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h
if(${LLVM_LINK_MODE} STREQUAL "shared")
find_library(CLANG_LIBRARIES
NAMES
libclang-cpp.so.18
libclang-cpp.so.18.1
clang-cpp-18.0
clang-cpp180
libclang-cpp.so.19
libclang-cpp.so.19.1
clang-cpp-19.0
clang-cpp190
clang-cpp
NAMES_PER_DIR
HINTS "${LLVM_LIBDIRS}"
@ -68,6 +68,8 @@ else()
FIND_AND_ADD_CLANG_LIB(clangToolingCore)
FIND_AND_ADD_CLANG_LIB(clangExtractAPI)
FIND_AND_ADD_CLANG_LIB(clangSupport)
FIND_AND_ADD_CLANG_LIB(clangInstallAPI)
FIND_AND_ADD_CLANG_LIB(clangAST)
endif()
if (MSVC)

View File

@ -9,21 +9,21 @@
find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h
HINTS ${LLVM_INCLUDE_DIRS}
PATHS
/usr/lib/llvm-18/include
/usr/local/llvm180/include
/usr/local/llvm18/include
/usr/local/opt/llvm@18/include
/opt/homebrew/opt/llvm@18/include
/usr/lib/llvm-19/include
/usr/local/llvm190/include
/usr/local/llvm19/include
/usr/local/opt/llvm@19/include
/opt/homebrew/opt/llvm@19/include
/mingw64/include)
find_library(LLD_LIBRARY NAMES lld-18.0 lld180 lld NAMES_PER_DIR
find_library(LLD_LIBRARY NAMES lld-19.0 lld190 lld NAMES_PER_DIR
HINTS ${LLVM_LIBDIRS}
PATHS
/usr/lib/llvm-18/lib
/usr/local/llvm180/lib
/usr/local/llvm18/lib
/usr/local/opt/llvm@18/lib
/opt/homebrew/opt/llvm@18/lib
/usr/lib/llvm-19/lib
/usr/local/llvm190/lib
/usr/local/llvm19/lib
/usr/local/opt/llvm@19/lib
/opt/homebrew/opt/llvm@19/lib
)
if(EXISTS ${LLD_LIBRARY})
set(LLD_LIBRARIES ${LLD_LIBRARY})
@ -34,11 +34,11 @@ else()
HINTS ${LLVM_LIBDIRS}
PATHS
${LLD_LIBDIRS}
/usr/lib/llvm-18/lib
/usr/local/llvm180/lib
/usr/local/llvm18/lib
/usr/local/opt/llvm@18/lib
/opt/homebrew/opt/llvm@18/lib
/usr/lib/llvm-19/lib
/usr/local/llvm190/lib
/usr/local/llvm19/lib
/usr/local/opt/llvm@19/lib
/opt/homebrew/opt/llvm@19/lib
/mingw64/lib
/c/msys64/mingw64/lib
c:/msys64/mingw64/lib)

View File

@ -17,12 +17,12 @@ if(ZIG_USE_LLVM_CONFIG)
# terminate when the right LLVM version is not found.
unset(LLVM_CONFIG_EXE CACHE)
find_program(LLVM_CONFIG_EXE
NAMES llvm-config-18 llvm-config-18.0 llvm-config180 llvm-config18 llvm-config NAMES_PER_DIR
NAMES llvm-config-19 llvm-config-19.0 llvm-config190 llvm-config19 llvm-config NAMES_PER_DIR
PATHS
"/mingw64/bin"
"/c/msys64/mingw64/bin"
"c:/msys64/mingw64/bin"
"C:/Libraries/llvm-18.0.0/bin")
"C:/Libraries/llvm-19.0.0/bin")
if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND")
if (NOT LLVM_CONFIG_ERROR_MESSAGES STREQUAL "")
@ -40,9 +40,9 @@ if(ZIG_USE_LLVM_CONFIG)
OUTPUT_STRIP_TRAILING_WHITESPACE)
get_filename_component(LLVM_CONFIG_DIR "${LLVM_CONFIG_EXE}" DIRECTORY)
if("${LLVM_CONFIG_VERSION}" VERSION_LESS 18 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 19 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 19)
if("${LLVM_CONFIG_VERSION}" VERSION_LESS 19 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 20 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 20)
# Save the error message, in case this is the last llvm-config we find
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 18.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}")
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 19.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}")
# Ignore this directory and try the search again
list(APPEND CMAKE_IGNORE_PATH "${LLVM_CONFIG_DIR}")
@ -63,12 +63,12 @@ if(ZIG_USE_LLVM_CONFIG)
ERROR_VARIABLE LLVM_CONFIG_ERROR
ERROR_STRIP_TRAILING_WHITESPACE)
if (LLVM_CONFIG_ERROR)
if (LLVM_CONFIG_ERROR)
# Save the error message, in case this is the last llvm-config we find
if (ZIG_SHARED_LLVM)
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 18.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library")
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 19.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library")
else()
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 18.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library")
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 19.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library")
endif()
# Ignore this directory and try the search again
@ -200,6 +200,7 @@ else()
FIND_AND_ADD_LLVM_LIB(LLVMTextAPIBinaryReader)
FIND_AND_ADD_LLVM_LIB(LLVMCoverage)
FIND_AND_ADD_LLVM_LIB(LLVMLineEditor)
FIND_AND_ADD_LLVM_LIB(LLVMSandboxIR)
FIND_AND_ADD_LLVM_LIB(LLVMXCoreDisassembler)
FIND_AND_ADD_LLVM_LIB(LLVMXCoreCodeGen)
FIND_AND_ADD_LLVM_LIB(LLVMXCoreDesc)
@ -335,6 +336,7 @@ else()
FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinkerParallel)
FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinkerClassic)
FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinker)
FIND_AND_ADD_LLVM_LIB(LLVMCodeGenData)
FIND_AND_ADD_LLVM_LIB(LLVMGlobalISel)
FIND_AND_ADD_LLVM_LIB(LLVMMIRParser)
FIND_AND_ADD_LLVM_LIB(LLVMAsmPrinter)

View File

@ -658,6 +658,7 @@ pub fn toLLVMTriple(target: std.Target, buf: []u8) []const u8 {
.shadermodel => "shadermodel",
.visionos => "xros",
.serenity => "serenity",
.bridgeos => "bridgeos",
.opencl,
.opengl,
.vulkan,

View File

@ -1,8 +1,14 @@
const std = @import("std");
const builtin = @import("builtin");
const native_endian = builtin.cpu.arch.endian();
const ofmt_c = builtin.object_format == .c;
pub const linkage: std.builtin.GlobalLinkage = if (builtin.is_test) .internal else .weak;
pub const linkage: std.builtin.GlobalLinkage = if (builtin.is_test)
.internal
else if (ofmt_c)
.strong
else
.weak;
/// Determines the symbol's visibility to other objects.
/// For WebAssembly this allows the symbol to be resolved to other modules, but will not
/// export it to the host runtime.
@ -28,7 +34,7 @@ pub const want_float_exceptions = !builtin.cpu.arch.isWasm();
// Libcalls that involve u128 on Windows x86-64 are expected by LLVM to use the
// calling convention of @Vector(2, u64), rather than what's standard.
pub const want_windows_v2u64_abi = builtin.os.tag == .windows and builtin.cpu.arch == .x86_64 and @import("builtin").object_format != .c;
pub const want_windows_v2u64_abi = builtin.os.tag == .windows and builtin.cpu.arch == .x86_64 and !ofmt_c;
/// This governs whether to use these symbol names for f16/f32 conversions
/// rather than the standard names:

View File

@ -215,9 +215,7 @@ inline __device__ unsigned int __activemask() {
#if CUDA_VERSION < 9020
return __nvvm_vote_ballot(1);
#else
unsigned int mask;
asm volatile("activemask.b32 %0;" : "=r"(mask));
return mask;
return __nvvm_activemask();
#endif
}

12
lib/include/__stdarg_header_macro.h vendored Normal file
View File

@ -0,0 +1,12 @@
/*===---- __stdarg_header_macro.h ------------------------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __STDARG_H
#define __STDARG_H
#endif

12
lib/include/__stddef_header_macro.h vendored Normal file
View File

@ -0,0 +1,12 @@
/*===---- __stddef_header_macro.h ------------------------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __STDDEF_H
#define __STDDEF_H
#endif

View File

@ -75,6 +75,14 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(v
#define __dbg(t) __builtin_arm_dbg(t)
#endif
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
#define _CHKFEAT_GCS 1
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__chkfeat(uint64_t __features) {
return __builtin_arm_chkfeat(__features) ^ __features;
}
#endif
/* 7.5 Swap */
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__swp(uint32_t __x, volatile uint32_t *__p) {
@ -109,7 +117,7 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
#endif
/* 7.7 NOP */
#if !defined(_MSC_VER) || !defined(__aarch64__)
#if !defined(_MSC_VER) || (!defined(__aarch64__) && !defined(__arm64ec__))
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
__builtin_arm_nop();
}
@ -313,7 +321,7 @@ __qdbl(int32_t __t) {
}
#endif
/* 8.4.3 Accumultating multiplications */
/* 8.4.3 Accumulating multiplications */
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__smlabb(int32_t __a, int32_t __b, int32_t __c) {
@ -545,7 +553,7 @@ __usub16(uint16x2_t __a, uint16x2_t __b) {
}
#endif
/* 8.5.10 Parallel 16-bit multiplications */
/* 8.5.10 Parallel 16-bit multiplication */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
@ -748,7 +756,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v))
#define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v))
/* 10.3 Memory Tagging Extensions (MTE) Intrinsics */
/* 10.3 MTE intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
#define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask)
#define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset)
@ -757,7 +765,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
/* 18 Memory Operations Intrinsics */
/* 18 memcpy family of operations intrinsics - MOPS */
#define __arm_mops_memset_tag(__tagged_address, __value, __size) \
__builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
#endif
@ -855,6 +863,24 @@ __rndrrs(uint64_t *__p) {
}
#endif
/* 11.2 Guarded Control Stack intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
static __inline__ void * __attribute__((__always_inline__, __nodebug__))
__gcspr() {
return (void *)__builtin_arm_rsr64("gcspr_el0");
}
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("gcs")))
__gcspopm() {
return __builtin_arm_gcspopm(0);
}
static __inline__ const void * __attribute__((__always_inline__, __nodebug__, target("gcs")))
__gcsss(const void *__stack) {
return __builtin_arm_gcsss(__stack);
}
#endif
#if defined(__cplusplus)
}
#endif

View File

@ -29,7 +29,7 @@
typedef __fp16 float16_t;
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
#if defined(__aarch64__)
#if defined(__aarch64__) || defined(__arm64ec__)
#define vabdh_f16(__p0, __p1) __extension__ ({ \
float16_t __ret; \
float16_t __s0 = __p0; \

39667
lib/include/arm_neon.h vendored

File diff suppressed because it is too large Load Diff

409
lib/include/arm_sme.h vendored
View File

@ -16,6 +16,8 @@
#endif
#include <arm_sve.h>
#include <stddef.h>
/* Function attributes */
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
@ -39,6 +41,11 @@ __ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible {
return x0 & 1;
}
void *__arm_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible;
void *__arm_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible;
void *__arm_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible;
void *__arm_sc_memchr(void *s, int c, size_t n) __arm_streaming_compatible;
__ai __attribute__((target("sme"))) void svundef_za(void) __arm_streaming_compatible __arm_out("za") { }
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m)))
@ -368,7 +375,7 @@ void svwrite_ver_za8_s8_m(uint64_t, uint32_t, svbool_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_mask_za)))
void svzero_mask_za(uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za)))
void svzero_za();
void svzero_za(void);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m)))
void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m)))
@ -597,6 +604,78 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_
void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m)))
void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_f16_vg1x2)))
void svmla_single_za16_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_f16_vg1x4)))
void svmla_single_za16_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_f16_vg1x2)))
void svmla_lane_za16_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_f16_vg1x4)))
void svmla_lane_za16_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_f16_vg1x2)))
void svmla_za16_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_f16_vg1x4)))
void svmla_za16_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_f16_vg1x2)))
void svmls_single_za16_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_f16_vg1x4)))
void svmls_single_za16_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_f16_vg1x2)))
void svmls_lane_za16_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_f16_vg1x4)))
void svmls_lane_za16_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_f16_vg1x2)))
void svmls_za16_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_f16_vg1x4)))
void svmls_za16_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_f16_m)))
void svmopa_za16_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za16_f16_m)))
void svmops_za16_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_f16_vg1x2)))
void svmla_za16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_f16_vg1x4)))
void svmla_za16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_f16_vg1x2)))
void svmla_lane_za16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_f16_vg1x4)))
void svmla_lane_za16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_f16_vg1x2)))
void svmla_za16_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_f16_vg1x4)))
void svmla_za16_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_f16_vg1x2)))
void svmls_za16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_f16_vg1x4)))
void svmls_za16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_f16_vg1x2)))
void svmls_lane_za16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_f16_vg1x4)))
void svmls_lane_za16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_f16_vg1x2)))
void svmls_za16_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_f16_vg1x4)))
void svmls_za16_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_f16_m)))
void svmopa_za16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za16_f16_m)))
void svmops_za16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x2)))
void svadd_za16_f16_vg1x2(uint32_t, svfloat16x2_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x4)))
void svadd_za16_f16_vg1x4(uint32_t, svfloat16x4_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x2)))
void svsub_za16_f16_vg1x2(uint32_t, svfloat16x2_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x4)))
void svsub_za16_f16_vg1x4(uint32_t, svfloat16x4_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x2)))
void svadd_za16_vg1x2(uint32_t, svfloat16x2_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x4)))
void svadd_za16_vg1x4(uint32_t, svfloat16x4_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x2)))
void svsub_za16_vg1x2(uint32_t, svfloat16x2_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x4)))
void svsub_za16_vg1x4(uint32_t, svfloat16x4_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m)))
void svmopa_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m)))
@ -2059,6 +2138,78 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x
void svwrite_za8_vg1x4(uint32_t, svuint8x4_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x4)))
void svwrite_za8_vg1x4(uint32_t, svint8x4_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x2)))
void svadd_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x4)))
void svadd_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x2)))
void svmla_single_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x4)))
void svmla_single_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x2)))
void svmla_lane_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x4)))
void svmla_lane_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x2)))
void svmla_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x4)))
void svmla_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x2)))
void svmls_single_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x4)))
void svmls_single_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x2)))
void svmls_lane_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x4)))
void svmls_lane_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x2)))
void svmls_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x4)))
void svmls_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_bf16_m)))
void svmopa_za16_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za16_bf16_m)))
void svmops_za16_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x2)))
void svsub_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x4)))
void svsub_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x2)))
void svadd_za16_vg1x2(uint32_t, svbfloat16x2_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x4)))
void svadd_za16_vg1x4(uint32_t, svbfloat16x4_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x2)))
void svmla_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x4)))
void svmla_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x2)))
void svmla_lane_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x4)))
void svmla_lane_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x2)))
void svmla_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x4)))
void svmla_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x2)))
void svmls_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x4)))
void svmls_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x2)))
void svmls_lane_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x4)))
void svmls_lane_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x2)))
void svmls_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x4)))
void svmls_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_bf16_m)))
void svmopa_za16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za16_bf16_m)))
void svmops_za16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x2)))
void svsub_za16_vg1x2(uint32_t, svbfloat16x2_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x4)))
void svsub_za16_vg1x4(uint32_t, svbfloat16x4_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x2)))
void svadd_za64_f64_vg1x2(uint32_t, svfloat64x2_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x4)))
@ -2403,6 +2554,262 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_s1
void svvdot_lane_za64_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_u16_vg1x4)))
void svvdot_lane_za64_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_u8)))
svuint8_t svreadz_hor_za128_u8(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_u32)))
svuint32_t svreadz_hor_za128_u32(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_u64)))
svuint64_t svreadz_hor_za128_u64(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_u16)))
svuint16_t svreadz_hor_za128_u16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_bf16)))
svbfloat16_t svreadz_hor_za128_bf16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_s8)))
svint8_t svreadz_hor_za128_s8(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_f64)))
svfloat64_t svreadz_hor_za128_f64(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_f32)))
svfloat32_t svreadz_hor_za128_f32(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_f16)))
svfloat16_t svreadz_hor_za128_f16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_s32)))
svint32_t svreadz_hor_za128_s32(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_s64)))
svint64_t svreadz_hor_za128_s64(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_s16)))
svint16_t svreadz_hor_za128_s16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_u16)))
svuint16_t svreadz_hor_za16_u16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_bf16)))
svbfloat16_t svreadz_hor_za16_bf16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_f16)))
svfloat16_t svreadz_hor_za16_f16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_s16)))
svint16_t svreadz_hor_za16_s16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_u16_vg2)))
svuint16x2_t svreadz_hor_za16_u16_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_bf16_vg2)))
svbfloat16x2_t svreadz_hor_za16_bf16_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_f16_vg2)))
svfloat16x2_t svreadz_hor_za16_f16_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_s16_vg2)))
svint16x2_t svreadz_hor_za16_s16_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_u16_vg4)))
svuint16x4_t svreadz_hor_za16_u16_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_bf16_vg4)))
svbfloat16x4_t svreadz_hor_za16_bf16_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_f16_vg4)))
svfloat16x4_t svreadz_hor_za16_f16_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_s16_vg4)))
svint16x4_t svreadz_hor_za16_s16_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_u32)))
svuint32_t svreadz_hor_za32_u32(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_f32)))
svfloat32_t svreadz_hor_za32_f32(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_s32)))
svint32_t svreadz_hor_za32_s32(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_u32_vg2)))
svuint32x2_t svreadz_hor_za32_u32_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_f32_vg2)))
svfloat32x2_t svreadz_hor_za32_f32_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_s32_vg2)))
svint32x2_t svreadz_hor_za32_s32_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_u32_vg4)))
svuint32x4_t svreadz_hor_za32_u32_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_f32_vg4)))
svfloat32x4_t svreadz_hor_za32_f32_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_s32_vg4)))
svint32x4_t svreadz_hor_za32_s32_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_u64)))
svuint64_t svreadz_hor_za64_u64(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_f64)))
svfloat64_t svreadz_hor_za64_f64(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_s64)))
svint64_t svreadz_hor_za64_s64(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_u64_vg2)))
svuint64x2_t svreadz_hor_za64_u64_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_f64_vg2)))
svfloat64x2_t svreadz_hor_za64_f64_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_s64_vg2)))
svint64x2_t svreadz_hor_za64_s64_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_u64_vg4)))
svuint64x4_t svreadz_hor_za64_u64_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_f64_vg4)))
svfloat64x4_t svreadz_hor_za64_f64_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_s64_vg4)))
svint64x4_t svreadz_hor_za64_s64_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_u8)))
svuint8_t svreadz_hor_za8_u8(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_s8)))
svint8_t svreadz_hor_za8_s8(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_u8_vg2)))
svuint8x2_t svreadz_hor_za8_u8_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_s8_vg2)))
svint8x2_t svreadz_hor_za8_s8_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_u8_vg4)))
svuint8x4_t svreadz_hor_za8_u8_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_s8_vg4)))
svint8x4_t svreadz_hor_za8_s8_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_u8)))
svuint8_t svreadz_ver_za128_u8(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_u32)))
svuint32_t svreadz_ver_za128_u32(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_u64)))
svuint64_t svreadz_ver_za128_u64(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_u16)))
svuint16_t svreadz_ver_za128_u16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_bf16)))
svbfloat16_t svreadz_ver_za128_bf16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_s8)))
svint8_t svreadz_ver_za128_s8(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_f64)))
svfloat64_t svreadz_ver_za128_f64(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_f32)))
svfloat32_t svreadz_ver_za128_f32(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_f16)))
svfloat16_t svreadz_ver_za128_f16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_s32)))
svint32_t svreadz_ver_za128_s32(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_s64)))
svint64_t svreadz_ver_za128_s64(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_s16)))
svint16_t svreadz_ver_za128_s16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_u16)))
svuint16_t svreadz_ver_za16_u16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_bf16)))
svbfloat16_t svreadz_ver_za16_bf16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_f16)))
svfloat16_t svreadz_ver_za16_f16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_s16)))
svint16_t svreadz_ver_za16_s16(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_u16_vg2)))
svuint16x2_t svreadz_ver_za16_u16_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_bf16_vg2)))
svbfloat16x2_t svreadz_ver_za16_bf16_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_f16_vg2)))
svfloat16x2_t svreadz_ver_za16_f16_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_s16_vg2)))
svint16x2_t svreadz_ver_za16_s16_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_u16_vg4)))
svuint16x4_t svreadz_ver_za16_u16_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_bf16_vg4)))
svbfloat16x4_t svreadz_ver_za16_bf16_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_f16_vg4)))
svfloat16x4_t svreadz_ver_za16_f16_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_s16_vg4)))
svint16x4_t svreadz_ver_za16_s16_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_u32)))
svuint32_t svreadz_ver_za32_u32(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_f32)))
svfloat32_t svreadz_ver_za32_f32(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_s32)))
svint32_t svreadz_ver_za32_s32(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_u32_vg2)))
svuint32x2_t svreadz_ver_za32_u32_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_f32_vg2)))
svfloat32x2_t svreadz_ver_za32_f32_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_s32_vg2)))
svint32x2_t svreadz_ver_za32_s32_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_u32_vg4)))
svuint32x4_t svreadz_ver_za32_u32_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_f32_vg4)))
svfloat32x4_t svreadz_ver_za32_f32_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_s32_vg4)))
svint32x4_t svreadz_ver_za32_s32_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_u64)))
svuint64_t svreadz_ver_za64_u64(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_f64)))
svfloat64_t svreadz_ver_za64_f64(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_s64)))
svint64_t svreadz_ver_za64_s64(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_u64_vg2)))
svuint64x2_t svreadz_ver_za64_u64_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_f64_vg2)))
svfloat64x2_t svreadz_ver_za64_f64_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_s64_vg2)))
svint64x2_t svreadz_ver_za64_s64_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_u64_vg4)))
svuint64x4_t svreadz_ver_za64_u64_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_f64_vg4)))
svfloat64x4_t svreadz_ver_za64_f64_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_s64_vg4)))
svint64x4_t svreadz_ver_za64_s64_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_u8)))
svuint8_t svreadz_ver_za8_u8(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_s8)))
svint8_t svreadz_ver_za8_s8(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_u8_vg2)))
svuint8x2_t svreadz_ver_za8_u8_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_s8_vg2)))
svint8x2_t svreadz_ver_za8_s8_vg2(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_u8_vg4)))
svuint8x4_t svreadz_ver_za8_u8_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_s8_vg4)))
svint8x4_t svreadz_ver_za8_s8_vg4(uint64_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_u16_vg1x2)))
svuint16x2_t svreadz_za16_u16_vg1x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_bf16_vg1x2)))
svbfloat16x2_t svreadz_za16_bf16_vg1x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_f16_vg1x2)))
svfloat16x2_t svreadz_za16_f16_vg1x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_s16_vg1x2)))
svint16x2_t svreadz_za16_s16_vg1x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_u16_vg1x4)))
svuint16x4_t svreadz_za16_u16_vg1x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_bf16_vg1x4)))
svbfloat16x4_t svreadz_za16_bf16_vg1x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_f16_vg1x4)))
svfloat16x4_t svreadz_za16_f16_vg1x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_s16_vg1x4)))
svint16x4_t svreadz_za16_s16_vg1x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za32_u32_vg1x2)))
svuint32x2_t svreadz_za32_u32_vg1x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za32_f32_vg1x2)))
svfloat32x2_t svreadz_za32_f32_vg1x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za32_s32_vg1x2)))
svint32x2_t svreadz_za32_s32_vg1x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za32_u32_vg1x4)))
svuint32x4_t svreadz_za32_u32_vg1x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za32_f32_vg1x4)))
svfloat32x4_t svreadz_za32_f32_vg1x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za32_s32_vg1x4)))
svint32x4_t svreadz_za32_s32_vg1x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za64_u64_vg1x2)))
svuint64x2_t svreadz_za64_u64_vg1x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za64_f64_vg1x2)))
svfloat64x2_t svreadz_za64_f64_vg1x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za64_s64_vg1x2)))
svint64x2_t svreadz_za64_s64_vg1x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za64_u64_vg1x4)))
svuint64x4_t svreadz_za64_u64_vg1x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za64_f64_vg1x4)))
svfloat64x4_t svreadz_za64_f64_vg1x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za64_s64_vg1x4)))
svint64x4_t svreadz_za64_s64_vg1x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_u8_vg1x2)))
svuint8x2_t svreadz_za8_u8_vg1x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_s8_vg1x2)))
svint8x2_t svreadz_za8_s8_vg1x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_u8_vg1x4)))
svuint8x4_t svreadz_za8_u8_vg1x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_s8_vg1x4)))
svint8x4_t svreadz_za8_s8_vg1x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg1x2)))
void svzero_za64_vg1x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg1x4)))
void svzero_za64_vg1x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg2x1)))
void svzero_za64_vg2x1(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg2x2)))
void svzero_za64_vg2x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg2x4)))
void svzero_za64_vg2x4(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg4x1)))
void svzero_za64_vg4x1(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg4x2)))
void svzero_za64_vg4x2(uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg4x4)))
void svzero_za64_vg4x4(uint32_t);
#ifdef __cplusplus
} // extern "C"
#endif

33276
lib/include/arm_sve.h vendored

File diff suppressed because it is too large Load Diff

View File

@ -16,7 +16,7 @@
#define __ARM_NEON_TYPES_H
typedef float float32_t;
typedef __fp16 float16_t;
#ifdef __aarch64__
#if defined(__aarch64__) || defined(__arm64ec__)
typedef double float64_t;
#endif
@ -40,7 +40,7 @@ typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
#ifdef __aarch64__
#if defined(__aarch64__) || defined(__arm64ec__)
typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
#endif
@ -125,7 +125,7 @@ typedef struct float32x4x2_t {
float32x4_t val[2];
} float32x4x2_t;
#ifdef __aarch64__
#if defined(__aarch64__) || defined(__arm64ec__)
typedef struct float64x1x2_t {
float64x1_t val[2];
} float64x1x2_t;
@ -215,7 +215,7 @@ typedef struct float32x4x3_t {
float32x4_t val[3];
} float32x4x3_t;
#ifdef __aarch64__
#if defined(__aarch64__) || defined(__arm64ec__)
typedef struct float64x1x3_t {
float64x1_t val[3];
} float64x1x3_t;
@ -305,7 +305,7 @@ typedef struct float32x4x4_t {
float32x4_t val[4];
} float32x4x4_t;
#ifdef __aarch64__
#if defined(__aarch64__) || defined(__arm64ec__)
typedef struct float64x1x4_t {
float64x1_t val[4];
} float64x1x4_t;

View File

@ -1,271 +0,0 @@
/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512ERINTRIN_H
#define __AVX512ERINTRIN_H
/* exp2a23 */
#define _mm512_exp2a23_round_pd(A, R) \
((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)-1, (int)(R)))
#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(S), (__mmask8)(M), \
(int)(R)))
#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)(M), (int)(R)))
#define _mm512_exp2a23_pd(A) \
_mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_exp2a23_pd(S, M, A) \
_mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_maskz_exp2a23_pd(M, A) \
_mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_exp2a23_round_ps(A, R) \
((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)-1, (int)(R)))
#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(S), (__mmask16)(M), \
(int)(R)))
#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)(M), (int)(R)))
#define _mm512_exp2a23_ps(A) \
_mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_exp2a23_ps(S, M, A) \
_mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_maskz_exp2a23_ps(M, A) \
_mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
/* rsqrt28 */
#define _mm512_rsqrt28_round_pd(A, R) \
((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)-1, (int)(R)))
#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(S), (__mmask8)(M), \
(int)(R)))
#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)(M), (int)(R)))
#define _mm512_rsqrt28_pd(A) \
_mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_rsqrt28_pd(S, M, A) \
_mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_maskz_rsqrt28_pd(M, A) \
_mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_rsqrt28_round_ps(A, R) \
((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)-1, (int)(R)))
#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(S), (__mmask16)(M), \
(int)(R)))
#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)(M), (int)(R)))
#define _mm512_rsqrt28_ps(A) \
_mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_rsqrt28_ps(S, M, A) \
_mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_maskz_rsqrt28_ps(M, A) \
_mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm_rsqrt28_round_ss(A, B, R) \
((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)-1, (int)(R)))
#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)(__m128)(S), \
(__mmask8)(M), (int)(R)))
#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(M), (int)(R)))
#define _mm_rsqrt28_ss(A, B) \
_mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_mask_rsqrt28_ss(S, M, A, B) \
_mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_maskz_rsqrt28_ss(M, A, B) \
_mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_rsqrt28_round_sd(A, B, R) \
((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)-1, (int)(R)))
#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)(__m128d)(S), \
(__mmask8)(M), (int)(R)))
#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)(M), (int)(R)))
#define _mm_rsqrt28_sd(A, B) \
_mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_mask_rsqrt28_sd(S, M, A, B) \
_mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_maskz_rsqrt28_sd(M, A, B) \
_mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
/* rcp28 */
#define _mm512_rcp28_round_pd(A, R) \
((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)-1, (int)(R)))
#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(S), (__mmask8)(M), \
(int)(R)))
#define _mm512_maskz_rcp28_round_pd(M, A, R) \
((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)(M), (int)(R)))
#define _mm512_rcp28_pd(A) \
_mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_rcp28_pd(S, M, A) \
_mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_maskz_rcp28_pd(M, A) \
_mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_rcp28_round_ps(A, R) \
((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)-1, (int)(R)))
#define _mm512_mask_rcp28_round_ps(S, M, A, R) \
((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(S), (__mmask16)(M), \
(int)(R)))
#define _mm512_maskz_rcp28_round_ps(M, A, R) \
((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)(M), (int)(R)))
#define _mm512_rcp28_ps(A) \
_mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_rcp28_ps(S, M, A) \
_mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_maskz_rcp28_ps(M, A) \
_mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm_rcp28_round_ss(A, B, R) \
((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)-1, (int)(R)))
#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)(__m128)(S), \
(__mmask8)(M), (int)(R)))
#define _mm_maskz_rcp28_round_ss(M, A, B, R) \
((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(M), (int)(R)))
#define _mm_rcp28_ss(A, B) \
_mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_mask_rcp28_ss(S, M, A, B) \
_mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_maskz_rcp28_ss(M, A, B) \
_mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_rcp28_round_sd(A, B, R) \
((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)-1, (int)(R)))
#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)(__m128d)(S), \
(__mmask8)(M), (int)(R)))
#define _mm_maskz_rcp28_round_sd(M, A, B, R) \
((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)(M), (int)(R)))
#define _mm_rcp28_sd(A, B) \
_mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_mask_rcp28_sd(S, M, A, B) \
_mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_maskz_rcp28_sd(M, A, B) \
_mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#endif /* __AVX512ERINTRIN_H */

View File

@ -96,8 +96,8 @@ _mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
(h5), (h4), (h3), (h2), (h1))
static __inline __m512h __DEFAULT_FN_ATTRS512
_mm512_set1_pch(_Float16 _Complex h) {
return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, h));
_mm512_set1_pch(_Float16 _Complex __h) {
return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, __h));
}
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castph_ps(__m128h __a) {
@ -282,75 +282,75 @@ _mm512_zextph256_ph512(__m256h __a) {
#define _mm_comi_sh(A, B, pred) \
_mm_comi_round_sh((A), (B), (pred), _MM_FROUND_CUR_DIRECTION)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h A,
__m128h B) {
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OS,
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h __A,
__m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_EQ_OS,
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h A,
__m128h B) {
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OS,
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h __A,
__m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LT_OS,
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h A,
__m128h B) {
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OS,
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h __A,
__m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LE_OS,
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h A,
__m128h B) {
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OS,
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h __A,
__m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GT_OS,
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h A,
__m128h B) {
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OS,
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h __A,
__m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GE_OS,
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h A,
__m128h B) {
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_US,
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h __A,
__m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_NEQ_US,
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h A,
__m128h B) {
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OQ,
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h __A,
__m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_EQ_OQ,
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h A,
__m128h B) {
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OQ,
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h __A,
__m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LT_OQ,
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h A,
__m128h B) {
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OQ,
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h __A,
__m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LE_OQ,
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h A,
__m128h B) {
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OQ,
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h __A,
__m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GT_OQ,
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h A,
__m128h B) {
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OQ,
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h __A,
__m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GE_OQ,
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h A,
__m128h B) {
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_UQ,
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h __A,
__m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_NEQ_UQ,
_MM_FROUND_CUR_DIRECTION);
}

View File

@ -1,92 +0,0 @@
/*===------------- avx512pfintrin.h - PF intrinsics ------------------------===
*
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512PFINTRIN_H
#define __AVX512PFINTRIN_H
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
(void const *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \
__builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \
(void const *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfdps((__mmask16)(mask), \
(__v16si)(__m512i)(index), (void const *)(addr), \
(int)(scale), (int)(hint))
#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \
__builtin_ia32_gatherpfdps((__mmask16) -1, \
(__v16si)(__m512i)(index), (void const *)(addr), \
(int)(scale), (int)(hint))
#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
(void const *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \
__builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \
(void const *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
(void const *)(addr), (int)(scale), (int)(hint))
#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \
__builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \
(void const *)(addr), (int)(scale), (int)(hint))
#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \
__builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \
(void *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
(void *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \
__builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \
(void *)(addr), (int)(scale), (int)(hint))
#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfdps((__mmask16)(mask), \
(__v16si)(__m512i)(index), (void *)(addr), \
(int)(scale), (int)(hint))
#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \
__builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \
(void *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
(void *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \
__builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \
(void *)(addr), (int)(scale), (int)(hint))
#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
(void *)(addr), (int)(scale), (int)(hint))
#endif

View File

@ -207,6 +207,8 @@ _mm256_div_ps(__m256 __a, __m256 __b)
/// Compares two 256-bit vectors of [4 x double] and returns the greater
/// of each pair of values.
///
/// If either value in a comparison is NaN, returns the value from \a __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMAXPD </c> instruction.
@ -226,6 +228,8 @@ _mm256_max_pd(__m256d __a, __m256d __b)
/// Compares two 256-bit vectors of [8 x float] and returns the greater
/// of each pair of values.
///
/// If either value in a comparison is NaN, returns the value from \a __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMAXPS </c> instruction.
@ -245,6 +249,8 @@ _mm256_max_ps(__m256 __a, __m256 __b)
/// Compares two 256-bit vectors of [4 x double] and returns the lesser
/// of each pair of values.
///
/// If either value in a comparison is NaN, returns the value from \a __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMINPD </c> instruction.
@ -264,6 +270,8 @@ _mm256_min_pd(__m256d __a, __m256d __b)
/// Compares two 256-bit vectors of [8 x float] and returns the lesser
/// of each pair of values.
///
/// If either value in a comparison is NaN, returns the value from \a __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMINPS </c> instruction.
@ -832,6 +840,7 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c)
/// Copies the values stored in a 128-bit vector of [4 x float] as
/// specified by the 128-bit integer vector operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.
@ -1574,14 +1583,6 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
(__v4df)(__m256d)(b), (int)(mask)))
/* Compare */
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */
#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */
#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
@ -1607,13 +1608,14 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */
/* Below intrinsic defined in emmintrin.h can be used for AVX */
/// Compares each of the corresponding double-precision values of two
/// 128-bit vectors of [2 x double], using the operation specified by the
/// immediate integer operand.
///
/// Returns a [2 x double] vector consisting of two doubles corresponding to
/// the two comparison results: zero if the comparison is false, and all 1's
/// if the comparison is true.
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
/// If either value in a comparison is NaN, comparisons that are ordered
/// return false, and comparisons that are unordered return true.
///
/// \headerfile <x86intrin.h>
///
@ -1663,17 +1665,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1E: Greater-than (ordered, non-signaling) \n
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
#define _mm_cmp_pd(a, b, c) \
((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
(__v2df)(__m128d)(b), (c)))
/// \fn __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c)
/* Below intrinsic defined in xmmintrin.h can be used for AVX */
/// Compares each of the corresponding values of two 128-bit vectors of
/// [4 x float], using the operation specified by the immediate integer
/// operand.
///
/// Returns a [4 x float] vector consisting of four floats corresponding to
/// the four comparison results: zero if the comparison is false, and all 1's
/// if the comparison is true.
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
/// If either value in a comparison is NaN, comparisons that are ordered
/// return false, and comparisons that are unordered return true.
///
/// \headerfile <x86intrin.h>
///
@ -1723,17 +1724,15 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1E: Greater-than (ordered, non-signaling) \n
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ps(a, b, c) \
((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
(__v4sf)(__m128)(b), (c)))
/// \fn __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c)
/// Compares each of the corresponding double-precision values of two
/// 256-bit vectors of [4 x double], using the operation specified by the
/// immediate integer operand.
///
/// Returns a [4 x double] vector consisting of four doubles corresponding to
/// the four comparison results: zero if the comparison is false, and all 1's
/// if the comparison is true.
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
/// If either value in a comparison is NaN, comparisons that are ordered
/// return false, and comparisons that are unordered return true.
///
/// \headerfile <x86intrin.h>
///
@ -1791,9 +1790,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// [8 x float], using the operation specified by the immediate integer
/// operand.
///
/// Returns a [8 x float] vector consisting of eight floats corresponding to
/// the eight comparison results: zero if the comparison is false, and all
/// 1's if the comparison is true.
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
/// If either value in a comparison is NaN, comparisons that are ordered
/// return false, and comparisons that are unordered return true.
///
/// \headerfile <x86intrin.h>
///
@ -1847,12 +1846,14 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
(__v8sf)(__m256)(b), (c)))
/* Below intrinsic defined in emmintrin.h can be used for AVX */
/// Compares each of the corresponding scalar double-precision values of
/// two 128-bit vectors of [2 x double], using the operation specified by the
/// immediate integer operand.
///
/// If the result is true, all 64 bits of the destination vector are set;
/// otherwise they are cleared.
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
/// If either value in a comparison is NaN, comparisons that are ordered
/// return false, and comparisons that are unordered return true.
///
/// \headerfile <x86intrin.h>
///
@ -1902,16 +1903,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1E: Greater-than (ordered, non-signaling) \n
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
#define _mm_cmp_sd(a, b, c) \
((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
(__v2df)(__m128d)(b), (c)))
/// \fn __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c)
/* Below intrinsic defined in xmmintrin.h can be used for AVX */
/// Compares each of the corresponding scalar values of two 128-bit
/// vectors of [4 x float], using the operation specified by the immediate
/// integer operand.
///
/// If the result is true, all 32 bits of the destination vector are set;
/// otherwise they are cleared.
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
/// If either value in a comparison is NaN, comparisons that are ordered
/// return false, and comparisons that are unordered return true.
///
/// \headerfile <x86intrin.h>
///
@ -1961,9 +1962,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1E: Greater-than (ordered, non-signaling) \n
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ss(a, b, c) \
((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
(__v4sf)(__m128)(b), (c)))
/// \fn __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c)
/// Takes a [8 x i32] vector and returns the vector element value
/// indexed by the immediate constant operand.
@ -2213,6 +2212,10 @@ _mm256_cvtpd_ps(__m256d __a)
/// Converts a vector of [8 x float] into a vector of [8 x i32].
///
/// If a converted value does not fit in a 32-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTPS2DQ </c> instruction.
@ -2242,9 +2245,13 @@ _mm256_cvtps_pd(__m128 __a)
return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);
}
/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4
/// x i32], truncating the result by rounding towards zero when it is
/// inexact.
/// Converts a 256-bit vector of [4 x double] into four signed truncated
/// (rounded toward zero) 32-bit integers returned in a 128-bit vector of
/// [4 x i32].
///
/// If a converted value does not fit in a 32-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///
@ -2259,9 +2266,12 @@ _mm256_cvttpd_epi32(__m256d __a)
return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
}
/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4
/// x i32]. When a conversion is inexact, the value returned is rounded
/// according to the rounding control bits in the MXCSR register.
/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of
/// [4 x i32].
///
/// If a converted value does not fit in a 32-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///
@ -2276,8 +2286,12 @@ _mm256_cvtpd_epi32(__m256d __a)
return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
}
/// Converts a vector of [8 x float] into a vector of [8 x i32],
/// truncating the result by rounding towards zero when it is inexact.
/// Converts a vector of [8 x float] into eight signed truncated (rounded
/// toward zero) 32-bit integers returned in a vector of [8 x i32].
///
/// If a converted value does not fit in a 32-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///

View File

@ -161,8 +161,7 @@ _mm_tzcnt_64(unsigned long long __X)
#undef __RELAXED_FN_ATTRS
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__BMI__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI__)
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
@ -610,7 +609,6 @@ __blsr_u64(unsigned long long __X)
#undef __DEFAULT_FN_ATTRS
#endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \
|| defined(__BMI__) */
#endif /* !defined(__SCE__) || __has_feature(modules) || defined(__BMI__) */
#endif /* __BMIINTRIN_H */

View File

@ -13,4 +13,7 @@
#ifndef __BUILTINS_H
#define __BUILTINS_H
#if defined(__MVS__) && __has_include_next(<builtins.h>)
#include_next <builtins.h>
#endif /* __MVS__ */
#endif /* __BUILTINS_H */

26
lib/include/cpuid.h vendored
View File

@ -10,7 +10,7 @@
#ifndef __CPUID_H
#define __CPUID_H
#if !(__x86_64__ || __i386__)
#if !defined(__x86_64__) && !defined(__i386__)
#error this header is for x86 only
#endif
@ -200,6 +200,9 @@
#define bit_AMXINT8 0x02000000
/* Features in %eax for leaf 7 sub-leaf 1 */
#define bit_SHA512 0x00000001
#define bit_SM3 0x00000002
#define bit_SM4 0x00000004
#define bit_RAOINT 0x00000008
#define bit_AVXVNNI 0x00000010
#define bit_AVX512BF16 0x00000020
@ -211,7 +214,12 @@
/* Features in %edx for leaf 7 sub-leaf 1 */
#define bit_AVXVNNIINT8 0x00000010
#define bit_AVXNECONVERT 0x00000020
#define bit_AMXCOMPLEX 0x00000100
#define bit_AVXVNNIINT16 0x00000400
#define bit_PREFETCHI 0x00004000
#define bit_USERMSR 0x00008000
#define bit_AVX10 0x00080000
#define bit_APXF 0x00200000
/* Features in %eax for leaf 13 sub-leaf 1 */
#define bit_XSAVEOPT 0x00000001
@ -244,8 +252,11 @@
#define bit_RDPRU 0x00000010
#define bit_WBNOINVD 0x00000200
/* Features in %ebx for leaf 0x24 */
#define bit_AVX10_256 0x00020000
#define bit_AVX10_512 0x00040000
#if __i386__
#ifdef __i386__
#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \
__asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
: "0"(__leaf))
@ -274,7 +285,7 @@ static __inline unsigned int __get_cpuid_max (unsigned int __leaf,
unsigned int *__sig)
{
unsigned int __eax, __ebx, __ecx, __edx;
#if __i386__
#ifdef __i386__
int __cpuid_supported;
__asm(" pushfl\n"
@ -328,4 +339,13 @@ static __inline int __get_cpuid_count (unsigned int __leaf,
return 1;
}
// In some configurations, __cpuidex is defined as a builtin (primarily
// -fms-extensions) which will conflict with the __cpuidex definition below.
#if !(__has_builtin(__cpuidex))
static __inline void __cpuidex(int __cpu_info[4], int __leaf, int __subleaf) {
__cpuid_count(__leaf, __subleaf, __cpu_info[0], __cpu_info[1], __cpu_info[2],
__cpu_info[3]);
}
#endif
#endif /* __CPUID_H */

View File

@ -99,7 +99,7 @@ template <class __T>
__attribute__((enable_if(true, "")))
inline _CPP14_CONSTEXPR __host__ __device__ const __T &
min(const __T &__a, const __T &__b) {
return __a < __b ? __a : __b;
return __b < __a ? __b : __a;
}
#pragma pop_macro("_CPP14_CONSTEXPR")

File diff suppressed because it is too large Load Diff

28
lib/include/float.h vendored
View File

@ -10,6 +10,10 @@
#ifndef __CLANG_FLOAT_H
#define __CLANG_FLOAT_H
#if defined(__MVS__) && __has_include_next(<float.h>)
#include_next <float.h>
#else
/* If we're on MinGW, fall back to the system's float.h, which might have
* additional definitions provided for Windows.
* For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx
@ -82,6 +86,18 @@
# undef DBL_HAS_SUBNORM
# undef LDBL_HAS_SUBNORM
# endif
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \
!defined(__STRICT_ANSI__)
# undef FLT_NORM_MAX
# undef DBL_NORM_MAX
# undef LDBL_NORM_MAX
#endif
#endif
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \
!defined(__STRICT_ANSI__)
# undef INFINITY
# undef NAN
#endif
/* Characteristics of floating point types, C99 5.2.4.2.2 */
@ -151,6 +167,17 @@
# define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__
#endif
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \
!defined(__STRICT_ANSI__)
/* C23 5.2.5.3.3p29-30 */
# define INFINITY (__builtin_inff())
# define NAN (__builtin_nanf(""))
/* C23 5.2.5.3.3p32 */
# define FLT_NORM_MAX __FLT_NORM_MAX__
# define DBL_NORM_MAX __DBL_NORM_MAX__
# define LDBL_NORM_MAX __LDBL_NORM_MAX__
#endif
#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
# define FLT16_MANT_DIG __FLT16_MANT_DIG__
# define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__
@ -165,4 +192,5 @@
# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__
#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */
#endif /* __MVS__ */
#endif /* __CLANG_FLOAT_H */

View File

@ -60,7 +60,8 @@ _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
/// Computes a scalar multiply-add of the single-precision values in the
/// low 32 bits of 128-bit vectors of [4 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
/// result[127:32] = __A[127:32]
/// \endcode
@ -88,7 +89,8 @@ _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
/// Computes a scalar multiply-add of the double-precision values in the
/// low 64 bits of 128-bit vectors of [2 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
/// result[127:64] = __A[127:64]
/// \endcode
@ -156,7 +158,8 @@ _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
/// Computes a scalar multiply-subtract of the single-precision values in
/// the low 32 bits of 128-bit vectors of [4 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
/// result[127:32] = __A[127:32]
/// \endcode
@ -184,7 +187,8 @@ _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
/// Computes a scalar multiply-subtract of the double-precision values in
/// the low 64 bits of 128-bit vectors of [2 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
/// result[127:64] = __A[127:64]
/// \endcode
@ -252,7 +256,8 @@ _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
/// Computes a scalar negated multiply-add of the single-precision values in
/// the low 32 bits of 128-bit vectors of [4 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = -(__A[31:0] * __B[31:0]) + __C[31:0]
/// result[127:32] = __A[127:32]
/// \endcode
@ -280,7 +285,8 @@ _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
/// Computes a scalar negated multiply-add of the double-precision values
/// in the low 64 bits of 128-bit vectors of [2 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = -(__A[63:0] * __B[63:0]) + __C[63:0]
/// result[127:64] = __A[127:64]
/// \endcode
@ -348,7 +354,8 @@ _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
/// Computes a scalar negated multiply-subtract of the single-precision
/// values in the low 32 bits of 128-bit vectors of [4 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = -(__A[31:0] * __B[31:0]) - __C[31:0]
/// result[127:32] = __A[127:32]
/// \endcode
@ -376,7 +383,8 @@ _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
/// Computes a scalar negated multiply-subtract of the double-precision
/// values in the low 64 bits of 128-bit vectors of [2 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = -(__A[63:0] * __B[63:0]) - __C[63:0]
/// result[127:64] = __A[127:64]
/// \endcode
@ -404,7 +412,8 @@ _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
/// Computes a multiply with alternating add/subtract of 128-bit vectors of
/// [4 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
/// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
/// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
@ -430,7 +439,8 @@ _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
/// Computes a multiply with alternating add/subtract of 128-bit vectors of
/// [2 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
/// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
/// \endcode
@ -454,7 +464,8 @@ _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
/// Computes a multiply with alternating add/subtract of 128-bit vectors of
/// [4 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
/// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
/// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
@ -480,7 +491,8 @@ _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
/// Computes a multiply with alternating add/subtract of 128-bit vectors of
/// [2 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
/// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
/// \endcode
@ -664,7 +676,8 @@ _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
/// Computes a multiply with alternating add/subtract of 256-bit vectors of
/// [8 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
/// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
/// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
@ -694,7 +707,8 @@ _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
/// Computes a multiply with alternating add/subtract of 256-bit vectors of
/// [4 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
/// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
/// result[191:128] = (__A[191:128] * __B[191:128]) - __C[191:128]
@ -720,7 +734,8 @@ _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
/// Computes a vector multiply with alternating add/subtract of 256-bit
/// vectors of [8 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
/// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
/// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
@ -750,7 +765,8 @@ _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
/// Computes a vector multiply with alternating add/subtract of 256-bit
/// vectors of [4 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
/// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
/// result[191:128] = (__A[191:128] * __B[191:128]) + __C[191:128]

View File

@ -26,8 +26,8 @@
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
#endif
/// Find the first set bit starting from the lsb. Result is undefined if
/// input is 0.
/// Finds the first set bit starting from the least significant bit. The result
/// is undefined if the input is 0.
///
/// \headerfile <x86intrin.h>
///
@ -43,8 +43,8 @@ __bsfd(int __A) {
return __builtin_ctz((unsigned int)__A);
}
/// Find the first set bit starting from the msb. Result is undefined if
/// input is 0.
/// Finds the first set bit starting from the most significant bit. The result
/// is undefined if the input is 0.
///
/// \headerfile <x86intrin.h>
///
@ -90,8 +90,8 @@ _bswap(int __A) {
return (int)__builtin_bswap32((unsigned int)__A);
}
/// Find the first set bit starting from the lsb. Result is undefined if
/// input is 0.
/// Finds the first set bit starting from the least significant bit. The result
/// is undefined if the input is 0.
///
/// \headerfile <x86intrin.h>
///
@ -108,8 +108,8 @@ _bswap(int __A) {
/// \see __bsfd
#define _bit_scan_forward(A) __bsfd((A))
/// Find the first set bit starting from the msb. Result is undefined if
/// input is 0.
/// Finds the first set bit starting from the most significant bit. The result
/// is undefined if the input is 0.
///
/// \headerfile <x86intrin.h>
///
@ -127,8 +127,8 @@ _bswap(int __A) {
#define _bit_scan_reverse(A) __bsrd((A))
#ifdef __x86_64__
/// Find the first set bit starting from the lsb. Result is undefined if
/// input is 0.
/// Finds the first set bit starting from the least significant bit. The result
/// is undefined if the input is 0.
///
/// \headerfile <x86intrin.h>
///
@ -143,8 +143,8 @@ __bsfq(long long __A) {
return (long long)__builtin_ctzll((unsigned long long)__A);
}
/// Find the first set bit starting from the msb. Result is undefined if
/// input is 0.
/// Finds the first set bit starting from the most significant bit. The result
/// is undefined if input is 0.
///
/// \headerfile <x86intrin.h>
///
@ -159,7 +159,7 @@ __bsrq(long long __A) {
return 63 - __builtin_clzll((unsigned long long)__A);
}
/// Swaps the bytes in the input. Converting little endian to big endian or
/// Swaps the bytes in the input, converting little endian to big endian or
/// vice versa.
///
/// \headerfile <x86intrin.h>
@ -175,7 +175,7 @@ __bswapq(long long __A) {
return (long long)__builtin_bswap64((unsigned long long)__A);
}
/// Swaps the bytes in the input. Converting little endian to big endian or
/// Swaps the bytes in the input, converting little endian to big endian or
/// vice versa.
///
/// \headerfile <x86intrin.h>
@ -198,7 +198,7 @@ __bswapq(long long __A) {
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c POPCNT instruction or a
/// a sequence of arithmetic and logic ops to calculate it.
/// sequence of arithmetic and logic operations to calculate it.
///
/// \param __A
/// An unsigned 32-bit integer operand.
@ -220,7 +220,7 @@ __popcntd(unsigned int __A)
/// \endcode
///
/// This intrinsic corresponds to the \c POPCNT instruction or a
/// a sequence of arithmetic and logic ops to calculate it.
/// sequence of arithmetic and logic operations to calculate it.
///
/// \param A
/// An unsigned 32-bit integer operand.
@ -235,7 +235,7 @@ __popcntd(unsigned int __A)
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c POPCNT instruction or a
/// a sequence of arithmetic and logic ops to calculate it.
/// sequence of arithmetic and logic operations to calculate it.
///
/// \param __A
/// An unsigned 64-bit integer operand.
@ -257,7 +257,7 @@ __popcntq(unsigned long long __A)
/// \endcode
///
/// This intrinsic corresponds to the \c POPCNT instruction or a
/// a sequence of arithmetic and logic ops to calculate it.
/// sequence of arithmetic and logic operations to calculate it.
///
/// \param A
/// An unsigned 64-bit integer operand.
@ -268,7 +268,7 @@ __popcntq(unsigned long long __A)
#endif /* __x86_64__ */
#ifdef __x86_64__
/// Returns the program status and control \c RFLAGS register with the \c VM
/// Returns the program status-and-control \c RFLAGS register with the \c VM
/// and \c RF flags cleared.
///
/// \headerfile <x86intrin.h>
@ -282,7 +282,7 @@ __readeflags(void)
return __builtin_ia32_readeflags_u64();
}
/// Writes the specified value to the program status and control \c RFLAGS
/// Writes the specified value to the program status-and-control \c RFLAGS
/// register. Reserved bits are not affected.
///
/// \headerfile <x86intrin.h>
@ -298,7 +298,7 @@ __writeeflags(unsigned long long __f)
}
#else /* !__x86_64__ */
/// Returns the program status and control \c EFLAGS register with the \c VM
/// Returns the program status-and-control \c EFLAGS register with the \c VM
/// and \c RF flags cleared.
///
/// \headerfile <x86intrin.h>
@ -312,7 +312,7 @@ __readeflags(void)
return __builtin_ia32_readeflags_u32();
}
/// Writes the specified value to the program status and control \c EFLAGS
/// Writes the specified value to the program status-and-control \c EFLAGS
/// register. Reserved bits are not affected.
///
/// \headerfile <x86intrin.h>
@ -328,7 +328,7 @@ __writeeflags(unsigned int __f)
}
#endif /* !__x86_64__ */
/// Cast a 32-bit float value to a 32-bit unsigned integer value.
/// Casts a 32-bit float value to a 32-bit unsigned integer value.
///
/// \headerfile <x86intrin.h>
///
@ -337,13 +337,13 @@ __writeeflags(unsigned int __f)
///
/// \param __A
/// A 32-bit float value.
/// \returns a 32-bit unsigned integer containing the converted value.
/// \returns A 32-bit unsigned integer containing the converted value.
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CAST
_castf32_u32(float __A) {
return __builtin_bit_cast(unsigned int, __A);
}
/// Cast a 64-bit float value to a 64-bit unsigned integer value.
/// Casts a 64-bit float value to a 64-bit unsigned integer value.
///
/// \headerfile <x86intrin.h>
///
@ -352,13 +352,13 @@ _castf32_u32(float __A) {
///
/// \param __A
/// A 64-bit float value.
/// \returns a 64-bit unsigned integer containing the converted value.
/// \returns A 64-bit unsigned integer containing the converted value.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CAST
_castf64_u64(double __A) {
return __builtin_bit_cast(unsigned long long, __A);
}
/// Cast a 32-bit unsigned integer value to a 32-bit float value.
/// Casts a 32-bit unsigned integer value to a 32-bit float value.
///
/// \headerfile <x86intrin.h>
///
@ -367,13 +367,13 @@ _castf64_u64(double __A) {
///
/// \param __A
/// A 32-bit unsigned integer value.
/// \returns a 32-bit float value containing the converted value.
/// \returns A 32-bit float value containing the converted value.
static __inline__ float __DEFAULT_FN_ATTRS_CAST
_castu32_f32(unsigned int __A) {
return __builtin_bit_cast(float, __A);
}
/// Cast a 64-bit unsigned integer value to a 64-bit float value.
/// Casts a 64-bit unsigned integer value to a 64-bit float value.
///
/// \headerfile <x86intrin.h>
///
@ -382,7 +382,7 @@ _castu32_f32(unsigned int __A) {
///
/// \param __A
/// A 64-bit unsigned integer value.
/// \returns a 64-bit float value containing the converted value.
/// \returns A 64-bit float value containing the converted value.
static __inline__ double __DEFAULT_FN_ATTRS_CAST
_castu64_f64(unsigned long long __A) {
return __builtin_bit_cast(double, __A);
@ -470,7 +470,7 @@ __crc32q(unsigned long long __C, unsigned long long __D)
}
#endif /* __x86_64__ */
/// Reads the specified performance monitoring counter. Refer to your
/// Reads the specified performance-monitoring counter. Refer to your
/// processor's documentation to determine which performance counters are
/// supported.
///
@ -487,7 +487,7 @@ __rdpmc(int __A) {
return __builtin_ia32_rdpmc(__A);
}
/// Reads the processor's time stamp counter and the \c IA32_TSC_AUX MSR
/// Reads the processor's time-stamp counter and the \c IA32_TSC_AUX MSR
/// \c (0xc0000103).
///
/// \headerfile <x86intrin.h>
@ -495,14 +495,14 @@ __rdpmc(int __A) {
/// This intrinsic corresponds to the \c RDTSCP instruction.
///
/// \param __A
/// Address of where to store the 32-bit \c IA32_TSC_AUX value.
/// \returns The 64-bit value of the time stamp counter.
/// The address of where to store the 32-bit \c IA32_TSC_AUX value.
/// \returns The 64-bit value of the time-stamp counter.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__rdtscp(unsigned int *__A) {
return __builtin_ia32_rdtscp(__A);
}
/// Reads the processor's time stamp counter.
/// Reads the processor's time-stamp counter.
///
/// \headerfile <x86intrin.h>
///
@ -512,7 +512,7 @@ __rdtscp(unsigned int *__A) {
///
/// This intrinsic corresponds to the \c RDTSC instruction.
///
/// \returns The 64-bit value of the time stamp counter.
/// \returns The 64-bit value of the time-stamp counter.
#define _rdtsc() __rdtsc()
/// Reads the specified performance monitoring counter. Refer to your

View File

@ -16,281 +16,231 @@
#include <x86gprintrin.h>
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__MMX__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__)
#include <mmintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__SSE__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__)
#include <xmmintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__SSE2__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__)
#include <emmintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__SSE3__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__)
#include <pmmintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__SSSE3__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__)
#include <tmmintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__SSE4_2__) || defined(__SSE4_1__))
#include <smmintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AES__) || defined(__PCLMUL__))
#include <wmmintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__CLFLUSHOPT__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__)
#include <clflushoptintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__CLWB__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__)
#include <clwbintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__)
#include <avxintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX2__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__)
#include <avx2intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__F16C__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__)
#include <f16cintrin.h>
#endif
/* No feature check desired due to internal checks */
#include <bmiintrin.h>
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__BMI2__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__)
#include <bmi2intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__LZCNT__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__)
#include <lzcntintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__POPCNT__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__)
#include <popcntintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__FMA__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__)
#include <fmaintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512F__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__)
#include <avx512fintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512VL__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__)
#include <avx512vlintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512BW__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__)
#include <avx512bwintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512BITALG__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__)
#include <avx512bitalgintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512CD__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__)
#include <avx512cdintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512VPOPCNTDQ__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
#include <avx512vpopcntdqintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
#include <avx512vpopcntdqvlintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512VNNI__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__)
#include <avx512vnniintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512VNNI__))
#include <avx512vlvnniintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVXVNNI__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__)
#include <avxvnniintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512DQ__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__)
#include <avx512dqintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512BITALG__))
#include <avx512vlbitalgintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512BW__))
#include <avx512vlbwintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512CD__))
#include <avx512vlcdintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512DQ__))
#include <avx512vldqintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512ER__)
#include <avx512erintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512IFMA__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__)
#include <avx512ifmaintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX512IFMA__) && defined(__AVX512VL__))
#include <avx512ifmavlintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVXIFMA__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__)
#include <avxifmaintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512VBMI__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__)
#include <avx512vbmiintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX512VBMI__) && defined(__AVX512VL__))
#include <avx512vbmivlintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512VBMI2__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__)
#include <avx512vbmi2intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX512VBMI2__) && defined(__AVX512VL__))
#include <avx512vlvbmi2intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512PF__)
#include <avx512pfintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512FP16__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__)
#include <avx512fp16intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512FP16__))
#include <avx512vlfp16intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512BF16__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__)
#include <avx512bf16intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512BF16__))
#include <avx512vlbf16intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__PKU__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__)
#include <pkuintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__VPCLMULQDQ__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__)
#include <vpclmulqdqintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__VAES__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__)
#include <vaesintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__GFNI__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__)
#include <gfniintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVXVNNIINT8__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__)
#include <avxvnniint8intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVXNECONVERT__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__)
#include <avxneconvertintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__SHA512__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__)
#include <sha512intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__SM3__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__)
#include <sm3intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__SM4__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__)
#include <sm4intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVXVNNIINT16__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__)
#include <avxvnniint16intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RDPID__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__)
/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
///
/// \headerfile <immintrin.h>
@ -304,8 +254,7 @@ _rdpid_u32(void) {
}
#endif // __RDPID__
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RDRND__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__)
/// Returns a 16-bit hardware-generated random value.
///
/// \headerfile <immintrin.h>
@ -367,8 +316,7 @@ _rdrand64_step(unsigned long long *__p)
}
#endif /* __RDRND__ */
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__FSGSBASE__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__)
#ifdef __x86_64__
/// Reads the FS base register.
///
@ -481,8 +429,7 @@ _writegsbase_u64(unsigned long long __V)
#endif
#endif /* __FSGSBASE__ */
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__MOVBE__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__)
/* The structs used below are to force the load/store to be unaligned. This
* is accomplished with the __packed__ attribute. The __may_alias__ prevents
@ -598,139 +545,118 @@ _storebe_i64(void * __P, long long __D) {
#endif
#endif /* __MOVBE */
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RTM__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__)
#include <rtmintrin.h>
#include <xtestintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__SHA__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__)
#include <shaintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__FXSR__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__)
#include <fxsrintrin.h>
#endif
/* No feature check desired due to internal MSC_VER checks */
#include <xsaveintrin.h>
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__XSAVEOPT__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__)
#include <xsaveoptintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__XSAVEC__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__)
#include <xsavecintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__XSAVES__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__)
#include <xsavesintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__SHSTK__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__)
#include <cetintrin.h>
#endif
/* Intrinsics inside adcintrin.h are available at all times. */
#include <adcintrin.h>
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__ADX__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__)
#include <adxintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RDSEED__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__)
#include <rdseedintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__WBNOINVD__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__)
#include <wbnoinvdintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__CLDEMOTE__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__)
#include <cldemoteintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__WAITPKG__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__)
#include <waitpkgintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__MOVDIRI__) || defined(__MOVDIR64B__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) || \
defined(__MOVDIR64B__)
#include <movdirintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__PCONFIG__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)
#include <pconfigintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__SGX__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__)
#include <sgxintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__PTWRITE__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__)
#include <ptwriteintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__INVPCID__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__)
#include <invpcidintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AMX_FP16__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__)
#include <amxfp16intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__KL__) || defined(__WIDEKL__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) || \
defined(__WIDEKL__)
#include <keylockerintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) || \
defined(__AMX_INT8__) || defined(__AMX_BF16__)
#include <amxintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AMX_COMPLEX__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__)
#include <amxcomplexintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
defined(__AVX512VP2INTERSECT__)
#include <avx512vp2intersectintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
#include <avx512vlvp2intersectintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__ENQCMD__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
#include <enqcmdintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__SERIALIZE__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__)
#include <serializeintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__TSXLDTRK__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__)
#include <tsxldtrkintrin.h>
#endif

272
lib/include/intrin.h vendored
View File

@ -15,8 +15,10 @@
#ifndef __INTRIN_H
#define __INTRIN_H
#include <intrin0.h>
/* First include the standard intrinsics. */
#if defined(__i386__) || defined(__x86_64__)
#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__))
#include <x86intrin.h>
#endif
@ -24,7 +26,7 @@
#include <armintr.h>
#endif
#if defined(__aarch64__)
#if defined(__aarch64__) || defined(__arm64ec__)
#include <arm64intr.h>
#endif
@ -131,8 +133,6 @@ void __writefsqword(unsigned long, unsigned __int64);
void __writefsword(unsigned long, unsigned short);
void __writemsr(unsigned long, unsigned __int64);
void *_AddressOfReturnAddress(void);
unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
unsigned char _bittest(long const *, long);
unsigned char _bittestandcomplement(long *, long);
unsigned char _bittestandreset(long *, long);
@ -151,7 +151,6 @@ long _InterlockedExchangeAdd_HLERelease(long volatile *, long);
__int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64);
__int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64);
void _ReadBarrier(void);
void _ReadWriteBarrier(void);
unsigned int _rorx_u32(unsigned int, const unsigned int);
int _sarx_i32(int, unsigned int);
#if __STDC_HOSTED__
@ -167,7 +166,7 @@ unsigned __int32 xbegin(void);
void _xend(void);
/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */
#ifdef __x86_64__
#if defined(__x86_64__) && !defined(__arm64ec__)
void __addgsbyte(unsigned long, unsigned char);
void __addgsdword(unsigned long, unsigned long);
void __addgsqword(unsigned long, unsigned __int64);
@ -182,12 +181,6 @@ unsigned char __readgsbyte(unsigned long);
unsigned long __readgsdword(unsigned long);
unsigned __int64 __readgsqword(unsigned long);
unsigned short __readgsword(unsigned long);
unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,
unsigned __int64 _HighPart,
unsigned char _Shift);
unsigned __int64 __shiftright128(unsigned __int64 _LowPart,
unsigned __int64 _HighPart,
unsigned char _Shift);
void __stosq(unsigned __int64 *, unsigned __int64, size_t);
unsigned char __vmx_on(unsigned __int64 *);
unsigned char __vmx_vmclear(unsigned __int64 *);
@ -236,216 +229,15 @@ unsigned __int64 _shlx_u64(unsigned __int64, unsigned int);
unsigned __int64 _shrx_u64(unsigned __int64, unsigned int);
__int64 __mulh(__int64, __int64);
unsigned __int64 __umulh(unsigned __int64, unsigned __int64);
__int64 _mul128(__int64, __int64, __int64*);
unsigned __int64 _umul128(unsigned __int64,
unsigned __int64,
unsigned __int64*);
__int64 _mul128(__int64, __int64, __int64 *);
#endif /* __x86_64__ */
#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
#endif
#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
__int64 _InterlockedDecrement64(__int64 volatile *_Addend);
__int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value);
__int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value);
__int64 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value);
__int64 _InterlockedIncrement64(__int64 volatile *_Addend);
__int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Exchange Add
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
char _InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value);
char _InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value);
char _InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value);
short _InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value);
short _InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value);
short _InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value);
long _InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value);
long _InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value);
long _InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value);
__int64 _InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value);
__int64 _InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value);
__int64 _InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Increment
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
short _InterlockedIncrement16_acq(short volatile *_Value);
short _InterlockedIncrement16_nf(short volatile *_Value);
short _InterlockedIncrement16_rel(short volatile *_Value);
long _InterlockedIncrement_acq(long volatile *_Value);
long _InterlockedIncrement_nf(long volatile *_Value);
long _InterlockedIncrement_rel(long volatile *_Value);
__int64 _InterlockedIncrement64_acq(__int64 volatile *_Value);
__int64 _InterlockedIncrement64_nf(__int64 volatile *_Value);
__int64 _InterlockedIncrement64_rel(__int64 volatile *_Value);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Decrement
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
short _InterlockedDecrement16_acq(short volatile *_Value);
short _InterlockedDecrement16_nf(short volatile *_Value);
short _InterlockedDecrement16_rel(short volatile *_Value);
long _InterlockedDecrement_acq(long volatile *_Value);
long _InterlockedDecrement_nf(long volatile *_Value);
long _InterlockedDecrement_rel(long volatile *_Value);
__int64 _InterlockedDecrement64_acq(__int64 volatile *_Value);
__int64 _InterlockedDecrement64_nf(__int64 volatile *_Value);
__int64 _InterlockedDecrement64_rel(__int64 volatile *_Value);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked And
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
char _InterlockedAnd8_acq(char volatile *_Value, char _Mask);
char _InterlockedAnd8_nf(char volatile *_Value, char _Mask);
char _InterlockedAnd8_rel(char volatile *_Value, char _Mask);
short _InterlockedAnd16_acq(short volatile *_Value, short _Mask);
short _InterlockedAnd16_nf(short volatile *_Value, short _Mask);
short _InterlockedAnd16_rel(short volatile *_Value, short _Mask);
long _InterlockedAnd_acq(long volatile *_Value, long _Mask);
long _InterlockedAnd_nf(long volatile *_Value, long _Mask);
long _InterlockedAnd_rel(long volatile *_Value, long _Mask);
__int64 _InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask);
#endif
/*----------------------------------------------------------------------------*\
|* Bit Counting and Testing
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
unsigned char _interlockedbittestandset_acq(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandset_nf(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandset_rel(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,
long _BitPos);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Or
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
char _InterlockedOr8_acq(char volatile *_Value, char _Mask);
char _InterlockedOr8_nf(char volatile *_Value, char _Mask);
char _InterlockedOr8_rel(char volatile *_Value, char _Mask);
short _InterlockedOr16_acq(short volatile *_Value, short _Mask);
short _InterlockedOr16_nf(short volatile *_Value, short _Mask);
short _InterlockedOr16_rel(short volatile *_Value, short _Mask);
long _InterlockedOr_acq(long volatile *_Value, long _Mask);
long _InterlockedOr_nf(long volatile *_Value, long _Mask);
long _InterlockedOr_rel(long volatile *_Value, long _Mask);
__int64 _InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Xor
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
char _InterlockedXor8_acq(char volatile *_Value, char _Mask);
char _InterlockedXor8_nf(char volatile *_Value, char _Mask);
char _InterlockedXor8_rel(char volatile *_Value, char _Mask);
short _InterlockedXor16_acq(short volatile *_Value, short _Mask);
short _InterlockedXor16_nf(short volatile *_Value, short _Mask);
short _InterlockedXor16_rel(short volatile *_Value, short _Mask);
long _InterlockedXor_acq(long volatile *_Value, long _Mask);
long _InterlockedXor_nf(long volatile *_Value, long _Mask);
long _InterlockedXor_rel(long volatile *_Value, long _Mask);
__int64 _InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Exchange
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
char _InterlockedExchange8_acq(char volatile *_Target, char _Value);
char _InterlockedExchange8_nf(char volatile *_Target, char _Value);
char _InterlockedExchange8_rel(char volatile *_Target, char _Value);
short _InterlockedExchange16_acq(short volatile *_Target, short _Value);
short _InterlockedExchange16_nf(short volatile *_Target, short _Value);
short _InterlockedExchange16_rel(short volatile *_Target, short _Value);
long _InterlockedExchange_acq(long volatile *_Target, long _Value);
long _InterlockedExchange_nf(long volatile *_Target, long _Value);
long _InterlockedExchange_rel(long volatile *_Target, long _Value);
__int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value);
__int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value);
__int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Compare Exchange
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
char _InterlockedCompareExchange8_acq(char volatile *_Destination,
char _Exchange, char _Comparand);
char _InterlockedCompareExchange8_nf(char volatile *_Destination,
char _Exchange, char _Comparand);
char _InterlockedCompareExchange8_rel(char volatile *_Destination,
char _Exchange, char _Comparand);
short _InterlockedCompareExchange16_acq(short volatile *_Destination,
short _Exchange, short _Comparand);
short _InterlockedCompareExchange16_nf(short volatile *_Destination,
short _Exchange, short _Comparand);
short _InterlockedCompareExchange16_rel(short volatile *_Destination,
short _Exchange, short _Comparand);
long _InterlockedCompareExchange_acq(long volatile *_Destination,
long _Exchange, long _Comparand);
long _InterlockedCompareExchange_nf(long volatile *_Destination,
long _Exchange, long _Comparand);
long _InterlockedCompareExchange_rel(long volatile *_Destination,
long _Exchange, long _Comparand);
__int64 _InterlockedCompareExchange64_acq(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand);
__int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand);
__int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand);
#endif
#if defined(__x86_64__) || defined(__aarch64__)
unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,
__int64 _ExchangeHigh,
__int64 _ExchangeLow,
__int64 *_ComparandResult);
#endif
#if defined(__aarch64__)
unsigned char _InterlockedCompareExchange128_acq(__int64 volatile *_Destination,
__int64 _ExchangeHigh,
__int64 _ExchangeLow,
__int64 *_ComparandResult);
unsigned char _InterlockedCompareExchange128_nf(__int64 volatile *_Destination,
__int64 _ExchangeHigh,
__int64 _ExchangeLow,
__int64 *_ComparandResult);
unsigned char _InterlockedCompareExchange128_rel(__int64 volatile *_Destination,
__int64 _ExchangeHigh,
__int64 _ExchangeLow,
__int64 *_ComparandResult);
#endif
/*----------------------------------------------------------------------------*\
|* movs, stos
\*----------------------------------------------------------------------------*/
#if defined(__i386__) || defined(__x86_64__)
#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__))
static __inline__ void __DEFAULT_FN_ATTRS __movsb(unsigned char *__dst,
unsigned char const *__src,
size_t __n) {
@ -514,7 +306,7 @@ static __inline__ void __DEFAULT_FN_ATTRS __stosw(unsigned short *__dst,
: "memory");
}
#endif
#ifdef __x86_64__
#if defined(__x86_64__) && !defined(__arm64ec__)
static __inline__ void __DEFAULT_FN_ATTRS __movsq(
unsigned long long *__dst, unsigned long long const *__src, size_t __n) {
__asm__ __volatile__("rep movsq"
@ -533,10 +325,40 @@ static __inline__ void __DEFAULT_FN_ATTRS __stosq(unsigned __int64 *__dst,
/*----------------------------------------------------------------------------*\
|* Misc
\*----------------------------------------------------------------------------*/
#if defined(__i386__) || defined(__x86_64__)
#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__))
static __inline__ void __DEFAULT_FN_ATTRS __halt(void) {
__asm__ volatile("hlt");
}
static inline unsigned char __inbyte(unsigned short port) {
unsigned char ret;
__asm__ __volatile__("inb %w1, %b0" : "=a"(ret) : "Nd"(port));
return ret;
}
static inline unsigned short __inword(unsigned short port) {
unsigned short ret;
__asm__ __volatile__("inw %w1, %w0" : "=a"(ret) : "Nd"(port));
return ret;
}
static inline unsigned long __indword(unsigned short port) {
unsigned long ret;
__asm__ __volatile__("inl %w1, %k0" : "=a"(ret) : "Nd"(port));
return ret;
}
static inline void __outbyte(unsigned short port, unsigned char data) {
__asm__ __volatile__("outb %b0, %w1" : : "a"(data), "Nd"(port));
}
static inline void __outword(unsigned short port, unsigned short data) {
__asm__ __volatile__("outw %w0, %w1" : : "a"(data), "Nd"(port));
}
static inline void __outdword(unsigned short port, unsigned long data) {
__asm__ __volatile__("outl %k0, %w1" : : "a"(data), "Nd"(port));
}
#endif
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
@ -548,9 +370,10 @@ static __inline__ void __DEFAULT_FN_ATTRS __nop(void) {
/*----------------------------------------------------------------------------*\
|* MS AArch64 specific
\*----------------------------------------------------------------------------*/
#if defined(__aarch64__)
#if defined(__aarch64__) || defined(__arm64ec__)
unsigned __int64 __getReg(int);
long _InterlockedAdd(long volatile *Addend, long Value);
__int64 _InterlockedAdd64(__int64 volatile *Addend, __int64 Value);
__int64 _ReadStatusReg(int);
void _WriteStatusReg(int, __int64);
@ -582,18 +405,19 @@ unsigned int _CountLeadingOnes(unsigned long);
unsigned int _CountLeadingOnes64(unsigned __int64);
unsigned int _CountLeadingSigns(long);
unsigned int _CountLeadingSigns64(__int64);
unsigned int _CountLeadingZeros(unsigned long);
unsigned int _CountLeadingZeros64(unsigned _int64);
unsigned int _CountOneBits(unsigned long);
unsigned int _CountOneBits64(unsigned __int64);
void __cdecl __prefetch(void *);
unsigned int __hlt(unsigned int, ...);
void __cdecl __prefetch(const void *);
#endif
/*----------------------------------------------------------------------------*\
|* Privileged intrinsics
\*----------------------------------------------------------------------------*/
#if defined(__i386__) || defined(__x86_64__)
#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__))
static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS
__readmsr(unsigned long __register) {
// Loads the contents of a 64-bit model specific register (MSR) specified in
@ -607,7 +431,6 @@ __readmsr(unsigned long __register) {
__asm__ ("rdmsr" : "=d"(__edx), "=a"(__eax) : "c"(__register));
return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax;
}
#endif
static __inline__ unsigned __LPTRINT_TYPE__ __DEFAULT_FN_ATTRS __readcr3(void) {
unsigned __LPTRINT_TYPE__ __cr3_val;
@ -623,6 +446,7 @@ static __inline__ void __DEFAULT_FN_ATTRS
__writecr3(unsigned __INTPTR_TYPE__ __cr3_val) {
__asm__ ("mov {%0, %%cr3|cr3, %0}" : : "r"(__cr3_val) : "memory");
}
#endif
#ifdef __cplusplus
}

247
lib/include/intrin0.h vendored Normal file
View File

@ -0,0 +1,247 @@
/* ===-------- intrin.h ---------------------------------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
/* Only include this if we're compiling for the windows platform. */
#ifndef _MSC_VER
#include_next <intrin0.h>
#else
#ifndef __INTRIN0_H
#define __INTRIN0_H
#if defined(__x86_64__) && !defined(__arm64ec__)
#include <adcintrin.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
void _ReadWriteBarrier(void);
#if defined(__aarch64__) || defined(__arm64ec__)
unsigned int _CountLeadingZeros(unsigned long);
unsigned int _CountLeadingZeros64(unsigned _int64);
unsigned char _InterlockedCompareExchange128_acq(__int64 volatile *_Destination,
__int64 _ExchangeHigh,
__int64 _ExchangeLow,
__int64 *_ComparandResult);
unsigned char _InterlockedCompareExchange128_nf(__int64 volatile *_Destination,
__int64 _ExchangeHigh,
__int64 _ExchangeLow,
__int64 *_ComparandResult);
unsigned char _InterlockedCompareExchange128_rel(__int64 volatile *_Destination,
__int64 _ExchangeHigh,
__int64 _ExchangeLow,
__int64 *_ComparandResult);
#endif
#ifdef __x86_64__ && !defined(__arm64ec__)
unsigned __int64 _umul128(unsigned __int64, unsigned __int64,
unsigned __int64 *);
unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,
unsigned __int64 _HighPart,
unsigned char _Shift);
unsigned __int64 __shiftright128(unsigned __int64 _LowPart,
unsigned __int64 _HighPart,
unsigned char _Shift);
#endif
#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__))
void _mm_pause(void);
#endif
#if defined(__x86_64__) || defined(__aarch64__)
unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,
__int64 _ExchangeHigh,
__int64 _ExchangeLow,
__int64 *_ComparandResult);
#endif
#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
#endif
#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
defined(__aarch64__)
__int64 _InterlockedDecrement64(__int64 volatile *_Addend);
__int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value);
__int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value);
__int64 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value);
__int64 _InterlockedIncrement64(__int64 volatile *_Addend);
__int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);
#endif
#if defined(__arm__) || defined(__aarch64__) || defined(__arm64ec__)
/*----------------------------------------------------------------------------*\
|* Interlocked Exchange Add
\*----------------------------------------------------------------------------*/
char _InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value);
char _InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value);
char _InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value);
short _InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value);
short _InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value);
short _InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value);
long _InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value);
long _InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value);
long _InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value);
__int64 _InterlockedExchangeAdd64_acq(__int64 volatile *_Addend,
__int64 _Value);
__int64 _InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value);
__int64 _InterlockedExchangeAdd64_rel(__int64 volatile *_Addend,
__int64 _Value);
/*----------------------------------------------------------------------------*\
|* Interlocked Increment
\*----------------------------------------------------------------------------*/
short _InterlockedIncrement16_acq(short volatile *_Value);
short _InterlockedIncrement16_nf(short volatile *_Value);
short _InterlockedIncrement16_rel(short volatile *_Value);
long _InterlockedIncrement_acq(long volatile *_Value);
long _InterlockedIncrement_nf(long volatile *_Value);
long _InterlockedIncrement_rel(long volatile *_Value);
__int64 _InterlockedIncrement64_acq(__int64 volatile *_Value);
__int64 _InterlockedIncrement64_nf(__int64 volatile *_Value);
__int64 _InterlockedIncrement64_rel(__int64 volatile *_Value);
/*----------------------------------------------------------------------------*\
|* Interlocked Decrement
\*----------------------------------------------------------------------------*/
short _InterlockedDecrement16_acq(short volatile *_Value);
short _InterlockedDecrement16_nf(short volatile *_Value);
short _InterlockedDecrement16_rel(short volatile *_Value);
long _InterlockedDecrement_acq(long volatile *_Value);
long _InterlockedDecrement_nf(long volatile *_Value);
long _InterlockedDecrement_rel(long volatile *_Value);
__int64 _InterlockedDecrement64_acq(__int64 volatile *_Value);
__int64 _InterlockedDecrement64_nf(__int64 volatile *_Value);
__int64 _InterlockedDecrement64_rel(__int64 volatile *_Value);
/*----------------------------------------------------------------------------*\
|* Interlocked And
\*----------------------------------------------------------------------------*/
char _InterlockedAnd8_acq(char volatile *_Value, char _Mask);
char _InterlockedAnd8_nf(char volatile *_Value, char _Mask);
char _InterlockedAnd8_rel(char volatile *_Value, char _Mask);
short _InterlockedAnd16_acq(short volatile *_Value, short _Mask);
short _InterlockedAnd16_nf(short volatile *_Value, short _Mask);
short _InterlockedAnd16_rel(short volatile *_Value, short _Mask);
long _InterlockedAnd_acq(long volatile *_Value, long _Mask);
long _InterlockedAnd_nf(long volatile *_Value, long _Mask);
long _InterlockedAnd_rel(long volatile *_Value, long _Mask);
__int64 _InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask);
/*----------------------------------------------------------------------------*\
|* Bit Counting and Testing
\*----------------------------------------------------------------------------*/
unsigned char _interlockedbittestandset_acq(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandset_nf(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandset_rel(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,
long _BitPos);
/*----------------------------------------------------------------------------*\
|* Interlocked Or
\*----------------------------------------------------------------------------*/
char _InterlockedOr8_acq(char volatile *_Value, char _Mask);
char _InterlockedOr8_nf(char volatile *_Value, char _Mask);
char _InterlockedOr8_rel(char volatile *_Value, char _Mask);
short _InterlockedOr16_acq(short volatile *_Value, short _Mask);
short _InterlockedOr16_nf(short volatile *_Value, short _Mask);
short _InterlockedOr16_rel(short volatile *_Value, short _Mask);
long _InterlockedOr_acq(long volatile *_Value, long _Mask);
long _InterlockedOr_nf(long volatile *_Value, long _Mask);
long _InterlockedOr_rel(long volatile *_Value, long _Mask);
__int64 _InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask);
/*----------------------------------------------------------------------------*\
|* Interlocked Xor
\*----------------------------------------------------------------------------*/
char _InterlockedXor8_acq(char volatile *_Value, char _Mask);
char _InterlockedXor8_nf(char volatile *_Value, char _Mask);
char _InterlockedXor8_rel(char volatile *_Value, char _Mask);
short _InterlockedXor16_acq(short volatile *_Value, short _Mask);
short _InterlockedXor16_nf(short volatile *_Value, short _Mask);
short _InterlockedXor16_rel(short volatile *_Value, short _Mask);
long _InterlockedXor_acq(long volatile *_Value, long _Mask);
long _InterlockedXor_nf(long volatile *_Value, long _Mask);
long _InterlockedXor_rel(long volatile *_Value, long _Mask);
__int64 _InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask);
/*----------------------------------------------------------------------------*\
|* Interlocked Exchange
\*----------------------------------------------------------------------------*/
char _InterlockedExchange8_acq(char volatile *_Target, char _Value);
char _InterlockedExchange8_nf(char volatile *_Target, char _Value);
char _InterlockedExchange8_rel(char volatile *_Target, char _Value);
short _InterlockedExchange16_acq(short volatile *_Target, short _Value);
short _InterlockedExchange16_nf(short volatile *_Target, short _Value);
short _InterlockedExchange16_rel(short volatile *_Target, short _Value);
long _InterlockedExchange_acq(long volatile *_Target, long _Value);
long _InterlockedExchange_nf(long volatile *_Target, long _Value);
long _InterlockedExchange_rel(long volatile *_Target, long _Value);
__int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value);
__int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value);
__int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value);
/*----------------------------------------------------------------------------*\
|* Interlocked Compare Exchange
\*----------------------------------------------------------------------------*/
char _InterlockedCompareExchange8_acq(char volatile *_Destination,
char _Exchange, char _Comparand);
char _InterlockedCompareExchange8_nf(char volatile *_Destination,
char _Exchange, char _Comparand);
char _InterlockedCompareExchange8_rel(char volatile *_Destination,
char _Exchange, char _Comparand);
short _InterlockedCompareExchange16_acq(short volatile *_Destination,
short _Exchange, short _Comparand);
short _InterlockedCompareExchange16_nf(short volatile *_Destination,
short _Exchange, short _Comparand);
short _InterlockedCompareExchange16_rel(short volatile *_Destination,
short _Exchange, short _Comparand);
long _InterlockedCompareExchange_acq(long volatile *_Destination,
long _Exchange, long _Comparand);
long _InterlockedCompareExchange_nf(long volatile *_Destination, long _Exchange,
long _Comparand);
long _InterlockedCompareExchange_rel(long volatile *_Destination,
long _Exchange, long _Comparand);
__int64 _InterlockedCompareExchange64_acq(__int64 volatile *_Destination,
__int64 _Exchange,
__int64 _Comparand);
__int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand);
__int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
__int64 _Exchange,
__int64 _Comparand);
#endif
#ifdef __cplusplus
}
#endif
#endif /* __INTRIN0_H */
#endif /* _MSC_VER */

View File

@ -13,6 +13,9 @@
#if !defined(_AIX) || !defined(_STD_TYPES_T)
#define __CLANG_INTTYPES_H
#endif
#if defined(__MVS__) && __has_include_next(<inttypes.h>)
#include_next <inttypes.h>
#else
#if defined(_MSC_VER) && _MSC_VER < 1800
#error MSVC does not have inttypes.h prior to Visual Studio 2013
@ -94,4 +97,5 @@
#define SCNxFAST32 "x"
#endif
#endif /* __MVS__ */
#endif /* __CLANG_INTTYPES_H */

View File

@ -9,6 +9,9 @@
#ifndef __ISO646_H
#define __ISO646_H
#if defined(__MVS__) && __has_include_next(<iso646.h>)
#include_next <iso646.h>
#else
#ifndef __cplusplus
#define and &&
@ -24,4 +27,5 @@
#define xor_eq ^=
#endif
#endif /* __MVS__ */
#endif /* __ISO646_H */

View File

@ -28,8 +28,7 @@
#ifndef _KEYLOCKERINTRIN_H
#define _KEYLOCKERINTRIN_H
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__KL__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__)
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
@ -327,11 +326,9 @@ _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
#undef __DEFAULT_FN_ATTRS
#endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \
|| defined(__KL__) */
#endif /* !defined(__SCE__ || __has_feature(modules) || defined(__KL__) */
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__WIDEKL__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__WIDEKL__)
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
@ -524,7 +521,7 @@ _mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void*
#undef __DEFAULT_FN_ATTRS
#endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \
|| defined(__WIDEKL__) */
#endif /* !defined(__SCE__) || __has_feature(modules) || defined(__WIDEKL__) \
*/
#endif /* _KEYLOCKERINTRIN_H */

View File

@ -9,6 +9,10 @@
#ifndef __CLANG_LIMITS_H
#define __CLANG_LIMITS_H
#if defined(__MVS__) && __has_include_next(<limits.h>)
#include_next <limits.h>
#else
/* The system's limits.h may, in turn, try to #include_next GCC's limits.h.
Avert this #include_next madness. */
#if defined __GNUC__ && !defined _GCC_LIMITS_H_
@ -122,4 +126,5 @@
#define ULONG_LONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)
#endif
#endif /* __MVS__ */
#endif /* __CLANG_LIMITS_H */

View File

@ -1,4 +1,4 @@
//===-- Wrapper for C standard assert.h declarations on the GPU ------------===//
//===-- Wrapper for C standard assert.h declarations on the GPU -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.

147
lib/include/mm3dnow.h vendored
View File

@ -7,151 +7,16 @@
*===-----------------------------------------------------------------------===
*/
// 3dNow intrinsics are no longer supported.
#ifndef _MM3DNOW_H_INCLUDED
#define _MM3DNOW_H_INCLUDED
#ifndef _CLANG_DISABLE_CRT_DEPRECATION_WARNINGS
#warning "The <mm3dnow.h> header is deprecated, and 3dNow! intrinsics are unsupported. For other intrinsics, include <x86intrin.h>, instead."
#endif
#include <mmintrin.h>
#include <prfchwintrin.h>
typedef float __v2sf __attribute__((__vector_size__(8)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64)))
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
_m_femms(void) {
__builtin_ia32_femms();
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pavgusb(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pf2id(__m64 __m) {
return (__m64)__builtin_ia32_pf2id((__v2sf)__m);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfacc(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfadd(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfcmpeq(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfcmpge(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfcmpgt(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfmax(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfmin(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfmul(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfrcp(__m64 __m) {
return (__m64)__builtin_ia32_pfrcp((__v2sf)__m);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfrcpit1(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfrcpit2(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfrsqrt(__m64 __m) {
return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfrsqrtit1(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfsub(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfsubr(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pi2fd(__m64 __m) {
return (__m64)__builtin_ia32_pi2fd((__v2si)__m);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pmulhrw(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);
}
/* Handle the 3dnowa instructions here. */
#undef __DEFAULT_FN_ATTRS
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64)))
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pf2iw(__m64 __m) {
return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfnacc(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfpnacc(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pi2fw(__m64 __m) {
return (__m64)__builtin_ia32_pi2fw((__v2si)__m);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pswapdsf(__m64 __m) {
return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pswapdsi(__m64 __m) {
return (__m64)__builtin_ia32_pswapdsi((__v2si)__m);
}
#undef __DEFAULT_FN_ATTRS
#endif

160
lib/include/mmintrin.h vendored
View File

@ -105,28 +105,23 @@ _mm_cvtm64_si64(__m64 __m)
return (long long)__m;
}
/// Converts 16-bit signed integers from both 64-bit integer vector
/// parameters of [4 x i16] into 8-bit signed integer values, and constructs
/// a 64-bit integer vector of [8 x i8] as the result. Positive values
/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
/// are saturated to 0x80.
/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
/// vector parameters of [4 x i16] into 8-bit signed integer values, and
/// constructs a 64-bit integer vector of [8 x i8] as the result.
///
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
/// less than 0x80 are saturated to 0x80.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
///
/// \param __m1
/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
/// 16-bit signed integer and is converted to an 8-bit signed integer with
/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
/// Negative values less than 0x80 are saturated to 0x80. The converted
/// [4 x i8] values are written to the lower 32 bits of the result.
/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
/// written to the lower 32 bits of the result.
/// \param __m2
/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
/// 16-bit signed integer and is converted to an 8-bit signed integer with
/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
/// Negative values less than 0x80 are saturated to 0x80. The converted
/// [4 x i8] values are written to the upper 32 bits of the result.
/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
/// written to the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the converted
/// values.
static __inline__ __m64 __DEFAULT_FN_ATTRS
@ -135,28 +130,23 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
}
/// Converts 32-bit signed integers from both 64-bit integer vector
/// parameters of [2 x i32] into 16-bit signed integer values, and constructs
/// a 64-bit integer vector of [4 x i16] as the result. Positive values
/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
/// 0x8000 are saturated to 0x8000.
/// Converts, with saturation, 32-bit signed integers from both 64-bit integer
/// vector parameters of [2 x i32] into 16-bit signed integer values, and
/// constructs a 64-bit integer vector of [4 x i16] as the result.
///
/// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
/// values less than 0x8000 are saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
///
/// \param __m1
/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
/// 32-bit signed integer and is converted to a 16-bit signed integer with
/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
/// Negative values less than 0x8000 are saturated to 0x8000. The converted
/// [2 x i16] values are written to the lower 32 bits of the result.
/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
/// written to the lower 32 bits of the result.
/// \param __m2
/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
/// 32-bit signed integer and is converted to a 16-bit signed integer with
/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
/// Negative values less than 0x8000 are saturated to 0x8000. The converted
/// [2 x i16] values are written to the upper 32 bits of the result.
/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
/// written to the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [4 x i16] containing the converted
/// values.
static __inline__ __m64 __DEFAULT_FN_ATTRS
@ -165,28 +155,23 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
}
/// Converts 16-bit signed integers from both 64-bit integer vector
/// parameters of [4 x i16] into 8-bit unsigned integer values, and
/// constructs a 64-bit integer vector of [8 x i8] as the result. Values
/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
/// to 0.
/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
/// vector parameters of [4 x i16] into 8-bit unsigned integer values, and
/// constructs a 64-bit integer vector of [8 x i8] as the result.
///
/// Values greater than 0xFF are saturated to 0xFF. Values less than 0 are
/// saturated to 0.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
///
/// \param __m1
/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
/// than 0 are saturated to 0. The converted [4 x i8] values are written to
/// the lower 32 bits of the result.
/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
/// written to the lower 32 bits of the result.
/// \param __m2
/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
/// than 0 are saturated to 0. The converted [4 x i8] values are written to
/// the upper 32 bits of the result.
/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
/// written to the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the converted
/// values.
static __inline__ __m64 __DEFAULT_FN_ATTRS
@ -400,11 +385,13 @@ _mm_add_pi32(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
}
/// Adds each 8-bit signed integer element of the first 64-bit integer
/// vector of [8 x i8] to the corresponding 8-bit signed integer element of
/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than
/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].
/// Adds, with saturation, each 8-bit signed integer element of the first
/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit signed
/// integer element of the second 64-bit integer vector of [8 x i8].
///
/// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
/// less than 0x80 are saturated to 0x80. The results are packed into a
/// 64-bit integer vector of [8 x i8].
///
/// \headerfile <x86intrin.h>
///
@ -422,12 +409,13 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
}
/// Adds each 16-bit signed integer element of the first 64-bit integer
/// vector of [4 x i16] to the corresponding 16-bit signed integer element of
/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than
/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
/// saturated to 0x8000. The results are packed into a 64-bit integer vector
/// of [4 x i16].
/// Adds, with saturation, each 16-bit signed integer element of the first
/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit signed
/// integer element of the second 64-bit integer vector of [4 x i16].
///
/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
/// less than 0x8000 are saturated to 0x8000. The results are packed into a
/// 64-bit integer vector of [4 x i16].
///
/// \headerfile <x86intrin.h>
///
@ -445,11 +433,12 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
}
/// Adds each 8-bit unsigned integer element of the first 64-bit integer
/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
/// saturated to 0xFF. The results are packed into a 64-bit integer vector of
/// [8 x i8].
/// Adds, with saturation, each 8-bit unsigned integer element of the first
/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned
/// integer element of the second 64-bit integer vector of [8 x i8].
///
/// Sums greater than 0xFF are saturated to 0xFF. The results are packed
/// into a 64-bit integer vector of [8 x i8].
///
/// \headerfile <x86intrin.h>
///
@ -467,11 +456,12 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
}
/// Adds each 16-bit unsigned integer element of the first 64-bit integer
/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element
/// of the second 64-bit integer vector of [4 x i16]. Sums greater than
/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
/// integer vector of [4 x i16].
/// Adds, with saturation, each 16-bit unsigned integer element of the first
/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned
/// integer element of the second 64-bit integer vector of [4 x i16].
///
/// Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed
/// into a 64-bit integer vector of [4 x i16].
///
/// \headerfile <x86intrin.h>
///
@ -552,12 +542,13 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
}
/// Subtracts each 8-bit signed integer element of the second 64-bit
/// integer vector of [8 x i8] from the corresponding 8-bit signed integer
/// element of the first 64-bit integer vector of [8 x i8]. Positive results
/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
/// are saturated to 0x80. The results are packed into a 64-bit integer
/// vector of [8 x i8].
/// Subtracts, with saturation, each 8-bit signed integer element of the second
/// 64-bit integer vector of [8 x i8] from the corresponding 8-bit signed
/// integer element of the first 64-bit integer vector of [8 x i8].
///
/// Positive results greater than 0x7F are saturated to 0x7F. Negative
/// results less than 0x80 are saturated to 0x80. The results are packed
/// into a 64-bit integer vector of [8 x i8].
///
/// \headerfile <x86intrin.h>
///
@ -575,12 +566,13 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
}
/// Subtracts each 16-bit signed integer element of the second 64-bit
/// integer vector of [4 x i16] from the corresponding 16-bit signed integer
/// element of the first 64-bit integer vector of [4 x i16]. Positive results
/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit
/// integer vector of [4 x i16].
/// Subtracts, with saturation, each 16-bit signed integer element of the
/// second 64-bit integer vector of [4 x i16] from the corresponding 16-bit
/// signed integer element of the first 64-bit integer vector of [4 x i16].
///
/// Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative
/// results less than 0x8000 are saturated to 0x8000. The results are packed
/// into a 64-bit integer vector of [4 x i16].
///
/// \headerfile <x86intrin.h>
///
@ -1149,7 +1141,7 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
/// [8 x i8] to determine if the element of the first vector is equal to the
/// corresponding element of the second vector.
///
/// The comparison yields 0 for false, 0xFF for true.
/// Each comparison returns 0 for false, 0xFF for true.
///
/// \headerfile <x86intrin.h>
///
@ -1171,7 +1163,7 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
/// [4 x i16] to determine if the element of the first vector is equal to the
/// corresponding element of the second vector.
///
/// The comparison yields 0 for false, 0xFFFF for true.
/// Each comparison returns 0 for false, 0xFFFF for true.
///
/// \headerfile <x86intrin.h>
///
@ -1193,7 +1185,7 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
/// [2 x i32] to determine if the element of the first vector is equal to the
/// corresponding element of the second vector.
///
/// The comparison yields 0 for false, 0xFFFFFFFF for true.
/// Each comparison returns 0 for false, 0xFFFFFFFF for true.
///
/// \headerfile <x86intrin.h>
///
@ -1215,7 +1207,7 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
/// [8 x i8] to determine if the element of the first vector is greater than
/// the corresponding element of the second vector.
///
/// The comparison yields 0 for false, 0xFF for true.
/// Each comparison returns 0 for false, 0xFF for true.
///
/// \headerfile <x86intrin.h>
///
@ -1237,7 +1229,7 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
/// [4 x i16] to determine if the element of the first vector is greater than
/// the corresponding element of the second vector.
///
/// The comparison yields 0 for false, 0xFFFF for true.
/// Each comparison returns 0 for false, 0xFFFF for true.
///
/// \headerfile <x86intrin.h>
///
@ -1259,7 +1251,7 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
/// [2 x i32] to determine if the element of the first vector is greater than
/// the corresponding element of the second vector.
///
/// The comparison yields 0 for false, 0xFFFFFFFF for true.
/// Each comparison returns 0 for false, 0xFFFFFFFF for true.
///
/// \headerfile <x86intrin.h>
///

View File

@ -44,7 +44,6 @@ module _Builtin_intrinsics [system] [extern_c] {
textual header "avxintrin.h"
textual header "avx2intrin.h"
textual header "avx512fintrin.h"
textual header "avx512erintrin.h"
textual header "fmaintrin.h"
header "x86intrin.h"
@ -203,6 +202,11 @@ module _Builtin_stdarg [system] {
export *
}
explicit module header_macro {
header "__stdarg_header_macro.h"
export *
}
explicit module va_arg {
header "__stdarg_va_arg.h"
export *
@ -232,6 +236,10 @@ module _Builtin_stdbool [system] {
module _Builtin_stddef [system] {
textual header "stddef.h"
explicit module header_macro {
header "__stddef_header_macro.h"
export *
}
// __stddef_max_align_t.h is always in this module, even if
// -fbuiltin-headers-in-system-modules is passed.
explicit module max_align_t {
@ -315,3 +323,8 @@ module opencl_c {
header "opencl-c.h"
header "opencl-c-base.h"
}
module ptrauth {
header "ptrauth.h"
export *
}

View File

@ -46,6 +46,10 @@
#define __opencl_c_ext_fp32_global_atomic_min_max 1
#define __opencl_c_ext_fp32_local_atomic_min_max 1
#define __opencl_c_ext_image_raw10_raw12 1
#define cl_khr_kernel_clock 1
#define __opencl_c_kernel_clock_scope_device 1
#define __opencl_c_kernel_clock_scope_work_group 1
#define __opencl_c_kernel_clock_scope_sub_group 1
#endif // defined(__SPIR__) || defined(__SPIRV__)
#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)

View File

@ -17314,6 +17314,21 @@ half __ovld __conv sub_group_clustered_rotate(half, int, uint);
#endif // cl_khr_fp16
#endif // cl_khr_subgroup_rotate
#if defined(cl_khr_kernel_clock)
#if defined(__opencl_c_kernel_clock_scope_device)
ulong __ovld clock_read_device();
uint2 __ovld clock_read_hilo_device();
#endif // __opencl_c_kernel_clock_scope_device
#if defined(__opencl_c_kernel_clock_scope_work_group)
ulong __ovld clock_read_work_group();
uint2 __ovld clock_read_hilo_work_group();
#endif // __opencl_c_kernel_clock_scope_work_group
#if defined(__opencl_c_kernel_clock_scope_sub_group)
ulong __ovld clock_read_sub_group();
uint2 __ovld clock_read_hilo_sub_group();
#endif // __opencl_c_kernel_clock_scope_sub_group
#endif // cl_khr_kernel_clock
#if defined(cl_intel_subgroups)
// Intel-Specific Sub Group Functions
float __ovld __conv intel_sub_group_shuffle( float , uint );

View File

@ -8,16 +8,17 @@
*/
#if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED)
#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef __PRFCHWINTRIN_H
#define __PRFCHWINTRIN_H
/// Loads a memory sequence containing the specified memory address into
/// all data cache levels. The cache-coherency state is set to exclusive.
/// Data can be read from and written to the cache line without additional
/// delay.
/// all data cache levels.
///
/// The cache-coherency state is set to exclusive. Data can be read from
/// and written to the cache line without additional delay.
///
/// \headerfile <x86intrin.h>
///
@ -32,10 +33,11 @@ _m_prefetch(void *__P)
}
/// Loads a memory sequence containing the specified memory address into
/// the L1 data cache and sets the cache-coherency to modified. This
/// provides a hint to the processor that the cache line will be modified.
/// It is intended for use when the cache line will be written to shortly
/// after the prefetch is performed.
/// the L1 data cache and sets the cache-coherency state to modified.
///
/// This provides a hint to the processor that the cache line will be
/// modified. It is intended for use when the cache line will be written to
/// shortly after the prefetch is performed.
///
/// Note that the effect of this intrinsic is dependent on the processor
/// implementation.

330
lib/include/ptrauth.h vendored Normal file
View File

@ -0,0 +1,330 @@
/*===---- ptrauth.h - Pointer authentication -------------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __PTRAUTH_H
#define __PTRAUTH_H
typedef enum {
ptrauth_key_asia = 0,
ptrauth_key_asib = 1,
ptrauth_key_asda = 2,
ptrauth_key_asdb = 3,
/* A process-independent key which can be used to sign code pointers. */
ptrauth_key_process_independent_code = ptrauth_key_asia,
/* A process-specific key which can be used to sign code pointers. */
ptrauth_key_process_dependent_code = ptrauth_key_asib,
/* A process-independent key which can be used to sign data pointers. */
ptrauth_key_process_independent_data = ptrauth_key_asda,
/* A process-specific key which can be used to sign data pointers. */
ptrauth_key_process_dependent_data = ptrauth_key_asdb,
/* The key used to sign return addresses on the stack.
The extra data is based on the storage address of the return address.
On AArch64, that is always the storage address of the return address + 8
(or, in other words, the value of the stack pointer on function entry) */
ptrauth_key_return_address = ptrauth_key_process_dependent_code,
/* The key used to sign C function pointers.
The extra data is always 0. */
ptrauth_key_function_pointer = ptrauth_key_process_independent_code,
/* The key used to sign C++ v-table pointers.
The extra data is always 0. */
ptrauth_key_cxx_vtable_pointer = ptrauth_key_process_independent_data,
/* Other pointers signed under the ABI use private ABI rules. */
} ptrauth_key;
/* An integer type of the appropriate size for a discriminator argument. */
typedef __UINTPTR_TYPE__ ptrauth_extra_data_t;
/* An integer type of the appropriate size for a generic signature. */
typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
/* A signed pointer value embeds the original pointer together with
a signature that attests to the validity of that pointer. Because
this signature must use only "spare" bits of the pointer, a
signature's validity is probabilistic in practice: it is unlikely
but still plausible that an invalidly-derived signature will
somehow equal the correct signature and therefore successfully
authenticate. Nonetheless, this scheme provides a strong degree
of protection against certain kinds of attacks. */
/* Authenticating a pointer that was not signed with the given key
and extra-data value will (likely) fail by trapping. */
/* The null function pointer is always the all-zero bit pattern.
Signing an all-zero bit pattern will embed a (likely) non-zero
signature in the result, and so the result will not seem to be
a null function pointer. Authenticating this value will yield
a null function pointer back. However, authenticating an
all-zero bit pattern will probably fail, because the
authentication will expect a (likely) non-zero signature to
embedded in the value.
Because of this, if a pointer may validly be null, you should
check for null before attempting to authenticate it with one
of these intrinsics. This is not necessary when using the
__ptrauth qualifier; the compiler will perform this check
automatically. */
#if __has_feature(ptrauth_intrinsics)
/* Strip the signature from a value without authenticating it.
If the value is a function pointer, the result will not be a
legal function pointer because of the missing signature, and
attempting to call it will result in an authentication failure.
The value must be an expression of pointer type.
The key must be a constant expression of type ptrauth_key.
The result will have the same type as the original value. */
#define ptrauth_strip(__value, __key) __builtin_ptrauth_strip(__value, __key)
/* Blend a constant discriminator into the given pointer-like value
to form a new discriminator. Not all bits of the inputs are
guaranteed to contribute to the result.
On arm64e, the integer must fall within the range of a uint16_t;
other bits may be ignored.
For the purposes of ptrauth_sign_constant, the result of calling
this function is considered a constant expression if the arguments
are constant. Some restrictions may be imposed on the pointer.
The first argument must be an expression of pointer type.
The second argument must be an expression of integer type.
The result will have type uintptr_t. */
#define ptrauth_blend_discriminator(__pointer, __integer) \
__builtin_ptrauth_blend_discriminator(__pointer, __integer)
/* Return a signed pointer for a constant address in a manner which guarantees
a non-attackable sequence.
The value must be a constant expression of pointer type which evaluates to
a non-null pointer.
The key must be a constant expression of type ptrauth_key.
The extra data must be a constant expression of pointer or integer type;
if an integer, it will be coerced to ptrauth_extra_data_t.
The result will have the same type as the original value.
This can be used in constant expressions. */
#define ptrauth_sign_constant(__value, __key, __data) \
__builtin_ptrauth_sign_constant(__value, __key, __data)
/* Add a signature to the given pointer value using a specific key,
using the given extra data as a salt to the signing process.
This operation does not authenticate the original value and is
therefore potentially insecure if an attacker could possibly
control that value.
The value must be an expression of pointer type.
The key must be a constant expression of type ptrauth_key.
The extra data must be an expression of pointer or integer type;
if an integer, it will be coerced to ptrauth_extra_data_t.
The result will have the same type as the original value. */
#define ptrauth_sign_unauthenticated(__value, __key, __data) \
__builtin_ptrauth_sign_unauthenticated(__value, __key, __data)
/* Authenticate a pointer using one scheme and resign it using another.
If the result is subsequently authenticated using the new scheme, that
authentication is guaranteed to fail if and only if the initial
authentication failed.
The value must be an expression of pointer type.
The key must be a constant expression of type ptrauth_key.
The extra data must be an expression of pointer or integer type;
if an integer, it will be coerced to ptrauth_extra_data_t.
The result will have the same type as the original value.
This operation is guaranteed to not leave the intermediate value
available for attack before it is re-signed.
Do not pass a null pointer to this function. A null pointer
will not successfully authenticate.
This operation traps if the authentication fails. */
#define ptrauth_auth_and_resign(__value, __old_key, __old_data, __new_key, \
__new_data) \
__builtin_ptrauth_auth_and_resign(__value, __old_key, __old_data, __new_key, \
__new_data)
/* Authenticate a pointer using one scheme and resign it as a C
function pointer.
If the result is subsequently authenticated using the new scheme, that
authentication is guaranteed to fail if and only if the initial
authentication failed.
The value must be an expression of function pointer type.
The key must be a constant expression of type ptrauth_key.
The extra data must be an expression of pointer or integer type;
if an integer, it will be coerced to ptrauth_extra_data_t.
The result will have the same type as the original value.
This operation is guaranteed to not leave the intermediate value
available for attack before it is re-signed. Additionally, if this
expression is used syntactically as the function expression in a
call, only a single authentication will be performed. */
#define ptrauth_auth_function(__value, __old_key, __old_data) \
ptrauth_auth_and_resign(__value, __old_key, __old_data, \
ptrauth_key_function_pointer, 0)
/* Authenticate a data pointer.
The value must be an expression of non-function pointer type.
The key must be a constant expression of type ptrauth_key.
The extra data must be an expression of pointer or integer type;
if an integer, it will be coerced to ptrauth_extra_data_t.
The result will have the same type as the original value.
This operation traps if the authentication fails. */
#define ptrauth_auth_data(__value, __old_key, __old_data) \
__builtin_ptrauth_auth(__value, __old_key, __old_data)
/* Compute a constant discriminator from the given string.
The argument must be a string literal of char character type. The result
has type ptrauth_extra_data_t.
The result value is never zero and always within range for both the
__ptrauth qualifier and ptrauth_blend_discriminator.
This can be used in constant expressions.
*/
#define ptrauth_string_discriminator(__string) \
__builtin_ptrauth_string_discriminator(__string)
/* Compute a constant discriminator from the given type.
The result can be used as the second argument to
ptrauth_blend_discriminator or the third argument to the
__ptrauth qualifier. It has type size_t.
If the type is a C++ member function pointer type, the result is
the discriminator used to signed member function pointers of that
type. If the type is a function, function pointer, or function
reference type, the result is the discriminator used to sign
functions of that type. It is ill-formed to use this macro with any
other type.
A call to this function is an integer constant expression. */
#define ptrauth_type_discriminator(__type) \
__builtin_ptrauth_type_discriminator(__type)
/* Compute a signature for the given pair of pointer-sized values.
The order of the arguments is significant.
Like a pointer signature, the resulting signature depends on
private key data and therefore should not be reliably reproducible
by attackers. That means that this can be used to validate the
integrity of arbitrary data by storing a signature for that data
alongside it, then checking that the signature is still valid later.
Data which exceeds two pointers in size can be signed by either
computing a tree of generic signatures or just signing an ordinary
cryptographic hash of the data.
The result has type ptrauth_generic_signature_t. However, it may
not have as many bits of entropy as that type's width would suggest;
some implementations are known to compute a compressed signature as
if the arguments were a pointer and a discriminator.
The arguments must be either pointers or integers; if integers, they
will be coerce to uintptr_t. */
#define ptrauth_sign_generic_data(__value, __data) \
__builtin_ptrauth_sign_generic_data(__value, __data)
/* C++ vtable pointer signing class attribute */
#define ptrauth_cxx_vtable_pointer(key, address_discrimination, \
extra_discrimination...) \
[[clang::ptrauth_vtable_pointer(key, address_discrimination, \
extra_discrimination)]]
#else
#define ptrauth_strip(__value, __key) \
({ \
(void)__key; \
__value; \
})
#define ptrauth_blend_discriminator(__pointer, __integer) \
({ \
(void)__pointer; \
(void)__integer; \
((ptrauth_extra_data_t)0); \
})
#define ptrauth_sign_constant(__value, __key, __data) \
({ \
(void)__key; \
(void)__data; \
__value; \
})
#define ptrauth_sign_unauthenticated(__value, __key, __data) \
({ \
(void)__key; \
(void)__data; \
__value; \
})
#define ptrauth_auth_and_resign(__value, __old_key, __old_data, __new_key, \
__new_data) \
({ \
(void)__old_key; \
(void)__old_data; \
(void)__new_key; \
(void)__new_data; \
__value; \
})
#define ptrauth_auth_function(__value, __old_key, __old_data) \
({ \
(void)__old_key; \
(void)__old_data; \
__value; \
})
#define ptrauth_auth_data(__value, __old_key, __old_data) \
({ \
(void)__old_key; \
(void)__old_data; \
__value; \
})
#define ptrauth_string_discriminator(__string) \
({ \
(void)__string; \
((ptrauth_extra_data_t)0); \
})
#define ptrauth_type_discriminator(__type) ((ptrauth_extra_data_t)0)
#define ptrauth_sign_generic_data(__value, __data) \
({ \
(void)__value; \
(void)__data; \
((ptrauth_generic_signature_t)0); \
})
#define ptrauth_cxx_vtable_pointer(key, address_discrimination, \
extra_discrimination...)
#endif /* __has_feature(ptrauth_intrinsics) */
#endif /* __PTRAUTH_H */

View File

@ -14,10 +14,6 @@
#include <stdint.h>
#include <stddef.h>
#ifndef __riscv_vector
#error "Vector intrinsics require the vector extension."
#endif
#ifdef __cplusplus
extern "C" {
#endif

View File

@ -13,4 +13,106 @@
#pragma clang riscv intrinsic sifive_vector
#define __riscv_sf_vc_x_se_u8mf4(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 6, vl)
#define __riscv_sf_vc_x_se_u8mf2(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 7, vl)
#define __riscv_sf_vc_x_se_u8m1(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 0, vl)
#define __riscv_sf_vc_x_se_u8m2(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 1, vl)
#define __riscv_sf_vc_x_se_u8m4(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 2, vl)
#define __riscv_sf_vc_x_se_u8m8(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 3, vl)
#define __riscv_sf_vc_x_se_u16mf2(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 7, vl)
#define __riscv_sf_vc_x_se_u16m1(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 0, vl)
#define __riscv_sf_vc_x_se_u16m2(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 1, vl)
#define __riscv_sf_vc_x_se_u16m4(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 2, vl)
#define __riscv_sf_vc_x_se_u16m8(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 3, vl)
#define __riscv_sf_vc_x_se_u32m1(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 0, vl)
#define __riscv_sf_vc_x_se_u32m2(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 1, vl)
#define __riscv_sf_vc_x_se_u32m4(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 2, vl)
#define __riscv_sf_vc_x_se_u32m8(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 3, vl)
#define __riscv_sf_vc_i_se_u8mf4(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 7, vl)
#define __riscv_sf_vc_i_se_u8mf2(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 6, vl)
#define __riscv_sf_vc_i_se_u8m1(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 0, vl)
#define __riscv_sf_vc_i_se_u8m2(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 1, vl)
#define __riscv_sf_vc_i_se_u8m4(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 2, vl)
#define __riscv_sf_vc_i_se_u8m8(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 3, vl)
#define __riscv_sf_vc_i_se_u16mf2(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 7, vl)
#define __riscv_sf_vc_i_se_u16m1(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 0, vl)
#define __riscv_sf_vc_i_se_u16m2(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 1, vl)
#define __riscv_sf_vc_i_se_u16m4(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 2, vl)
#define __riscv_sf_vc_i_se_u16m8(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 3, vl)
#define __riscv_sf_vc_i_se_u32m1(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 0, vl)
#define __riscv_sf_vc_i_se_u32m2(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 1, vl)
#define __riscv_sf_vc_i_se_u32m4(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 2, vl)
#define __riscv_sf_vc_i_se_u32m8(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 3, vl)
#if __riscv_v_elen >= 64
#define __riscv_sf_vc_x_se_u8mf8(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 5, vl)
#define __riscv_sf_vc_x_se_u16mf4(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 6, vl)
#define __riscv_sf_vc_x_se_u32mf2(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 7, vl)
#define __riscv_sf_vc_i_se_u8mf8(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 5, vl)
#define __riscv_sf_vc_i_se_u16mf4(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 6, vl)
#define __riscv_sf_vc_i_se_u32mf2(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 7, vl)
#define __riscv_sf_vc_i_se_u64m1(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 64, 0, vl)
#define __riscv_sf_vc_i_se_u64m2(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 64, 1, vl)
#define __riscv_sf_vc_i_se_u64m4(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 64, 2, vl)
#define __riscv_sf_vc_i_se_u64m8(p27_26, p24_20, p11_7, simm5, vl) \
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 64, 3, vl)
#if __riscv_xlen >= 64
#define __riscv_sf_vc_x_se_u64m1(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint64_t)rs1, 64, 0, vl)
#define __riscv_sf_vc_x_se_u64m2(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint64_t)rs1, 64, 1, vl)
#define __riscv_sf_vc_x_se_u64m4(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint64_t)rs1, 64, 2, vl)
#define __riscv_sf_vc_x_se_u64m8(p27_26, p24_20, p11_7, rs1, vl) \
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint64_t)rs1, 64, 3, vl)
#endif
#endif
#endif //_SIFIVE_VECTOR_H_

View File

@ -1188,6 +1188,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M,
/// Compares each of the corresponding 64-bit values of the 128-bit
/// integer vectors for equality.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VPCMPEQQ / PCMPEQQ </c> instruction.
@ -1431,8 +1433,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) {
}
/* SSE4 Pack with Unsigned Saturation. */
/// Converts 32-bit signed integers from both 128-bit integer vector
/// operands into 16-bit unsigned integers, and returns the packed result.
/// Converts, with saturation, 32-bit signed integers from both 128-bit integer
/// vector operands into 16-bit unsigned integers, and returns the packed
/// result.
///
/// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than
/// 0x0000 are saturated to 0x0000.
///
@ -1441,17 +1445,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) {
/// This intrinsic corresponds to the <c> VPACKUSDW / PACKUSDW </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a
/// signed integer and is converted to a 16-bit unsigned integer with
/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values
/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values
/// are written to the lower 64 bits of the result.
/// A 128-bit vector of [4 x i32]. The converted [4 x i16] values are
/// written to the lower 64 bits of the result.
/// \param __V2
/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a
/// signed integer and is converted to a 16-bit unsigned integer with
/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values
/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values
/// are written to the higher 64 bits of the result.
/// A 128-bit vector of [4 x i32]. The converted [4 x i16] values are
/// written to the higher 64 bits of the result.
/// \returns A 128-bit vector of [8 x i16] containing the converted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1,
__m128i __V2) {
@ -2305,6 +2303,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) {
/// integer vectors to determine if the values in the first operand are
/// greater than those in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VPCMPGTQ / PCMPGTQ </c> instruction.

View File

@ -10,6 +10,10 @@
#ifndef __STDALIGN_H
#define __STDALIGN_H
#if defined(__MVS__) && __has_include_next(<stdalign.h>)
#include_next <stdalign.h>
#else
#if defined(__cplusplus) || \
(defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L)
#ifndef __cplusplus
@ -21,4 +25,5 @@
#define __alignof_is_defined 1
#endif /* __STDC_VERSION__ */
#endif /* __MVS__ */
#endif /* __STDALIGN_H */

34
lib/include/stdarg.h vendored
View File

@ -14,29 +14,24 @@
* need to use some of its interfaces. Otherwise this header provides all of
* the expected interfaces.
*
* When clang modules are enabled, this header is a textual header. It ignores
* its header guard so that multiple submodules can export its interfaces.
* Take module SM with submodules A and B, whose headers both include stdarg.h
* When SM.A builds, __STDARG_H will be defined. When SM.B builds, the
* definition from SM.A will leak when building without local submodule
* visibility. stdarg.h wouldn't include any of its implementation headers, and
* SM.B wouldn't import any of the stdarg modules, and SM.B's `export *`
* wouldn't export any stdarg interfaces as expected. However, since stdarg.h
* ignores its header guard when building with modules, it all works as
* expected.
*
* When clang modules are not enabled, the header guards can function in the
* normal simple fashion.
* When clang modules are enabled, this header is a textual header to support
* the multiple include behavior. As such, it doesn't directly declare anything
* so that it doesn't add duplicate declarations to all of its includers'
* modules.
*/
#if !defined(__STDARG_H) || __has_feature(modules) || \
defined(__need___va_list) || defined(__need_va_list) || \
defined(__need_va_arg) || defined(__need___va_copy) || \
defined(__need_va_copy)
#if defined(__MVS__) && __has_include_next(<stdarg.h>)
#undef __need___va_list
#undef __need_va_list
#undef __need_va_arg
#undef __need___va_copy
#undef __need_va_copy
#include <__stdarg_header_macro.h>
#include_next <stdarg.h>
#else
#if !defined(__need___va_list) && !defined(__need_va_list) && \
!defined(__need_va_arg) && !defined(__need___va_copy) && \
!defined(__need_va_copy)
#define __STDARG_H
#define __need___va_list
#define __need_va_list
#define __need_va_arg
@ -49,6 +44,7 @@
!defined(__STRICT_ANSI__)
#define __need_va_copy
#endif
#include <__stdarg_header_macro.h>
#endif
#ifdef __need___va_list
@ -76,4 +72,4 @@
#undef __need_va_copy
#endif /* defined(__need_va_copy) */
#endif
#endif /* __MVS__ */

View File

@ -16,7 +16,7 @@
* Exclude the MSVC path as well as the MSVC header as of the 14.31.30818
* explicitly disallows `stdatomic.h` in the C mode via an `#error`. Fallback
* to the clang resource header until that is fully supported. The
* `stdatomic.h` header requires C++ 23 or newer.
* `stdatomic.h` header requires C++23 or newer.
*/
#if __STDC_HOSTED__ && \
__has_include_next(<stdatomic.h>) && \
@ -35,6 +35,9 @@ extern "C" {
#define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE
#define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
#define ATOMIC_CHAR8_T_LOCK_FREE __CLANG_ATOMIC_CHAR8_T_LOCK_FREE
#endif
#define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE
#define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE
#define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE
@ -104,6 +107,9 @@ typedef _Atomic(long) atomic_long;
typedef _Atomic(unsigned long) atomic_ulong;
typedef _Atomic(long long) atomic_llong;
typedef _Atomic(unsigned long long) atomic_ullong;
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
typedef _Atomic(unsigned char) atomic_char8_t;
#endif
typedef _Atomic(uint_least16_t) atomic_char16_t;
typedef _Atomic(uint_least32_t) atomic_char32_t;
typedef _Atomic(wchar_t) atomic_wchar_t;
@ -166,7 +172,11 @@ typedef _Atomic(uintmax_t) atomic_uintmax_t;
typedef struct atomic_flag { atomic_bool _Value; } atomic_flag;
#ifdef __cplusplus
#define ATOMIC_FLAG_INIT {false}
#else
#define ATOMIC_FLAG_INIT { 0 }
#endif
/* These should be provided by the libc implementation. */
#ifdef __cplusplus

View File

@ -12,6 +12,10 @@
#define __bool_true_false_are_defined 1
#if defined(__MVS__) && __has_include_next(<stdbool.h>)
#include_next <stdbool.h>
#else
#if defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L
/* FIXME: We should be issuing a deprecation warning here, but cannot yet due
* to system headers which include this header file unconditionally.
@ -31,4 +35,5 @@
#endif
#endif
#endif /* __MVS__ */
#endif /* __STDBOOL_H */

60
lib/include/stddef.h vendored
View File

@ -14,34 +14,32 @@
* need to use some of its interfaces. Otherwise this header provides all of
* the expected interfaces.
*
* When clang modules are enabled, this header is a textual header. It ignores
* its header guard so that multiple submodules can export its interfaces.
* Take module SM with submodules A and B, whose headers both include stddef.h
* When SM.A builds, __STDDEF_H will be defined. When SM.B builds, the
* definition from SM.A will leak when building without local submodule
* visibility. stddef.h wouldn't include any of its implementation headers, and
* SM.B wouldn't import any of the stddef modules, and SM.B's `export *`
* wouldn't export any stddef interfaces as expected. However, since stddef.h
* ignores its header guard when building with modules, it all works as
* expected.
*
* When clang modules are not enabled, the header guards can function in the
* normal simple fashion.
* When clang modules are enabled, this header is a textual header to support
* the multiple include behavior. As such, it doesn't directly declare anything
* so that it doesn't add duplicate declarations to all of its includers'
* modules.
*/
#if !defined(__STDDEF_H) || __has_feature(modules) || \
(defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1) || \
defined(__need_ptrdiff_t) || defined(__need_size_t) || \
defined(__need_rsize_t) || defined(__need_wchar_t) || \
defined(__need_NULL) || defined(__need_nullptr_t) || \
defined(__need_unreachable) || defined(__need_max_align_t) || \
defined(__need_offsetof) || defined(__need_wint_t)
#if defined(__MVS__) && __has_include_next(<stddef.h>)
#undef __need_ptrdiff_t
#undef __need_size_t
#undef __need_rsize_t
#undef __need_wchar_t
#undef __need_NULL
#undef __need_nullptr_t
#undef __need_unreachable
#undef __need_max_align_t
#undef __need_offsetof
#undef __need_wint_t
#include <__stddef_header_macro.h>
#include_next <stddef.h>
#else
#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \
!defined(__need_rsize_t) && !defined(__need_wchar_t) && \
!defined(__need_NULL) && !defined(__need_nullptr_t) && \
!defined(__need_unreachable) && !defined(__need_max_align_t) && \
!defined(__need_offsetof) && !defined(__need_wint_t)
#define __STDDEF_H
#define __need_ptrdiff_t
#define __need_size_t
/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is
@ -50,7 +48,24 @@
#define __need_rsize_t
#endif
#define __need_wchar_t
#if !defined(__STDDEF_H) || __has_feature(modules)
/*
* __stddef_null.h is special when building without modules: if __need_NULL is
* set, then it will unconditionally redefine NULL. To avoid stepping on client
* definitions of NULL, __need_NULL should only be set the first time this
* header is included, that is when __STDDEF_H is not defined. However, when
* building with modules, this header is a textual header and needs to
* unconditionally include __stdef_null.h to support multiple submodules
* exporting _Builtin_stddef.null. Take module SM with submodules A and B, whose
* headers both include stddef.h When SM.A builds, __STDDEF_H will be defined.
* When SM.B builds, the definition from SM.A will leak when building without
* local submodule visibility. stddef.h wouldn't include __stddef_null.h, and
* SM.B wouldn't import _Builtin_stddef.null, and SM.B's `export *` wouldn't
* export NULL as expected. When building with modules, always include
* __stddef_null.h so that everything works as expected.
*/
#define __need_NULL
#endif
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \
defined(__cplusplus)
#define __need_nullptr_t
@ -66,6 +81,7 @@
/* wint_t is provided by <wchar.h> and not <stddef.h>. It's here
* for compatibility, but must be explicitly requested. Therefore
* __need_wint_t is intentionally not defined here. */
#include <__stddef_header_macro.h>
#endif
#if defined(__need_ptrdiff_t)
@ -120,4 +136,4 @@ __WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */
#undef __need_wint_t
#endif /* __need_wint_t */
#endif
#endif /* __MVS__ */

View File

@ -14,6 +14,10 @@
#define __CLANG_STDINT_H
#endif
#if defined(__MVS__) && __has_include_next(<stdint.h>)
#include_next <stdint.h>
#else
/* If we're hosted, fall back to the system's stdint.h, which might have
* additional definitions.
*/
@ -947,4 +951,5 @@ typedef __UINTMAX_TYPE__ uintmax_t;
#endif
#endif /* __STDC_HOSTED__ */
#endif /* __MVS__ */
#endif /* __CLANG_STDINT_H */

View File

@ -10,9 +10,15 @@
#ifndef __STDNORETURN_H
#define __STDNORETURN_H
#if defined(__MVS__) && __has_include_next(<stdnoreturn.h>)
#include_next <stdnoreturn.h>
#else
#define noreturn _Noreturn
#define __noreturn_is_defined 1
#endif /* __MVS__ */
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) && \
!defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS)
/* The noreturn macro is deprecated in C23. We do not mark it as such because

View File

@ -271,10 +271,11 @@ _mm_hadd_pi32(__m64 __a, __m64 __b)
return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
}
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
/// 0x8000.
/// Horizontally adds, with saturation, the adjacent pairs of values contained
/// in two packed 128-bit vectors of [8 x i16].
///
/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
/// less than 0x8000 are saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///
@ -296,10 +297,11 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
}
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
/// 0x8000.
/// Horizontally adds, with saturation, the adjacent pairs of values contained
/// in two packed 64-bit vectors of [4 x i16].
///
/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
/// less than 0x8000 are saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///
@ -413,10 +415,11 @@ _mm_hsub_pi32(__m64 __a, __m64 __b)
return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
/// saturated to 0x8000.
/// Horizontally subtracts, with saturation, the adjacent pairs of values
/// contained in two packed 128-bit vectors of [8 x i16].
///
/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
/// Negative differences less than 0x8000 are saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///
@ -438,10 +441,11 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
/// saturated to 0x8000.
/// Horizontally subtracts, with saturation, the adjacent pairs of values
/// contained in two packed 64-bit vectors of [4 x i16].
///
/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
/// Negative differences less than 0x8000 are saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///

View File

@ -8,5 +8,9 @@
*/
#ifndef __VARARGS_H
#define __VARARGS_H
#error "Please use <stdarg.h> instead of <varargs.h>"
#if defined(__MVS__) && __has_include_next(<varargs.h>)
#include_next <varargs.h>
#else
#error "Please use <stdarg.h> instead of <varargs.h>"
#endif /* __MVS__ */
#endif

View File

@ -10,38 +10,31 @@
#ifndef __X86GPRINTRIN_H
#define __X86GPRINTRIN_H
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__HRESET__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__HRESET__)
#include <hresetintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__UINTR__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__UINTR__)
#include <uintrintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__USERMSR__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__USERMSR__)
#include <usermsrintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__CRC32__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__CRC32__)
#include <crc32intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__PRFCHI__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__PRFCHI__)
#include <prfchiintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RAOINT__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__RAOINT__)
#include <raointintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__CMPCCXADD__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__CMPCCXADD__)
#include <cmpccxaddintrin.h>
#endif

View File

@ -14,53 +14,39 @@
#include <immintrin.h>
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__3dNOW__)
#include <mm3dnow.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__PRFCHW__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__PRFCHW__)
#include <prfchwintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__SSE4A__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE4A__)
#include <ammintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__FMA4__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA4__)
#include <fma4intrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__XOP__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__XOP__)
#include <xopintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__TBM__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__TBM__)
#include <tbmintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__LWP__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__LWP__)
#include <lwpintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__MWAITX__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__MWAITX__)
#include <mwaitxintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__CLZERO__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__CLZERO__)
#include <clzerointrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RDPRU__)
#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPRU__)
#include <rdpruintrin.h>
#endif

View File

@ -316,6 +316,8 @@ _mm_rsqrt_ps(__m128 __a)
/// operands and returns the lesser value in the low-order bits of the
/// vector of [4 x float].
///
/// If either value in a comparison is NaN, returns the value from \a __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMINSS / MINSS </c> instructions.
@ -338,6 +340,8 @@ _mm_min_ss(__m128 __a, __m128 __b)
/// Compares two 128-bit vectors of [4 x float] and returns the lesser
/// of each pair of values.
///
/// If either value in a comparison is NaN, returns the value from \a __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMINPS / MINPS </c> instructions.
@ -358,6 +362,8 @@ _mm_min_ps(__m128 __a, __m128 __b)
/// operands and returns the greater value in the low-order bits of a 128-bit
/// vector of [4 x float].
///
/// If either value in a comparison is NaN, returns the value from \a __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMAXSS / MAXSS </c> instructions.
@ -380,6 +386,8 @@ _mm_max_ss(__m128 __a, __m128 __b)
/// Compares two 128-bit vectors of [4 x float] and returns the greater
/// of each pair of values.
///
/// If either value in a comparison is NaN, returns the value from \a __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMAXPS / MAXPS </c> instructions.
@ -474,8 +482,11 @@ _mm_xor_ps(__m128 __a, __m128 __b)
}
/// Compares two 32-bit float values in the low-order bits of both
/// operands for equality and returns the result of the comparison in the
/// operands for equality.
///
/// The comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
/// low-order bits of a vector [4 x float].
/// If either value in a comparison is NaN, returns false.
///
/// \headerfile <x86intrin.h>
///
@ -498,6 +509,9 @@ _mm_cmpeq_ss(__m128 __a, __m128 __b)
/// Compares each of the corresponding 32-bit float values of the
/// 128-bit vectors of [4 x float] for equality.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
/// If either value in a comparison is NaN, returns false.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCMPEQPS / CMPEQPS </c> instructions.
@ -515,8 +529,11 @@ _mm_cmpeq_ps(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the value in the first operand is less than the
/// corresponding value in the second operand and returns the result of the
/// comparison in the low-order bits of a vector of [4 x float].
/// corresponding value in the second operand.
///
/// The comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
/// low-order bits of a vector of [4 x float].
/// If either value in a comparison is NaN, returns false.
///
/// \headerfile <x86intrin.h>
///
@ -540,6 +557,9 @@ _mm_cmplt_ss(__m128 __a, __m128 __b)
/// 128-bit vectors of [4 x float] to determine if the values in the first
/// operand are less than those in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
/// If either value in a comparison is NaN, returns false.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.
@ -557,9 +577,11 @@ _mm_cmplt_ps(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the value in the first operand is less than or
/// equal to the corresponding value in the second operand and returns the
/// result of the comparison in the low-order bits of a vector of
/// [4 x float].
/// equal to the corresponding value in the second operand.
///
/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true, in
/// the low-order bits of a vector of [4 x float].
/// If either value in a comparison is NaN, returns false.
///
/// \headerfile <x86intrin.h>
///
@ -583,6 +605,9 @@ _mm_cmple_ss(__m128 __a, __m128 __b)
/// 128-bit vectors of [4 x float] to determine if the values in the first
/// operand are less than or equal to those in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
/// If either value in a comparison is NaN, returns false.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.
@ -600,8 +625,11 @@ _mm_cmple_ps(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the value in the first operand is greater than
/// the corresponding value in the second operand and returns the result of
/// the comparison in the low-order bits of a vector of [4 x float].
/// the corresponding value in the second operand.
///
/// The comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
/// low-order bits of a vector of [4 x float].
/// If either value in a comparison is NaN, returns false.
///
/// \headerfile <x86intrin.h>
///
@ -627,6 +655,9 @@ _mm_cmpgt_ss(__m128 __a, __m128 __b)
/// 128-bit vectors of [4 x float] to determine if the values in the first
/// operand are greater than those in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
/// If either value in a comparison is NaN, returns false.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.
@ -644,9 +675,11 @@ _mm_cmpgt_ps(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the value in the first operand is greater than
/// or equal to the corresponding value in the second operand and returns
/// the result of the comparison in the low-order bits of a vector of
/// [4 x float].
/// or equal to the corresponding value in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
/// low-order bits of a vector of [4 x float].
/// If either value in a comparison is NaN, returns false.
///
/// \headerfile <x86intrin.h>
///
@ -672,6 +705,9 @@ _mm_cmpge_ss(__m128 __a, __m128 __b)
/// 128-bit vectors of [4 x float] to determine if the values in the first
/// operand are greater than or equal to those in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
/// If either value in a comparison is NaN, returns false.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.
@ -687,9 +723,12 @@ _mm_cmpge_ps(__m128 __a, __m128 __b)
return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a);
}
/// Compares two 32-bit float values in the low-order bits of both
/// operands for inequality and returns the result of the comparison in the
/// Compares two 32-bit float values in the low-order bits of both operands
/// for inequality.
///
/// The comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
/// low-order bits of a vector of [4 x float].
/// If either value in a comparison is NaN, returns true.
///
/// \headerfile <x86intrin.h>
///
@ -713,6 +752,9 @@ _mm_cmpneq_ss(__m128 __a, __m128 __b)
/// Compares each of the corresponding 32-bit float values of the
/// 128-bit vectors of [4 x float] for inequality.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
/// If either value in a comparison is NaN, returns true.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCMPNEQPS / CMPNEQPS </c>
@ -731,8 +773,11 @@ _mm_cmpneq_ps(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the value in the first operand is not less than
/// the corresponding value in the second operand and returns the result of
/// the comparison in the low-order bits of a vector of [4 x float].
/// the corresponding value in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
/// low-order bits of a vector of [4 x float].
/// If either value in a comparison is NaN, returns true.
///
/// \headerfile <x86intrin.h>
///
@ -757,6 +802,9 @@ _mm_cmpnlt_ss(__m128 __a, __m128 __b)
/// 128-bit vectors of [4 x float] to determine if the values in the first
/// operand are not less than those in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
/// If either value in a comparison is NaN, returns true.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>
@ -775,9 +823,11 @@ _mm_cmpnlt_ps(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the value in the first operand is not less than
/// or equal to the corresponding value in the second operand and returns
/// the result of the comparison in the low-order bits of a vector of
/// [4 x float].
/// or equal to the corresponding value in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
/// low-order bits of a vector of [4 x float].
/// If either value in a comparison is NaN, returns true.
///
/// \headerfile <x86intrin.h>
///
@ -802,6 +852,9 @@ _mm_cmpnle_ss(__m128 __a, __m128 __b)
/// 128-bit vectors of [4 x float] to determine if the values in the first
/// operand are not less than or equal to those in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
/// If either value in a comparison is NaN, returns true.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>
@ -820,9 +873,11 @@ _mm_cmpnle_ps(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the value in the first operand is not greater
/// than the corresponding value in the second operand and returns the
/// result of the comparison in the low-order bits of a vector of
/// [4 x float].
/// than the corresponding value in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
/// low-order bits of a vector of [4 x float].
/// If either value in a comparison is NaN, returns true.
///
/// \headerfile <x86intrin.h>
///
@ -849,6 +904,9 @@ _mm_cmpngt_ss(__m128 __a, __m128 __b)
/// 128-bit vectors of [4 x float] to determine if the values in the first
/// operand are not greater than those in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
/// If either value in a comparison is NaN, returns true.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>
@ -867,9 +925,11 @@ _mm_cmpngt_ps(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the value in the first operand is not greater
/// than or equal to the corresponding value in the second operand and
/// returns the result of the comparison in the low-order bits of a vector
/// of [4 x float].
/// than or equal to the corresponding value in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
/// low-order bits of a vector of [4 x float].
/// If either value in a comparison is NaN, returns true.
///
/// \headerfile <x86intrin.h>
///
@ -896,6 +956,9 @@ _mm_cmpnge_ss(__m128 __a, __m128 __b)
/// 128-bit vectors of [4 x float] to determine if the values in the first
/// operand are not greater than or equal to those in the second operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
/// If either value in a comparison is NaN, returns true.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>
@ -914,9 +977,11 @@ _mm_cmpnge_ps(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the value in the first operand is ordered with
/// respect to the corresponding value in the second operand and returns the
/// result of the comparison in the low-order bits of a vector of
/// [4 x float].
/// respect to the corresponding value in the second operand.
///
/// A pair of floating-point values are ordered with respect to each
/// other if neither value is a NaN. Each comparison returns 0x0 for false,
/// 0xFFFFFFFF for true.
///
/// \headerfile <x86intrin.h>
///
@ -941,6 +1006,10 @@ _mm_cmpord_ss(__m128 __a, __m128 __b)
/// 128-bit vectors of [4 x float] to determine if the values in the first
/// operand are ordered with respect to those in the second operand.
///
/// A pair of floating-point values are ordered with respect to each
/// other if neither value is a NaN. Each comparison returns 0x0 for false,
/// 0xFFFFFFFF for true.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCMPORDPS / CMPORDPS </c>
@ -959,9 +1028,11 @@ _mm_cmpord_ps(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the value in the first operand is unordered
/// with respect to the corresponding value in the second operand and
/// returns the result of the comparison in the low-order bits of a vector
/// of [4 x float].
/// with respect to the corresponding value in the second operand.
///
/// A pair of double-precision values are unordered with respect to each
/// other if one or both values are NaN. Each comparison returns 0x0 for
/// false, 0xFFFFFFFF for true.
///
/// \headerfile <x86intrin.h>
///
@ -986,6 +1057,10 @@ _mm_cmpunord_ss(__m128 __a, __m128 __b)
/// 128-bit vectors of [4 x float] to determine if the values in the first
/// operand are unordered with respect to those in the second operand.
///
/// A pair of double-precision values are unordered with respect to each
/// other if one or both values are NaN. Each comparison returns 0x0 for
/// false, 0xFFFFFFFFFFFFFFFF for true.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCMPUNORDPS / CMPUNORDPS </c>
@ -1003,9 +1078,10 @@ _mm_cmpunord_ps(__m128 __a, __m128 __b)
}
/// Compares two 32-bit float values in the low-order bits of both
/// operands for equality and returns the result of the comparison.
/// operands for equality.
///
/// If either of the two lower 32-bit values is NaN, 0 is returned.
/// The comparison returns 0 for false, 1 for true. If either value in a
/// comparison is NaN, returns 0.
///
/// \headerfile <x86intrin.h>
///
@ -1018,8 +1094,7 @@ _mm_cmpunord_ps(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results. If either of the
/// two lower 32-bit values is NaN, 0 is returned.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comieq_ss(__m128 __a, __m128 __b)
{
@ -1028,9 +1103,10 @@ _mm_comieq_ss(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the first operand is less than the second
/// operand and returns the result of the comparison.
/// operand.
///
/// If either of the two lower 32-bit values is NaN, 0 is returned.
/// The comparison returns 0 for false, 1 for true. If either value in a
/// comparison is NaN, returns 0.
///
/// \headerfile <x86intrin.h>
///
@ -1043,8 +1119,7 @@ _mm_comieq_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results. If either of the two
/// lower 32-bit values is NaN, 0 is returned.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comilt_ss(__m128 __a, __m128 __b)
{
@ -1053,9 +1128,10 @@ _mm_comilt_ss(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the first operand is less than or equal to the
/// second operand and returns the result of the comparison.
/// second operand.
///
/// If either of the two lower 32-bit values is NaN, 0 is returned.
/// The comparison returns 0 for false, 1 for true. If either value in a
/// comparison is NaN, returns 0.
///
/// \headerfile <x86intrin.h>
///
@ -1067,8 +1143,7 @@ _mm_comilt_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results. If either of the two
/// lower 32-bit values is NaN, 0 is returned.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comile_ss(__m128 __a, __m128 __b)
{
@ -1077,9 +1152,10 @@ _mm_comile_ss(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the first operand is greater than the second
/// operand and returns the result of the comparison.
/// operand.
///
/// If either of the two lower 32-bit values is NaN, 0 is returned.
/// The comparison returns 0 for false, 1 for true. If either value in a
/// comparison is NaN, returns 0.
///
/// \headerfile <x86intrin.h>
///
@ -1091,8 +1167,7 @@ _mm_comile_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results. If either of the
/// two lower 32-bit values is NaN, 0 is returned.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comigt_ss(__m128 __a, __m128 __b)
{
@ -1101,9 +1176,10 @@ _mm_comigt_ss(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the first operand is greater than or equal to
/// the second operand and returns the result of the comparison.
/// the second operand.
///
/// If either of the two lower 32-bit values is NaN, 0 is returned.
/// The comparison returns 0 for false, 1 for true. If either value in a
/// comparison is NaN, returns 0.
///
/// \headerfile <x86intrin.h>
///
@ -1115,8 +1191,7 @@ _mm_comigt_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results. If either of the two
/// lower 32-bit values is NaN, 0 is returned.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comige_ss(__m128 __a, __m128 __b)
{
@ -1125,9 +1200,10 @@ _mm_comige_ss(__m128 __a, __m128 __b)
/// Compares two 32-bit float values in the low-order bits of both
/// operands to determine if the first operand is not equal to the second
/// operand and returns the result of the comparison.
/// operand.
///
/// If either of the two lower 32-bit values is NaN, 1 is returned.
/// The comparison returns 0 for false, 1 for true. If either value in a
/// comparison is NaN, returns 1.
///
/// \headerfile <x86intrin.h>
///
@ -1139,8 +1215,7 @@ _mm_comige_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results. If either of the
/// two lower 32-bit values is NaN, 1 is returned.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comineq_ss(__m128 __a, __m128 __b)
{
@ -1148,10 +1223,10 @@ _mm_comineq_ss(__m128 __a, __m128 __b)
}
/// Performs an unordered comparison of two 32-bit float values using
/// the low-order bits of both operands to determine equality and returns
/// the result of the comparison.
/// the low-order bits of both operands to determine equality.
///
/// If either of the two lower 32-bit values is NaN, 0 is returned.
/// The comparison returns 0 for false, 1 for true. If either value in a
/// comparison is NaN, returns 0.
///
/// \headerfile <x86intrin.h>
///
@ -1163,8 +1238,7 @@ _mm_comineq_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results. If either of the two
/// lower 32-bit values is NaN, 0 is returned.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomieq_ss(__m128 __a, __m128 __b)
{
@ -1173,9 +1247,10 @@ _mm_ucomieq_ss(__m128 __a, __m128 __b)
/// Performs an unordered comparison of two 32-bit float values using
/// the low-order bits of both operands to determine if the first operand is
/// less than the second operand and returns the result of the comparison.
/// less than the second operand.
///
/// If either of the two lower 32-bit values is NaN, 0 is returned.
/// The comparison returns 0 for false, 1 for true. If either value in a
/// comparison is NaN, returns 0.
///
/// \headerfile <x86intrin.h>
///
@ -1187,8 +1262,7 @@ _mm_ucomieq_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results. If either of the two
/// lower 32-bit values is NaN, 0 is returned.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomilt_ss(__m128 __a, __m128 __b)
{
@ -1197,10 +1271,10 @@ _mm_ucomilt_ss(__m128 __a, __m128 __b)
/// Performs an unordered comparison of two 32-bit float values using
/// the low-order bits of both operands to determine if the first operand is
/// less than or equal to the second operand and returns the result of the
/// comparison.
/// less than or equal to the second operand.
///
/// If either of the two lower 32-bit values is NaN, 0 is returned.
/// The comparison returns 0 for false, 1 for true. If either value in a
/// comparison is NaN, returns 0.
///
/// \headerfile <x86intrin.h>
///
@ -1212,8 +1286,7 @@ _mm_ucomilt_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results. If either of the two
/// lower 32-bit values is NaN, 0 is returned.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomile_ss(__m128 __a, __m128 __b)
{
@ -1222,10 +1295,10 @@ _mm_ucomile_ss(__m128 __a, __m128 __b)
/// Performs an unordered comparison of two 32-bit float values using
/// the low-order bits of both operands to determine if the first operand is
/// greater than the second operand and returns the result of the
/// comparison.
/// greater than the second operand.
///
/// If either of the two lower 32-bit values is NaN, 0 is returned.
/// The comparison returns 0 for false, 1 for true. If either value in a
/// comparison is NaN, returns 0.
///
/// \headerfile <x86intrin.h>
///
@ -1237,8 +1310,7 @@ _mm_ucomile_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results. If either of the two
/// lower 32-bit values is NaN, 0 is returned.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomigt_ss(__m128 __a, __m128 __b)
{
@ -1247,10 +1319,10 @@ _mm_ucomigt_ss(__m128 __a, __m128 __b)
/// Performs an unordered comparison of two 32-bit float values using
/// the low-order bits of both operands to determine if the first operand is
/// greater than or equal to the second operand and returns the result of
/// the comparison.
/// greater than or equal to the second operand.
///
/// If either of the two lower 32-bit values is NaN, 0 is returned.
/// The comparison returns 0 for false, 1 for true. If either value in a
/// comparison is NaN, returns 0.
///
/// \headerfile <x86intrin.h>
///
@ -1262,8 +1334,7 @@ _mm_ucomigt_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results. If either of the two
/// lower 32-bit values is NaN, 0 is returned.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomige_ss(__m128 __a, __m128 __b)
{
@ -1271,10 +1342,10 @@ _mm_ucomige_ss(__m128 __a, __m128 __b)
}
/// Performs an unordered comparison of two 32-bit float values using
/// the low-order bits of both operands to determine inequality and returns
/// the result of the comparison.
/// the low-order bits of both operands to determine inequality.
///
/// If either of the two lower 32-bit values is NaN, 1 is returned.
/// The comparison returns 0 for false, 1 for true. If either value in a
/// comparison is NaN, returns 0.
///
/// \headerfile <x86intrin.h>
///
@ -1286,8 +1357,7 @@ _mm_ucomige_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results. If either of the two
/// lower 32-bit values is NaN, 1 is returned.
/// \returns An integer containing the comparison results.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomineq_ss(__m128 __a, __m128 __b)
{
@ -1297,6 +1367,10 @@ _mm_ucomineq_ss(__m128 __a, __m128 __b)
/// Converts a float value contained in the lower 32 bits of a vector of
/// [4 x float] into a 32-bit integer.
///
/// If the converted value does not fit in a 32-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>
@ -1315,6 +1389,10 @@ _mm_cvtss_si32(__m128 __a)
/// Converts a float value contained in the lower 32 bits of a vector of
/// [4 x float] into a 32-bit integer.
///
/// If the converted value does not fit in a 32-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>
@ -1335,6 +1413,10 @@ _mm_cvt_ss2si(__m128 __a)
/// Converts a float value contained in the lower 32 bits of a vector of
/// [4 x float] into a 64-bit integer.
///
/// If the converted value does not fit in a 32-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>
@ -1355,6 +1437,10 @@ _mm_cvtss_si64(__m128 __a)
/// Converts two low-order float values in a 128-bit vector of
/// [4 x float] into a 64-bit vector of [2 x i32].
///
/// If a converted value does not fit in a 32-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.
@ -1371,6 +1457,10 @@ _mm_cvtps_pi32(__m128 __a)
/// Converts two low-order float values in a 128-bit vector of
/// [4 x float] into a 64-bit vector of [2 x i32].
///
/// If a converted value does not fit in a 32-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.
@ -1384,9 +1474,12 @@ _mm_cvt_ps2pi(__m128 __a)
return _mm_cvtps_pi32(__a);
}
/// Converts a float value contained in the lower 32 bits of a vector of
/// [4 x float] into a 32-bit integer, truncating the result when it is
/// inexact.
/// Converts the lower (first) element of a vector of [4 x float] into a signed
/// truncated (rounded toward zero) 32-bit integer.
///
/// If the converted value does not fit in a 32-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///
@ -1403,9 +1496,12 @@ _mm_cvttss_si32(__m128 __a)
return __builtin_ia32_cvttss2si((__v4sf)__a);
}
/// Converts a float value contained in the lower 32 bits of a vector of
/// [4 x float] into a 32-bit integer, truncating the result when it is
/// inexact.
/// Converts the lower (first) element of a vector of [4 x float] into a signed
/// truncated (rounded toward zero) 32-bit integer.
///
/// If the converted value does not fit in a 32-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///
@ -1423,9 +1519,12 @@ _mm_cvtt_ss2si(__m128 __a)
}
#ifdef __x86_64__
/// Converts a float value contained in the lower 32 bits of a vector of
/// [4 x float] into a 64-bit integer, truncating the result when it is
/// inexact.
/// Converts the lower (first) element of a vector of [4 x float] into a signed
/// truncated (rounded toward zero) 64-bit integer.
///
/// If the converted value does not fit in a 64-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///
@ -1443,9 +1542,13 @@ _mm_cvttss_si64(__m128 __a)
}
#endif
/// Converts two low-order float values in a 128-bit vector of
/// [4 x float] into a 64-bit vector of [2 x i32], truncating the result
/// when it is inexact.
/// Converts the lower (first) two elements of a 128-bit vector of [4 x float]
/// into two signed truncated (rounded toward zero) 32-bit integers,
/// returned in a 64-bit vector of [2 x i32].
///
/// If a converted value does not fit in a 32-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///
@ -1461,9 +1564,13 @@ _mm_cvttps_pi32(__m128 __a)
return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a);
}
/// Converts two low-order float values in a 128-bit vector of [4 x
/// float] into a 64-bit vector of [2 x i32], truncating the result when it
/// is inexact.
/// Converts the lower (first) two elements of a 128-bit vector of [4 x float]
/// into two signed truncated (rounded toward zero) 64-bit integers,
/// returned in a 64-bit vector of [2 x i32].
///
/// If a converted value does not fit in a 32-bit integer, raises a
/// floating-point invalid exception. If the exception is masked, returns
/// the most negative integer.
///
/// \headerfile <x86intrin.h>
///
@ -1803,7 +1910,7 @@ _mm_undefined_ps(void)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_set_ss(float __w)
{
return __extension__ (__m128){ __w, 0, 0, 0 };
return __extension__ (__m128){ __w, 0.0f, 0.0f, 0.0f };
}
/// Constructs a 128-bit floating-point vector of [4 x float], with each
@ -2940,6 +3047,85 @@ _mm_movemask_ps(__m128 __a)
return __builtin_ia32_movmskps((__v4sf)__a);
}
/* Compare */
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */
/// Compares each of the corresponding values of two 128-bit vectors of
/// [4 x float], using the operation specified by the immediate integer
/// operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
/// If either value in a comparison is NaN, comparisons that are ordered
/// return false, and comparisons that are unordered return true.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);
/// \endcode
///
/// This intrinsic corresponds to the <c> (V)CMPPS </c> instruction.
///
/// \param a
/// A 128-bit vector of [4 x float].
/// \param b
/// A 128-bit vector of [4 x float].
/// \param c
/// An immediate integer operand, with bits [4:0] specifying which comparison
/// operation to use: \n
/// 0x00: Equal (ordered, non-signaling) \n
/// 0x01: Less-than (ordered, signaling) \n
/// 0x02: Less-than-or-equal (ordered, signaling) \n
/// 0x03: Unordered (non-signaling) \n
/// 0x04: Not-equal (unordered, non-signaling) \n
/// 0x05: Not-less-than (unordered, signaling) \n
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
/// 0x07: Ordered (non-signaling) \n
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ps(a, b, c) \
((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c)))
/// Compares each of the corresponding scalar values of two 128-bit
/// vectors of [4 x float], using the operation specified by the immediate
/// integer operand.
///
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
/// If either value in a comparison is NaN, comparisons that are ordered
/// return false, and comparisons that are unordered return true.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);
/// \endcode
///
/// This intrinsic corresponds to the <c> (V)CMPSS </c> instruction.
///
/// \param a
/// A 128-bit vector of [4 x float].
/// \param b
/// A 128-bit vector of [4 x float].
/// \param c
/// An immediate integer operand, with bits [4:0] specifying which comparison
/// operation to use: \n
/// 0x00: Equal (ordered, non-signaling) \n
/// 0x01: Less-than (ordered, signaling) \n
/// 0x02: Less-than-or-equal (ordered, signaling) \n
/// 0x03: Unordered (non-signaling) \n
/// 0x04: Not-equal (unordered, non-signaling) \n
/// 0x05: Not-less-than (unordered, signaling) \n
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
/// 0x07: Ordered (non-signaling) \n
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ss(a, b, c) \
((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c)))
#define _MM_ALIGN16 __attribute__((aligned(16)))

25
lib/include/yvals_core.h vendored Normal file
View File

@ -0,0 +1,25 @@
//===----- yvals_core.h - Internal MSVC STL core header -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Only include this if we are aiming for MSVC compatibility.
#ifndef _MSC_VER
#include_next <yvals_core.h>
#else
#ifndef __clang_yvals_core_h
#define __clang_yvals_core_h
#include_next <yvals_core.h>
#ifdef _STL_INTRIN_HEADER
#undef _STL_INTRIN_HEADER
#define _STL_INTRIN_HEADER <intrin0.h>
#endif
#endif
#endif

18
lib/include/zos_wrappers/builtins.h vendored Normal file
View File

@ -0,0 +1,18 @@
/*===---- builtins.h - z/Architecture Builtin Functions --------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __ZOS_WRAPPERS_BUILTINS_H
#define __ZOS_WRAPPERS_BUILTINS_H
#if defined(__MVS__)
#include_next <builtins.h>
#if defined(__VEC__)
#include <vecintrin.h>
#endif
#endif /* defined(__MVS__) */
#endif /* __ZOS_WRAPPERS_BUILTINS_H */

View File

@ -26,7 +26,7 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Iter, class _Sent, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
__adjacent_find(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) {
if (__first == __last)
return __first;
@ -40,13 +40,13 @@ __adjacent_find(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) {
}
template <class _ForwardIterator, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) {
return std::__adjacent_find(std::move(__first), std::move(__last), __pred);
}
template <class _ForwardIterator>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
adjacent_find(_ForwardIterator __first, _ForwardIterator __last) {
return std::adjacent_find(std::move(__first), std::move(__last), __equal_to());
}

View File

@ -19,7 +19,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
all_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
for (; __first != __last; ++__first)
if (!__pred(*__first))

View File

@ -19,7 +19,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
any_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
for (; __first != __last; ++__first)
if (__pred(*__first))

View File

@ -22,14 +22,14 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Tp, class _Compare>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) {
__first = std::lower_bound<_ForwardIterator, _Tp, __comp_ref_type<_Compare> >(__first, __last, __value, __comp);
return __first != __last && !__comp(__value, *__first);
}
template <class _ForwardIterator, class _Tp>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
return std::binary_search(__first, __last, __value, __less<>());
}

View File

@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 17
template <class _Tp, class _Compare>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&
[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&
clamp(_LIBCPP_LIFETIMEBOUND const _Tp& __v,
_LIBCPP_LIFETIMEBOUND const _Tp& __lo,
_LIBCPP_LIFETIMEBOUND const _Tp& __hi,
@ -31,7 +31,7 @@ clamp(_LIBCPP_LIFETIMEBOUND const _Tp& __v,
}
template <class _Tp>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&
[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&
clamp(_LIBCPP_LIFETIMEBOUND const _Tp& __v,
_LIBCPP_LIFETIMEBOUND const _Tp& __lo,
_LIBCPP_LIFETIMEBOUND const _Tp& __hi) {

View File

@ -10,8 +10,7 @@
#define _LIBCPP___ALGORITHM_COMP_H
#include <__config>
#include <__type_traits/integral_constant.h>
#include <__type_traits/operation_traits.h>
#include <__type_traits/desugars_to.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@ -27,7 +26,7 @@ struct __equal_to {
};
template <class _Tp, class _Up>
struct __desugars_to<__equal_tag, __equal_to, _Tp, _Up> : true_type {};
inline const bool __desugars_to_v<__equal_tag, __equal_to, _Tp, _Up> = true;
// The definition is required because __less is part of the ABI, but it's empty
// because all comparisons should be transparent.
@ -42,6 +41,9 @@ struct __less<void, void> {
}
};
template <class _Tp>
inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true;
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___ALGORITHM_COMP_H

View File

@ -41,9 +41,9 @@ struct __debug_less {
}
template <class _LHS, class _RHS>
_LIBCPP_CONSTEXPR_SINCE_CXX14 inline _LIBCPP_HIDE_FROM_ABI decltype((void)std::declval<_Compare&>()(
std::declval<_LHS&>(), std::declval<_RHS&>()))
__do_compare_assert(int, _LHS& __l, _RHS& __r) {
_LIBCPP_CONSTEXPR_SINCE_CXX14 inline
_LIBCPP_HIDE_FROM_ABI decltype((void)std::declval<_Compare&>()(std::declval<_LHS&>(), std::declval<_RHS&>()))
__do_compare_assert(int, _LHS& __l, _RHS& __r) {
_LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(!__comp_(__l, __r), "Comparator does not induce a strict weak ordering");
(void)__l;
(void)__r;

View File

@ -32,7 +32,7 @@ template <class, class _InIter, class _Sent, class _OutIter>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter);
template <class _AlgPolicy>
struct __copy_loop {
struct __copy_impl {
template <class _InIter, class _Sent, class _OutIter>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
operator()(_InIter __first, _Sent __last, _OutIter __result) const {
@ -94,9 +94,7 @@ struct __copy_loop {
__local_first = _Traits::__begin(++__segment_iterator);
}
}
};
struct __copy_trivial {
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
@ -108,7 +106,7 @@ struct __copy_trivial {
template <class _AlgPolicy, class _InIter, class _Sent, class _OutIter>
pair<_InIter, _OutIter> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
__copy(_InIter __first, _Sent __last, _OutIter __result) {
return std::__dispatch_copy_or_move<_AlgPolicy, __copy_loop<_AlgPolicy>, __copy_trivial>(
return std::__copy_move_unwrap_iters<__copy_impl<_AlgPolicy> >(
std::move(__first), std::move(__last), std::move(__result));
}

View File

@ -15,7 +15,7 @@
#include <__config>
#include <__iterator/segmented_iterator.h>
#include <__type_traits/common_type.h>
#include <__type_traits/is_copy_constructible.h>
#include <__type_traits/is_constructible.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@ -33,7 +33,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter, _OutIter>
__copy_backward(_InIter __first, _Sent __last, _OutIter __result);
template <class _AlgPolicy>
struct __copy_backward_loop {
struct __copy_backward_impl {
template <class _InIter, class _Sent, class _OutIter>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
operator()(_InIter __first, _Sent __last, _OutIter __result) const {
@ -104,9 +104,7 @@ struct __copy_backward_loop {
__local_last = _Traits::__end(__segment_iterator);
}
}
};
struct __copy_backward_trivial {
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
@ -118,7 +116,7 @@ struct __copy_backward_trivial {
template <class _AlgPolicy, class _BidirectionalIterator1, class _Sentinel, class _BidirectionalIterator2>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_BidirectionalIterator1, _BidirectionalIterator2>
__copy_backward(_BidirectionalIterator1 __first, _Sentinel __last, _BidirectionalIterator2 __result) {
return std::__dispatch_copy_or_move<_AlgPolicy, __copy_backward_loop<_AlgPolicy>, __copy_backward_trivial>(
return std::__copy_move_unwrap_iters<__copy_backward_impl<_AlgPolicy> >(
std::move(__first), std::move(__last), std::move(__result));
}

View File

@ -19,9 +19,8 @@
#include <__type_traits/enable_if.h>
#include <__type_traits/is_always_bitcastable.h>
#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/is_copy_constructible.h>
#include <__type_traits/is_constructible.h>
#include <__type_traits/is_trivially_assignable.h>
#include <__type_traits/is_trivially_copyable.h>
#include <__type_traits/is_volatile.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@ -81,30 +80,17 @@ __copy_backward_trivial_impl(_In* __first, _In* __last, _Out* __result) {
// Iterator unwrapping and dispatching to the correct overload.
template <class _F1, class _F2>
struct __overload : _F1, _F2 {
using _F1::operator();
using _F2::operator();
};
template <class _InIter, class _Sent, class _OutIter, class = void>
struct __can_rewrap : false_type {};
template <class _InIter, class _Sent, class _OutIter>
struct __can_rewrap<_InIter,
_Sent,
_OutIter,
// Note that sentinels are always copy-constructible.
__enable_if_t< is_copy_constructible<_InIter>::value && is_copy_constructible<_OutIter>::value > >
: true_type {};
template <class _InIter, class _OutIter>
struct __can_rewrap
: integral_constant<bool, is_copy_constructible<_InIter>::value && is_copy_constructible<_OutIter>::value> {};
template <class _Algorithm,
class _InIter,
class _Sent,
class _OutIter,
__enable_if_t<__can_rewrap<_InIter, _Sent, _OutIter>::value, int> = 0>
__enable_if_t<__can_rewrap<_InIter, _OutIter>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter>
__unwrap_and_dispatch(_InIter __first, _Sent __last, _OutIter __out_first) {
__copy_move_unwrap_iters(_InIter __first, _Sent __last, _OutIter __out_first) {
auto __range = std::__unwrap_range(__first, std::move(__last));
auto __result = _Algorithm()(std::move(__range.first), std::move(__range.second), std::__unwrap_iter(__out_first));
return std::make_pair(std::__rewrap_range<_Sent>(std::move(__first), std::move(__result.first)),
@ -115,24 +101,12 @@ template <class _Algorithm,
class _InIter,
class _Sent,
class _OutIter,
__enable_if_t<!__can_rewrap<_InIter, _Sent, _OutIter>::value, int> = 0>
__enable_if_t<!__can_rewrap<_InIter, _OutIter>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter>
__unwrap_and_dispatch(_InIter __first, _Sent __last, _OutIter __out_first) {
__copy_move_unwrap_iters(_InIter __first, _Sent __last, _OutIter __out_first) {
return _Algorithm()(std::move(__first), std::move(__last), std::move(__out_first));
}
template <class _AlgPolicy,
class _NaiveAlgorithm,
class _OptimizedAlgorithm,
class _InIter,
class _Sent,
class _OutIter>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter>
__dispatch_copy_or_move(_InIter __first, _Sent __last, _OutIter __out_first) {
using _Algorithm = __overload<_NaiveAlgorithm, _OptimizedAlgorithm>;
return std::__unwrap_and_dispatch<_Algorithm>(std::move(__first), std::move(__last), std::move(__out_first));
}
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS

View File

@ -79,7 +79,7 @@ __count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __l
}
template <class _InputIterator, class _Tp>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator>
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator>
count(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
__identity __proj;
return std::__count<_ClassicAlgPolicy>(__first, __last, __value, __proj);

View File

@ -20,9 +20,9 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
typename iterator_traits<_InputIterator>::difference_type
count_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
typename iterator_traits<_InputIterator>::difference_type
count_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
typename iterator_traits<_InputIterator>::difference_type __r(0);
for (; __first != __last; ++__first)
if (__pred(*__first))

View File

@ -18,12 +18,11 @@
#include <__iterator/distance.h>
#include <__iterator/iterator_traits.h>
#include <__string/constexpr_c_functions.h>
#include <__type_traits/desugars_to.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/integral_constant.h>
#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/is_equality_comparable.h>
#include <__type_traits/is_volatile.h>
#include <__type_traits/operation_traits.h>
#include <__utility/move.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@ -47,7 +46,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 boo
template <class _Tp,
class _Up,
class _BinaryPredicate,
__enable_if_t<__desugars_to<__equal_tag, _BinaryPredicate, _Tp, _Up>::value && !is_volatile<_Tp>::value &&
__enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, _Tp, _Up> && !is_volatile<_Tp>::value &&
!is_volatile<_Up>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value,
int> = 0>
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
@ -56,33 +55,19 @@ __equal_iter_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _BinaryPredicate&)
}
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) {
return std::__equal_iter_impl(
std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred);
}
template <class _InputIterator1, class _InputIterator2>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2) {
return std::equal(__first1, __last1, __first2, __equal_to());
}
#if _LIBCPP_STD_VER >= 14
template <class _BinaryPredicate, class _InputIterator1, class _InputIterator2>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
__equal(_InputIterator1 __first1,
_InputIterator1 __last1,
_InputIterator2 __first2,
_InputIterator2 __last2,
_BinaryPredicate __pred,
input_iterator_tag,
input_iterator_tag) {
for (; __first1 != __last1 && __first2 != __last2; ++__first1, (void)++__first2)
if (!__pred(*__first1, *__first2))
return false;
return __first1 == __last1 && __first2 == __last2;
}
template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Pred, class _Proj1, class _Proj2>
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl(
@ -101,7 +86,7 @@ template <class _Tp,
class _Pred,
class _Proj1,
class _Proj2,
__enable_if_t<__desugars_to<__equal_tag, _Pred, _Tp, _Up>::value && __is_identity<_Proj1>::value &&
__enable_if_t<__desugars_to_v<__equal_tag, _Pred, _Tp, _Up> && __is_identity<_Proj1>::value &&
__is_identity<_Proj2>::value && !is_volatile<_Tp>::value && !is_volatile<_Up>::value &&
__libcpp_is_trivially_equality_comparable<_Tp, _Up>::value,
int> = 0>
@ -110,17 +95,18 @@ __equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&,
return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1));
}
template <class _BinaryPredicate, class _RandomAccessIterator1, class _RandomAccessIterator2>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
__equal(_RandomAccessIterator1 __first1,
_RandomAccessIterator1 __last1,
_RandomAccessIterator2 __first2,
_RandomAccessIterator2 __last2,
_BinaryPredicate __pred,
random_access_iterator_tag,
random_access_iterator_tag) {
if (std::distance(__first1, __last1) != std::distance(__first2, __last2))
return false;
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
equal(_InputIterator1 __first1,
_InputIterator1 __last1,
_InputIterator2 __first2,
_InputIterator2 __last2,
_BinaryPredicate __pred) {
if constexpr (__has_random_access_iterator_category<_InputIterator1>::value &&
__has_random_access_iterator_category<_InputIterator2>::value) {
if (std::distance(__first1, __last1) != std::distance(__first2, __last2))
return false;
}
__identity __proj;
return std::__equal_impl(
std::__unwrap_iter(__first1),
@ -132,36 +118,13 @@ __equal(_RandomAccessIterator1 __first1,
__proj);
}
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
equal(_InputIterator1 __first1,
_InputIterator1 __last1,
_InputIterator2 __first2,
_InputIterator2 __last2,
_BinaryPredicate __pred) {
return std::__equal<_BinaryPredicate&>(
__first1,
__last1,
__first2,
__last2,
__pred,
typename iterator_traits<_InputIterator1>::iterator_category(),
typename iterator_traits<_InputIterator2>::iterator_category());
template <class _InputIterator1, class _InputIterator2>
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) {
return std::equal(__first1, __last1, __first2, __last2, __equal_to());
}
template <class _InputIterator1, class _InputIterator2>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) {
return std::__equal(
__first1,
__last1,
__first2,
__last2,
__equal_to(),
typename iterator_traits<_InputIterator1>::iterator_category(),
typename iterator_traits<_InputIterator2>::iterator_category());
}
#endif
#endif // _LIBCPP_STD_VER >= 14
_LIBCPP_END_NAMESPACE_STD

View File

@ -23,7 +23,7 @@
#include <__iterator/iterator_traits.h>
#include <__iterator/next.h>
#include <__type_traits/is_callable.h>
#include <__type_traits/is_copy_constructible.h>
#include <__type_traits/is_constructible.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@ -60,7 +60,7 @@ __equal_range(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp
}
template <class _ForwardIterator, class _Tp, class _Compare>
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator>
equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) {
static_assert(__is_callable<_Compare, decltype(*__first), const _Tp&>::value, "The comparator has to be callable");
static_assert(is_copy_constructible<_ForwardIterator>::value, "Iterator has to be copy constructible");
@ -73,7 +73,7 @@ equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu
}
template <class _ForwardIterator, class _Tp>
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator>
equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
return std::equal_range(std::move(__first), std::move(__last), __value, __less<>());
}

View File

@ -9,18 +9,74 @@
#ifndef _LIBCPP___ALGORITHM_FILL_N_H
#define _LIBCPP___ALGORITHM_FILL_N_H
#include <__algorithm/min.h>
#include <__config>
#include <__fwd/bit_reference.h>
#include <__iterator/iterator_traits.h>
#include <__memory/pointer_traits.h>
#include <__utility/convert_to_integral.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
// fill_n isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset.
template <class _OutputIterator, class _Size, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value);
template <bool _FillVal, class _Cp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
__fill_n_bool(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) {
using _It = __bit_iterator<_Cp, false>;
using __storage_type = typename _It::__storage_type;
const int __bits_per_word = _It::__bits_per_word;
// do first partial word
if (__first.__ctz_ != 0) {
__storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
__storage_type __dn = std::min(__clz_f, __n);
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
if (_FillVal)
*__first.__seg_ |= __m;
else
*__first.__seg_ &= ~__m;
__n -= __dn;
++__first.__seg_;
}
// do middle whole words
__storage_type __nw = __n / __bits_per_word;
std::__fill_n(std::__to_address(__first.__seg_), __nw, _FillVal ? static_cast<__storage_type>(-1) : 0);
__n -= __nw * __bits_per_word;
// do last partial word
if (__n > 0) {
__first.__seg_ += __nw;
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
if (_FillVal)
*__first.__seg_ |= __m;
else
*__first.__seg_ &= ~__m;
}
}
template <class _Cp, class _Size>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false>
__fill_n(__bit_iterator<_Cp, false> __first, _Size __n, const bool& __value) {
if (__n > 0) {
if (__value)
std::__fill_n_bool<true>(__first, __n);
else
std::__fill_n_bool<false>(__first, __n);
}
return __first + __n;
}
template <class _OutputIterator, class _Size, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
@ -37,4 +93,6 @@ fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#endif // _LIBCPP___ALGORITHM_FILL_N_H

View File

@ -43,7 +43,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
// generic implementation
template <class _Iter, class _Sent, class _Tp, class _Proj>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter
__find_impl(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) {
__find(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) {
for (; __first != __last; ++__first)
if (std::__invoke(__proj, *__first) == __value)
break;
@ -57,8 +57,7 @@ template <class _Tp,
__enable_if_t<__is_identity<_Proj>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value &&
sizeof(_Tp) == 1,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp*
__find_impl(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __find(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
if (auto __ret = std::__constexpr_memchr(__first, __value, __last - __first))
return __ret;
return __last;
@ -71,8 +70,7 @@ template <class _Tp,
__enable_if_t<__is_identity<_Proj>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value &&
sizeof(_Tp) == sizeof(wchar_t) && _LIBCPP_ALIGNOF(_Tp) >= _LIBCPP_ALIGNOF(wchar_t),
int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp*
__find_impl(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __find(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
if (auto __ret = std::__constexpr_wmemchr(__first, __value, __last - __first))
return __ret;
return __last;
@ -89,10 +87,10 @@ template <class _Tp,
is_signed<_Tp>::value == is_signed<_Up>::value,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp*
__find_impl(_Tp* __first, _Tp* __last, const _Up& __value, _Proj& __proj) {
__find(_Tp* __first, _Tp* __last, const _Up& __value, _Proj& __proj) {
if (__value < numeric_limits<_Tp>::min() || __value > numeric_limits<_Tp>::max())
return __last;
return std::__find_impl(__first, __last, _Tp(__value), __proj);
return std::__find(__first, __last, _Tp(__value), __proj);
}
// __bit_iterator implementation
@ -134,7 +132,7 @@ __find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n)
template <class _Cp, bool _IsConst, class _Tp, class _Proj, __enable_if_t<__is_identity<_Proj>::value, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, _IsConst>
__find_impl(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value, _Proj&) {
__find(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value, _Proj&) {
if (static_cast<bool>(__value))
return std::__find_bool<true>(__first, static_cast<typename _Cp::size_type>(__last - __first));
return std::__find_bool<false>(__first, static_cast<typename _Cp::size_type>(__last - __first));
@ -150,7 +148,7 @@ template <class _SegmentedIterator,
class _Proj,
__enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
__find_impl(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value, _Proj& __proj) {
__find(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value, _Proj& __proj) {
return std::__find_segment_if(std::move(__first), std::move(__last), __find_segment<_Tp>(__value), __proj);
}
@ -163,17 +161,17 @@ struct __find_segment {
template <class _InputIterator, class _Proj>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _InputIterator
operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) const {
return std::__find_impl(__first, __last, __value_, __proj);
return std::__find(__first, __last, __value_, __proj);
}
};
// public API
template <class _InputIterator, class _Tp>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
find(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
__identity __proj;
return std::__rewrap_iter(
__first, std::__find_impl(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __value, __proj));
__first, std::__find(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __value, __proj));
}
_LIBCPP_END_NAMESPACE_STD

View File

@ -205,7 +205,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Fo
}
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_end(
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_end(
_ForwardIterator1 __first1,
_ForwardIterator1 __last1,
_ForwardIterator2 __first2,
@ -215,7 +215,7 @@ _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
}
template <class _ForwardIterator1, class _ForwardIterator2>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1
find_end(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) {
return std::find_end(__first1, __last1, __first2, __last2, __equal_to());
}

View File

@ -35,7 +35,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_fir
}
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of(
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of(
_ForwardIterator1 __first1,
_ForwardIterator1 __last1,
_ForwardIterator2 __first2,
@ -45,7 +45,7 @@ _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
}
template <class _ForwardIterator1, class _ForwardIterator2>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of(
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of(
_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) {
return std::__find_first_of_ce(__first1, __last1, __first2, __last2, __equal_to());
}

View File

@ -19,7 +19,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
find_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
for (; __first != __last; ++__first)
if (__pred(*__first))

View File

@ -19,7 +19,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
find_if_not(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
for (; __first != __last; ++__first)
if (!__pred(*__first))

View File

@ -78,8 +78,7 @@ concept __indirectly_binary_left_foldable =
struct __fold_left_with_iter {
template <input_iterator _Ip, sentinel_for<_Ip> _Sp, class _Tp, __indirectly_binary_left_foldable<_Tp, _Ip> _Fp>
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr auto
operator()(_Ip __first, _Sp __last, _Tp __init, _Fp __f) {
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Ip __first, _Sp __last, _Tp __init, _Fp __f) {
using _Up = decay_t<invoke_result_t<_Fp&, _Tp, iter_reference_t<_Ip>>>;
if (__first == __last) {
@ -95,7 +94,7 @@ struct __fold_left_with_iter {
}
template <input_range _Rp, class _Tp, __indirectly_binary_left_foldable<_Tp, iterator_t<_Rp>> _Fp>
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Rp&& __r, _Tp __init, _Fp __f) {
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Rp&& __r, _Tp __init, _Fp __f) {
auto __result = operator()(ranges::begin(__r), ranges::end(__r), std::move(__init), std::ref(__f));
using _Up = decay_t<invoke_result_t<_Fp&, _Tp, range_reference_t<_Rp>>>;
@ -107,13 +106,12 @@ inline constexpr auto fold_left_with_iter = __fold_left_with_iter();
struct __fold_left {
template <input_iterator _Ip, sentinel_for<_Ip> _Sp, class _Tp, __indirectly_binary_left_foldable<_Tp, _Ip> _Fp>
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr auto
operator()(_Ip __first, _Sp __last, _Tp __init, _Fp __f) {
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Ip __first, _Sp __last, _Tp __init, _Fp __f) {
return fold_left_with_iter(std::move(__first), std::move(__last), std::move(__init), std::ref(__f)).value;
}
template <input_range _Rp, class _Tp, __indirectly_binary_left_foldable<_Tp, iterator_t<_Rp>> _Fp>
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Rp&& __r, _Tp __init, _Fp __f) {
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Rp&& __r, _Tp __init, _Fp __f) {
return fold_left_with_iter(ranges::begin(__r), ranges::end(__r), std::move(__init), std::ref(__f)).value;
}
};

View File

@ -47,7 +47,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __includes(
}
template <class _InputIterator1, class _InputIterator2, class _Compare>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
includes(_InputIterator1 __first1,
_InputIterator1 __last1,
_InputIterator2 __first2,
@ -67,7 +67,7 @@ includes(_InputIterator1 __first1,
}
template <class _InputIterator1, class _InputIterator2>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) {
return std::includes(std::move(__first1), std::move(__last1), std::move(__first2), std::move(__last2), __less<>());
}

View File

@ -114,8 +114,8 @@ _LIBCPP_HIDE_FROM_ABI void __buffered_inplace_merge(
for (_BidirectionalIterator __i = __middle; __i != __last;
__d.template __incr<value_type>(), (void)++__i, (void)++__p)
::new ((void*)__p) value_type(_IterOps<_AlgPolicy>::__iter_move(__i));
typedef __unconstrained_reverse_iterator<_BidirectionalIterator> _RBi;
typedef __unconstrained_reverse_iterator<value_type*> _Rv;
typedef reverse_iterator<_BidirectionalIterator> _RBi;
typedef reverse_iterator<value_type*> _Rv;
typedef __invert<_Compare> _Inverted;
std::__half_inplace_merge<_AlgPolicy>(
_Rv(__p), _Rv(__buff), _RBi(__middle), _RBi(__first), _RBi(__last), _Inverted(__comp));

View File

@ -22,13 +22,13 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _RandomAccessIterator, class _Compare>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
is_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
return std::__is_heap_until(__first, __last, static_cast<__comp_ref_type<_Compare> >(__comp)) == __last;
}
template <class _RandomAccessIterator>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
is_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) {
return std::is_heap(__first, __last, __less<>());
}

View File

@ -46,13 +46,13 @@ __is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co
}
template <class _RandomAccessIterator, class _Compare>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
return std::__is_heap_until(__first, __last, static_cast<__comp_ref_type<_Compare> >(__comp));
}
template <class _RandomAccessIterator>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last) {
return std::__is_heap_until(__first, __last, __less<>());
}

View File

@ -18,7 +18,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
is_partitioned(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
for (; __first != __last; ++__first)
if (!__pred(*__first))

View File

@ -113,7 +113,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation_impl(
// 2+1 iterators, predicate. Not used by range algorithms.
template <class _AlgPolicy, class _ForwardIterator1, class _Sentinel1, class _ForwardIterator2, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation(
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation(
_ForwardIterator1 __first1, _Sentinel1 __last1, _ForwardIterator2 __first2, _BinaryPredicate&& __pred) {
// Shorten sequences as much as possible by lopping of any equal prefix.
for (; __first1 != __last1; ++__first1, (void)++__first2) {
@ -247,7 +247,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation(
// 2+1 iterators, predicate
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _BinaryPredicate __pred) {
static_assert(__is_callable<_BinaryPredicate, decltype(*__first1), decltype(*__first2)>::value,
"The predicate has to be callable");
@ -257,7 +257,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool i
// 2+1 iterators
template <class _ForwardIterator1, class _ForwardIterator2>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2) {
return std::is_permutation(__first1, __last1, __first2, __equal_to());
}
@ -266,7 +266,7 @@ is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIt
// 2+2 iterators
template <class _ForwardIterator1, class _ForwardIterator2>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) {
return std::__is_permutation<_ClassicAlgPolicy>(
std::move(__first1),
@ -280,7 +280,7 @@ _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
// 2+2 iterators, predicate
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
_ForwardIterator1 __first1,
_ForwardIterator1 __last1,
_ForwardIterator2 __first2,

View File

@ -22,13 +22,13 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Compare>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
is_sorted(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) {
return std::__is_sorted_until<__comp_ref_type<_Compare> >(__first, __last, __comp) == __last;
}
template <class _ForwardIterator>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
is_sorted(_ForwardIterator __first, _ForwardIterator __last) {
return std::is_sorted(__first, __last, __less<>());
}

Some files were not shown because too many files have changed in this diff Show More