11#ifndef EIGEN_CONFIGURE_VECTORIZATION_H
12#define EIGEN_CONFIGURE_VECTORIZATION_H
33#if (defined EIGEN_CUDACC)
34#define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
35#define EIGEN_ALIGNOF(x) __alignof(x)
37#define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
38#define EIGEN_ALIGNOF(x) alignof(x)
42#if defined(EIGEN_DONT_VECTORIZE)
43#if defined(EIGEN_GPUCC)
46#define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
48#define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
50#elif defined(__AVX512F__)
52#define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
55#define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
56#elif defined __HVX__ && (__HVX_LENGTH__ == 128)
57#define EIGEN_IDEAL_MAX_ALIGN_BYTES 128
59#define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
63#define EIGEN_MIN_ALIGN_BYTES 16
69#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && \
70 EIGEN_MAX_STATIC_ALIGN_BYTES > 0
71#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
76#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
77#ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
78#undef EIGEN_MAX_STATIC_ALIGN_BYTES
80#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
83#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
93#if EIGEN_COMP_GNUC && \
94 !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
95#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
97#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
101#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT && !EIGEN_COMP_SUNCC && !EIGEN_OS_QNX
102#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
104#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
107#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
108#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
110#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
116#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES < EIGEN_MAX_STATIC_ALIGN_BYTES
117#undef EIGEN_MAX_STATIC_ALIGN_BYTES
118#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
121#if EIGEN_MAX_STATIC_ALIGN_BYTES == 0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
122#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
131#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
132#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
133#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
134#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
135#if EIGEN_MAX_STATIC_ALIGN_BYTES > 0
136#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
138#define EIGEN_ALIGN_MAX
143#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES > 0
144#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
147#ifdef EIGEN_DONT_ALIGN
148#ifdef EIGEN_MAX_ALIGN_BYTES
149#undef EIGEN_MAX_ALIGN_BYTES
151#define EIGEN_MAX_ALIGN_BYTES 0
152#elif !defined(EIGEN_MAX_ALIGN_BYTES)
153#define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
156#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
157#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
159#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
162#ifndef EIGEN_UNALIGNED_VECTORIZE
163#define EIGEN_UNALIGNED_VECTORIZE 1
170#if EIGEN_MAX_ALIGN_BYTES == 0
171#ifndef EIGEN_DONT_VECTORIZE
172#define EIGEN_DONT_VECTORIZE
181#if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
182#define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
186#define EIGEN_SSE2_ON_NON_MSVC
190#if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
192#if defined(EIGEN_SSE2_ON_NON_MSVC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
197#define EIGEN_VECTORIZE
198#define EIGEN_VECTORIZE_SSE
199#define EIGEN_VECTORIZE_SSE2
206#define EIGEN_VECTORIZE_SSE3
209#define EIGEN_VECTORIZE_SSSE3
212#define EIGEN_VECTORIZE_SSE4_1
215#define EIGEN_VECTORIZE_SSE4_2
218#ifndef EIGEN_USE_SYCL
219#define EIGEN_VECTORIZE_AVX
221#define EIGEN_VECTORIZE_SSE3
222#define EIGEN_VECTORIZE_SSSE3
223#define EIGEN_VECTORIZE_SSE4_1
224#define EIGEN_VECTORIZE_SSE4_2
227#ifndef EIGEN_USE_SYCL
228#define EIGEN_VECTORIZE_AVX2
229#define EIGEN_VECTORIZE_AVX
231#define EIGEN_VECTORIZE_SSE3
232#define EIGEN_VECTORIZE_SSSE3
233#define EIGEN_VECTORIZE_SSE4_1
234#define EIGEN_VECTORIZE_SSE4_2
236#if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
239#define EIGEN_VECTORIZE_FMA
241#if defined(__AVX512F__)
242#ifndef EIGEN_VECTORIZE_FMA
244#error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
246#error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
249#ifndef EIGEN_USE_SYCL
250#define EIGEN_VECTORIZE_AVX512
251#define EIGEN_VECTORIZE_AVX2
252#define EIGEN_VECTORIZE_AVX
254#define EIGEN_VECTORIZE_FMA
255#define EIGEN_VECTORIZE_SSE3
256#define EIGEN_VECTORIZE_SSSE3
257#define EIGEN_VECTORIZE_SSE4_1
258#define EIGEN_VECTORIZE_SSE4_2
259#ifndef EIGEN_USE_SYCL
261#define EIGEN_VECTORIZE_AVX512DQ
264#define EIGEN_VECTORIZE_AVX512ER
267#define EIGEN_VECTORIZE_AVX512BF16
270#define EIGEN_VECTORIZE_AVX512VL
274#define EIGEN_VECTORIZE_AVX512FP16
277#error Please add -mavx512vl to your compiler flags: compiling with -mavx512fp16 alone without AVX512-VL is not supported.
279#error Please enable AVX512-VL in your compiler flags (e.g. -mavx512vl): compiling with AVX512-FP16 alone without AVX512-VL is not supported.
287#if (EIGEN_COMP_CLANGAPPLE == 11000033) && (__MAC_OS_X_VERSION_MIN_REQUIRED == 101500)
290#ifdef EIGEN_VECTORIZE_AVX
291#undef EIGEN_VECTORIZE_AVX
293 "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
294#ifdef EIGEN_VECTORIZE_AVX2
295#undef EIGEN_VECTORIZE_AVX2
297#ifdef EIGEN_VECTORIZE_FMA
298#undef EIGEN_VECTORIZE_FMA
300#ifdef EIGEN_VECTORIZE_AVX512
301#undef EIGEN_VECTORIZE_AVX512
303#ifdef EIGEN_VECTORIZE_AVX512DQ
304#undef EIGEN_VECTORIZE_AVX512DQ
306#ifdef EIGEN_VECTORIZE_AVX512ER
307#undef EIGEN_VECTORIZE_AVX512ER
332#if EIGEN_COMP_ICC >= 1110 || EIGEN_COMP_EMSCRIPTEN
333#include <immintrin.h>
336#include <emmintrin.h>
337#include <xmmintrin.h>
338#ifdef EIGEN_VECTORIZE_SSE3
339#include <pmmintrin.h>
341#ifdef EIGEN_VECTORIZE_SSSE3
342#include <tmmintrin.h>
344#ifdef EIGEN_VECTORIZE_SSE4_1
345#include <smmintrin.h>
347#ifdef EIGEN_VECTORIZE_SSE4_2
348#include <nmmintrin.h>
350#if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
351#include <immintrin.h>
356#elif defined(__VSX__) && !defined(__APPLE__)
358#define EIGEN_VECTORIZE
359#define EIGEN_VECTORIZE_VSX 1
360#define EIGEN_VECTORIZE_FMA
368#elif defined __ALTIVEC__
370#define EIGEN_VECTORIZE
371#define EIGEN_VECTORIZE_ALTIVEC
372#define EIGEN_VECTORIZE_FMA
380#elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
382#define EIGEN_VECTORIZE
383#define EIGEN_VECTORIZE_NEON
388#elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
390#define EIGEN_VECTORIZE
391#define EIGEN_VECTORIZE_SVE
396#if defined __ARM_FEATURE_SVE_BITS
397#define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
399#error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
402#elif (defined __s390x__ && defined __VEC__)
404#define EIGEN_VECTORIZE
405#define EIGEN_VECTORIZE_ZVECTOR
406#include <vecintrin.h>
408#elif defined __mips_msa
412#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
418#define EIGEN_VECTORIZE
419#define EIGEN_VECTORIZE_MSA
423#elif defined __HVX__ && (__HVX_LENGTH__ == 128)
425#define EIGEN_VECTORIZE
426#define EIGEN_VECTORIZE_HVX
427#include <hexagon_types.h>
435#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
440#if defined(__ARM_FEATURE_FMA)
441#define EIGEN_VECTORIZE_FMA
444#if defined(__F16C__) && !defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG_STRICT || EIGEN_CLANG_STRICT_AT_LEAST(3, 8, 0))
446#define EIGEN_HAS_FP16_C
454#include <immintrin.h>
458#if defined EIGEN_CUDACC
459#define EIGEN_VECTORIZE_GPU
460#include <vector_types.h>
461#if EIGEN_CUDA_SDK_VER >= 70500
462#define EIGEN_HAS_CUDA_FP16
466#if defined(EIGEN_HAS_CUDA_FP16)
467#include <cuda_runtime_api.h>
468#include <cuda_fp16.h>
471#if defined(EIGEN_HIPCC)
472#define EIGEN_VECTORIZE_GPU
473#include <hip/hip_vector_types.h>
474#define EIGEN_HAS_HIP_FP16
475#include <hip/hip_fp16.h>
476#define EIGEN_HAS_HIP_BF16
477#include <hip/hip_bfloat16.h>
482#include "../InternalHeaderCheck.h"
486inline static const char *SimdInstructionSetsInUse(
void) {
487#if defined(EIGEN_VECTORIZE_AVX512)
488 return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
489#elif defined(EIGEN_VECTORIZE_AVX)
490 return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
491#elif defined(EIGEN_VECTORIZE_SSE4_2)
492 return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
493#elif defined(EIGEN_VECTORIZE_SSE4_1)
494 return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
495#elif defined(EIGEN_VECTORIZE_SSSE3)
496 return "SSE, SSE2, SSE3, SSSE3";
497#elif defined(EIGEN_VECTORIZE_SSE3)
498 return "SSE, SSE2, SSE3";
499#elif defined(EIGEN_VECTORIZE_SSE2)
501#elif defined(EIGEN_VECTORIZE_ALTIVEC)
503#elif defined(EIGEN_VECTORIZE_VSX)
505#elif defined(EIGEN_VECTORIZE_NEON)
507#elif defined(EIGEN_VECTORIZE_SVE)
509#elif defined(EIGEN_VECTORIZE_ZVECTOR)
510 return "S390X ZVECTOR";
511#elif defined(EIGEN_VECTORIZE_MSA)
Namespace containing all symbols from the Eigen library.
Definition Core:137