10#ifndef EIGEN_COMPLEX_AVX_H
11#define EIGEN_COMPLEX_AVX_H
14#include "../../InternalHeaderCheck.h"
22 EIGEN_STRONG_INLINE Packet4cf() {}
23 EIGEN_STRONG_INLINE
explicit Packet4cf(
const __m256& a) : v(a) {}
27#ifndef EIGEN_VECTORIZE_AVX512
29struct packet_traits<std::complex<float> > : default_packet_traits {
30 typedef Packet4cf type;
31 typedef Packet2cf half;
55struct unpacket_traits<Packet4cf> {
56 typedef std::complex<float> type;
57 typedef Packet2cf half;
58 typedef Packet8f as_real;
63 masked_load_available =
false,
64 masked_store_available =
false
69EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(
const Packet4cf& a,
const Packet4cf& b) {
70 return Packet4cf(_mm256_add_ps(a.v, b.v));
73EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(
const Packet4cf& a,
const Packet4cf& b) {
74 return Packet4cf(_mm256_sub_ps(a.v, b.v));
77EIGEN_STRONG_INLINE Packet4cf pnegate(
const Packet4cf& a) {
78 return Packet4cf(pnegate(a.v));
81EIGEN_STRONG_INLINE Packet4cf pconj(
const Packet4cf& a) {
82 const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000,
83 0x80000000, 0x00000000, 0x80000000));
84 return Packet4cf(_mm256_xor_ps(a.v, mask));
88EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(
const Packet4cf& a,
const Packet4cf& b) {
89 __m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
90 __m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)));
91 __m256 result = _mm256_addsub_ps(tmp1, tmp2);
92 return Packet4cf(result);
96EIGEN_STRONG_INLINE Packet4cf pcmp_eq(
const Packet4cf& a,
const Packet4cf& b) {
97 __m256 eq = _mm256_cmp_ps(a.v, b.v, _CMP_EQ_OQ);
98 return Packet4cf(_mm256_and_ps(eq, _mm256_permute_ps(eq, 0xb1)));
102EIGEN_STRONG_INLINE Packet4cf ptrue<Packet4cf>(
const Packet4cf& a) {
103 return Packet4cf(ptrue(Packet8f(a.v)));
106EIGEN_STRONG_INLINE Packet4cf pand<Packet4cf>(
const Packet4cf& a,
const Packet4cf& b) {
107 return Packet4cf(_mm256_and_ps(a.v, b.v));
110EIGEN_STRONG_INLINE Packet4cf por<Packet4cf>(
const Packet4cf& a,
const Packet4cf& b) {
111 return Packet4cf(_mm256_or_ps(a.v, b.v));
114EIGEN_STRONG_INLINE Packet4cf pxor<Packet4cf>(
const Packet4cf& a,
const Packet4cf& b) {
115 return Packet4cf(_mm256_xor_ps(a.v, b.v));
118EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(
const Packet4cf& a,
const Packet4cf& b) {
119 return Packet4cf(_mm256_andnot_ps(b.v, a.v));
123EIGEN_STRONG_INLINE Packet4cf pload<Packet4cf>(
const std::complex<float>* from) {
124 EIGEN_DEBUG_ALIGNED_LOAD
return Packet4cf(pload<Packet8f>(&numext::real_ref(*from)));
127EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(
const std::complex<float>* from) {
128 EIGEN_DEBUG_UNALIGNED_LOAD
return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from)));
132EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(
const std::complex<float>& from) {
133 const float re = std::real(from);
134 const float im = std::imag(from);
135 return Packet4cf(_mm256_set_ps(im, re, im, re, im, re, im, re));
139EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(
const std::complex<float>* from) {
141 Packet2cf a = ploaddup<Packet2cf>(from);
142 Packet2cf b = ploaddup<Packet2cf>(from + 1);
143 return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
147EIGEN_STRONG_INLINE
void pstore<std::complex<float> >(std::complex<float>* to,
const Packet4cf& from) {
148 EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v);
151EIGEN_STRONG_INLINE
void pstoreu<std::complex<float> >(std::complex<float>* to,
const Packet4cf& from) {
152 EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v);
156EIGEN_DEVICE_FUNC
inline Packet4cf pgather<std::complex<float>, Packet4cf>(
const std::complex<float>* from,
158 return Packet4cf(_mm256_set_ps(std::imag(from[3 * stride]), std::real(from[3 * stride]), std::imag(from[2 * stride]),
159 std::real(from[2 * stride]), std::imag(from[1 * stride]), std::real(from[1 * stride]),
160 std::imag(from[0 * stride]), std::real(from[0 * stride])));
164EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to,
const Packet4cf& from,
166 __m128 low = _mm256_extractf128_ps(from.v, 0);
168 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)), _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
170 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)), _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
172 __m128 high = _mm256_extractf128_ps(from.v, 1);
174 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)), _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
176 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)), _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
180EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(
const Packet4cf& a) {
181 return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
185EIGEN_STRONG_INLINE Packet4cf preverse(
const Packet4cf& a) {
186 __m128 low = _mm256_extractf128_ps(a.v, 0);
187 __m128 high = _mm256_extractf128_ps(a.v, 1);
188 __m128d lowd = _mm_castps_pd(low);
189 __m128d highd = _mm_castps_pd(high);
190 low = _mm_castpd_ps(_mm_shuffle_pd(lowd, lowd, 0x1));
191 high = _mm_castpd_ps(_mm_shuffle_pd(highd, highd, 0x1));
192 __m256 result = _mm256_setzero_ps();
193 result = _mm256_insertf128_ps(result, low, 1);
194 result = _mm256_insertf128_ps(result, high, 0);
195 return Packet4cf(result);
199EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(
const Packet4cf& a) {
200 return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v, 0)), Packet2cf(_mm256_extractf128_ps(a.v, 1))));
204EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(
const Packet4cf& a) {
205 return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)), Packet2cf(_mm256_extractf128_ps(a.v, 1))));
208EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf, Packet8f)
211EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(
const Packet4cf& a,
const Packet4cf& b) {
212 return pdiv_complex(a, b);
216EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(
const Packet4cf& x) {
217 return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0, 1)));
222 EIGEN_STRONG_INLINE Packet2cd() {}
223 EIGEN_STRONG_INLINE
explicit Packet2cd(
const __m256d& a) : v(a) {}
227#ifndef EIGEN_VECTORIZE_AVX512
229struct packet_traits<std::complex<double> > : default_packet_traits {
230 typedef Packet2cd type;
231 typedef Packet1cd half;
254struct unpacket_traits<Packet2cd> {
255 typedef std::complex<double> type;
256 typedef Packet1cd half;
257 typedef Packet4d as_real;
262 masked_load_available =
false,
263 masked_store_available =
false
268EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(
const Packet2cd& a,
const Packet2cd& b) {
269 return Packet2cd(_mm256_add_pd(a.v, b.v));
272EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(
const Packet2cd& a,
const Packet2cd& b) {
273 return Packet2cd(_mm256_sub_pd(a.v, b.v));
276EIGEN_STRONG_INLINE Packet2cd pnegate(
const Packet2cd& a) {
277 return Packet2cd(pnegate(a.v));
280EIGEN_STRONG_INLINE Packet2cd pconj(
const Packet2cd& a) {
281 const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000, 0x0, 0x0, 0x0, 0x80000000, 0x0, 0x0, 0x0));
282 return Packet2cd(_mm256_xor_pd(a.v, mask));
286EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(
const Packet2cd& a,
const Packet2cd& b) {
287 __m256d tmp1 = _mm256_shuffle_pd(a.v, a.v, 0x0);
288 __m256d even = _mm256_mul_pd(tmp1, b.v);
289 __m256d tmp2 = _mm256_shuffle_pd(a.v, a.v, 0xF);
290 __m256d tmp3 = _mm256_shuffle_pd(b.v, b.v, 0x5);
291 __m256d odd = _mm256_mul_pd(tmp2, tmp3);
292 return Packet2cd(_mm256_addsub_pd(even, odd));
296EIGEN_STRONG_INLINE Packet2cd pcmp_eq(
const Packet2cd& a,
const Packet2cd& b) {
297 __m256d eq = _mm256_cmp_pd(a.v, b.v, _CMP_EQ_OQ);
298 return Packet2cd(pand(eq, _mm256_permute_pd(eq, 0x5)));
302EIGEN_STRONG_INLINE Packet2cd ptrue<Packet2cd>(
const Packet2cd& a) {
303 return Packet2cd(ptrue(Packet4d(a.v)));
306EIGEN_STRONG_INLINE Packet2cd pand<Packet2cd>(
const Packet2cd& a,
const Packet2cd& b) {
307 return Packet2cd(_mm256_and_pd(a.v, b.v));
310EIGEN_STRONG_INLINE Packet2cd por<Packet2cd>(
const Packet2cd& a,
const Packet2cd& b) {
311 return Packet2cd(_mm256_or_pd(a.v, b.v));
314EIGEN_STRONG_INLINE Packet2cd pxor<Packet2cd>(
const Packet2cd& a,
const Packet2cd& b) {
315 return Packet2cd(_mm256_xor_pd(a.v, b.v));
318EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(
const Packet2cd& a,
const Packet2cd& b) {
319 return Packet2cd(_mm256_andnot_pd(b.v, a.v));
323EIGEN_STRONG_INLINE Packet2cd pload<Packet2cd>(
const std::complex<double>* from) {
324 EIGEN_DEBUG_ALIGNED_LOAD
return Packet2cd(pload<Packet4d>((
const double*)from));
327EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(
const std::complex<double>* from) {
328 EIGEN_DEBUG_UNALIGNED_LOAD
return Packet2cd(ploadu<Packet4d>((
const double*)from));
332EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(
const std::complex<double>& from) {
335 return Packet2cd(_mm256_broadcast_pd((
const __m128d*)(
const void*)&from));
339EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(
const std::complex<double>* from) {
340 return pset1<Packet2cd>(*from);
344EIGEN_STRONG_INLINE
void pstore<std::complex<double> >(std::complex<double>* to,
const Packet2cd& from) {
345 EIGEN_DEBUG_ALIGNED_STORE pstore((
double*)to, from.v);
348EIGEN_STRONG_INLINE
void pstoreu<std::complex<double> >(std::complex<double>* to,
const Packet2cd& from) {
349 EIGEN_DEBUG_UNALIGNED_STORE pstoreu((
double*)to, from.v);
353EIGEN_DEVICE_FUNC
inline Packet2cd pgather<std::complex<double>, Packet2cd>(
const std::complex<double>* from,
355 return Packet2cd(_mm256_set_pd(std::imag(from[1 * stride]), std::real(from[1 * stride]), std::imag(from[0 * stride]),
356 std::real(from[0 * stride])));
360EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to,
const Packet2cd& from,
362 __m128d low = _mm256_extractf128_pd(from.v, 0);
363 to[stride * 0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
364 __m128d high = _mm256_extractf128_pd(from.v, 1);
365 to[stride * 1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
369EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(
const Packet2cd& a) {
370 __m128d low = _mm256_extractf128_pd(a.v, 0);
371 EIGEN_ALIGN16
double res[2];
372 _mm_store_pd(res, low);
373 return std::complex<double>(res[0], res[1]);
377EIGEN_STRONG_INLINE Packet2cd preverse(
const Packet2cd& a) {
378 __m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
379 return Packet2cd(result);
383EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(
const Packet2cd& a) {
384 return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v, 0)), Packet1cd(_mm256_extractf128_pd(a.v, 1))));
388EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(
const Packet2cd& a) {
389 return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v, 0)), Packet1cd(_mm256_extractf128_pd(a.v, 1))));
392EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd, Packet4d)
395EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(
const Packet2cd& a,
const Packet2cd& b) {
396 return pdiv_complex(a, b);
400EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(
const Packet2cd& x) {
401 return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
404EIGEN_DEVICE_FUNC
inline void ptranspose(PacketBlock<Packet4cf, 4>& kernel) {
405 __m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
406 __m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
407 __m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
408 __m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
410 __m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
411 __m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
412 __m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
413 __m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
415 kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
416 kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
417 kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
418 kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
421EIGEN_DEVICE_FUNC
inline void ptranspose(PacketBlock<Packet2cd, 2>& kernel) {
422 __m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0 + (2 << 4));
423 kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1 + (3 << 4));
424 kernel.packet[0].v = tmp;
428EIGEN_STRONG_INLINE Packet2cd psqrt<Packet2cd>(
const Packet2cd& a) {
429 return psqrt_complex<Packet2cd>(a);
433EIGEN_STRONG_INLINE Packet4cf psqrt<Packet4cf>(
const Packet4cf& a) {
434 return psqrt_complex<Packet4cf>(a);
438EIGEN_STRONG_INLINE Packet2cd plog<Packet2cd>(
const Packet2cd& a) {
439 return plog_complex<Packet2cd>(a);
443EIGEN_STRONG_INLINE Packet4cf plog<Packet4cf>(
const Packet4cf& a) {
444 return plog_complex<Packet4cf>(a);
448EIGEN_STRONG_INLINE Packet4cf pexp<Packet4cf>(
const Packet4cf& a) {
449 return pexp_complex<Packet4cf>(a);
@ Aligned32
Definition Constants.h:238
Namespace containing all symbols from the Eigen library.
Definition Core:137