Eigen  3.4.90 (git rev 5a9f66fb35d03a4da9ef8976e67a61b30aa16dcf)
 
Loading...
Searching...
No Matches
AVX/Complex.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner ([email protected])
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_COMPLEX_AVX_H
11#define EIGEN_COMPLEX_AVX_H
12
13// IWYU pragma: private
14#include "../../InternalHeaderCheck.h"
15
16namespace Eigen {
17
18namespace internal {
19
20//---------- float ----------
21struct Packet4cf {
22 EIGEN_STRONG_INLINE Packet4cf() {}
23 EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}
24 __m256 v;
25};
26
27#ifndef EIGEN_VECTORIZE_AVX512
28template <>
29struct packet_traits<std::complex<float> > : default_packet_traits {
30 typedef Packet4cf type;
31 typedef Packet2cf half;
32 enum {
33 Vectorizable = 1,
34 AlignedOnScalar = 1,
35 size = 4,
36
37 HasAdd = 1,
38 HasSub = 1,
39 HasMul = 1,
40 HasDiv = 1,
41 HasNegate = 1,
42 HasSqrt = 1,
43 HasLog = 1,
44 HasExp = 1,
45 HasAbs = 0,
46 HasAbs2 = 0,
47 HasMin = 0,
48 HasMax = 0,
49 HasSetLinear = 0
50 };
51};
52#endif
53
54template <>
55struct unpacket_traits<Packet4cf> {
56 typedef std::complex<float> type;
57 typedef Packet2cf half;
58 typedef Packet8f as_real;
59 enum {
60 size = 4,
61 alignment = Aligned32,
62 vectorizable = true,
63 masked_load_available = false,
64 masked_store_available = false
65 };
66};
67
68template <>
69EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
70 return Packet4cf(_mm256_add_ps(a.v, b.v));
71}
72template <>
73EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
74 return Packet4cf(_mm256_sub_ps(a.v, b.v));
75}
76template <>
77EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a) {
78 return Packet4cf(pnegate(a.v));
79}
80template <>
81EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a) {
82 const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000,
83 0x80000000, 0x00000000, 0x80000000));
84 return Packet4cf(_mm256_xor_ps(a.v, mask));
85}
86
87template <>
88EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
89 __m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
90 __m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)));
91 __m256 result = _mm256_addsub_ps(tmp1, tmp2);
92 return Packet4cf(result);
93}
94
95template <>
96EIGEN_STRONG_INLINE Packet4cf pcmp_eq(const Packet4cf& a, const Packet4cf& b) {
97 __m256 eq = _mm256_cmp_ps(a.v, b.v, _CMP_EQ_OQ);
98 return Packet4cf(_mm256_and_ps(eq, _mm256_permute_ps(eq, 0xb1)));
99}
100
101template <>
102EIGEN_STRONG_INLINE Packet4cf ptrue<Packet4cf>(const Packet4cf& a) {
103 return Packet4cf(ptrue(Packet8f(a.v)));
104}
105template <>
106EIGEN_STRONG_INLINE Packet4cf pand<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
107 return Packet4cf(_mm256_and_ps(a.v, b.v));
108}
109template <>
110EIGEN_STRONG_INLINE Packet4cf por<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
111 return Packet4cf(_mm256_or_ps(a.v, b.v));
112}
113template <>
114EIGEN_STRONG_INLINE Packet4cf pxor<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
115 return Packet4cf(_mm256_xor_ps(a.v, b.v));
116}
117template <>
118EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
119 return Packet4cf(_mm256_andnot_ps(b.v, a.v));
120}
121
122template <>
123EIGEN_STRONG_INLINE Packet4cf pload<Packet4cf>(const std::complex<float>* from) {
124 EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from)));
125}
126template <>
127EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) {
128 EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from)));
129}
130
131template <>
132EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from) {
133 const float re = std::real(from);
134 const float im = std::imag(from);
135 return Packet4cf(_mm256_set_ps(im, re, im, re, im, re, im, re));
136}
137
138template <>
139EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from) {
140 // FIXME The following might be optimized using _mm256_movedup_pd
141 Packet2cf a = ploaddup<Packet2cf>(from);
142 Packet2cf b = ploaddup<Packet2cf>(from + 1);
143 return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
144}
145
146template <>
147EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) {
148 EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v);
149}
150template <>
151EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) {
152 EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v);
153}
154
155template <>
156EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from,
157 Index stride) {
158 return Packet4cf(_mm256_set_ps(std::imag(from[3 * stride]), std::real(from[3 * stride]), std::imag(from[2 * stride]),
159 std::real(from[2 * stride]), std::imag(from[1 * stride]), std::real(from[1 * stride]),
160 std::imag(from[0 * stride]), std::real(from[0 * stride])));
161}
162
163template <>
164EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from,
165 Index stride) {
166 __m128 low = _mm256_extractf128_ps(from.v, 0);
167 to[stride * 0] =
168 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)), _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
169 to[stride * 1] =
170 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)), _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
171
172 __m128 high = _mm256_extractf128_ps(from.v, 1);
173 to[stride * 2] =
174 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)), _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
175 to[stride * 3] =
176 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)), _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
177}
178
179template <>
180EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(const Packet4cf& a) {
181 return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
182}
183
184template <>
185EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
186 __m128 low = _mm256_extractf128_ps(a.v, 0);
187 __m128 high = _mm256_extractf128_ps(a.v, 1);
188 __m128d lowd = _mm_castps_pd(low);
189 __m128d highd = _mm_castps_pd(high);
190 low = _mm_castpd_ps(_mm_shuffle_pd(lowd, lowd, 0x1));
191 high = _mm_castpd_ps(_mm_shuffle_pd(highd, highd, 0x1));
192 __m256 result = _mm256_setzero_ps();
193 result = _mm256_insertf128_ps(result, low, 1);
194 result = _mm256_insertf128_ps(result, high, 0);
195 return Packet4cf(result);
196}
197
198template <>
199EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a) {
200 return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v, 0)), Packet2cf(_mm256_extractf128_ps(a.v, 1))));
201}
202
203template <>
204EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a) {
205 return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)), Packet2cf(_mm256_extractf128_ps(a.v, 1))));
206}
207
208EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf, Packet8f)
209
210template <>
211EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
212 return pdiv_complex(a, b);
213}
214
215template <>
216EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x) {
217 return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0, 1)));
218}
219
220//---------- double ----------
221struct Packet2cd {
222 EIGEN_STRONG_INLINE Packet2cd() {}
223 EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}
224 __m256d v;
225};
226
227#ifndef EIGEN_VECTORIZE_AVX512
228template <>
229struct packet_traits<std::complex<double> > : default_packet_traits {
230 typedef Packet2cd type;
231 typedef Packet1cd half;
232 enum {
233 Vectorizable = 1,
234 AlignedOnScalar = 0,
235 size = 2,
236
237 HasAdd = 1,
238 HasSub = 1,
239 HasMul = 1,
240 HasDiv = 1,
241 HasNegate = 1,
242 HasSqrt = 1,
243 HasLog = 1,
244 HasAbs = 0,
245 HasAbs2 = 0,
246 HasMin = 0,
247 HasMax = 0,
248 HasSetLinear = 0
249 };
250};
251#endif
252
253template <>
254struct unpacket_traits<Packet2cd> {
255 typedef std::complex<double> type;
256 typedef Packet1cd half;
257 typedef Packet4d as_real;
258 enum {
259 size = 2,
260 alignment = Aligned32,
261 vectorizable = true,
262 masked_load_available = false,
263 masked_store_available = false
264 };
265};
266
267template <>
268EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
269 return Packet2cd(_mm256_add_pd(a.v, b.v));
270}
271template <>
272EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
273 return Packet2cd(_mm256_sub_pd(a.v, b.v));
274}
275template <>
276EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) {
277 return Packet2cd(pnegate(a.v));
278}
279template <>
280EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a) {
281 const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000, 0x0, 0x0, 0x0, 0x80000000, 0x0, 0x0, 0x0));
282 return Packet2cd(_mm256_xor_pd(a.v, mask));
283}
284
285template <>
286EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
287 __m256d tmp1 = _mm256_shuffle_pd(a.v, a.v, 0x0);
288 __m256d even = _mm256_mul_pd(tmp1, b.v);
289 __m256d tmp2 = _mm256_shuffle_pd(a.v, a.v, 0xF);
290 __m256d tmp3 = _mm256_shuffle_pd(b.v, b.v, 0x5);
291 __m256d odd = _mm256_mul_pd(tmp2, tmp3);
292 return Packet2cd(_mm256_addsub_pd(even, odd));
293}
294
295template <>
296EIGEN_STRONG_INLINE Packet2cd pcmp_eq(const Packet2cd& a, const Packet2cd& b) {
297 __m256d eq = _mm256_cmp_pd(a.v, b.v, _CMP_EQ_OQ);
298 return Packet2cd(pand(eq, _mm256_permute_pd(eq, 0x5)));
299}
300
301template <>
302EIGEN_STRONG_INLINE Packet2cd ptrue<Packet2cd>(const Packet2cd& a) {
303 return Packet2cd(ptrue(Packet4d(a.v)));
304}
305template <>
306EIGEN_STRONG_INLINE Packet2cd pand<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
307 return Packet2cd(_mm256_and_pd(a.v, b.v));
308}
309template <>
310EIGEN_STRONG_INLINE Packet2cd por<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
311 return Packet2cd(_mm256_or_pd(a.v, b.v));
312}
313template <>
314EIGEN_STRONG_INLINE Packet2cd pxor<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
315 return Packet2cd(_mm256_xor_pd(a.v, b.v));
316}
317template <>
318EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
319 return Packet2cd(_mm256_andnot_pd(b.v, a.v));
320}
321
322template <>
323EIGEN_STRONG_INLINE Packet2cd pload<Packet2cd>(const std::complex<double>* from) {
324 EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from));
325}
326template <>
327EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from) {
328 EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from));
329}
330
331template <>
332EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from) {
333 // in case casting to a __m128d* is really not safe, then we can still fallback to this version: (much slower though)
334 // return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from));
335 return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));
336}
337
338template <>
339EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) {
340 return pset1<Packet2cd>(*from);
341}
342
343template <>
344EIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to, const Packet2cd& from) {
345 EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v);
346}
347template <>
348EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to, const Packet2cd& from) {
349 EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v);
350}
351
352template <>
353EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from,
354 Index stride) {
355 return Packet2cd(_mm256_set_pd(std::imag(from[1 * stride]), std::real(from[1 * stride]), std::imag(from[0 * stride]),
356 std::real(from[0 * stride])));
357}
358
359template <>
360EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from,
361 Index stride) {
362 __m128d low = _mm256_extractf128_pd(from.v, 0);
363 to[stride * 0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
364 __m128d high = _mm256_extractf128_pd(from.v, 1);
365 to[stride * 1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
366}
367
368template <>
369EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a) {
370 __m128d low = _mm256_extractf128_pd(a.v, 0);
371 EIGEN_ALIGN16 double res[2];
372 _mm_store_pd(res, low);
373 return std::complex<double>(res[0], res[1]);
374}
375
376template <>
377EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
378 __m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
379 return Packet2cd(result);
380}
381
382template <>
383EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a) {
384 return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v, 0)), Packet1cd(_mm256_extractf128_pd(a.v, 1))));
385}
386
387template <>
388EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a) {
389 return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v, 0)), Packet1cd(_mm256_extractf128_pd(a.v, 1))));
390}
391
392EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd, Packet4d)
393
394template <>
395EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
396 return pdiv_complex(a, b);
397}
398
399template <>
400EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x) {
401 return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
402}
403
404EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4cf, 4>& kernel) {
405 __m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
406 __m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
407 __m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
408 __m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
409
410 __m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
411 __m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
412 __m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
413 __m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
414
415 kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
416 kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
417 kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
418 kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
419}
420
421EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cd, 2>& kernel) {
422 __m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0 + (2 << 4));
423 kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1 + (3 << 4));
424 kernel.packet[0].v = tmp;
425}
426
427template <>
428EIGEN_STRONG_INLINE Packet2cd psqrt<Packet2cd>(const Packet2cd& a) {
429 return psqrt_complex<Packet2cd>(a);
430}
431
432template <>
433EIGEN_STRONG_INLINE Packet4cf psqrt<Packet4cf>(const Packet4cf& a) {
434 return psqrt_complex<Packet4cf>(a);
435}
436
437template <>
438EIGEN_STRONG_INLINE Packet2cd plog<Packet2cd>(const Packet2cd& a) {
439 return plog_complex<Packet2cd>(a);
440}
441
442template <>
443EIGEN_STRONG_INLINE Packet4cf plog<Packet4cf>(const Packet4cf& a) {
444 return plog_complex<Packet4cf>(a);
445}
446
447template <>
448EIGEN_STRONG_INLINE Packet4cf pexp<Packet4cf>(const Packet4cf& a) {
449 return pexp_complex<Packet4cf>(a);
450}
451
452} // end namespace internal
453
454} // end namespace Eigen
455
456#endif // EIGEN_COMPLEX_AVX_H
@ Aligned32
Definition Constants.h:238
Namespace containing all symbols from the Eigen library.
Definition Core:137