Eigen  3.4.90 (git rev 5a9f66fb35d03a4da9ef8976e67a61b30aa16dcf)
 
Loading...
Searching...
No Matches
SSE/Complex.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2010 Gael Guennebaud <[email protected]>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_COMPLEX_SSE_H
11#define EIGEN_COMPLEX_SSE_H
12
13// IWYU pragma: private
14#include "../../InternalHeaderCheck.h"
15
16namespace Eigen {
17
18namespace internal {
19
20//---------- float ----------
21struct Packet2cf {
22 EIGEN_STRONG_INLINE Packet2cf() {}
23 EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
24 Packet4f v;
25};
26
27// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
28// to leverage AVX instructions.
29#ifndef EIGEN_VECTORIZE_AVX
30template <>
31struct packet_traits<std::complex<float> > : default_packet_traits {
32 typedef Packet2cf type;
33 typedef Packet2cf half;
34 enum {
35 Vectorizable = 1,
36 AlignedOnScalar = 1,
37 size = 2,
38
39 HasAdd = 1,
40 HasSub = 1,
41 HasMul = 1,
42 HasDiv = 1,
43 HasNegate = 1,
44 HasSqrt = 1,
45 HasLog = 1,
46 HasExp = 1,
47 HasAbs = 0,
48 HasAbs2 = 0,
49 HasMin = 0,
50 HasMax = 0,
51 HasSetLinear = 0,
52 HasBlend = 1
53 };
54};
55#endif
56
57template <>
58struct unpacket_traits<Packet2cf> {
59 typedef std::complex<float> type;
60 typedef Packet2cf half;
61 typedef Packet4f as_real;
62 enum {
63 size = 2,
64 alignment = Aligned16,
65 vectorizable = true,
66 masked_load_available = false,
67 masked_store_available = false
68 };
69};
70
71template <>
72EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
73 return Packet2cf(_mm_add_ps(a.v, b.v));
74}
75template <>
76EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
77 return Packet2cf(_mm_sub_ps(a.v, b.v));
78}
79
80template <>
81EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) {
82 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000));
83 return Packet2cf(_mm_xor_ps(a.v, mask));
84}
85template <>
86EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) {
87 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000, 0x80000000, 0x00000000, 0x80000000));
88 return Packet2cf(_mm_xor_ps(a.v, mask));
89}
90
91template <>
92EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
93#ifdef EIGEN_VECTORIZE_SSE3
94 return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
95 _mm_mul_ps(_mm_movehdup_ps(a.v), vec4f_swizzle1(b.v, 1, 0, 3, 2))));
96 // return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
97 // _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
98 // vec4f_swizzle1(b.v, 1, 0, 3, 2))));
99#else
100 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000, 0x00000000, 0x80000000, 0x00000000));
101 return Packet2cf(
102 _mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
103 _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
104#endif
105}
106
107template <>
108EIGEN_STRONG_INLINE Packet2cf ptrue<Packet2cf>(const Packet2cf& a) {
109 return Packet2cf(ptrue(Packet4f(a.v)));
110}
111template <>
112EIGEN_STRONG_INLINE Packet2cf pand<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
113 return Packet2cf(_mm_and_ps(a.v, b.v));
114}
115template <>
116EIGEN_STRONG_INLINE Packet2cf por<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
117 return Packet2cf(_mm_or_ps(a.v, b.v));
118}
119template <>
120EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
121 return Packet2cf(_mm_xor_ps(a.v, b.v));
122}
123template <>
124EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
125 return Packet2cf(_mm_andnot_ps(b.v, a.v));
126}
127
128template <>
129EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) {
130 EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from)));
131}
132template <>
133EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) {
134 EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from)));
135}
136
137template <>
138EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) {
139 const float re = std::real(from);
140 const float im = std::imag(from);
141 return Packet2cf(_mm_set_ps(im, re, im, re));
142}
143
144template <>
145EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) {
146 return pset1<Packet2cf>(*from);
147}
148
149template <>
150EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const Packet2cf& from) {
151 EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v));
152}
153template <>
154EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet2cf& from) {
155 EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v));
156}
157
158template <>
159EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from,
160 Index stride) {
161 return Packet2cf(_mm_set_ps(std::imag(from[1 * stride]), std::real(from[1 * stride]), std::imag(from[0 * stride]),
162 std::real(from[0 * stride])));
163}
164
165template <>
166EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from,
167 Index stride) {
168 to[stride * 0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
169 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
170 to[stride * 1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
171 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
172}
173
174template <>
175EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float>* addr) {
176 _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0);
177}
178
179template <>
180EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) {
181 alignas(alignof(__m64)) std::complex<float> res;
182 _mm_storel_pi((__m64*)&res, a.v);
183 return res;
184}
185
186template <>
187EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) {
188 return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v)))));
189}
190
191template <>
192EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) {
193 return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v, a.v))));
194}
195
196template <>
197EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a) {
198 return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v, a.v))));
199}
200
201EIGEN_STRONG_INLINE Packet2cf pcplxflip /* <Packet2cf> */ (const Packet2cf& x) {
202 return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
203}
204
205EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf, Packet4f)
206
207template <>
208EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
209 return pdiv_complex(a, b);
210}
211
212//---------- double ----------
213struct Packet1cd {
214 EIGEN_STRONG_INLINE Packet1cd() {}
215 EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
216 Packet2d v;
217};
218
219// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
220// to leverage AVX instructions.
221#ifndef EIGEN_VECTORIZE_AVX
222template <>
223struct packet_traits<std::complex<double> > : default_packet_traits {
224 typedef Packet1cd type;
225 typedef Packet1cd half;
226 enum {
227 Vectorizable = 1,
228 AlignedOnScalar = 0,
229 size = 1,
230
231 HasAdd = 1,
232 HasSub = 1,
233 HasMul = 1,
234 HasDiv = 1,
235 HasNegate = 1,
236 HasSqrt = 1,
237 HasLog = 1,
238 HasAbs = 0,
239 HasAbs2 = 0,
240 HasMin = 0,
241 HasMax = 0,
242 HasSetLinear = 0
243 };
244};
245#endif
246
247template <>
248struct unpacket_traits<Packet1cd> {
249 typedef std::complex<double> type;
250 typedef Packet1cd half;
251 typedef Packet2d as_real;
252 enum {
253 size = 1,
254 alignment = Aligned16,
255 vectorizable = true,
256 masked_load_available = false,
257 masked_store_available = false
258 };
259};
260
261template <>
262EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
263 return Packet1cd(_mm_add_pd(a.v, b.v));
264}
265template <>
266EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
267 return Packet1cd(_mm_sub_pd(a.v, b.v));
268}
269template <>
270EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) {
271 return Packet1cd(pnegate(Packet2d(a.v)));
272}
273template <>
274EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) {
275 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000, 0x0, 0x0, 0x0));
276 return Packet1cd(_mm_xor_pd(a.v, mask));
277}
278
279template <>
280EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
281#ifdef EIGEN_VECTORIZE_SSE3
282 return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),
283 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), vec2d_swizzle1(b.v, 1, 0))));
284#else
285 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0, 0x0, 0x80000000, 0x0));
286 return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
287 _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), vec2d_swizzle1(b.v, 1, 0)), mask)));
288#endif
289}
290
291template <>
292EIGEN_STRONG_INLINE Packet1cd ptrue<Packet1cd>(const Packet1cd& a) {
293 return Packet1cd(ptrue(Packet2d(a.v)));
294}
295template <>
296EIGEN_STRONG_INLINE Packet1cd pand<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
297 return Packet1cd(_mm_and_pd(a.v, b.v));
298}
299template <>
300EIGEN_STRONG_INLINE Packet1cd por<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
301 return Packet1cd(_mm_or_pd(a.v, b.v));
302}
303template <>
304EIGEN_STRONG_INLINE Packet1cd pxor<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
305 return Packet1cd(_mm_xor_pd(a.v, b.v));
306}
307template <>
308EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
309 return Packet1cd(_mm_andnot_pd(b.v, a.v));
310}
311
312// FIXME force unaligned load, this is a temporary fix
313template <>
314EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) {
315 EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from));
316}
317template <>
318EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) {
319 EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from));
320}
321template <>
322EIGEN_STRONG_INLINE Packet1cd
323pset1<Packet1cd>(const std::complex<double>& from) { /* here we really have to use unaligned loads :( */
324 return ploadu<Packet1cd>(&from);
325}
326
327template <>
328EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) {
329 return pset1<Packet1cd>(*from);
330}
331
332// FIXME force unaligned store, this is a temporary fix
333template <>
334EIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to, const Packet1cd& from) {
335 EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v));
336}
337template <>
338EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to, const Packet1cd& from) {
339 EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v));
340}
341
342template <>
343EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double>* addr) {
344 _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0);
345}
346
347template <>
348EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) {
349 EIGEN_ALIGN16 double res[2];
350 _mm_store_pd(res, a.v);
351 return std::complex<double>(res[0], res[1]);
352}
353
354template <>
355EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) {
356 return a;
357}
358
359template <>
360EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) {
361 return pfirst(a);
362}
363
364template <>
365EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) {
366 return pfirst(a);
367}
368
369EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd, Packet2d)
370
371template <>
372EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
373 return pdiv_complex(a, b);
374}
375
376EIGEN_STRONG_INLINE Packet1cd pcplxflip /* <Packet1cd> */ (const Packet1cd& x) {
377 return Packet1cd(preverse(Packet2d(x.v)));
378}
379
380EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf, 2>& kernel) {
381 __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
382 __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
383
384 __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
385 kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
386 kernel.packet[1].v = tmp;
387}
388
389template <>
390EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {
391 __m128 eq = _mm_cmpeq_ps(a.v, b.v);
392 return Packet2cf(pand<Packet4f>(eq, vec4f_swizzle1(eq, 1, 0, 3, 2)));
393}
394
395template <>
396EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) {
397 __m128d eq = _mm_cmpeq_pd(a.v, b.v);
398 return Packet1cd(pand<Packet2d>(eq, vec2d_swizzle1(eq, 1, 0)));
399}
400
401template <>
402EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket,
403 const Packet2cf& elsePacket) {
404 __m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
405 return Packet2cf(_mm_castpd_ps(result));
406}
407
408template <>
409EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {
410 return psqrt_complex<Packet1cd>(a);
411}
412
413template <>
414EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
415 return psqrt_complex<Packet2cf>(a);
416}
417
418template <>
419EIGEN_STRONG_INLINE Packet1cd plog<Packet1cd>(const Packet1cd& a) {
420 return plog_complex<Packet1cd>(a);
421}
422
423template <>
424EIGEN_STRONG_INLINE Packet2cf plog<Packet2cf>(const Packet2cf& a) {
425 return plog_complex<Packet2cf>(a);
426}
427
428template <>
429EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
430 return pexp_complex<Packet2cf>(a);
431}
432
433} // end namespace internal
434} // end namespace Eigen
435
436#endif // EIGEN_COMPLEX_SSE_H
@ Aligned16
Definition Constants.h:237
Namespace containing all symbols from the Eigen library.
Definition Core:137