Eigen  3.4.90 (git rev 5a9f66fb35d03a4da9ef8976e67a61b30aa16dcf)
 
Loading...
Searching...
No Matches
NEON/Complex.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2010 Gael Guennebaud <[email protected]>
5// Copyright (C) 2010 Konstantinos Margaritis <[email protected]>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_COMPLEX_NEON_H
12#define EIGEN_COMPLEX_NEON_H
13
14// IWYU pragma: private
15#include "../../InternalHeaderCheck.h"
16
17namespace Eigen {
18
19namespace internal {
20
21inline uint32x4_t p4ui_CONJ_XOR() {
22// See bug 1325, clang fails to call vld1q_u64.
23#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML
24 uint32x4_t ret = {0x00000000, 0x80000000, 0x00000000, 0x80000000};
25 return ret;
26#else
27 static const uint32_t conj_XOR_DATA[] = {0x00000000, 0x80000000, 0x00000000, 0x80000000};
28 return vld1q_u32(conj_XOR_DATA);
29#endif
30}
31
32inline uint32x2_t p2ui_CONJ_XOR() {
33 static const uint32_t conj_XOR_DATA[] = {0x00000000, 0x80000000};
34 return vld1_u32(conj_XOR_DATA);
35}
36
37//---------- float ----------
38
39struct Packet1cf {
40 EIGEN_STRONG_INLINE Packet1cf() {}
41 EIGEN_STRONG_INLINE explicit Packet1cf(const Packet2f& a) : v(a) {}
42 Packet2f v;
43};
44struct Packet2cf {
45 EIGEN_STRONG_INLINE Packet2cf() {}
46 EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
47 Packet4f v;
48};
49
50template <>
51struct packet_traits<std::complex<float> > : default_packet_traits {
52 typedef Packet2cf type;
53 typedef Packet1cf half;
54 enum {
55 Vectorizable = 1,
56 AlignedOnScalar = 1,
57 size = 2,
58
59 HasAdd = 1,
60 HasSub = 1,
61 HasMul = 1,
62 HasDiv = 1,
63 HasNegate = 1,
64 HasSqrt = 1,
65 HasLog = 1,
66 HasExp = 1,
67 HasAbs = 0,
68 HasAbs2 = 0,
69 HasMin = 0,
70 HasMax = 0,
71 HasSetLinear = 0
72 };
73};
74
75template <>
76struct unpacket_traits<Packet1cf> {
77 typedef std::complex<float> type;
78 typedef Packet1cf half;
79 typedef Packet2f as_real;
80 enum {
81 size = 1,
82 alignment = Aligned16,
83 vectorizable = true,
84 masked_load_available = false,
85 masked_store_available = false
86 };
87};
88template <>
89struct unpacket_traits<Packet2cf> {
90 typedef std::complex<float> type;
91 typedef Packet1cf half;
92 typedef Packet4f as_real;
93 enum {
94 size = 2,
95 alignment = Aligned16,
96 vectorizable = true,
97 masked_load_available = false,
98 masked_store_available = false
99 };
100};
101
102template <>
103EIGEN_STRONG_INLINE Packet1cf pcast<float, Packet1cf>(const float& a) {
104 return Packet1cf(vset_lane_f32(a, vdup_n_f32(0.f), 0));
105}
106template <>
107EIGEN_STRONG_INLINE Packet2cf pcast<Packet2f, Packet2cf>(const Packet2f& a) {
108 return Packet2cf(vreinterpretq_f32_u64(vmovl_u32(vreinterpret_u32_f32(a))));
109}
110
111template <>
112EIGEN_STRONG_INLINE Packet1cf pset1<Packet1cf>(const std::complex<float>& from) {
113 return Packet1cf(vld1_f32(reinterpret_cast<const float*>(&from)));
114}
115template <>
116EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) {
117 const float32x2_t r64 = vld1_f32(reinterpret_cast<const float*>(&from));
118 return Packet2cf(vcombine_f32(r64, r64));
119}
120
121template <>
122EIGEN_STRONG_INLINE Packet1cf padd<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
123 return Packet1cf(padd<Packet2f>(a.v, b.v));
124}
125template <>
126EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
127 return Packet2cf(padd<Packet4f>(a.v, b.v));
128}
129
130template <>
131EIGEN_STRONG_INLINE Packet1cf psub<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
132 return Packet1cf(psub<Packet2f>(a.v, b.v));
133}
134template <>
135EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
136 return Packet2cf(psub<Packet4f>(a.v, b.v));
137}
138
139template <>
140EIGEN_STRONG_INLINE Packet1cf pnegate(const Packet1cf& a) {
141 return Packet1cf(pnegate<Packet2f>(a.v));
142}
143template <>
144EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) {
145 return Packet2cf(pnegate<Packet4f>(a.v));
146}
147
148template <>
149EIGEN_STRONG_INLINE Packet1cf pconj(const Packet1cf& a) {
150 const Packet2ui b = Packet2ui(vreinterpret_u32_f32(a.v));
151 return Packet1cf(vreinterpret_f32_u32(veor_u32(b, p2ui_CONJ_XOR())));
152}
153template <>
154EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) {
155 const Packet4ui b = Packet4ui(vreinterpretq_u32_f32(a.v));
156 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
157}
158
159template <>
160EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
161 Packet2f v1, v2;
162
163 // Get the real values of a | a1_re | a1_re |
164 v1 = vdup_lane_f32(a.v, 0);
165 // Get the imag values of a | a1_im | a1_im |
166 v2 = vdup_lane_f32(a.v, 1);
167 // Multiply the real a with b
168 v1 = vmul_f32(v1, b.v);
169 // Multiply the imag a with b
170 v2 = vmul_f32(v2, b.v);
171 // Conjugate v2
172 v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
173 // Swap real/imag elements in v2.
174 v2 = vrev64_f32(v2);
175 // Add and return the result
176 return Packet1cf(vadd_f32(v1, v2));
177}
178template <>
179EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
180 Packet4f v1, v2;
181
182 // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
183 v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
184 // Get the imag values of a | a1_im | a1_im | a2_im | a2_im |
185 v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
186 // Multiply the real a with b
187 v1 = vmulq_f32(v1, b.v);
188 // Multiply the imag a with b
189 v2 = vmulq_f32(v2, b.v);
190 // Conjugate v2
191 v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
192 // Swap real/imag elements in v2.
193 v2 = vrev64q_f32(v2);
194 // Add and return the result
195 return Packet2cf(vaddq_f32(v1, v2));
196}
197
198template <>
199EIGEN_STRONG_INLINE Packet1cf pcmp_eq(const Packet1cf& a, const Packet1cf& b) {
200 // Compare real and imaginary parts of a and b to get the mask vector:
201 // [re(a[0])==re(b[0]), im(a[0])==im(b[0])]
202 Packet2f eq = pcmp_eq<Packet2f>(a.v, b.v);
203 // Swap real/imag elements in the mask in to get:
204 // [im(a[0])==im(b[0]), re(a[0])==re(b[0])]
205 Packet2f eq_swapped = vrev64_f32(eq);
206 // Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped
207 return Packet1cf(pand<Packet2f>(eq, eq_swapped));
208}
209template <>
210EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {
211 // Compare real and imaginary parts of a and b to get the mask vector:
212 // [re(a[0])==re(b[0]), im(a[0])==im(b[0]), re(a[1])==re(b[1]), im(a[1])==im(b[1])]
213 Packet4f eq = pcmp_eq<Packet4f>(a.v, b.v);
214 // Swap real/imag elements in the mask in to get:
215 // [im(a[0])==im(b[0]), re(a[0])==re(b[0]), im(a[1])==im(b[1]), re(a[1])==re(b[1])]
216 Packet4f eq_swapped = vrev64q_f32(eq);
217 // Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped
218 return Packet2cf(pand<Packet4f>(eq, eq_swapped));
219}
220
221template <>
222EIGEN_STRONG_INLINE Packet1cf pand<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
223 return Packet1cf(vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v))));
224}
225template <>
226EIGEN_STRONG_INLINE Packet2cf pand<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
227 return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v))));
228}
229
230template <>
231EIGEN_STRONG_INLINE Packet1cf por<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
232 return Packet1cf(vreinterpret_f32_u32(vorr_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v))));
233}
234template <>
235EIGEN_STRONG_INLINE Packet2cf por<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
236 return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v))));
237}
238
239template <>
240EIGEN_STRONG_INLINE Packet1cf pxor<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
241 return Packet1cf(vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v))));
242}
243template <>
244EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
245 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v))));
246}
247
248template <>
249EIGEN_STRONG_INLINE Packet1cf pandnot<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
250 return Packet1cf(vreinterpret_f32_u32(vbic_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v))));
251}
252template <>
253EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
254 return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v))));
255}
256
257template <>
258EIGEN_STRONG_INLINE Packet1cf pload<Packet1cf>(const std::complex<float>* from) {
259 EIGEN_DEBUG_ALIGNED_LOAD return Packet1cf(pload<Packet2f>((const float*)from));
260}
261template <>
262EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) {
263 EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(reinterpret_cast<const float*>(from)));
264}
265
266template <>
267EIGEN_STRONG_INLINE Packet1cf ploadu<Packet1cf>(const std::complex<float>* from) {
268 EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cf(ploadu<Packet2f>((const float*)from));
269}
270template <>
271EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) {
272 EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(reinterpret_cast<const float*>(from)));
273}
274
275template <>
276EIGEN_STRONG_INLINE Packet1cf ploaddup<Packet1cf>(const std::complex<float>* from) {
277 return pset1<Packet1cf>(*from);
278}
279template <>
280EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) {
281 return pset1<Packet2cf>(*from);
282}
283
284template <>
285EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const Packet1cf& from) {
286 EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v);
287}
288template <>
289EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const Packet2cf& from) {
290 EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<float*>(to), from.v);
291}
292
293template <>
294EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet1cf& from) {
295 EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v);
296}
297template <>
298EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet2cf& from) {
299 EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<float*>(to), from.v);
300}
301
302template <>
303EIGEN_DEVICE_FUNC inline Packet1cf pgather<std::complex<float>, Packet1cf>(const std::complex<float>* from,
304 Index stride) {
305 const Packet2f tmp = vdup_n_f32(std::real(from[0 * stride]));
306 return Packet1cf(vset_lane_f32(std::imag(from[0 * stride]), tmp, 1));
307}
308template <>
309EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from,
310 Index stride) {
311 Packet4f res = vdupq_n_f32(std::real(from[0 * stride]));
312 res = vsetq_lane_f32(std::imag(from[0 * stride]), res, 1);
313 res = vsetq_lane_f32(std::real(from[1 * stride]), res, 2);
314 res = vsetq_lane_f32(std::imag(from[1 * stride]), res, 3);
315 return Packet2cf(res);
316}
317
318template <>
319EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet1cf>(std::complex<float>* to, const Packet1cf& from,
320 Index stride) {
321 to[stride * 0] = std::complex<float>(vget_lane_f32(from.v, 0), vget_lane_f32(from.v, 1));
322}
323template <>
324EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from,
325 Index stride) {
326 to[stride * 0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
327 to[stride * 1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
328}
329
330template <>
331EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float>* addr) {
332 EIGEN_ARM_PREFETCH(reinterpret_cast<const float*>(addr));
333}
334
335template <>
336EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet1cf>(const Packet1cf& a) {
337 EIGEN_ALIGN16 std::complex<float> x;
338 vst1_f32(reinterpret_cast<float*>(&x), a.v);
339 return x;
340}
341template <>
342EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) {
343 EIGEN_ALIGN16 std::complex<float> x[2];
344 vst1q_f32(reinterpret_cast<float*>(x), a.v);
345 return x[0];
346}
347
348template <>
349EIGEN_STRONG_INLINE Packet1cf preverse(const Packet1cf& a) {
350 return a;
351}
352template <>
353EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) {
354 return Packet2cf(vcombine_f32(vget_high_f32(a.v), vget_low_f32(a.v)));
355}
356
357template <>
358EIGEN_STRONG_INLINE Packet1cf pcplxflip<Packet1cf>(const Packet1cf& a) {
359 return Packet1cf(vrev64_f32(a.v));
360}
361template <>
362EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a) {
363 return Packet2cf(vrev64q_f32(a.v));
364}
365
366template <>
367EIGEN_STRONG_INLINE std::complex<float> predux<Packet1cf>(const Packet1cf& a) {
368 std::complex<float> s;
369 vst1_f32((float*)&s, a.v);
370 return s;
371}
372template <>
373EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) {
374 std::complex<float> s;
375 vst1_f32(reinterpret_cast<float*>(&s), vadd_f32(vget_low_f32(a.v), vget_high_f32(a.v)));
376 return s;
377}
378
379template <>
380EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet1cf>(const Packet1cf& a) {
381 std::complex<float> s;
382 vst1_f32((float*)&s, a.v);
383 return s;
384}
385template <>
386EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a) {
387 float32x2_t a1, a2, v1, v2, prod;
388 std::complex<float> s;
389
390 a1 = vget_low_f32(a.v);
391 a2 = vget_high_f32(a.v);
392 // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
393 v1 = vdup_lane_f32(a1, 0);
394 // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
395 v2 = vdup_lane_f32(a1, 1);
396 // Multiply the real a with b
397 v1 = vmul_f32(v1, a2);
398 // Multiply the imag a with b
399 v2 = vmul_f32(v2, a2);
400 // Conjugate v2
401 v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
402 // Swap real/imag elements in v2.
403 v2 = vrev64_f32(v2);
404 // Add v1, v2
405 prod = vadd_f32(v1, v2);
406
407 vst1_f32(reinterpret_cast<float*>(&s), prod);
408
409 return s;
410}
411
412EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cf, Packet2f)
413EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf, Packet4f)
414
415template <>
416EIGEN_STRONG_INLINE Packet1cf pdiv<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
417 return pdiv_complex(a, b);
418}
419template <>
420EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
421 return pdiv_complex(a, b);
422}
423
424EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1cf, 1>& /*kernel*/) {}
425EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf, 2>& kernel) {
426 Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
427 kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
428 kernel.packet[1].v = tmp;
429}
430
431template <>
432EIGEN_STRONG_INLINE Packet1cf psqrt<Packet1cf>(const Packet1cf& a) {
433 return psqrt_complex<Packet1cf>(a);
434}
435
436template <>
437EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
438 return psqrt_complex<Packet2cf>(a);
439}
440
441template <>
442EIGEN_STRONG_INLINE Packet1cf plog<Packet1cf>(const Packet1cf& a) {
443 return plog_complex(a);
444}
445
446template <>
447EIGEN_STRONG_INLINE Packet2cf plog<Packet2cf>(const Packet2cf& a) {
448 return plog_complex(a);
449}
450
451template <>
452EIGEN_STRONG_INLINE Packet1cf pexp<Packet1cf>(const Packet1cf& a) {
453 return pexp_complex(a);
454}
455
456template <>
457EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
458 return pexp_complex(a);
459}
460
461//---------- double ----------
462#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
463
464// See bug 1325, clang fails to call vld1q_u64.
465#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML || EIGEN_COMP_CPE
466static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
467#else
468const uint64_t p2ul_conj_XOR_DATA[] = {0x0, 0x8000000000000000};
469static uint64x2_t p2ul_CONJ_XOR = vld1q_u64(p2ul_conj_XOR_DATA);
470#endif
471
472struct Packet1cd {
473 EIGEN_STRONG_INLINE Packet1cd() {}
474 EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
475 Packet2d v;
476};
477
478template <>
479struct packet_traits<std::complex<double> > : default_packet_traits {
480 typedef Packet1cd type;
481 typedef Packet1cd half;
482 enum {
483 Vectorizable = 1,
484 AlignedOnScalar = 0,
485 size = 1,
486
487 HasAdd = 1,
488 HasSub = 1,
489 HasMul = 1,
490 HasDiv = 1,
491 HasNegate = 1,
492 HasSqrt = 1,
493 HasLog = 1,
494 HasAbs = 0,
495 HasAbs2 = 0,
496 HasMin = 0,
497 HasMax = 0,
498 HasSetLinear = 0
499 };
500};
501
502template <>
503struct unpacket_traits<Packet1cd> {
504 typedef std::complex<double> type;
505 typedef Packet1cd half;
506 typedef Packet2d as_real;
507 enum {
508 size = 1,
509 alignment = Aligned16,
510 vectorizable = true,
511 masked_load_available = false,
512 masked_store_available = false
513 };
514};
515
516template <>
517EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) {
518 EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>(reinterpret_cast<const double*>(from)));
519}
520
521template <>
522EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) {
523 EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>(reinterpret_cast<const double*>(from)));
524}
525
526template <>
527EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) {
528 /* here we really have to use unaligned loads :( */
529 return ploadu<Packet1cd>(&from);
530}
531
532template <>
533EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
534 return Packet1cd(padd<Packet2d>(a.v, b.v));
535}
536
537template <>
538EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
539 return Packet1cd(psub<Packet2d>(a.v, b.v));
540}
541
542template <>
543EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) {
544 return Packet1cd(pnegate<Packet2d>(a.v));
545}
546
547template <>
548EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) {
549 return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR)));
550}
551
552template <>
553EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
554 Packet2d v1, v2;
555
556 // Get the real values of a
557 v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
558 // Get the imag values of a
559 v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
560 // Multiply the real a with b
561 v1 = vmulq_f64(v1, b.v);
562 // Multiply the imag a with b
563 v2 = vmulq_f64(v2, b.v);
564 // Conjugate v2
565 v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
566 // Swap real/imag elements in v2.
567 v2 = preverse<Packet2d>(v2);
568 // Add and return the result
569 return Packet1cd(vaddq_f64(v1, v2));
570}
571
572template <>
573EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) {
574 // Compare real and imaginary parts of a and b to get the mask vector:
575 // [re(a)==re(b), im(a)==im(b)]
576 Packet2d eq = pcmp_eq<Packet2d>(a.v, b.v);
577 // Swap real/imag elements in the mask in to get:
578 // [im(a)==im(b), re(a)==re(b)]
579 Packet2d eq_swapped = vreinterpretq_f64_u32(vrev64q_u32(vreinterpretq_u32_f64(eq)));
580 // Return re(a)==re(b) & im(a)==im(b) by computing bitwise AND of eq and eq_swapped
581 return Packet1cd(pand<Packet2d>(eq, eq_swapped));
582}
583
584template <>
585EIGEN_STRONG_INLINE Packet1cd pand<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
586 return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v), vreinterpretq_u64_f64(b.v))));
587}
588
589template <>
590EIGEN_STRONG_INLINE Packet1cd por<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
591 return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v), vreinterpretq_u64_f64(b.v))));
592}
593
594template <>
595EIGEN_STRONG_INLINE Packet1cd pxor<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
596 return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), vreinterpretq_u64_f64(b.v))));
597}
598
599template <>
600EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
601 return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v), vreinterpretq_u64_f64(b.v))));
602}
603
604template <>
605EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) {
606 return pset1<Packet1cd>(*from);
607}
608
609template <>
610EIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to, const Packet1cd& from) {
611 EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<double*>(to), from.v);
612}
613
614template <>
615EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to, const Packet1cd& from) {
616 EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), from.v);
617}
618
619template <>
620EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double>* addr) {
621 EIGEN_ARM_PREFETCH(reinterpret_cast<const double*>(addr));
622}
623
624template <>
625EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from,
626 Index stride) {
627 Packet2d res = pset1<Packet2d>(0.0);
628 res = vsetq_lane_f64(std::real(from[0 * stride]), res, 0);
629 res = vsetq_lane_f64(std::imag(from[0 * stride]), res, 1);
630 return Packet1cd(res);
631}
632
633template <>
634EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from,
635 Index stride) {
636 to[stride * 0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
637}
638
639template <>
640EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) {
641 EIGEN_ALIGN16 std::complex<double> res;
642 pstore<std::complex<double> >(&res, a);
643 return res;
644}
645
646template <>
647EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) {
648 return a;
649}
650
651template <>
652EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) {
653 return pfirst(a);
654}
655
656template <>
657EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) {
658 return pfirst(a);
659}
660
661EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd, Packet2d)
662
663template <>
664EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
665 return pdiv_complex(a, b);
666}
667
668EIGEN_STRONG_INLINE Packet1cd pcplxflip /*<Packet1cd>*/ (const Packet1cd& x) {
669 return Packet1cd(preverse(Packet2d(x.v)));
670}
671
672EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd, 2>& kernel) {
673 Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
674 kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
675 kernel.packet[1].v = tmp;
676}
677
678template <>
679EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {
680 return psqrt_complex<Packet1cd>(a);
681}
682
683template <>
684EIGEN_STRONG_INLINE Packet1cd plog<Packet1cd>(const Packet1cd& a) {
685 return plog_complex(a);
686}
687
688#endif // EIGEN_ARCH_ARM64
689
690} // end namespace internal
691
692} // end namespace Eigen
693
694#endif // EIGEN_COMPLEX_NEON_H
@ Aligned16
Definition Constants.h:237
Namespace containing all symbols from the Eigen library.
Definition Core:137