Eigen  3.4.90 (git rev 5a9f66fb35d03a4da9ef8976e67a61b30aa16dcf)
 
Loading...
Searching...
No Matches
GenericPacketMath.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008 Gael Guennebaud <[email protected]>
5// Copyright (C) 2006-2008 Benoit Jacob <[email protected]>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_GENERIC_PACKET_MATH_H
12#define EIGEN_GENERIC_PACKET_MATH_H
13
14// IWYU pragma: private
15#include "./InternalHeaderCheck.h"
16
17namespace Eigen {
18
19namespace internal {
20
29#ifndef EIGEN_DEBUG_ALIGNED_LOAD
30#define EIGEN_DEBUG_ALIGNED_LOAD
31#endif
32
33#ifndef EIGEN_DEBUG_UNALIGNED_LOAD
34#define EIGEN_DEBUG_UNALIGNED_LOAD
35#endif
36
37#ifndef EIGEN_DEBUG_ALIGNED_STORE
38#define EIGEN_DEBUG_ALIGNED_STORE
39#endif
40
41#ifndef EIGEN_DEBUG_UNALIGNED_STORE
42#define EIGEN_DEBUG_UNALIGNED_STORE
43#endif
44
45struct default_packet_traits {
46 enum {
47 // Ops that are implemented for most types.
48 HasAdd = 1,
49 HasSub = 1,
50 HasShift = 1,
51 HasMul = 1,
52 HasNegate = 1,
53 HasAbs = 1,
54 HasAbs2 = 1,
55 HasMin = 1,
56 HasMax = 1,
57 HasConj = 1,
58 HasSetLinear = 1,
59 HasSign = 1,
60 // By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet
61 // types
62 HasRound = 1,
63
64 HasArg = 0,
65 HasAbsDiff = 0,
66 HasBlend = 0,
67 // This flag is used to indicate whether packet comparison is supported.
68 // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
69 HasCmp = 0,
70
71 HasDiv = 0,
72 HasReciprocal = 0,
73 HasSqrt = 0,
74 HasRsqrt = 0,
75 HasExp = 0,
76 HasExpm1 = 0,
77 HasLog = 0,
78 HasLog1p = 0,
79 HasLog10 = 0,
80 HasPow = 0,
81 HasSin = 0,
82 HasCos = 0,
83 HasTan = 0,
84 HasASin = 0,
85 HasACos = 0,
86 HasATan = 0,
87 HasATanh = 0,
88 HasSinh = 0,
89 HasCosh = 0,
90 HasTanh = 0,
91 HasLGamma = 0,
92 HasDiGamma = 0,
93 HasZeta = 0,
94 HasPolygamma = 0,
95 HasErf = 0,
96 HasErfc = 0,
97 HasNdtri = 0,
98 HasBessel = 0,
99 HasIGamma = 0,
100 HasIGammaDerA = 0,
101 HasGammaSampleDerAlpha = 0,
102 HasIGammac = 0,
103 HasBetaInc = 0
104 };
105};
106
107template <typename T>
108struct packet_traits : default_packet_traits {
109 typedef T type;
110 typedef T half;
111 enum {
112 Vectorizable = 0,
113 size = 1,
114 AlignedOnScalar = 0,
115 };
116 enum {
117 HasAdd = 0,
118 HasSub = 0,
119 HasMul = 0,
120 HasNegate = 0,
121 HasAbs = 0,
122 HasAbs2 = 0,
123 HasMin = 0,
124 HasMax = 0,
125 HasConj = 0,
126 HasSetLinear = 0
127 };
128};
129
130template <typename T>
131struct packet_traits<const T> : packet_traits<T> {};
132
133template <typename T>
134struct unpacket_traits {
135 typedef T type;
136 typedef T half;
137 typedef typename numext::get_integer_by_size<sizeof(T)>::signed_type integer_packet;
138 enum {
139 size = 1,
140 alignment = alignof(T),
141 vectorizable = false,
142 masked_load_available = false,
143 masked_store_available = false
144 };
145};
146
147template <typename T>
148struct unpacket_traits<const T> : unpacket_traits<T> {};
149
153template <typename Packet>
154struct is_scalar {
155 using Scalar = typename unpacket_traits<Packet>::type;
156 enum { value = internal::is_same<Packet, Scalar>::value };
157};
158
159// automatically and succinctly define combinations of pcast<SrcPacket,TgtPacket> when
160// 1) the packets are the same type, or
161// 2) the packets differ only in sign.
162// In both of these cases, preinterpret (bit_cast) is equivalent to pcast (static_cast)
163template <typename SrcPacket, typename TgtPacket,
164 bool Scalar = is_scalar<SrcPacket>::value && is_scalar<TgtPacket>::value>
165struct is_degenerate_helper : is_same<SrcPacket, TgtPacket> {};
166template <>
167struct is_degenerate_helper<int8_t, uint8_t, true> : std::true_type {};
168template <>
169struct is_degenerate_helper<int16_t, uint16_t, true> : std::true_type {};
170template <>
171struct is_degenerate_helper<int32_t, uint32_t, true> : std::true_type {};
172template <>
173struct is_degenerate_helper<int64_t, uint64_t, true> : std::true_type {};
174
175template <typename SrcPacket, typename TgtPacket>
176struct is_degenerate_helper<SrcPacket, TgtPacket, false> {
177 using SrcScalar = typename unpacket_traits<SrcPacket>::type;
178 static constexpr int SrcSize = unpacket_traits<SrcPacket>::size;
179 using TgtScalar = typename unpacket_traits<TgtPacket>::type;
180 static constexpr int TgtSize = unpacket_traits<TgtPacket>::size;
181 static constexpr bool value = is_degenerate_helper<SrcScalar, TgtScalar, true>::value && (SrcSize == TgtSize);
182};
183
184// is_degenerate<T1,T2>::value == is_degenerate<T2,T1>::value
185template <typename SrcPacket, typename TgtPacket>
186struct is_degenerate {
187 static constexpr bool value =
188 is_degenerate_helper<SrcPacket, TgtPacket>::value || is_degenerate_helper<TgtPacket, SrcPacket>::value;
189};
190
191template <typename Packet>
192struct is_half {
193 using Scalar = typename unpacket_traits<Packet>::type;
194 static constexpr int Size = unpacket_traits<Packet>::size;
195 using DefaultPacket = typename packet_traits<Scalar>::type;
196 static constexpr int DefaultSize = unpacket_traits<DefaultPacket>::size;
197 static constexpr bool value = Size < DefaultSize;
198};
199
200template <typename Src, typename Tgt>
201struct type_casting_traits {
202 enum {
203 VectorizedCast =
204 is_degenerate<Src, Tgt>::value && packet_traits<Src>::Vectorizable && packet_traits<Tgt>::Vectorizable,
205 SrcCoeffRatio = 1,
206 TgtCoeffRatio = 1
207 };
208};
209
210// provides a succint template to define vectorized casting traits with respect to the largest accessible packet types
211template <typename Src, typename Tgt>
212struct vectorized_type_casting_traits {
213 enum : int {
214 DefaultSrcPacketSize = packet_traits<Src>::size,
215 DefaultTgtPacketSize = packet_traits<Tgt>::size,
216 VectorizedCast = 1,
217 SrcCoeffRatio = plain_enum_max(DefaultTgtPacketSize / DefaultSrcPacketSize, 1),
218 TgtCoeffRatio = plain_enum_max(DefaultSrcPacketSize / DefaultTgtPacketSize, 1)
219 };
220};
221
224template <typename T, int unique_id = 0>
225struct eigen_packet_wrapper {
226 EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
227 EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
228 EIGEN_ALWAYS_INLINE eigen_packet_wrapper() = default;
229 EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T& v) : m_val(v) {}
230 EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T& v) {
231 m_val = v;
232 return *this;
233 }
234
235 T m_val;
236};
237
238template <typename Target, typename Packet, bool IsSame = is_same<Target, Packet>::value>
239struct preinterpret_generic;
240
241template <typename Target, typename Packet>
242struct preinterpret_generic<Target, Packet, false> {
243 // the packets are not the same, attempt scalar bit_cast
244 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Target run(const Packet& a) {
245 return numext::bit_cast<Target, Packet>(a);
246 }
247};
248
249template <typename Packet>
250struct preinterpret_generic<Packet, Packet, true> {
251 // the packets are the same type: do nothing
252 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
253};
254
256template <typename Target, typename Packet>
257EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) {
258 return preinterpret_generic<Target, Packet>::run(a);
259}
260
261template <typename SrcPacket, typename TgtPacket, bool Degenerate = is_degenerate<SrcPacket, TgtPacket>::value,
262 bool TgtIsHalf = is_half<TgtPacket>::value>
263struct pcast_generic;
264
265template <typename SrcPacket, typename TgtPacket>
266struct pcast_generic<SrcPacket, TgtPacket, false, false> {
267 // the packets are not degenerate: attempt scalar static_cast
268 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
269 return cast_impl<SrcPacket, TgtPacket>::run(a);
270 }
271};
272
273template <typename Packet>
274struct pcast_generic<Packet, Packet, true, false> {
275 // the packets are the same: do nothing
276 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
277};
278
279template <typename SrcPacket, typename TgtPacket, bool TgtIsHalf>
280struct pcast_generic<SrcPacket, TgtPacket, true, TgtIsHalf> {
281 // the packets are degenerate: preinterpret is equivalent to pcast
282 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { return preinterpret<TgtPacket>(a); }
283};
284
286template <typename SrcPacket, typename TgtPacket>
287EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a) {
288 return pcast_generic<SrcPacket, TgtPacket>::run(a);
289}
290template <typename SrcPacket, typename TgtPacket>
291EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b) {
292 return pcast_generic<SrcPacket, TgtPacket>::run(a, b);
293}
294template <typename SrcPacket, typename TgtPacket>
295EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c,
296 const SrcPacket& d) {
297 return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d);
298}
299template <typename SrcPacket, typename TgtPacket>
300EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c, const SrcPacket& d,
301 const SrcPacket& e, const SrcPacket& f, const SrcPacket& g,
302 const SrcPacket& h) {
303 return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d, e, f, g, h);
304}
305
306template <typename SrcPacket, typename TgtPacket>
307struct pcast_generic<SrcPacket, TgtPacket, false, true> {
308 // TgtPacket is a half packet of some other type
309 // perform cast and truncate result
310 using DefaultTgtPacket = typename is_half<TgtPacket>::DefaultPacket;
311 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
312 return preinterpret<TgtPacket>(pcast<SrcPacket, DefaultTgtPacket>(a));
313 }
314};
315
317template <typename Packet>
318EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, const Packet& b) {
319 return a + b;
320}
321// Avoid compiler warning for boolean algebra.
322template <>
323EIGEN_DEVICE_FUNC inline bool padd(const bool& a, const bool& b) {
324 return a || b;
325}
326
331template <typename Packet>
332EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_fpops_available, Packet> padd(
333 const Packet& a, const Packet& b, typename unpacket_traits<Packet>::mask_t umask);
334
336template <typename Packet>
337EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) {
338 return a - b;
339}
340
342template <typename Packet>
343EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) {
344 EIGEN_STATIC_ASSERT((!is_same<typename unpacket_traits<Packet>::type, bool>::value),
345 NEGATE IS NOT DEFINED FOR BOOLEAN TYPES)
346 return numext::negate(a);
347}
348
350template <typename Packet>
351EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) {
352 return numext::conj(a);
353}
354
356template <typename Packet>
357EIGEN_DEVICE_FUNC inline Packet pmul(const Packet& a, const Packet& b) {
358 return a * b;
359}
360// Avoid compiler warning for boolean algebra.
361template <>
362EIGEN_DEVICE_FUNC inline bool pmul(const bool& a, const bool& b) {
363 return a && b;
364}
365
367template <typename Packet>
368EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) {
369 return a / b;
370}
371
372// In the generic case, memset to all one bits.
373template <typename Packet, typename EnableIf = void>
374struct ptrue_impl {
375 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
376 Packet b;
377 memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
378 return b;
379 }
380};
381
382// For booleans, we can only directly set a valid `bool` value to avoid UB.
383template <>
384struct ptrue_impl<bool, void> {
385 static EIGEN_DEVICE_FUNC inline bool run(const bool& /*a*/) { return true; }
386};
387
388// For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
389// Although this is technically not a valid bitmask, the scalar path for pselect
390// uses a comparison to zero, so this should still work in most cases. We don't
391// have another option, since the scalar type requires initialization.
392template <typename T>
393struct ptrue_impl<T, std::enable_if_t<is_scalar<T>::value && NumTraits<T>::RequireInitialization>> {
394 static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(1); }
395};
396
398template <typename Packet>
399EIGEN_DEVICE_FUNC inline Packet ptrue(const Packet& a) {
400 return ptrue_impl<Packet>::run(a);
401}
402
403// In the general case, memset to zero.
404template <typename Packet, typename EnableIf = void>
405struct pzero_impl {
406 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
407 Packet b;
408 memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
409 return b;
410 }
411};
412
413// For scalars, explicitly set to Scalar(0), since the underlying representation
414// for zero may not consist of all-zero bits.
415template <typename T>
416struct pzero_impl<T, std::enable_if_t<is_scalar<T>::value>> {
417 static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(0); }
418};
419
421template <typename Packet>
422EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) {
423 return pzero_impl<Packet>::run(a);
424}
425
427template <typename Packet>
428EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) {
429 return a <= b ? ptrue(a) : pzero(a);
430}
431
433template <typename Packet>
434EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) {
435 return a < b ? ptrue(a) : pzero(a);
436}
437
439template <typename Packet>
440EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) {
441 return a == b ? ptrue(a) : pzero(a);
442}
443
445template <typename Packet>
446EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) {
447 return a >= b ? pzero(a) : ptrue(a);
448}
449
450template <typename T>
451struct bit_and {
452 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; }
453};
454
455template <typename T>
456struct bit_or {
457 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a | b; }
458};
459
460template <typename T>
461struct bit_xor {
462 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a ^ b; }
463};
464
465template <typename T>
466struct bit_not {
467 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; }
468};
469
470template <>
471struct bit_and<bool> {
472 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const {
473 return a && b;
474 }
475};
476
477template <>
478struct bit_or<bool> {
479 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const {
480 return a || b;
481 }
482};
483
484template <>
485struct bit_xor<bool> {
486 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const {
487 return a != b;
488 }
489};
490
491template <>
492struct bit_not<bool> {
493 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; }
494};
495
496// Use operators &, |, ^, ~.
497template <typename T>
498struct operator_bitwise_helper {
499 EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
500 EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
501 EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
502 EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
503};
504
505// Apply binary operations byte-by-byte
506template <typename T>
507struct bytewise_bitwise_helper {
508 EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
509 return binary(a, b, bit_and<unsigned char>());
510 }
511 EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return binary(a, b, bit_or<unsigned char>()); }
512 EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
513 return binary(a, b, bit_xor<unsigned char>());
514 }
515 EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return unary(a, bit_not<unsigned char>()); }
516
517 private:
518 template <typename Op>
519 EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
520 const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
521 T c;
522 unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
523 for (size_t i = 0; i < sizeof(T); ++i) {
524 *c_ptr++ = op(*a_ptr++);
525 }
526 return c;
527 }
528
529 template <typename Op>
530 EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
531 const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
532 const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
533 T c;
534 unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
535 for (size_t i = 0; i < sizeof(T); ++i) {
536 *c_ptr++ = op(*a_ptr++, *b_ptr++);
537 }
538 return c;
539 }
540};
541
542// In the general case, use byte-by-byte manipulation.
543template <typename T, typename EnableIf = void>
544struct bitwise_helper : public bytewise_bitwise_helper<T> {};
545
546// For integers or non-trivial scalars, use binary operators.
547template <typename T>
548struct bitwise_helper<T, typename std::enable_if_t<is_scalar<T>::value &&
549 (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>>
550 : public operator_bitwise_helper<T> {};
551
553template <typename Packet>
554EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) {
555 return bitwise_helper<Packet>::bitwise_and(a, b);
556}
557
559template <typename Packet>
560EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) {
561 return bitwise_helper<Packet>::bitwise_or(a, b);
562}
563
565template <typename Packet>
566EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) {
567 return bitwise_helper<Packet>::bitwise_xor(a, b);
568}
569
571template <typename Packet>
572EIGEN_DEVICE_FUNC inline Packet pnot(const Packet& a) {
573 return bitwise_helper<Packet>::bitwise_not(a);
574}
575
577template <typename Packet>
578EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) {
579 return pand(a, pnot(b));
580}
581
583template <typename Packet>
584EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) {
585 return pandnot(ptrue(a), pcmp_eq(a, a));
586}
587
588// In the general case, use bitwise select.
589template <typename Packet, typename EnableIf = void>
590struct pselect_impl {
591 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
592 return por(pand(a, mask), pandnot(b, mask));
593 }
594};
595
596// For scalars, use ternary select.
597template <typename Packet>
598struct pselect_impl<Packet, std::enable_if_t<is_scalar<Packet>::value>> {
599 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
600 return numext::equal_strict(mask, Packet(0)) ? b : a;
601 }
602};
603
605template <typename Packet>
606EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) {
607 return pselect_impl<Packet>::run(mask, a, b);
608}
609
610template <>
611EIGEN_DEVICE_FUNC inline bool pselect<bool>(const bool& cond, const bool& a, const bool& b) {
612 return cond ? a : b;
613}
614
617template <int NaNPropagation>
618struct pminmax_impl {
619 template <typename Packet, typename Op>
620 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
621 return op(a, b);
622 }
623};
624
627template <>
628struct pminmax_impl<PropagateNaN> {
629 template <typename Packet, typename Op>
630 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
631 Packet not_nan_mask_a = pcmp_eq(a, a);
632 Packet not_nan_mask_b = pcmp_eq(b, b);
633 return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), b), a);
634 }
635};
636
640template <>
641struct pminmax_impl<PropagateNumbers> {
642 template <typename Packet, typename Op>
643 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
644 Packet not_nan_mask_a = pcmp_eq(a, a);
645 Packet not_nan_mask_b = pcmp_eq(b, b);
646 return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), a), b);
647 }
648};
649
650#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& a, const Type& b) { return Func(a, b); }
651
654template <typename Packet>
655EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
656 return numext::mini(a, b);
657}
658
661template <int NaNPropagation, typename Packet>
662EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
663 return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
664}
665
668template <typename Packet>
669EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
670 return numext::maxi(a, b);
671}
672
675template <int NaNPropagation, typename Packet>
676EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
677 return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax<Packet>)));
678}
679
681template <typename Packet>
682EIGEN_DEVICE_FUNC inline Packet pabs(const Packet& a) {
683 return numext::abs(a);
684}
685template <>
686EIGEN_DEVICE_FUNC inline unsigned int pabs(const unsigned int& a) {
687 return a;
688}
689template <>
690EIGEN_DEVICE_FUNC inline unsigned long pabs(const unsigned long& a) {
691 return a;
692}
693template <>
694EIGEN_DEVICE_FUNC inline unsigned long long pabs(const unsigned long long& a) {
695 return a;
696}
697
699template <typename Packet>
700EIGEN_DEVICE_FUNC inline Packet paddsub(const Packet& a, const Packet& b) {
701 return pselect(peven_mask(a), padd(a, b), psub(a, b));
702}
703
705template <typename Packet>
706EIGEN_DEVICE_FUNC inline Packet parg(const Packet& a) {
707 using numext::arg;
708 return arg(a);
709}
710
712template <int N, typename T>
713EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) {
714 return numext::arithmetic_shift_right(a, N);
715}
716
718template <int N, typename T>
719EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) {
720 return numext::logical_shift_right(a, N);
721}
722
724template <int N, typename T>
725EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) {
726 return numext::logical_shift_left(a, N);
727}
728
732template <typename Packet>
733EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
734 int exp;
735 EIGEN_USING_STD(frexp);
736 Packet result = static_cast<Packet>(frexp(a, &exp));
737 exponent = static_cast<Packet>(exp);
738 return result;
739}
740
744template <typename Packet>
745EIGEN_DEVICE_FUNC inline Packet pldexp(const Packet& a, const Packet& exponent) {
746 EIGEN_USING_STD(ldexp)
747 return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
748}
749
751template <typename Packet>
752EIGEN_DEVICE_FUNC inline Packet pabsdiff(const Packet& a, const Packet& b) {
753 return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b));
754}
755
757template <typename Packet>
758EIGEN_DEVICE_FUNC inline Packet pload(const typename unpacket_traits<Packet>::type* from) {
759 return *from;
760}
761
766template <typename Packet>
767EIGEN_DEVICE_FUNC inline Packet pload_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
768 const Index offset = 0) {
769 const Index packet_size = unpacket_traits<Packet>::size;
770 eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
771 typedef typename unpacket_traits<Packet>::type Scalar;
772 EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
773 for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
774 elements[i] = from[i - offset];
775 }
776 return pload<Packet>(elements);
777}
778
780template <typename Packet>
781EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits<Packet>::type* from) {
782 return *from;
783}
784
787template <typename Packet>
788EIGEN_DEVICE_FUNC inline Packet ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
789 const Index offset = 0) {
790 const Index packet_size = unpacket_traits<Packet>::size;
791 eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
792 typedef typename unpacket_traits<Packet>::type Scalar;
793 EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
794 for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
795 elements[i] = from[i - offset];
796 }
797 return pload<Packet>(elements);
798}
799
804template <typename Packet>
805EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_load_available, Packet> ploadu(
806 const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
807
809template <typename Packet>
810EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits<Packet>::type& a) {
811 return a;
812}
813
815template <typename Packet, typename BitsType>
816EIGEN_DEVICE_FUNC inline Packet pset1frombits(BitsType a);
817
819template <typename Packet>
820EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits<Packet>::type* a) {
821 return pset1<Packet>(*a);
822}
823
829template <typename Packet>
830EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits<Packet>::type* from) {
831 return *from;
832}
833
840template <typename Packet>
841EIGEN_DEVICE_FUNC inline Packet ploadquad(const typename unpacket_traits<Packet>::type* from) {
842 return pload1<Packet>(from);
843}
844
854template <typename Packet>
855EIGEN_DEVICE_FUNC inline void pbroadcast4(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1,
856 Packet& a2, Packet& a3) {
857 a0 = pload1<Packet>(a + 0);
858 a1 = pload1<Packet>(a + 1);
859 a2 = pload1<Packet>(a + 2);
860 a3 = pload1<Packet>(a + 3);
861}
862
870template <typename Packet>
871EIGEN_DEVICE_FUNC inline void pbroadcast2(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1) {
872 a0 = pload1<Packet>(a + 0);
873 a1 = pload1<Packet>(a + 1);
874}
875
877template <typename Packet>
878EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits<Packet>::type& a) {
879 return a;
880}
881
884template <typename Packet>
885EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& /*a*/) {
886 typedef typename unpacket_traits<Packet>::type Scalar;
887 const size_t n = unpacket_traits<Packet>::size;
888 EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
889 for (size_t i = 0; i < n; ++i) {
890 memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
891 }
892 return ploadu<Packet>(elements);
893}
894
896template <typename Scalar, typename Packet>
897EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) {
898 (*to) = from;
899}
900
904template <typename Scalar, typename Packet>
905EIGEN_DEVICE_FUNC inline void pstore_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
906 const Index packet_size = unpacket_traits<Packet>::size;
907 eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
908 EIGEN_ALIGN_MAX Scalar elements[packet_size];
909 pstore<Scalar>(elements, from);
910 for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
911 to[i] = elements[i + offset];
912 }
913}
914
916template <typename Scalar, typename Packet>
917EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) {
918 (*to) = from;
919}
920
922template <typename Scalar, typename Packet>
923EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
924 const Index packet_size = unpacket_traits<Packet>::size;
925 eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
926 EIGEN_ALIGN_MAX Scalar elements[packet_size];
927 pstore<Scalar>(elements, from);
928 for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
929 to[i] = elements[i + offset];
930 }
931}
932
937template <typename Scalar, typename Packet>
938EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_store_available, void> pstoreu(
939 Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
940
941template <typename Scalar, typename Packet>
942EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) {
943 return ploadu<Packet>(from);
944}
945
946template <typename Scalar, typename Packet>
947EIGEN_DEVICE_FUNC inline Packet pgather_partial(const Scalar* from, Index stride, const Index n) {
948 const Index packet_size = unpacket_traits<Packet>::size;
949 EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
950 for (Index i = 0; i < numext::mini(n, packet_size); i++) {
951 elements[i] = from[i * stride];
952 }
953 return pload<Packet>(elements);
954}
955
956template <typename Scalar, typename Packet>
957EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) {
958 pstore(to, from);
959}
960
961template <typename Scalar, typename Packet>
962EIGEN_DEVICE_FUNC inline void pscatter_partial(Scalar* to, const Packet& from, Index stride, const Index n) {
963 const Index packet_size = unpacket_traits<Packet>::size;
964 EIGEN_ALIGN_MAX Scalar elements[packet_size];
965 pstore<Scalar>(elements, from);
966 for (Index i = 0; i < numext::mini(n, packet_size); i++) {
967 to[i * stride] = elements[i];
968 }
969}
970
972template <typename Scalar>
973EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) {
974#if defined(EIGEN_HIP_DEVICE_COMPILE)
975 // do nothing
976#elif defined(EIGEN_CUDA_ARCH)
977#if defined(__LP64__) || EIGEN_OS_WIN64
978 // 64-bit pointer operand constraint for inlined asm
979 asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
980#else
981 // 32-bit pointer operand constraint for inlined asm
982 asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
983#endif
984#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
985 __builtin_prefetch(addr);
986#endif
987}
988
990template <typename Packet>
991EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) {
992 return a;
993}
994
996template <typename Packet>
997EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) {
998 return Packet(numext::imag(a), numext::real(a));
999}
1000
1001/**************************
1002 * Special math functions
1003 ***************************/
1004
1006template <typename Packet>
1007EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet& a) {
1008 EIGEN_USING_STD(sin);
1009 return sin(a);
1010}
1011
1013template <typename Packet>
1014EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet& a) {
1015 EIGEN_USING_STD(cos);
1016 return cos(a);
1017}
1018
1020template <typename Packet>
1021EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan(const Packet& a) {
1022 EIGEN_USING_STD(tan);
1023 return tan(a);
1024}
1025
1027template <typename Packet>
1028EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin(const Packet& a) {
1029 EIGEN_USING_STD(asin);
1030 return asin(a);
1031}
1032
1034template <typename Packet>
1035EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) {
1036 EIGEN_USING_STD(acos);
1037 return acos(a);
1038}
1039
1041template <typename Packet>
1042EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psinh(const Packet& a) {
1043 EIGEN_USING_STD(sinh);
1044 return sinh(a);
1045}
1046
1048template <typename Packet>
1049EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh(const Packet& a) {
1050 EIGEN_USING_STD(cosh);
1051 return cosh(a);
1052}
1053
1055template <typename Packet>
1056EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan(const Packet& a) {
1057 EIGEN_USING_STD(atan);
1058 return atan(a);
1059}
1060
1062template <typename Packet>
1063EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet& a) {
1064 EIGEN_USING_STD(tanh);
1065 return tanh(a);
1066}
1067
1069template <typename Packet>
1070EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet& a) {
1071 EIGEN_USING_STD(atanh);
1072 return atanh(a);
1073}
1074
1076template <typename Packet>
1077EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) {
1078 EIGEN_USING_STD(exp);
1079 return exp(a);
1080}
1081
1083template <typename Packet>
1084EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet& a) {
1085 return numext::expm1(a);
1086}
1087
1089template <typename Packet>
1090EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) {
1091 EIGEN_USING_STD(log);
1092 return log(a);
1093}
1094
1096template <typename Packet>
1097EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet& a) {
1098 return numext::log1p(a);
1099}
1100
1102template <typename Packet>
1103EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& a) {
1104 EIGEN_USING_STD(log10);
1105 return log10(a);
1106}
1107
1109template <typename Packet>
1110EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) {
1111 using Scalar = typename internal::unpacket_traits<Packet>::type;
1112 using RealScalar = typename NumTraits<Scalar>::Real;
1113 return pmul(pset1<Packet>(Scalar(RealScalar(EIGEN_LOG2E))), plog(a));
1114}
1115
1117template <typename Packet>
1118EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) {
1119 return numext::sqrt(a);
1120}
1121
1123template <typename Packet>
1124EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcbrt(const Packet& a) {
1125 return numext::cbrt(a);
1126}
1127
1128template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
1129 bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
1130struct nearest_integer_packetop_impl {
1131 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return numext::floor(x); }
1132 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return numext::ceil(x); }
1133 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return numext::rint(x); }
1134 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return numext::round(x); }
1135 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return numext::trunc(x); }
1136};
1137
1139template <typename Packet>
1140EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pround(const Packet& a) {
1141 return nearest_integer_packetop_impl<Packet>::run_round(a);
1142}
1143
1145template <typename Packet>
1146EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pfloor(const Packet& a) {
1147 return nearest_integer_packetop_impl<Packet>::run_floor(a);
1148}
1149
1152template <typename Packet>
1153EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet print(const Packet& a) {
1154 return nearest_integer_packetop_impl<Packet>::run_rint(a);
1155}
1156
1158template <typename Packet>
1159EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pceil(const Packet& a) {
1160 return nearest_integer_packetop_impl<Packet>::run_ceil(a);
1161}
1162
1164template <typename Packet>
1165EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ptrunc(const Packet& a) {
1166 return nearest_integer_packetop_impl<Packet>::run_trunc(a);
1167}
1168
1169template <typename Packet, typename EnableIf = void>
1170struct psign_impl {
1171 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) { return numext::sign(a); }
1172};
1173
1175template <typename Packet>
1176EIGEN_DEVICE_FUNC inline Packet psign(const Packet& a) {
1177 return psign_impl<Packet>::run(a);
1178}
1179
1180template <>
1181EIGEN_DEVICE_FUNC inline bool psign(const bool& a) {
1182 return a;
1183}
1184
1186template <typename Packet>
1187EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a) {
1188 return a;
1189}
1190
1195template <typename Packet>
1196EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits<Packet>::size % 8) == 0,
1197 typename unpacket_traits<Packet>::half, Packet>
1198predux_half_dowto4(const Packet& a) {
1199 return a;
1200}
1201
1202// Slow generic implementation of Packet reduction.
1203template <typename Packet, typename Op>
1204EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_helper(const Packet& a, Op op) {
1205 typedef typename unpacket_traits<Packet>::type Scalar;
1206 const size_t n = unpacket_traits<Packet>::size;
1207 EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
1208 pstoreu<Scalar>(elements, a);
1209 for (size_t k = n / 2; k > 0; k /= 2) {
1210 for (size_t i = 0; i < k; ++i) {
1211 elements[i] = op(elements[i], elements[i + k]);
1212 }
1213 }
1214 return elements[0];
1215}
1216
1218template <typename Packet>
1219EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a) {
1220 return a;
1221}
1222
1224template <typename Packet>
1225EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a) {
1226 typedef typename unpacket_traits<Packet>::type Scalar;
1227 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
1228}
1229
1231template <typename Packet>
1232EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
1233 typedef typename unpacket_traits<Packet>::type Scalar;
1234 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
1235}
1236
1237template <int NaNPropagation, typename Packet>
1238EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
1239 typedef typename unpacket_traits<Packet>::type Scalar;
1240 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
1241}
1242
1244template <typename Packet>
1245EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
1246 typedef typename unpacket_traits<Packet>::type Scalar;
1247 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
1248}
1249
1250template <int NaNPropagation, typename Packet>
1251EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
1252 typedef typename unpacket_traits<Packet>::type Scalar;
1253 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
1254}
1255
1256#undef EIGEN_BINARY_OP_NAN_PROPAGATION
1257
1261// not needed yet
1262// template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
1263// { return bool(a); }
1264
1268template <typename Packet>
1269EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) {
1270 // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
1271 // It is expected that "true" is either:
1272 // - Scalar(1)
1273 // - bits full of ones (NaN for floats),
1274 // - or first bit equals to 1 (1 for ints, smallest denormal for floats).
1275 // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
1276 typedef typename unpacket_traits<Packet>::type Scalar;
1277 return numext::not_equal_strict(predux(a), Scalar(0));
1278}
1279
1280/***************************************************************************
1281 * The following functions might not have to be overwritten for vectorized types
1282 ***************************************************************************/
1283
1284// FMA instructions.
1286template <typename Packet>
1287EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
1288 return padd(pmul(a, b), c);
1289}
1290
1292template <typename Packet>
1293EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
1294 return psub(pmul(a, b), c);
1295}
1296
1298template <typename Packet>
1299EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
1300 return psub(c, pmul(a, b));
1301}
1302
1304template <typename Packet>
1305EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
1306 return pnegate(pmadd(a, b, c));
1307}
1308
1311// NOTE: this function must really be templated on the packet type (think about different packet types for the same
1312// scalar type)
1313template <typename Packet>
1314inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a) {
1315 pstore(to, pset1<Packet>(a));
1316}
1317
1320template <typename Packet, int Alignment>
1321EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from) {
1322 if (Alignment >= unpacket_traits<Packet>::alignment)
1323 return pload<Packet>(from);
1324 else
1325 return ploadu<Packet>(from);
1326}
1327
1330template <typename Packet, int Alignment>
1331EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits<Packet>::type* from,
1332 const Index n, const Index offset = 0) {
1333 if (Alignment >= unpacket_traits<Packet>::alignment)
1334 return pload_partial<Packet>(from, n, offset);
1335 else
1336 return ploadu_partial<Packet>(from, n, offset);
1337}
1338
1341template <typename Scalar, typename Packet, int Alignment>
1342EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) {
1343 if (Alignment >= unpacket_traits<Packet>::alignment)
1344 pstore(to, from);
1345 else
1346 pstoreu(to, from);
1347}
1348
1351template <typename Scalar, typename Packet, int Alignment>
1352EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Packet& from, const Index n,
1353 const Index offset = 0) {
1354 if (Alignment >= unpacket_traits<Packet>::alignment)
1355 pstore_partial(to, from, n, offset);
1356 else
1357 pstoreu_partial(to, from, n, offset);
1358}
1359
1365template <typename Packet, int LoadMode>
1366EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) {
1367 return ploadt<Packet, LoadMode>(from);
1368}
1369
1370/***************************************************************************
1371 * Fast complex products (GCC generates a function call which is very slow)
1372 ***************************************************************************/
1373
1374// Eigen+CUDA does not support complexes.
1375#if !defined(EIGEN_GPUCC)
1376
1377template <>
1378inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) {
1379 return std::complex<float>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
1380}
1381
1382template <>
1383inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) {
1384 return std::complex<double>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
1385}
1386
1387#endif
1388
1389/***************************************************************************
1390 * PacketBlock, that is a collection of N packets where the number of words
1391 * in the packet is a multiple of N.
1392 ***************************************************************************/
1393template <typename Packet, int N = unpacket_traits<Packet>::size>
1394struct PacketBlock {
1395 Packet packet[N];
1396};
1397
1398template <typename Packet>
1399EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet, 1>& /*kernel*/) {
1400 // Nothing to do in the scalar case, i.e. a 1x1 matrix.
1401}
1402
1403/***************************************************************************
1404 * Selector, i.e. vector of N boolean values used to select (i.e. blend)
1405 * words from 2 packets.
1406 ***************************************************************************/
1407template <size_t N>
1408struct Selector {
1409 bool select[N];
1410};
1411
1412template <typename Packet>
1413EIGEN_DEVICE_FUNC inline Packet pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket,
1414 const Packet& thenPacket, const Packet& elsePacket) {
1415 return ifPacket.select[0] ? thenPacket : elsePacket;
1416}
1417
1419template <typename Packet>
1420EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
1421 using Scalar = typename unpacket_traits<Packet>::type;
1422 return pdiv(pset1<Packet>(Scalar(1)), a);
1423}
1424
1426template <typename Packet>
1427EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet prsqrt(const Packet& a) {
1428 return preciprocal<Packet>(psqrt(a));
1429}
1430
1431template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
1432 bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
1433struct psignbit_impl;
1434template <typename Packet, bool IsInteger>
1435struct psignbit_impl<Packet, true, IsInteger> {
1436 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); }
1437};
1438template <typename Packet>
1439struct psignbit_impl<Packet, false, false> {
1440 // generic implementation if not specialized in PacketMath.h
1441 // slower than arithmetic shift
1442 typedef typename unpacket_traits<Packet>::type Scalar;
1443 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) {
1444 const Packet cst_pos_one = pset1<Packet>(Scalar(1));
1445 const Packet cst_neg_one = pset1<Packet>(Scalar(-1));
1446 return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one);
1447 }
1448};
1449template <typename Packet>
1450struct psignbit_impl<Packet, false, true> {
1451 // generic implementation for integer packets
1452 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); }
1453};
1455template <typename Packet>
1456EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet psignbit(const Packet& a) {
1457 return psignbit_impl<Packet>::run(a);
1458}
1459
1461template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1462EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1463 return numext::atan2(y, x);
1464}
1465
1467template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1468EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1469 typedef typename internal::unpacket_traits<Packet>::type Scalar;
1470
1471 // See https://en.cppreference.com/w/cpp/numeric/math/atan2
1472 // for how corner cases are supposed to be handled according to the
1473 // IEEE floating-point standard (IEC 60559).
1474 const Packet kSignMask = pset1<Packet>(-Scalar(0));
1475 const Packet kZero = pzero(x);
1476 const Packet kOne = pset1<Packet>(Scalar(1));
1477 const Packet kPi = pset1<Packet>(Scalar(EIGEN_PI));
1478
1479 const Packet x_has_signbit = psignbit(x);
1480 const Packet y_signmask = pand(y, kSignMask);
1481 const Packet x_signmask = pand(x, kSignMask);
1482 const Packet result_signmask = pxor(y_signmask, x_signmask);
1483 const Packet shift = por(pand(x_has_signbit, kPi), y_signmask);
1484
1485 const Packet x_and_y_are_same = pcmp_eq(pabs(x), pabs(y));
1486 const Packet x_and_y_are_zero = pcmp_eq(por(x, y), kZero);
1487
1488 Packet arg = pdiv(y, x);
1489 arg = pselect(x_and_y_are_same, por(kOne, result_signmask), arg);
1490 arg = pselect(x_and_y_are_zero, result_signmask, arg);
1491
1492 Packet result = patan(arg);
1493 result = padd(result, shift);
1494 return result;
1495}
1496
1498template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1499EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
1500 return Packet(numext::arg(a));
1501}
1502
1504template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1505EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
1506 EIGEN_STATIC_ASSERT(NumTraits<typename unpacket_traits<Packet>::type>::IsComplex,
1507 THIS METHOD IS FOR COMPLEX TYPES ONLY)
1508 using RealPacket = typename unpacket_traits<Packet>::as_real;
1509 // a // r i r i ...
1510 RealPacket aflip = pcplxflip(a).v; // i r i r ...
1511 RealPacket result = patan2(aflip, a.v); // atan2 crap atan2 crap ...
1512 return (Packet)pand(result, peven_mask(result)); // atan2 0 atan2 0 ...
1513}
1514
1515} // end namespace internal
1516
1517} // end namespace Eigen
1518
1519#endif // EIGEN_GENERIC_PACKET_MATH_H
@ PropagateNaN
Definition Constants.h:342
@ PropagateNumbers
Definition Constants.h:344
Namespace containing all symbols from the Eigen library.
Definition Core:137
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cosh_op< typename Derived::Scalar >, const Derived > cosh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cos_op< typename Derived::Scalar >, const Derived > cos(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_atanh_op< typename Derived::Scalar >, const Derived > atanh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_exp_op< typename Derived::Scalar >, const Derived > exp(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tan_op< typename Derived::Scalar >, const Derived > tan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_atan_op< typename Derived::Scalar >, const Derived > atan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log_op< typename Derived::Scalar >, const Derived > log(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sin_op< typename Derived::Scalar >, const Derived > sin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_asin_op< typename Derived::Scalar >, const Derived > asin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tanh_op< typename Derived::Scalar >, const Derived > tanh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_acos_op< typename Derived::Scalar >, const Derived > acos(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sinh_op< typename Derived::Scalar >, const Derived > sinh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log10_op< typename Derived::Scalar >, const Derived > log10(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_arg_op< typename Derived::Scalar >, const Derived > arg(const Eigen::ArrayBase< Derived > &x)