Eigen  3.4.90 (git rev 5a9f66fb35d03a4da9ef8976e67a61b30aa16dcf)
 
Loading...
Searching...
No Matches
MSA/PacketMath.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2018 Wave Computing, Inc.
5// Written by:
6// Chris Larsen
7// Alexey Frunze ([email protected])
8//
9// This Source Code Form is subject to the terms of the Mozilla
10// Public License v. 2.0. If a copy of the MPL was not distributed
11// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
12
13#ifndef EIGEN_PACKET_MATH_MSA_H
14#define EIGEN_PACKET_MATH_MSA_H
15
16#include <iostream>
17#include <string>
18
19// IWYU pragma: private
20#include "../../InternalHeaderCheck.h"
21
22namespace Eigen {
23
24namespace internal {
25
26#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
27#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
28#endif
29
30#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
31#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
32#endif
33
34#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
35#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
36#endif
37
38#if 0
39#define EIGEN_MSA_DEBUG \
40 static bool firstTime = true; \
41 do { \
42 if (firstTime) { \
43 std::cout << __FILE__ << ':' << __LINE__ << ':' << __FUNCTION__ << std::endl; \
44 firstTime = false; \
45 } \
46 } while (0)
47#else
48#define EIGEN_MSA_DEBUG
49#endif
50
51#define EIGEN_MSA_SHF_I8(a, b, c, d) (((d) << 6) | ((c) << 4) | ((b) << 2) | (a))
52
53typedef v4f32 Packet4f;
54typedef v4i32 Packet4i;
55typedef v4u32 Packet4ui;
56
57#define EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = {X, X, X, X}
58#define EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = {X, X, X, X}
59#define EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = {X, X, X, X}
60
61inline std::ostream& operator<<(std::ostream& os, const Packet4f& value) {
62 os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
63 return os;
64}
65
66inline std::ostream& operator<<(std::ostream& os, const Packet4i& value) {
67 os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
68 return os;
69}
70
71inline std::ostream& operator<<(std::ostream& os, const Packet4ui& value) {
72 os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
73 return os;
74}
75
76template <>
77struct packet_traits<float> : default_packet_traits {
78 typedef Packet4f type;
79 typedef Packet4f half; // Packet2f intrinsics not implemented yet
80 enum {
81 Vectorizable = 1,
82 AlignedOnScalar = 1,
83 size = 4,
84 // FIXME check the Has*
85 HasDiv = 1,
86 HasSin = EIGEN_FAST_MATH,
87 HasCos = EIGEN_FAST_MATH,
88 HasTanh = EIGEN_FAST_MATH,
89 HasErf = EIGEN_FAST_MATH,
90 HasLog = 1,
91 HasExp = 1,
92 HasSqrt = 1,
93 HasRsqrt = 1,
94 HasBlend = 1
95 };
96};
97
98template <>
99struct packet_traits<int32_t> : default_packet_traits {
100 typedef Packet4i type;
101 typedef Packet4i half; // Packet2i intrinsics not implemented yet
102 enum {
103 Vectorizable = 1,
104 AlignedOnScalar = 1,
105 size = 4,
106 // FIXME check the Has*
107 HasDiv = 1,
108 HasBlend = 1
109 };
110};
111
112template <>
113struct unpacket_traits<Packet4f> {
114 typedef float type;
115 enum {
116 size = 4,
117 alignment = Aligned16,
118 vectorizable = true,
119 masked_load_available = false,
120 masked_store_available = false
121 };
122 typedef Packet4f half;
123};
124
125template <>
126struct unpacket_traits<Packet4i> {
127 typedef int32_t type;
128 enum {
129 size = 4,
130 alignment = Aligned16,
131 vectorizable = true,
132 masked_load_available = false,
133 masked_store_available = false
134 };
135 typedef Packet4i half;
136};
137
138template <>
139EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
140 EIGEN_MSA_DEBUG;
141
142 Packet4f v = {from, from, from, from};
143 return v;
144}
145
146template <>
147EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) {
148 EIGEN_MSA_DEBUG;
149
150 return __builtin_msa_fill_w(from);
151}
152
153template <>
154EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float* from) {
155 EIGEN_MSA_DEBUG;
156
157 float f = *from;
158 Packet4f v = {f, f, f, f};
159 return v;
160}
161
162template <>
163EIGEN_STRONG_INLINE Packet4i pload1<Packet4i>(const int32_t* from) {
164 EIGEN_MSA_DEBUG;
165
166 return __builtin_msa_fill_w(*from);
167}
168
169template <>
170EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) {
171 EIGEN_MSA_DEBUG;
172
173 return __builtin_msa_fadd_w(a, b);
174}
175
176template <>
177EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) {
178 EIGEN_MSA_DEBUG;
179
180 return __builtin_msa_addv_w(a, b);
181}
182
183template <>
184EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) {
185 EIGEN_MSA_DEBUG;
186
187 static const Packet4f countdown = {0.0f, 1.0f, 2.0f, 3.0f};
188 return padd(pset1<Packet4f>(a), countdown);
189}
190
191template <>
192EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a) {
193 EIGEN_MSA_DEBUG;
194
195 static const Packet4i countdown = {0, 1, 2, 3};
196 return padd(pset1<Packet4i>(a), countdown);
197}
198
199template <>
200EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) {
201 EIGEN_MSA_DEBUG;
202
203 return __builtin_msa_fsub_w(a, b);
204}
205
206template <>
207EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) {
208 EIGEN_MSA_DEBUG;
209
210 return __builtin_msa_subv_w(a, b);
211}
212
213template <>
214EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) {
215 EIGEN_MSA_DEBUG;
216
217 return (Packet4f)__builtin_msa_bnegi_w((v4u32)a, 31);
218}
219
220template <>
221EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) {
222 EIGEN_MSA_DEBUG;
223
224 return __builtin_msa_addvi_w((v4i32)__builtin_msa_nori_b((v16u8)a, 0), 1);
225}
226
227template <>
228EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) {
229 EIGEN_MSA_DEBUG;
230
231 return a;
232}
233
234template <>
235EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) {
236 EIGEN_MSA_DEBUG;
237
238 return a;
239}
240
241template <>
242EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) {
243 EIGEN_MSA_DEBUG;
244
245 return __builtin_msa_fmul_w(a, b);
246}
247
248template <>
249EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) {
250 EIGEN_MSA_DEBUG;
251
252 return __builtin_msa_mulv_w(a, b);
253}
254
255template <>
256EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) {
257 EIGEN_MSA_DEBUG;
258
259 return __builtin_msa_fdiv_w(a, b);
260}
261
262template <>
263EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) {
264 EIGEN_MSA_DEBUG;
265
266 return __builtin_msa_div_s_w(a, b);
267}
268
269template <>
270EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
271 EIGEN_MSA_DEBUG;
272
273 return __builtin_msa_fmadd_w(c, a, b);
274}
275
276template <>
277EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) {
278 EIGEN_MSA_DEBUG;
279
280 // Use "asm" construct to avoid __builtin_msa_maddv_w GNU C bug.
281 Packet4i value = c;
282 __asm__("maddv.w %w[value], %w[a], %w[b]\n"
283 // Outputs
284 : [value] "+f"(value)
285 // Inputs
286 : [a] "f"(a), [b] "f"(b));
287 return value;
288}
289
290template <>
291EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) {
292 EIGEN_MSA_DEBUG;
293
294 return (Packet4f)__builtin_msa_and_v((v16u8)a, (v16u8)b);
295}
296
297template <>
298EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) {
299 EIGEN_MSA_DEBUG;
300
301 return (Packet4i)__builtin_msa_and_v((v16u8)a, (v16u8)b);
302}
303
304template <>
305EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) {
306 EIGEN_MSA_DEBUG;
307
308 return (Packet4f)__builtin_msa_or_v((v16u8)a, (v16u8)b);
309}
310
311template <>
312EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) {
313 EIGEN_MSA_DEBUG;
314
315 return (Packet4i)__builtin_msa_or_v((v16u8)a, (v16u8)b);
316}
317
318template <>
319EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) {
320 EIGEN_MSA_DEBUG;
321
322 return (Packet4f)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
323}
324
325template <>
326EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) {
327 EIGEN_MSA_DEBUG;
328
329 return (Packet4i)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
330}
331
332template <>
333EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) {
334 EIGEN_MSA_DEBUG;
335
336 return pand(a, (Packet4f)__builtin_msa_xori_b((v16u8)b, 255));
337}
338
339template <>
340EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) {
341 EIGEN_MSA_DEBUG;
342
343 return pand(a, (Packet4i)__builtin_msa_xori_b((v16u8)b, 255));
344}
345
346template <>
347EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
348 EIGEN_MSA_DEBUG;
349
350#if EIGEN_FAST_MATH
351 // This prefers numbers to NaNs.
352 return __builtin_msa_fmin_w(a, b);
353#else
354 // This prefers NaNs to numbers.
355 Packet4i aNaN = __builtin_msa_fcun_w(a, a);
356 Packet4i aMinOrNaN = por(__builtin_msa_fclt_w(a, b), aNaN);
357 return (Packet4f)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
358#endif
359}
360
361template <>
362EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) {
363 EIGEN_MSA_DEBUG;
364
365 return __builtin_msa_min_s_w(a, b);
366}
367
368template <>
369EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) {
370 EIGEN_MSA_DEBUG;
371
372#if EIGEN_FAST_MATH
373 // This prefers numbers to NaNs.
374 return __builtin_msa_fmax_w(a, b);
375#else
376 // This prefers NaNs to numbers.
377 Packet4i aNaN = __builtin_msa_fcun_w(a, a);
378 Packet4i aMaxOrNaN = por(__builtin_msa_fclt_w(b, a), aNaN);
379 return (Packet4f)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
380#endif
381}
382
383template <>
384EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) {
385 EIGEN_MSA_DEBUG;
386
387 return __builtin_msa_max_s_w(a, b);
388}
389
390template <>
391EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {
392 EIGEN_MSA_DEBUG;
393
394 EIGEN_DEBUG_ALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
395}
396
397template <>
398EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) {
399 EIGEN_MSA_DEBUG;
400
401 EIGEN_DEBUG_ALIGNED_LOAD return __builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
402}
403
404template <>
405EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
406 EIGEN_MSA_DEBUG;
407
408 EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
409}
410
411template <>
412EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) {
413 EIGEN_MSA_DEBUG;
414
415 EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4i)__builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
416}
417
418template <>
419EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) {
420 EIGEN_MSA_DEBUG;
421
422 float f0 = from[0], f1 = from[1];
423 Packet4f v0 = {f0, f0, f0, f0};
424 Packet4f v1 = {f1, f1, f1, f1};
425 return (Packet4f)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
426}
427
428template <>
429EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from) {
430 EIGEN_MSA_DEBUG;
431
432 int32_t i0 = from[0], i1 = from[1];
433 Packet4i v0 = {i0, i0, i0, i0};
434 Packet4i v1 = {i1, i1, i1, i1};
435 return (Packet4i)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
436}
437
438template <>
439EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {
440 EIGEN_MSA_DEBUG;
441
442 EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
443}
444
445template <>
446EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) {
447 EIGEN_MSA_DEBUG;
448
449 EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w(from, to, 0);
450}
451
452template <>
453EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) {
454 EIGEN_MSA_DEBUG;
455
456 EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
457}
458
459template <>
460EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) {
461 EIGEN_MSA_DEBUG;
462
463 EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w(from, to, 0);
464}
465
466template <>
467EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) {
468 EIGEN_MSA_DEBUG;
469
470 float f = *from;
471 Packet4f v = {f, f, f, f};
472 v[1] = from[stride];
473 v[2] = from[2 * stride];
474 v[3] = from[3 * stride];
475 return v;
476}
477
478template <>
479EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride) {
480 EIGEN_MSA_DEBUG;
481
482 int32_t i = *from;
483 Packet4i v = {i, i, i, i};
484 v[1] = from[stride];
485 v[2] = from[2 * stride];
486 v[3] = from[3 * stride];
487 return v;
488}
489
490template <>
491EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) {
492 EIGEN_MSA_DEBUG;
493
494 *to = from[0];
495 to += stride;
496 *to = from[1];
497 to += stride;
498 *to = from[2];
499 to += stride;
500 *to = from[3];
501}
502
503template <>
504EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from, Index stride) {
505 EIGEN_MSA_DEBUG;
506
507 *to = from[0];
508 to += stride;
509 *to = from[1];
510 to += stride;
511 *to = from[2];
512 to += stride;
513 *to = from[3];
514}
515
516template <>
517EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) {
518 EIGEN_MSA_DEBUG;
519
520 __builtin_prefetch(addr);
521}
522
523template <>
524EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) {
525 EIGEN_MSA_DEBUG;
526
527 __builtin_prefetch(addr);
528}
529
530template <>
531EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
532 EIGEN_MSA_DEBUG;
533
534 return a[0];
535}
536
537template <>
538EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) {
539 EIGEN_MSA_DEBUG;
540
541 return a[0];
542}
543
544template <>
545EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
546 EIGEN_MSA_DEBUG;
547
548 return (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
549}
550
551template <>
552EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
553 EIGEN_MSA_DEBUG;
554
555 return __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
556}
557
558template <>
559EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) {
560 EIGEN_MSA_DEBUG;
561
562 return (Packet4f)__builtin_msa_bclri_w((v4u32)a, 31);
563}
564
565template <>
566EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) {
567 EIGEN_MSA_DEBUG;
568
569 Packet4i zero = __builtin_msa_ldi_w(0);
570 return __builtin_msa_add_a_w(zero, a);
571}
572
573template <>
574EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) {
575 EIGEN_MSA_DEBUG;
576
577 Packet4f s = padd(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
578 s = padd(s, (Packet4f)__builtin_msa_shf_w((v4i32)s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
579 return s[0];
580}
581
582template <>
583EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a) {
584 EIGEN_MSA_DEBUG;
585
586 Packet4i s = padd(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
587 s = padd(s, __builtin_msa_shf_w(s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
588 return s[0];
589}
590
591// Other reduction functions:
592// mul
593template <>
594EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) {
595 EIGEN_MSA_DEBUG;
596
597 Packet4f p = pmul(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
598 p = pmul(p, (Packet4f)__builtin_msa_shf_w((v4i32)p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
599 return p[0];
600}
601
602template <>
603EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a) {
604 EIGEN_MSA_DEBUG;
605
606 Packet4i p = pmul(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
607 p = pmul(p, __builtin_msa_shf_w(p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
608 return p[0];
609}
610
611// min
612template <>
613EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) {
614 EIGEN_MSA_DEBUG;
615
616 // Swap 64-bit halves of a.
617 Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
618#if !EIGEN_FAST_MATH
619 // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit
620 // masks of all zeroes/ones in low 64 bits.
621 v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
622 // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.
623 unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
624#endif
625 // Continue with min computation.
626 Packet4f v = __builtin_msa_fmin_w(a, swapped);
627 v = __builtin_msa_fmin_w(v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
628#if !EIGEN_FAST_MATH
629 // Based on the mask select between v and 4 qNaNs.
630 v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
631 v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
632#endif
633 return v[0];
634}
635
636template <>
637EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a) {
638 EIGEN_MSA_DEBUG;
639
640 Packet4i m = pmin(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
641 m = pmin(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
642 return m[0];
643}
644
645// max
646template <>
647EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) {
648 EIGEN_MSA_DEBUG;
649
650 // Swap 64-bit halves of a.
651 Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
652#if !EIGEN_FAST_MATH
653 // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit
654 // masks of all zeroes/ones in low 64 bits.
655 v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
656 // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.
657 unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
658#endif
659 // Continue with max computation.
660 Packet4f v = __builtin_msa_fmax_w(a, swapped);
661 v = __builtin_msa_fmax_w(v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
662#if !EIGEN_FAST_MATH
663 // Based on the mask select between v and 4 qNaNs.
664 v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
665 v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
666#endif
667 return v[0];
668}
669
670template <>
671EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a) {
672 EIGEN_MSA_DEBUG;
673
674 Packet4i m = pmax(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
675 m = pmax(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
676 return m[0];
677}
678
679inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4f, 4>& value) {
680 os << "[ " << value.packet[0] << "," << std::endl
681 << " " << value.packet[1] << "," << std::endl
682 << " " << value.packet[2] << "," << std::endl
683 << " " << value.packet[3] << " ]";
684 return os;
685}
686
687EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
688 EIGEN_MSA_DEBUG;
689
690 v4i32 tmp1, tmp2, tmp3, tmp4;
691
692 tmp1 = __builtin_msa_ilvr_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
693 tmp2 = __builtin_msa_ilvr_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
694 tmp3 = __builtin_msa_ilvl_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
695 tmp4 = __builtin_msa_ilvl_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
696
697 kernel.packet[0] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
698 kernel.packet[1] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
699 kernel.packet[2] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
700 kernel.packet[3] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
701}
702
703inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4i, 4>& value) {
704 os << "[ " << value.packet[0] << "," << std::endl
705 << " " << value.packet[1] << "," << std::endl
706 << " " << value.packet[2] << "," << std::endl
707 << " " << value.packet[3] << " ]";
708 return os;
709}
710
711EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
712 EIGEN_MSA_DEBUG;
713
714 v4i32 tmp1, tmp2, tmp3, tmp4;
715
716 tmp1 = __builtin_msa_ilvr_w(kernel.packet[1], kernel.packet[0]);
717 tmp2 = __builtin_msa_ilvr_w(kernel.packet[3], kernel.packet[2]);
718 tmp3 = __builtin_msa_ilvl_w(kernel.packet[1], kernel.packet[0]);
719 tmp4 = __builtin_msa_ilvl_w(kernel.packet[3], kernel.packet[2]);
720
721 kernel.packet[0] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
722 kernel.packet[1] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
723 kernel.packet[2] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
724 kernel.packet[3] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
725}
726
727template <>
728EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& a) {
729 EIGEN_MSA_DEBUG;
730
731 return __builtin_msa_fsqrt_w(a);
732}
733
734template <>
735EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f& a) {
736 EIGEN_MSA_DEBUG;
737
738#if EIGEN_FAST_MATH
739 return __builtin_msa_frsqrt_w(a);
740#else
741 Packet4f ones = __builtin_msa_ffint_s_w(__builtin_msa_ldi_w(1));
742 return pdiv(ones, psqrt(a));
743#endif
744}
745
746template <>
747EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) {
748 Packet4f v = a;
749 int32_t old_mode, new_mode;
750 asm volatile(
751 "cfcmsa %[old_mode], $1\n"
752 "ori %[new_mode], %[old_mode], 3\n" // 3 = round towards -INFINITY.
753 "ctcmsa $1, %[new_mode]\n"
754 "frint.w %w[v], %w[v]\n"
755 "ctcmsa $1, %[old_mode]\n"
756 : // outputs
757 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
758 [v] "+f"(v)
759 : // inputs
760 : // clobbers
761 );
762 return v;
763}
764
765template <>
766EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) {
767 Packet4f v = a;
768 int32_t old_mode, new_mode;
769 asm volatile(
770 "cfcmsa %[old_mode], $1\n"
771 "ori %[new_mode], %[old_mode], 3\n"
772 "xori %[new_mode], %[new_mode], 1\n" // 2 = round towards +INFINITY.
773 "ctcmsa $1, %[new_mode]\n"
774 "frint.w %w[v], %w[v]\n"
775 "ctcmsa $1, %[old_mode]\n"
776 : // outputs
777 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
778 [v] "+f"(v)
779 : // inputs
780 : // clobbers
781 );
782 return v;
783}
784
785template <>
786EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
787 Packet4f v = a;
788 int32_t old_mode, new_mode;
789 asm volatile(
790 "cfcmsa %[old_mode], $1\n"
791 "ori %[new_mode], %[old_mode], 3\n"
792 "xori %[new_mode], %[new_mode], 3\n" // 0 = round to nearest, ties to even.
793 "ctcmsa $1, %[new_mode]\n"
794 "frint.w %w[v], %w[v]\n"
795 "ctcmsa $1, %[old_mode]\n"
796 : // outputs
797 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
798 [v] "+f"(v)
799 : // inputs
800 : // clobbers
801 );
802 return v;
803}
804
805template <>
806EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket,
807 const Packet4f& elsePacket) {
808 Packet4ui select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3]};
809 Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
810 return (Packet4f)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
811}
812
813template <>
814EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket,
815 const Packet4i& elsePacket) {
816 Packet4ui select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3]};
817 Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
818 return (Packet4i)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
819}
820
821//---------- double ----------
822
823typedef v2f64 Packet2d;
824typedef v2i64 Packet2l;
825typedef v2u64 Packet2ul;
826
827#define EIGEN_DECLARE_CONST_Packet2d(NAME, X) const Packet2d p2d_##NAME = {X, X}
828#define EIGEN_DECLARE_CONST_Packet2l(NAME, X) const Packet2l p2l_##NAME = {X, X}
829#define EIGEN_DECLARE_CONST_Packet2ul(NAME, X) const Packet2ul p2ul_##NAME = {X, X}
830
831inline std::ostream& operator<<(std::ostream& os, const Packet2d& value) {
832 os << "[ " << value[0] << ", " << value[1] << " ]";
833 return os;
834}
835
836inline std::ostream& operator<<(std::ostream& os, const Packet2l& value) {
837 os << "[ " << value[0] << ", " << value[1] << " ]";
838 return os;
839}
840
841inline std::ostream& operator<<(std::ostream& os, const Packet2ul& value) {
842 os << "[ " << value[0] << ", " << value[1] << " ]";
843 return os;
844}
845
846template <>
847struct packet_traits<double> : default_packet_traits {
848 typedef Packet2d type;
849 typedef Packet2d half;
850 enum {
851 Vectorizable = 1,
852 AlignedOnScalar = 1,
853 size = 2,
854 // FIXME check the Has*
855 HasDiv = 1,
856 HasExp = 1,
857 HasSqrt = 1,
858 HasRsqrt = 1,
859 HasBlend = 1
860 };
861};
862
863template <>
864struct unpacket_traits<Packet2d> {
865 typedef double type;
866 enum {
867 size = 2,
868 alignment = Aligned16,
869 vectorizable = true,
870 masked_load_available = false,
871 masked_store_available = false
872 };
873 typedef Packet2d half;
874};
875
876template <>
877EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
878 EIGEN_MSA_DEBUG;
879
880 Packet2d value = {from, from};
881 return value;
882}
883
884template <>
885EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) {
886 EIGEN_MSA_DEBUG;
887
888 return __builtin_msa_fadd_d(a, b);
889}
890
891template <>
892EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) {
893 EIGEN_MSA_DEBUG;
894
895 static const Packet2d countdown = {0.0, 1.0};
896 return padd(pset1<Packet2d>(a), countdown);
897}
898
899template <>
900EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) {
901 EIGEN_MSA_DEBUG;
902
903 return __builtin_msa_fsub_d(a, b);
904}
905
906template <>
907EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) {
908 EIGEN_MSA_DEBUG;
909
910 return (Packet2d)__builtin_msa_bnegi_d((v2u64)a, 63);
911}
912
913template <>
914EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) {
915 EIGEN_MSA_DEBUG;
916
917 return a;
918}
919
920template <>
921EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) {
922 EIGEN_MSA_DEBUG;
923
924 return __builtin_msa_fmul_d(a, b);
925}
926
927template <>
928EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) {
929 EIGEN_MSA_DEBUG;
930
931 return __builtin_msa_fdiv_d(a, b);
932}
933
934template <>
935EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
936 EIGEN_MSA_DEBUG;
937
938 return __builtin_msa_fmadd_d(c, a, b);
939}
940
941// Logical Operations are not supported for float, so we have to reinterpret casts using MSA
942// intrinsics
943template <>
944EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) {
945 EIGEN_MSA_DEBUG;
946
947 return (Packet2d)__builtin_msa_and_v((v16u8)a, (v16u8)b);
948}
949
950template <>
951EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) {
952 EIGEN_MSA_DEBUG;
953
954 return (Packet2d)__builtin_msa_or_v((v16u8)a, (v16u8)b);
955}
956
957template <>
958EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) {
959 EIGEN_MSA_DEBUG;
960
961 return (Packet2d)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
962}
963
964template <>
965EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) {
966 EIGEN_MSA_DEBUG;
967
968 return pand(a, (Packet2d)__builtin_msa_xori_b((v16u8)b, 255));
969}
970
971template <>
972EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) {
973 EIGEN_MSA_DEBUG;
974
975 EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
976}
977
978template <>
979EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) {
980 EIGEN_MSA_DEBUG;
981
982#if EIGEN_FAST_MATH
983 // This prefers numbers to NaNs.
984 return __builtin_msa_fmin_d(a, b);
985#else
986 // This prefers NaNs to numbers.
987 v2i64 aNaN = __builtin_msa_fcun_d(a, a);
988 v2i64 aMinOrNaN = por(__builtin_msa_fclt_d(a, b), aNaN);
989 return (Packet2d)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
990#endif
991}
992
993template <>
994EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) {
995 EIGEN_MSA_DEBUG;
996
997#if EIGEN_FAST_MATH
998 // This prefers numbers to NaNs.
999 return __builtin_msa_fmax_d(a, b);
1000#else
1001 // This prefers NaNs to numbers.
1002 v2i64 aNaN = __builtin_msa_fcun_d(a, a);
1003 v2i64 aMaxOrNaN = por(__builtin_msa_fclt_d(b, a), aNaN);
1004 return (Packet2d)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
1005#endif
1006}
1007
1008template <>
1009EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) {
1010 EIGEN_MSA_DEBUG;
1011
1012 EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
1013}
1014
1015template <>
1016EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) {
1017 EIGEN_MSA_DEBUG;
1018
1019 Packet2d value = {*from, *from};
1020 return value;
1021}
1022
1023template <>
1024EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) {
1025 EIGEN_MSA_DEBUG;
1026
1027 EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1028}
1029
1030template <>
1031EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
1032 EIGEN_MSA_DEBUG;
1033
1034 EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1035}
1036
1037template <>
1038EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) {
1039 EIGEN_MSA_DEBUG;
1040
1041 Packet2d value;
1042 value[0] = *from;
1043 from += stride;
1044 value[1] = *from;
1045 return value;
1046}
1047
1048template <>
1049EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride) {
1050 EIGEN_MSA_DEBUG;
1051
1052 *to = from[0];
1053 to += stride;
1054 *to = from[1];
1055}
1056
1057template <>
1058EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) {
1059 EIGEN_MSA_DEBUG;
1060
1061 __builtin_prefetch(addr);
1062}
1063
1064template <>
1065EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
1066 EIGEN_MSA_DEBUG;
1067
1068 return a[0];
1069}
1070
1071template <>
1072EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) {
1073 EIGEN_MSA_DEBUG;
1074
1075 return (Packet2d)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1076}
1077
1078template <>
1079EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) {
1080 EIGEN_MSA_DEBUG;
1081
1082 return (Packet2d)__builtin_msa_bclri_d((v2u64)a, 63);
1083}
1084
1085template <>
1086EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) {
1087 EIGEN_MSA_DEBUG;
1088
1089 Packet2d s = padd(a, preverse(a));
1090 return s[0];
1091}
1092
1093// Other reduction functions:
1094// mul
1095template <>
1096EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) {
1097 EIGEN_MSA_DEBUG;
1098
1099 Packet2d p = pmul(a, preverse(a));
1100 return p[0];
1101}
1102
1103// min
1104template <>
1105EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) {
1106 EIGEN_MSA_DEBUG;
1107
1108#if EIGEN_FAST_MATH
1109 Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1110 Packet2d v = __builtin_msa_fmin_d(a, swapped);
1111 return v[0];
1112#else
1113 double a0 = a[0], a1 = a[1];
1114 return ((numext::isnan)(a0) || a0 < a1) ? a0 : a1;
1115#endif
1116}
1117
1118// max
1119template <>
1120EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) {
1121 EIGEN_MSA_DEBUG;
1122
1123#if EIGEN_FAST_MATH
1124 Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1125 Packet2d v = __builtin_msa_fmax_d(a, swapped);
1126 return v[0];
1127#else
1128 double a0 = a[0], a1 = a[1];
1129 return ((numext::isnan)(a0) || a0 > a1) ? a0 : a1;
1130#endif
1131}
1132
1133template <>
1134EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& a) {
1135 EIGEN_MSA_DEBUG;
1136
1137 return __builtin_msa_fsqrt_d(a);
1138}
1139
1140template <>
1141EIGEN_STRONG_INLINE Packet2d prsqrt(const Packet2d& a) {
1142 EIGEN_MSA_DEBUG;
1143
1144#if EIGEN_FAST_MATH
1145 return __builtin_msa_frsqrt_d(a);
1146#else
1147 Packet2d ones = __builtin_msa_ffint_s_d(__builtin_msa_ldi_d(1));
1148 return pdiv(ones, psqrt(a));
1149#endif
1150}
1151
1152inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet2d, 2>& value) {
1153 os << "[ " << value.packet[0] << "," << std::endl << " " << value.packet[1] << " ]";
1154 return os;
1155}
1156
1157EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2d, 2>& kernel) {
1158 EIGEN_MSA_DEBUG;
1159
1160 Packet2d trn1 = (Packet2d)__builtin_msa_ilvev_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1161 Packet2d trn2 = (Packet2d)__builtin_msa_ilvod_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1162 kernel.packet[0] = trn1;
1163 kernel.packet[1] = trn2;
1164}
1165
1166template <>
1167EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) {
1168 Packet2d v = a;
1169 int32_t old_mode, new_mode;
1170 asm volatile(
1171 "cfcmsa %[old_mode], $1\n"
1172 "ori %[new_mode], %[old_mode], 3\n" // 3 = round towards -INFINITY.
1173 "ctcmsa $1, %[new_mode]\n"
1174 "frint.d %w[v], %w[v]\n"
1175 "ctcmsa $1, %[old_mode]\n"
1176 : // outputs
1177 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1178 [v] "+f"(v)
1179 : // inputs
1180 : // clobbers
1181 );
1182 return v;
1183}
1184
1185template <>
1186EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) {
1187 Packet2d v = a;
1188 int32_t old_mode, new_mode;
1189 asm volatile(
1190 "cfcmsa %[old_mode], $1\n"
1191 "ori %[new_mode], %[old_mode], 3\n"
1192 "xori %[new_mode], %[new_mode], 1\n" // 2 = round towards +INFINITY.
1193 "ctcmsa $1, %[new_mode]\n"
1194 "frint.d %w[v], %w[v]\n"
1195 "ctcmsa $1, %[old_mode]\n"
1196 : // outputs
1197 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1198 [v] "+f"(v)
1199 : // inputs
1200 : // clobbers
1201 );
1202 return v;
1203}
1204
1205template <>
1206EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) {
1207 Packet2d v = a;
1208 int32_t old_mode, new_mode;
1209 asm volatile(
1210 "cfcmsa %[old_mode], $1\n"
1211 "ori %[new_mode], %[old_mode], 3\n"
1212 "xori %[new_mode], %[new_mode], 3\n" // 0 = round to nearest, ties to even.
1213 "ctcmsa $1, %[new_mode]\n"
1214 "frint.d %w[v], %w[v]\n"
1215 "ctcmsa $1, %[old_mode]\n"
1216 : // outputs
1217 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1218 [v] "+f"(v)
1219 : // inputs
1220 : // clobbers
1221 );
1222 return v;
1223}
1224
1225template <>
1226EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket,
1227 const Packet2d& elsePacket) {
1228 Packet2ul select = {ifPacket.select[0], ifPacket.select[1]};
1229 Packet2l mask = __builtin_msa_ceqi_d((Packet2l)select, 0);
1230 return (Packet2d)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
1231}
1232
1233} // end namespace internal
1234
1235} // end namespace Eigen
1236
1237#endif // EIGEN_PACKET_MATH_MSA_H
@ Aligned16
Definition Constants.h:237
Namespace containing all symbols from the Eigen library.
Definition Core:137