Eigen  3.4.90 (git rev 5a9f66fb35d03a4da9ef8976e67a61b30aa16dcf)
 
Loading...
Searching...
No Matches
SYCL/PacketMath.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Mehdi Goli Codeplay Software Ltd.
5// Ralph Potter Codeplay Software Ltd.
6// Luke Iwanski Codeplay Software Ltd.
7// Contact: <[email protected]>
8//
9// This Source Code Form is subject to the terms of the Mozilla
10// Public License v. 2.0. If a copy of the MPL was not distributed
11// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
12
13/*****************************************************************
14 * PacketMath.h
15 *
16 * \brief:
17 * PacketMath
18 *
19 *****************************************************************/
20
21#ifndef EIGEN_PACKET_MATH_SYCL_H
22#define EIGEN_PACKET_MATH_SYCL_H
23#include <type_traits>
24
25// IWYU pragma: private
26#include "../../InternalHeaderCheck.h"
27
28namespace Eigen {
29
30namespace internal {
31#ifdef SYCL_DEVICE_ONLY
32#define SYCL_PLOAD(packet_type, AlignedType) \
33 template <> \
34 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType<packet_type>( \
35 const typename unpacket_traits<packet_type>::type* from) { \
36 auto ptr = \
37 cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>( \
38 from); \
39 packet_type res{}; \
40 res.load(0, ptr); \
41 return res; \
42 }
43
44SYCL_PLOAD(cl::sycl::cl_float4, u)
45SYCL_PLOAD(cl::sycl::cl_float4, )
46SYCL_PLOAD(cl::sycl::cl_double2, u)
47SYCL_PLOAD(cl::sycl::cl_double2, )
48#undef SYCL_PLOAD
49
50template <>
51EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pload<cl::sycl::cl_half8>(
52 const typename unpacket_traits<cl::sycl::cl_half8>::type* from) {
53 auto ptr =
54 cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
55 reinterpret_cast<const cl::sycl::cl_half*>(from));
56 cl::sycl::cl_half8 res{};
57 res.load(0, ptr);
58 return res;
59}
60
61template <>
62EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 ploadu<cl::sycl::cl_half8>(
63 const typename unpacket_traits<cl::sycl::cl_half8>::type* from) {
64 auto ptr =
65 cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
66 reinterpret_cast<const cl::sycl::cl_half*>(from));
67 cl::sycl::cl_half8 res{};
68 res.load(0, ptr);
69 return res;
70}
71
72#define SYCL_PSTORE(scalar, packet_type, alignment) \
73 template <> \
74 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment(scalar* to, const packet_type& from) { \
75 auto ptr = \
76 cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>( \
77 to); \
78 from.store(0, ptr); \
79 }
80
81SYCL_PSTORE(float, cl::sycl::cl_float4, )
82SYCL_PSTORE(float, cl::sycl::cl_float4, u)
83SYCL_PSTORE(double, cl::sycl::cl_double2, )
84SYCL_PSTORE(double, cl::sycl::cl_double2, u)
85#undef SYCL_PSTORE
86
87template <>
88EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoreu(Eigen::half* to, const cl::sycl::cl_half8& from) {
89 auto ptr =
90 cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
91 reinterpret_cast<cl::sycl::cl_half*>(to));
92 from.store(0, ptr);
93}
94
95template <>
96EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore(Eigen::half* to, const cl::sycl::cl_half8& from) {
97 auto ptr =
98 cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
99 reinterpret_cast<cl::sycl::cl_half*>(to));
100 from.store(0, ptr);
101}
102
103#define SYCL_PSET1(packet_type) \
104 template <> \
105 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \
106 const typename unpacket_traits<packet_type>::type& from) { \
107 return packet_type(from); \
108 }
109
110// global space
111SYCL_PSET1(cl::sycl::cl_half8)
112SYCL_PSET1(cl::sycl::cl_float4)
113SYCL_PSET1(cl::sycl::cl_double2)
114
115#undef SYCL_PSET1
116
117template <typename packet_type>
118struct get_base_packet {
119 template <typename sycl_multi_pointer>
120 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_ploaddup(sycl_multi_pointer) {}
121
122 template <typename sycl_multi_pointer>
123 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_pgather(sycl_multi_pointer, Index) {}
124};
125
126template <>
127struct get_base_packet<cl::sycl::cl_half8> {
128 template <typename sycl_multi_pointer>
129 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 get_ploaddup(sycl_multi_pointer from) {
130 return cl::sycl::cl_half8(static_cast<cl::sycl::half>(from[0]), static_cast<cl::sycl::half>(from[0]),
131 static_cast<cl::sycl::half>(from[1]), static_cast<cl::sycl::half>(from[1]),
132 static_cast<cl::sycl::half>(from[2]), static_cast<cl::sycl::half>(from[2]),
133 static_cast<cl::sycl::half>(from[3]), static_cast<cl::sycl::half>(from[3]));
134 }
135 template <typename sycl_multi_pointer>
136 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 get_pgather(sycl_multi_pointer from, Index stride) {
137 return cl::sycl::cl_half8(
138 static_cast<cl::sycl::half>(from[0 * stride]), static_cast<cl::sycl::half>(from[1 * stride]),
139 static_cast<cl::sycl::half>(from[2 * stride]), static_cast<cl::sycl::half>(from[3 * stride]),
140 static_cast<cl::sycl::half>(from[4 * stride]), static_cast<cl::sycl::half>(from[5 * stride]),
141 static_cast<cl::sycl::half>(from[6 * stride]), static_cast<cl::sycl::half>(from[7 * stride]));
142 }
143
144 template <typename sycl_multi_pointer>
145 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to, const cl::sycl::cl_half8& from,
146 Index stride) {
147 auto tmp = stride;
148 to[0] = Eigen::half(from.s0());
149 to[tmp] = Eigen::half(from.s1());
150 to[tmp += stride] = Eigen::half(from.s2());
151 to[tmp += stride] = Eigen::half(from.s3());
152 to[tmp += stride] = Eigen::half(from.s4());
153 to[tmp += stride] = Eigen::half(from.s5());
154 to[tmp += stride] = Eigen::half(from.s6());
155 to[tmp += stride] = Eigen::half(from.s7());
156 }
157 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 set_plset(const cl::sycl::half& a) {
158 return cl::sycl::cl_half8(static_cast<cl::sycl::half>(a), static_cast<cl::sycl::half>(a + 1),
159 static_cast<cl::sycl::half>(a + 2), static_cast<cl::sycl::half>(a + 3),
160 static_cast<cl::sycl::half>(a + 4), static_cast<cl::sycl::half>(a + 5),
161 static_cast<cl::sycl::half>(a + 6), static_cast<cl::sycl::half>(a + 7));
162 }
163};
164
165template <>
166struct get_base_packet<cl::sycl::cl_float4> {
167 template <typename sycl_multi_pointer>
168 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(sycl_multi_pointer from) {
169 return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
170 }
171 template <typename sycl_multi_pointer>
172 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(sycl_multi_pointer from, Index stride) {
173 return cl::sycl::cl_float4(from[0 * stride], from[1 * stride], from[2 * stride], from[3 * stride]);
174 }
175
176 template <typename sycl_multi_pointer>
177 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to, const cl::sycl::cl_float4& from,
178 Index stride) {
179 auto tmp = stride;
180 to[0] = from.x();
181 to[tmp] = from.y();
182 to[tmp += stride] = from.z();
183 to[tmp += stride] = from.w();
184 }
185 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(const float& a) {
186 return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1), static_cast<float>(a + 2),
187 static_cast<float>(a + 3));
188 }
189};
190
191template <>
192struct get_base_packet<cl::sycl::cl_double2> {
193 template <typename sycl_multi_pointer>
194 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_ploaddup(const sycl_multi_pointer from) {
195 return cl::sycl::cl_double2(from[0], from[0]);
196 }
197
198 template <typename sycl_multi_pointer, typename Index>
199 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(const sycl_multi_pointer from,
200 Index stride) {
201 return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
202 }
203
204 template <typename sycl_multi_pointer>
205 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to,
206 const cl::sycl::cl_double2& from, Index stride) {
207 to[0] = from.x();
208 to[stride] = from.y();
209 }
210
211 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(const double& a) {
212 return cl::sycl::cl_double2(static_cast<double>(a), static_cast<double>(a + 1));
213 }
214};
215
216#define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \
217 template <> \
218 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
219 const typename unpacket_traits<packet_type>::type* from) { \
220 return get_base_packet<packet_type>::get_ploaddup(from); \
221 }
222
223SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_half8)
224SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
225SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
226
227#undef SYCL_PLOAD_DUP_SPECILIZE
228
229#define SYCL_PLSET(packet_type) \
230 template <> \
231 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \
232 const typename unpacket_traits<packet_type>::type& a) { \
233 return get_base_packet<packet_type>::set_plset(a); \
234 }
235SYCL_PLSET(cl::sycl::cl_float4)
236SYCL_PLSET(cl::sycl::cl_double2)
237#undef SYCL_PLSET
238
239template <>
240EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 plset<cl::sycl::cl_half8>(
241 const typename unpacket_traits<cl::sycl::cl_half8>::type& a) {
242 return get_base_packet<cl::sycl::cl_half8>::set_plset((const cl::sycl::half&)a);
243}
244
245#define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
246 template <> \
247 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pgather<scalar, packet_type>( \
248 const typename unpacket_traits<packet_type>::type* from, Index stride) { \
249 return get_base_packet<packet_type>::get_pgather(from, stride); \
250 }
251
252SYCL_PGATHER_SPECILIZE(Eigen::half, cl::sycl::cl_half8)
253SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
254SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
255#undef SYCL_PGATHER_SPECILIZE
256
257#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
258 template <> \
259 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
260 typename unpacket_traits<packet_type>::type * to, const packet_type& from, Index stride) { \
261 get_base_packet<packet_type>::set_pscatter(to, from, stride); \
262 }
263
264SYCL_PSCATTER_SPECILIZE(Eigen::half, cl::sycl::cl_half8)
265SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
266SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
267
268#undef SYCL_PSCATTER_SPECILIZE
269
270#define SYCL_PMAD(packet_type) \
271 template <> \
272 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd(const packet_type& a, const packet_type& b, \
273 const packet_type& c) { \
274 return cl::sycl::mad(a, b, c); \
275 }
276
277SYCL_PMAD(cl::sycl::cl_half8)
278SYCL_PMAD(cl::sycl::cl_float4)
279SYCL_PMAD(cl::sycl::cl_double2)
280#undef SYCL_PMAD
281
282template <>
283EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half pfirst<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
284 return Eigen::half(a.s0());
285}
286template <>
287EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
288 return a.x();
289}
290template <>
291EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
292 return a.x();
293}
294
295template <>
296EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
297 return Eigen::half(a.s0() + a.s1() + a.s2() + a.s3() + a.s4() + a.s5() + a.s6() + a.s7());
298}
299
300template <>
301EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
302 return a.x() + a.y() + a.z() + a.w();
303}
304
305template <>
306EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
307 return a.x() + a.y();
308}
309
310template <>
311EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_max<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
312 return Eigen::half(cl::sycl::fmax(cl::sycl::fmax(cl::sycl::fmax(a.s0(), a.s1()), cl::sycl::fmax(a.s2(), a.s3())),
313 cl::sycl::fmax(cl::sycl::fmax(a.s4(), a.s5()), cl::sycl::fmax(a.s6(), a.s7()))));
314}
315template <>
316EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
317 return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), cl::sycl::fmax(a.z(), a.w()));
318}
319template <>
320EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
321 return cl::sycl::fmax(a.x(), a.y());
322}
323
324template <>
325EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_min<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
326 return Eigen::half(cl::sycl::fmin(cl::sycl::fmin(cl::sycl::fmin(a.s0(), a.s1()), cl::sycl::fmin(a.s2(), a.s3())),
327 cl::sycl::fmin(cl::sycl::fmin(a.s4(), a.s5()), cl::sycl::fmin(a.s6(), a.s7()))));
328}
329template <>
330EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
331 return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), cl::sycl::fmin(a.z(), a.w()));
332}
333template <>
334EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
335 return cl::sycl::fmin(a.x(), a.y());
336}
337
338template <>
339EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_mul<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
340 return Eigen::half(a.s0() * a.s1() * a.s2() * a.s3() * a.s4() * a.s5() * a.s6() * a.s7());
341}
342template <>
343EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
344 return a.x() * a.y() * a.z() * a.w();
345}
346template <>
347EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
348 return a.x() * a.y();
349}
350
351template <>
352EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pabs<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
353 return cl::sycl::cl_half8(cl::sycl::fabs(a.s0()), cl::sycl::fabs(a.s1()), cl::sycl::fabs(a.s2()),
354 cl::sycl::fabs(a.s3()), cl::sycl::fabs(a.s4()), cl::sycl::fabs(a.s5()),
355 cl::sycl::fabs(a.s6()), cl::sycl::fabs(a.s7()));
356}
357template <>
358EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
359 return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), cl::sycl::fabs(a.z()),
360 cl::sycl::fabs(a.w()));
361}
362template <>
363EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
364 return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
365}
366
367template <typename Packet>
368EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet& a, const Packet& b) {
369 return (a <= b).template as<Packet>();
370}
371
372template <typename Packet>
373EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet& a, const Packet& b) {
374 return (a < b).template as<Packet>();
375}
376
377template <typename Packet>
378EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet& a, const Packet& b) {
379 return (a == b).template as<Packet>();
380}
381
382#define SYCL_PCMP(OP, TYPE) \
383 template <> \
384 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE& a, const TYPE& b) { \
385 return sycl_pcmp_##OP<TYPE>(a, b); \
386 }
387
388SYCL_PCMP(le, cl::sycl::cl_half8)
389SYCL_PCMP(lt, cl::sycl::cl_half8)
390SYCL_PCMP(eq, cl::sycl::cl_half8)
391SYCL_PCMP(le, cl::sycl::cl_float4)
392SYCL_PCMP(lt, cl::sycl::cl_float4)
393SYCL_PCMP(eq, cl::sycl::cl_float4)
394SYCL_PCMP(le, cl::sycl::cl_double2)
395SYCL_PCMP(lt, cl::sycl::cl_double2)
396SYCL_PCMP(eq, cl::sycl::cl_double2)
397#undef SYCL_PCMP
398
399EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_half8, 8>& kernel) {
400 cl::sycl::cl_half tmp = kernel.packet[0].s1();
401 kernel.packet[0].s1() = kernel.packet[1].s0();
402 kernel.packet[1].s0() = tmp;
403
404 tmp = kernel.packet[0].s2();
405 kernel.packet[0].s2() = kernel.packet[2].s0();
406 kernel.packet[2].s0() = tmp;
407
408 tmp = kernel.packet[0].s3();
409 kernel.packet[0].s3() = kernel.packet[3].s0();
410 kernel.packet[3].s0() = tmp;
411
412 tmp = kernel.packet[0].s4();
413 kernel.packet[0].s4() = kernel.packet[4].s0();
414 kernel.packet[4].s0() = tmp;
415
416 tmp = kernel.packet[0].s5();
417 kernel.packet[0].s5() = kernel.packet[5].s0();
418 kernel.packet[5].s0() = tmp;
419
420 tmp = kernel.packet[0].s6();
421 kernel.packet[0].s6() = kernel.packet[6].s0();
422 kernel.packet[6].s0() = tmp;
423
424 tmp = kernel.packet[0].s7();
425 kernel.packet[0].s7() = kernel.packet[7].s0();
426 kernel.packet[7].s0() = tmp;
427
428 tmp = kernel.packet[1].s2();
429 kernel.packet[1].s2() = kernel.packet[2].s1();
430 kernel.packet[2].s1() = tmp;
431
432 tmp = kernel.packet[1].s3();
433 kernel.packet[1].s3() = kernel.packet[3].s1();
434 kernel.packet[3].s1() = tmp;
435
436 tmp = kernel.packet[1].s4();
437 kernel.packet[1].s4() = kernel.packet[4].s1();
438 kernel.packet[4].s1() = tmp;
439
440 tmp = kernel.packet[1].s5();
441 kernel.packet[1].s5() = kernel.packet[5].s1();
442 kernel.packet[5].s1() = tmp;
443
444 tmp = kernel.packet[1].s6();
445 kernel.packet[1].s6() = kernel.packet[6].s1();
446 kernel.packet[6].s1() = tmp;
447
448 tmp = kernel.packet[1].s7();
449 kernel.packet[1].s7() = kernel.packet[7].s1();
450 kernel.packet[7].s1() = tmp;
451
452 tmp = kernel.packet[2].s3();
453 kernel.packet[2].s3() = kernel.packet[3].s2();
454 kernel.packet[3].s2() = tmp;
455
456 tmp = kernel.packet[2].s4();
457 kernel.packet[2].s4() = kernel.packet[4].s2();
458 kernel.packet[4].s2() = tmp;
459
460 tmp = kernel.packet[2].s5();
461 kernel.packet[2].s5() = kernel.packet[5].s2();
462 kernel.packet[5].s2() = tmp;
463
464 tmp = kernel.packet[2].s6();
465 kernel.packet[2].s6() = kernel.packet[6].s2();
466 kernel.packet[6].s2() = tmp;
467
468 tmp = kernel.packet[2].s7();
469 kernel.packet[2].s7() = kernel.packet[7].s2();
470 kernel.packet[7].s2() = tmp;
471
472 tmp = kernel.packet[3].s4();
473 kernel.packet[3].s4() = kernel.packet[4].s3();
474 kernel.packet[4].s3() = tmp;
475
476 tmp = kernel.packet[3].s5();
477 kernel.packet[3].s5() = kernel.packet[5].s3();
478 kernel.packet[5].s3() = tmp;
479
480 tmp = kernel.packet[3].s6();
481 kernel.packet[3].s6() = kernel.packet[6].s3();
482 kernel.packet[6].s3() = tmp;
483
484 tmp = kernel.packet[3].s7();
485 kernel.packet[3].s7() = kernel.packet[7].s3();
486 kernel.packet[7].s3() = tmp;
487
488 tmp = kernel.packet[4].s5();
489 kernel.packet[4].s5() = kernel.packet[5].s4();
490 kernel.packet[5].s4() = tmp;
491
492 tmp = kernel.packet[4].s6();
493 kernel.packet[4].s6() = kernel.packet[6].s4();
494 kernel.packet[6].s4() = tmp;
495
496 tmp = kernel.packet[4].s7();
497 kernel.packet[4].s7() = kernel.packet[7].s4();
498 kernel.packet[7].s4() = tmp;
499
500 tmp = kernel.packet[5].s6();
501 kernel.packet[5].s6() = kernel.packet[6].s5();
502 kernel.packet[6].s5() = tmp;
503
504 tmp = kernel.packet[5].s7();
505 kernel.packet[5].s7() = kernel.packet[7].s5();
506 kernel.packet[7].s5() = tmp;
507
508 tmp = kernel.packet[6].s7();
509 kernel.packet[6].s7() = kernel.packet[7].s6();
510 kernel.packet[7].s6() = tmp;
511}
512
513EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
514 float tmp = kernel.packet[0].y();
515 kernel.packet[0].y() = kernel.packet[1].x();
516 kernel.packet[1].x() = tmp;
517
518 tmp = kernel.packet[0].z();
519 kernel.packet[0].z() = kernel.packet[2].x();
520 kernel.packet[2].x() = tmp;
521
522 tmp = kernel.packet[0].w();
523 kernel.packet[0].w() = kernel.packet[3].x();
524 kernel.packet[3].x() = tmp;
525
526 tmp = kernel.packet[1].z();
527 kernel.packet[1].z() = kernel.packet[2].y();
528 kernel.packet[2].y() = tmp;
529
530 tmp = kernel.packet[1].w();
531 kernel.packet[1].w() = kernel.packet[3].y();
532 kernel.packet[3].y() = tmp;
533
534 tmp = kernel.packet[2].w();
535 kernel.packet[2].w() = kernel.packet[3].z();
536 kernel.packet[3].z() = tmp;
537}
538
539EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
540 double tmp = kernel.packet[0].y();
541 kernel.packet[0].y() = kernel.packet[1].x();
542 kernel.packet[1].x() = tmp;
543}
544
545template <>
546EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pblend(
547 const Selector<unpacket_traits<cl::sycl::cl_half8>::size>& ifPacket, const cl::sycl::cl_half8& thenPacket,
548 const cl::sycl::cl_half8& elsePacket) {
549 cl::sycl::cl_short8 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1,
550 ifPacket.select[3] ? 0 : -1, ifPacket.select[4] ? 0 : -1, ifPacket.select[5] ? 0 : -1,
551 ifPacket.select[6] ? 0 : -1, ifPacket.select[7] ? 0 : -1);
552 return cl::sycl::select(thenPacket, elsePacket, condition);
553}
554
555template <>
556EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
557 const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket, const cl::sycl::cl_float4& thenPacket,
558 const cl::sycl::cl_float4& elsePacket) {
559 cl::sycl::cl_int4 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1,
560 ifPacket.select[3] ? 0 : -1);
561 return cl::sycl::select(thenPacket, elsePacket, condition);
562}
563
564template <>
565inline cl::sycl::cl_double2 pblend(const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
566 const cl::sycl::cl_double2& thenPacket, const cl::sycl::cl_double2& elsePacket) {
567 cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1);
568 return cl::sycl::select(thenPacket, elsePacket, condition);
569}
570#endif // SYCL_DEVICE_ONLY
571
572} // end namespace internal
573
574} // end namespace Eigen
575
576#endif // EIGEN_PACKET_MATH_SYCL_H
Namespace containing all symbols from the Eigen library.
Definition Core:137