21#ifndef EIGEN_PACKET_MATH_SYCL_H
22#define EIGEN_PACKET_MATH_SYCL_H
26#include "../../InternalHeaderCheck.h"
31#ifdef SYCL_DEVICE_ONLY
32#define SYCL_PLOAD(packet_type, AlignedType) \
34 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType<packet_type>( \
35 const typename unpacket_traits<packet_type>::type* from) { \
37 cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>( \
44SYCL_PLOAD(cl::sycl::cl_float4, u)
45SYCL_PLOAD(cl::sycl::cl_float4, )
46SYCL_PLOAD(cl::sycl::cl_double2, u)
47SYCL_PLOAD(cl::sycl::cl_double2, )
51EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pload<cl::sycl::cl_half8>(
52 const typename unpacket_traits<cl::sycl::cl_half8>::type* from) {
54 cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
55 reinterpret_cast<const cl::sycl::cl_half*
>(from));
56 cl::sycl::cl_half8 res{};
62EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 ploadu<cl::sycl::cl_half8>(
63 const typename unpacket_traits<cl::sycl::cl_half8>::type* from) {
65 cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
66 reinterpret_cast<const cl::sycl::cl_half*
>(from));
67 cl::sycl::cl_half8 res{};
72#define SYCL_PSTORE(scalar, packet_type, alignment) \
74 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment(scalar* to, const packet_type& from) { \
76 cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>( \
81SYCL_PSTORE(
float, cl::sycl::cl_float4, )
82SYCL_PSTORE(
float, cl::sycl::cl_float4, u)
83SYCL_PSTORE(
double, cl::sycl::cl_double2, )
84SYCL_PSTORE(
double, cl::sycl::cl_double2, u)
88EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void pstoreu(Eigen::half* to,
const cl::sycl::cl_half8& from) {
90 cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
91 reinterpret_cast<cl::sycl::cl_half*
>(to));
96EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void pstore(Eigen::half* to,
const cl::sycl::cl_half8& from) {
98 cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
99 reinterpret_cast<cl::sycl::cl_half*
>(to));
103#define SYCL_PSET1(packet_type) \
105 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \
106 const typename unpacket_traits<packet_type>::type& from) { \
107 return packet_type(from); \
111SYCL_PSET1(cl::sycl::cl_half8)
112SYCL_PSET1(cl::sycl::cl_float4)
113SYCL_PSET1(cl::sycl::cl_double2)
117template <
typename packet_type>
118struct get_base_packet {
119 template <
typename sycl_multi_po
inter>
120 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_ploaddup(sycl_multi_pointer) {}
122 template <
typename sycl_multi_po
inter>
123 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_pgather(sycl_multi_pointer, Index) {}
127struct get_base_packet<cl::sycl::cl_half8> {
128 template <
typename sycl_multi_po
inter>
129 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 get_ploaddup(sycl_multi_pointer from) {
130 return cl::sycl::cl_half8(
static_cast<cl::sycl::half
>(from[0]),
static_cast<cl::sycl::half
>(from[0]),
131 static_cast<cl::sycl::half
>(from[1]),
static_cast<cl::sycl::half
>(from[1]),
132 static_cast<cl::sycl::half
>(from[2]),
static_cast<cl::sycl::half
>(from[2]),
133 static_cast<cl::sycl::half
>(from[3]),
static_cast<cl::sycl::half
>(from[3]));
135 template <
typename sycl_multi_po
inter>
136 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 get_pgather(sycl_multi_pointer from, Index stride) {
137 return cl::sycl::cl_half8(
138 static_cast<cl::sycl::half
>(from[0 * stride]),
static_cast<cl::sycl::half
>(from[1 * stride]),
139 static_cast<cl::sycl::half
>(from[2 * stride]),
static_cast<cl::sycl::half
>(from[3 * stride]),
140 static_cast<cl::sycl::half
>(from[4 * stride]),
static_cast<cl::sycl::half
>(from[5 * stride]),
141 static_cast<cl::sycl::half
>(from[6 * stride]),
static_cast<cl::sycl::half
>(from[7 * stride]));
144 template <
typename sycl_multi_po
inter>
145 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void set_pscatter(sycl_multi_pointer to,
const cl::sycl::cl_half8& from,
148 to[0] = Eigen::half(from.s0());
149 to[tmp] = Eigen::half(from.s1());
150 to[tmp += stride] = Eigen::half(from.s2());
151 to[tmp += stride] = Eigen::half(from.s3());
152 to[tmp += stride] = Eigen::half(from.s4());
153 to[tmp += stride] = Eigen::half(from.s5());
154 to[tmp += stride] = Eigen::half(from.s6());
155 to[tmp += stride] = Eigen::half(from.s7());
157 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 set_plset(
const cl::sycl::half& a) {
158 return cl::sycl::cl_half8(
static_cast<cl::sycl::half
>(a),
static_cast<cl::sycl::half
>(a + 1),
159 static_cast<cl::sycl::half
>(a + 2),
static_cast<cl::sycl::half
>(a + 3),
160 static_cast<cl::sycl::half
>(a + 4),
static_cast<cl::sycl::half
>(a + 5),
161 static_cast<cl::sycl::half
>(a + 6),
static_cast<cl::sycl::half
>(a + 7));
166struct get_base_packet<cl::sycl::cl_float4> {
167 template <
typename sycl_multi_po
inter>
168 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(sycl_multi_pointer from) {
169 return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
171 template <
typename sycl_multi_po
inter>
172 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(sycl_multi_pointer from, Index stride) {
173 return cl::sycl::cl_float4(from[0 * stride], from[1 * stride], from[2 * stride], from[3 * stride]);
176 template <
typename sycl_multi_po
inter>
177 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void set_pscatter(sycl_multi_pointer to,
const cl::sycl::cl_float4& from,
182 to[tmp += stride] = from.z();
183 to[tmp += stride] = from.w();
185 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(
const float& a) {
186 return cl::sycl::cl_float4(
static_cast<float>(a),
static_cast<float>(a + 1),
static_cast<float>(a + 2),
187 static_cast<float>(a + 3));
192struct get_base_packet<cl::sycl::cl_double2> {
193 template <
typename sycl_multi_po
inter>
194 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_ploaddup(
const sycl_multi_pointer from) {
195 return cl::sycl::cl_double2(from[0], from[0]);
198 template <
typename sycl_multi_po
inter,
typename Index>
199 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(
const sycl_multi_pointer from,
201 return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
204 template <
typename sycl_multi_po
inter>
205 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void set_pscatter(sycl_multi_pointer to,
206 const cl::sycl::cl_double2& from, Index stride) {
208 to[stride] = from.y();
211 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(
const double& a) {
212 return cl::sycl::cl_double2(
static_cast<double>(a),
static_cast<double>(a + 1));
216#define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \
218 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
219 const typename unpacket_traits<packet_type>::type* from) { \
220 return get_base_packet<packet_type>::get_ploaddup(from); \
223SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_half8)
224SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
225SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
227#undef SYCL_PLOAD_DUP_SPECILIZE
229#define SYCL_PLSET(packet_type) \
231 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \
232 const typename unpacket_traits<packet_type>::type& a) { \
233 return get_base_packet<packet_type>::set_plset(a); \
235SYCL_PLSET(cl::sycl::cl_float4)
236SYCL_PLSET(cl::sycl::cl_double2)
240EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 plset<cl::sycl::cl_half8>(
241 const typename unpacket_traits<cl::sycl::cl_half8>::type& a) {
242 return get_base_packet<cl::sycl::cl_half8>::set_plset((
const cl::sycl::half&)a);
245#define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
247 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pgather<scalar, packet_type>( \
248 const typename unpacket_traits<packet_type>::type* from, Index stride) { \
249 return get_base_packet<packet_type>::get_pgather(from, stride); \
252SYCL_PGATHER_SPECILIZE(Eigen::half, cl::sycl::cl_half8)
253SYCL_PGATHER_SPECILIZE(
float, cl::sycl::cl_float4)
254SYCL_PGATHER_SPECILIZE(
double, cl::sycl::cl_double2)
255#undef SYCL_PGATHER_SPECILIZE
257#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
259 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
260 typename unpacket_traits<packet_type>::type * to, const packet_type& from, Index stride) { \
261 get_base_packet<packet_type>::set_pscatter(to, from, stride); \
264SYCL_PSCATTER_SPECILIZE(Eigen::half, cl::sycl::cl_half8)
265SYCL_PSCATTER_SPECILIZE(
float, cl::sycl::cl_float4)
266SYCL_PSCATTER_SPECILIZE(
double, cl::sycl::cl_double2)
268#undef SYCL_PSCATTER_SPECILIZE
270#define SYCL_PMAD(packet_type) \
272 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd(const packet_type& a, const packet_type& b, \
273 const packet_type& c) { \
274 return cl::sycl::mad(a, b, c); \
277SYCL_PMAD(cl::sycl::cl_half8)
278SYCL_PMAD(cl::sycl::cl_float4)
279SYCL_PMAD(cl::sycl::cl_double2)
283EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half pfirst<cl::sycl::cl_half8>(
const cl::sycl::cl_half8& a) {
284 return Eigen::half(a.s0());
287EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float pfirst<cl::sycl::cl_float4>(
const cl::sycl::cl_float4& a) {
291EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double pfirst<cl::sycl::cl_double2>(
const cl::sycl::cl_double2& a) {
296EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux<cl::sycl::cl_half8>(
const cl::sycl::cl_half8& a) {
297 return Eigen::half(a.s0() + a.s1() + a.s2() + a.s3() + a.s4() + a.s5() + a.s6() + a.s7());
301EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float predux<cl::sycl::cl_float4>(
const cl::sycl::cl_float4& a) {
302 return a.x() + a.y() + a.z() + a.w();
306EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double predux<cl::sycl::cl_double2>(
const cl::sycl::cl_double2& a) {
307 return a.x() + a.y();
311EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_max<cl::sycl::cl_half8>(
const cl::sycl::cl_half8& a) {
312 return Eigen::half(cl::sycl::fmax(cl::sycl::fmax(cl::sycl::fmax(a.s0(), a.s1()), cl::sycl::fmax(a.s2(), a.s3())),
313 cl::sycl::fmax(cl::sycl::fmax(a.s4(), a.s5()), cl::sycl::fmax(a.s6(), a.s7()))));
316EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float predux_max<cl::sycl::cl_float4>(
const cl::sycl::cl_float4& a) {
317 return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), cl::sycl::fmax(a.z(), a.w()));
320EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double predux_max<cl::sycl::cl_double2>(
const cl::sycl::cl_double2& a) {
321 return cl::sycl::fmax(a.x(), a.y());
325EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_min<cl::sycl::cl_half8>(
const cl::sycl::cl_half8& a) {
326 return Eigen::half(cl::sycl::fmin(cl::sycl::fmin(cl::sycl::fmin(a.s0(), a.s1()), cl::sycl::fmin(a.s2(), a.s3())),
327 cl::sycl::fmin(cl::sycl::fmin(a.s4(), a.s5()), cl::sycl::fmin(a.s6(), a.s7()))));
330EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float predux_min<cl::sycl::cl_float4>(
const cl::sycl::cl_float4& a) {
331 return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), cl::sycl::fmin(a.z(), a.w()));
334EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double predux_min<cl::sycl::cl_double2>(
const cl::sycl::cl_double2& a) {
335 return cl::sycl::fmin(a.x(), a.y());
339EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_mul<cl::sycl::cl_half8>(
const cl::sycl::cl_half8& a) {
340 return Eigen::half(a.s0() * a.s1() * a.s2() * a.s3() * a.s4() * a.s5() * a.s6() * a.s7());
343EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float predux_mul<cl::sycl::cl_float4>(
const cl::sycl::cl_float4& a) {
344 return a.x() * a.y() * a.z() * a.w();
347EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double predux_mul<cl::sycl::cl_double2>(
const cl::sycl::cl_double2& a) {
348 return a.x() * a.y();
352EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pabs<cl::sycl::cl_half8>(
const cl::sycl::cl_half8& a) {
353 return cl::sycl::cl_half8(cl::sycl::fabs(a.s0()), cl::sycl::fabs(a.s1()), cl::sycl::fabs(a.s2()),
354 cl::sycl::fabs(a.s3()), cl::sycl::fabs(a.s4()), cl::sycl::fabs(a.s5()),
355 cl::sycl::fabs(a.s6()), cl::sycl::fabs(a.s7()));
358EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pabs<cl::sycl::cl_float4>(
const cl::sycl::cl_float4& a) {
359 return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), cl::sycl::fabs(a.z()),
360 cl::sycl::fabs(a.w()));
363EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pabs<cl::sycl::cl_double2>(
const cl::sycl::cl_double2& a) {
364 return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
367template <
typename Packet>
368EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(
const Packet& a,
const Packet& b) {
369 return (a <= b).template as<Packet>();
372template <
typename Packet>
373EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(
const Packet& a,
const Packet& b) {
374 return (a < b).template as<Packet>();
377template <
typename Packet>
378EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(
const Packet& a,
const Packet& b) {
379 return (a == b).template as<Packet>();
382#define SYCL_PCMP(OP, TYPE) \
384 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE& a, const TYPE& b) { \
385 return sycl_pcmp_##OP<TYPE>(a, b); \
388SYCL_PCMP(le, cl::sycl::cl_half8)
389SYCL_PCMP(lt, cl::sycl::cl_half8)
390SYCL_PCMP(eq, cl::sycl::cl_half8)
391SYCL_PCMP(le, cl::sycl::cl_float4)
392SYCL_PCMP(lt, cl::sycl::cl_float4)
393SYCL_PCMP(eq, cl::sycl::cl_float4)
394SYCL_PCMP(le, cl::sycl::cl_double2)
395SYCL_PCMP(lt, cl::sycl::cl_double2)
396SYCL_PCMP(eq, cl::sycl::cl_double2)
399EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void ptranspose(PacketBlock<cl::sycl::cl_half8, 8>& kernel) {
400 cl::sycl::cl_half tmp = kernel.packet[0].s1();
401 kernel.packet[0].s1() = kernel.packet[1].s0();
402 kernel.packet[1].s0() = tmp;
404 tmp = kernel.packet[0].s2();
405 kernel.packet[0].s2() = kernel.packet[2].s0();
406 kernel.packet[2].s0() = tmp;
408 tmp = kernel.packet[0].s3();
409 kernel.packet[0].s3() = kernel.packet[3].s0();
410 kernel.packet[3].s0() = tmp;
412 tmp = kernel.packet[0].s4();
413 kernel.packet[0].s4() = kernel.packet[4].s0();
414 kernel.packet[4].s0() = tmp;
416 tmp = kernel.packet[0].s5();
417 kernel.packet[0].s5() = kernel.packet[5].s0();
418 kernel.packet[5].s0() = tmp;
420 tmp = kernel.packet[0].s6();
421 kernel.packet[0].s6() = kernel.packet[6].s0();
422 kernel.packet[6].s0() = tmp;
424 tmp = kernel.packet[0].s7();
425 kernel.packet[0].s7() = kernel.packet[7].s0();
426 kernel.packet[7].s0() = tmp;
428 tmp = kernel.packet[1].s2();
429 kernel.packet[1].s2() = kernel.packet[2].s1();
430 kernel.packet[2].s1() = tmp;
432 tmp = kernel.packet[1].s3();
433 kernel.packet[1].s3() = kernel.packet[3].s1();
434 kernel.packet[3].s1() = tmp;
436 tmp = kernel.packet[1].s4();
437 kernel.packet[1].s4() = kernel.packet[4].s1();
438 kernel.packet[4].s1() = tmp;
440 tmp = kernel.packet[1].s5();
441 kernel.packet[1].s5() = kernel.packet[5].s1();
442 kernel.packet[5].s1() = tmp;
444 tmp = kernel.packet[1].s6();
445 kernel.packet[1].s6() = kernel.packet[6].s1();
446 kernel.packet[6].s1() = tmp;
448 tmp = kernel.packet[1].s7();
449 kernel.packet[1].s7() = kernel.packet[7].s1();
450 kernel.packet[7].s1() = tmp;
452 tmp = kernel.packet[2].s3();
453 kernel.packet[2].s3() = kernel.packet[3].s2();
454 kernel.packet[3].s2() = tmp;
456 tmp = kernel.packet[2].s4();
457 kernel.packet[2].s4() = kernel.packet[4].s2();
458 kernel.packet[4].s2() = tmp;
460 tmp = kernel.packet[2].s5();
461 kernel.packet[2].s5() = kernel.packet[5].s2();
462 kernel.packet[5].s2() = tmp;
464 tmp = kernel.packet[2].s6();
465 kernel.packet[2].s6() = kernel.packet[6].s2();
466 kernel.packet[6].s2() = tmp;
468 tmp = kernel.packet[2].s7();
469 kernel.packet[2].s7() = kernel.packet[7].s2();
470 kernel.packet[7].s2() = tmp;
472 tmp = kernel.packet[3].s4();
473 kernel.packet[3].s4() = kernel.packet[4].s3();
474 kernel.packet[4].s3() = tmp;
476 tmp = kernel.packet[3].s5();
477 kernel.packet[3].s5() = kernel.packet[5].s3();
478 kernel.packet[5].s3() = tmp;
480 tmp = kernel.packet[3].s6();
481 kernel.packet[3].s6() = kernel.packet[6].s3();
482 kernel.packet[6].s3() = tmp;
484 tmp = kernel.packet[3].s7();
485 kernel.packet[3].s7() = kernel.packet[7].s3();
486 kernel.packet[7].s3() = tmp;
488 tmp = kernel.packet[4].s5();
489 kernel.packet[4].s5() = kernel.packet[5].s4();
490 kernel.packet[5].s4() = tmp;
492 tmp = kernel.packet[4].s6();
493 kernel.packet[4].s6() = kernel.packet[6].s4();
494 kernel.packet[6].s4() = tmp;
496 tmp = kernel.packet[4].s7();
497 kernel.packet[4].s7() = kernel.packet[7].s4();
498 kernel.packet[7].s4() = tmp;
500 tmp = kernel.packet[5].s6();
501 kernel.packet[5].s6() = kernel.packet[6].s5();
502 kernel.packet[6].s5() = tmp;
504 tmp = kernel.packet[5].s7();
505 kernel.packet[5].s7() = kernel.packet[7].s5();
506 kernel.packet[7].s5() = tmp;
508 tmp = kernel.packet[6].s7();
509 kernel.packet[6].s7() = kernel.packet[7].s6();
510 kernel.packet[7].s6() = tmp;
513EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void ptranspose(PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
514 float tmp = kernel.packet[0].y();
515 kernel.packet[0].y() = kernel.packet[1].x();
516 kernel.packet[1].x() = tmp;
518 tmp = kernel.packet[0].z();
519 kernel.packet[0].z() = kernel.packet[2].x();
520 kernel.packet[2].x() = tmp;
522 tmp = kernel.packet[0].w();
523 kernel.packet[0].w() = kernel.packet[3].x();
524 kernel.packet[3].x() = tmp;
526 tmp = kernel.packet[1].z();
527 kernel.packet[1].z() = kernel.packet[2].y();
528 kernel.packet[2].y() = tmp;
530 tmp = kernel.packet[1].w();
531 kernel.packet[1].w() = kernel.packet[3].y();
532 kernel.packet[3].y() = tmp;
534 tmp = kernel.packet[2].w();
535 kernel.packet[2].w() = kernel.packet[3].z();
536 kernel.packet[3].z() = tmp;
539EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void ptranspose(PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
540 double tmp = kernel.packet[0].y();
541 kernel.packet[0].y() = kernel.packet[1].x();
542 kernel.packet[1].x() = tmp;
546EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pblend(
547 const Selector<unpacket_traits<cl::sycl::cl_half8>::size>& ifPacket,
const cl::sycl::cl_half8& thenPacket,
548 const cl::sycl::cl_half8& elsePacket) {
549 cl::sycl::cl_short8 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1,
550 ifPacket.select[3] ? 0 : -1, ifPacket.select[4] ? 0 : -1, ifPacket.select[5] ? 0 : -1,
551 ifPacket.select[6] ? 0 : -1, ifPacket.select[7] ? 0 : -1);
552 return cl::sycl::select(thenPacket, elsePacket, condition);
556EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
557 const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
const cl::sycl::cl_float4& thenPacket,
558 const cl::sycl::cl_float4& elsePacket) {
559 cl::sycl::cl_int4 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1,
560 ifPacket.select[3] ? 0 : -1);
561 return cl::sycl::select(thenPacket, elsePacket, condition);
565inline cl::sycl::cl_double2 pblend(
const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
566 const cl::sycl::cl_double2& thenPacket,
const cl::sycl::cl_double2& elsePacket) {
567 cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1);
568 return cl::sycl::select(thenPacket, elsePacket, condition);
Namespace containing all symbols from the Eigen library.
Definition Core:137