10#ifndef EIGEN_TYPE_CASTING_SSE_H
11#define EIGEN_TYPE_CASTING_SSE_H
14#include "../../InternalHeaderCheck.h"
20#ifndef EIGEN_VECTORIZE_AVX
22struct type_casting_traits<float, bool> : vectorized_type_casting_traits<float, bool> {};
24struct type_casting_traits<bool, float> : vectorized_type_casting_traits<bool, float> {};
27struct type_casting_traits<float, int> : vectorized_type_casting_traits<float, int> {};
29struct type_casting_traits<int, float> : vectorized_type_casting_traits<int, float> {};
32struct type_casting_traits<float, double> : vectorized_type_casting_traits<float, double> {};
34struct type_casting_traits<double, float> : vectorized_type_casting_traits<double, float> {};
37struct type_casting_traits<double, int> : vectorized_type_casting_traits<double, int> {};
39struct type_casting_traits<int, double> : vectorized_type_casting_traits<int, double> {};
41#ifndef EIGEN_VECTORIZE_AVX2
43struct type_casting_traits<double, int64_t> : vectorized_type_casting_traits<double, int64_t> {};
45struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int64_t, double> {};
50EIGEN_STRONG_INLINE Packet16b pcast<Packet4f, Packet16b>(
const Packet4f& a,
const Packet4f& b,
const Packet4f& c,
52 __m128 zero = pzero(a);
53 __m128 nonzero_a = _mm_cmpneq_ps(a, zero);
54 __m128 nonzero_b = _mm_cmpneq_ps(b, zero);
55 __m128 nonzero_c = _mm_cmpneq_ps(c, zero);
56 __m128 nonzero_d = _mm_cmpneq_ps(d, zero);
57 __m128i ab_bytes = _mm_packs_epi32(_mm_castps_si128(nonzero_a), _mm_castps_si128(nonzero_b));
58 __m128i cd_bytes = _mm_packs_epi32(_mm_castps_si128(nonzero_c), _mm_castps_si128(nonzero_d));
59 __m128i merged = _mm_packs_epi16(ab_bytes, cd_bytes);
60 return _mm_and_si128(merged, _mm_set1_epi8(1));
64EIGEN_STRONG_INLINE Packet4f pcast<Packet16b, Packet4f>(
const Packet16b& a) {
65 const __m128 cst_one = _mm_set_ps1(1.0f);
66#ifdef EIGEN_VECTORIZE_SSE4_1
67 __m128i a_extended = _mm_cvtepi8_epi32(a);
68 __m128i abcd = _mm_cmpeq_epi32(a_extended, _mm_setzero_si128());
70 __m128i abcd_efhg_ijkl_mnop = _mm_cmpeq_epi8(a, _mm_setzero_si128());
71 __m128i aabb_ccdd_eeff_gghh = _mm_unpacklo_epi8(abcd_efhg_ijkl_mnop, abcd_efhg_ijkl_mnop);
72 __m128i abcd = _mm_unpacklo_epi8(aabb_ccdd_eeff_gghh, aabb_ccdd_eeff_gghh);
74 __m128 result = _mm_andnot_ps(_mm_castsi128_ps(abcd), cst_one);
79EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(
const Packet4f& a) {
80 return _mm_cvttps_epi32(a);
84EIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(
const Packet2d& a,
const Packet2d& b) {
85 return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm_cvttpd_epi32(a)), _mm_castsi128_ps(_mm_cvttpd_epi32(b)),
86 (1 << 2) | (1 << 6)));
90EIGEN_STRONG_INLINE Packet2l pcast<Packet2d, Packet2l>(
const Packet2d& a) {
92 return _mm_set_epi64x(_mm_cvttsd_si64(preverse(a)), _mm_cvttsd_si64(a));
94 return _mm_set_epi64x(
static_cast<int64_t
>(pfirst(preverse(a))),
static_cast<int64_t
>(pfirst(a)));
99EIGEN_STRONG_INLINE Packet2d pcast<Packet2l, Packet2d>(
const Packet2l& a) {
100 EIGEN_ALIGN16 int64_t aux[2];
102 return _mm_set_pd(
static_cast<double>(aux[1]),
static_cast<double>(aux[0]));
106EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(
const Packet4i& a) {
107 return _mm_cvtepi32_ps(a);
111EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(
const Packet2d& a,
const Packet2d& b) {
112 return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
116EIGEN_STRONG_INLINE Packet2d pcast<Packet4i, Packet2d>(
const Packet4i& a) {
118 return _mm_cvtepi32_pd(a);
122EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(
const Packet4f& a) {
124 return _mm_cvtps_pd(a);
128EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4f>(
const Packet4f& a) {
129 return _mm_castps_pd(a);
133EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet2d>(
const Packet2d& a) {
134 return _mm_castpd_ps(a);
138EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4f>(
const Packet4f& a) {
139 return _mm_castps_si128(a);
143EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4i>(
const Packet4i& a) {
144 return _mm_castsi128_ps(a);
148EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4i>(
const Packet4i& a) {
149 return _mm_castsi128_pd(a);
153EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2l>(
const Packet2l& a) {
154 return _mm_castsi128_pd(a);
157EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2d>(
const Packet2d& a) {
158 return _mm_castpd_si128(a);
162EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet2d>(
const Packet2d& a) {
163 return _mm_castpd_si128(a);
167EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4i>(
const Packet4i& a) {
172EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(
const Packet4ui& a) {
181struct type_casting_traits<
Eigen::half, float> {
189template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(
const Packet4h& a) {
190 __int64_t a64 = _mm_cvtm64_si64(a.x);
191 Eigen::half h = raw_uint16_to_half(
static_cast<unsigned short>(a64));
192 float f1 =
static_cast<float>(h);
193 h = raw_uint16_to_half(
static_cast<unsigned short>(a64 >> 16));
194 float f2 =
static_cast<float>(h);
195 h = raw_uint16_to_half(
static_cast<unsigned short>(a64 >> 32));
196 float f3 =
static_cast<float>(h);
197 h = raw_uint16_to_half(
static_cast<unsigned short>(a64 >> 48));
198 float f4 =
static_cast<float>(h);
199 return _mm_set_ps(f4, f3, f2, f1);
203struct type_casting_traits<float,
Eigen::half> {
211template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(
const Packet4f& a) {
212 EIGEN_ALIGN16
float aux[4];
214 Eigen::half h0(aux[0]);
215 Eigen::half h1(aux[1]);
216 Eigen::half h2(aux[2]);
217 Eigen::half h3(aux[3]);
220 result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);
Namespace containing all symbols from the Eigen library.
Definition Core:137