Eigen  3.4.90 (git rev 5a9f66fb35d03a4da9ef8976e67a61b30aa16dcf)
 
Loading...
Searching...
No Matches
GPU/TypeCasting.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2016 Benoit Steiner <[email protected]>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_TYPE_CASTING_GPU_H
11#define EIGEN_TYPE_CASTING_GPU_H
12
13// IWYU pragma: private
14#include "../../InternalHeaderCheck.h"
15
16namespace Eigen {
17
18namespace internal {
19
20#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
21 (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
22
23template <>
24struct type_casting_traits<Eigen::half, float> {
25 enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
26};
27
28template <>
29EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
30 float2 r1 = __half22float2(a);
31 float2 r2 = __half22float2(b);
32 return make_float4(r1.x, r1.y, r2.x, r2.y);
33}
34
35template <>
36EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcast<float4, Packet4h2>(const float4& a, const float4& b) {
37 Packet4h2 r;
38 half2* r_alias = reinterpret_cast<half2*>(&r);
39 r_alias[0] = __floats2half2_rn(a.x, a.y);
40 r_alias[1] = __floats2half2_rn(a.z, a.w);
41 r_alias[2] = __floats2half2_rn(b.x, b.y);
42 r_alias[3] = __floats2half2_rn(b.z, b.w);
43 return r;
44}
45
46template <>
47struct type_casting_traits<float, Eigen::half> {
48 enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
49};
50
51template <>
52EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<Packet4h2, float4>(const Packet4h2& a) {
53 // Simply discard the second half of the input
54 float4 r;
55 const half2* a_alias = reinterpret_cast<const half2*>(&a);
56 float2 r1 = __half22float2(a_alias[0]);
57 float2 r2 = __half22float2(a_alias[1]);
58 r.x = static_cast<float>(r1.x);
59 r.y = static_cast<float>(r1.y);
60 r.z = static_cast<float>(r2.x);
61 r.w = static_cast<float>(r2.y);
62 return r;
63}
64
65template <>
66EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
67 // Simply discard the second half of the input
68 return __floats2half2_rn(a.x, a.y);
69}
70
71#endif
72
73} // end namespace internal
74
75} // end namespace Eigen
76
77#endif // EIGEN_TYPE_CASTING_GPU_H
Namespace containing all symbols from the Eigen library.
Definition Core:137