Eigen  3.4.90 (git rev 5a9f66fb35d03a4da9ef8976e67a61b30aa16dcf)
 
Loading...
Searching...
No Matches
arch/SSE/MathFunctions.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2007 Julien Pommier
5// Copyright (C) 2009 Gael Guennebaud <[email protected]>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11/* The sin and cos and functions of this file come from
12 * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
13 */
14
15#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
16#define EIGEN_MATH_FUNCTIONS_SSE_H
17
18// IWYU pragma: private
19#include "../../InternalHeaderCheck.h"
20
21namespace Eigen {
22
23namespace internal {
24
25EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet4f)
26EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet2d)
27
28// Notice that for newer processors, it is counterproductive to use Newton
29// iteration for square root. In particular, Skylake and Zen2 processors
30// have approximately doubled throughput of the _mm_sqrt_ps instruction
31// compared to their predecessors.
32template <>
33EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f psqrt<Packet4f>(const Packet4f& x) {
34 return _mm_sqrt_ps(x);
35}
36template <>
37EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d psqrt<Packet2d>(const Packet2d& x) {
38 return _mm_sqrt_pd(x);
39}
40template <>
41EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16b psqrt<Packet16b>(const Packet16b& x) {
42 return x;
43}
44
45#if EIGEN_FAST_MATH
46// Even on Skylake, using Newton iteration is a win for reciprocal square root.
47template <>
48EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f prsqrt<Packet4f>(const Packet4f& x) {
49 return generic_rsqrt_newton_step<Packet4f, /*Steps=*/1>::run(x, _mm_rsqrt_ps(x));
50}
51
52#ifdef EIGEN_VECTORIZE_FMA
53// Trying to speed up reciprocal using Newton-Raphson is counterproductive
54// unless FMA is available. Without FMA pdiv(pset1<Packet>(Scalar(1),a)) is
55// 30% faster.
56template <>
57EIGEN_STRONG_INLINE Packet4f preciprocal<Packet4f>(const Packet4f& x) {
58 return generic_reciprocal_newton_step<Packet4f, /*Steps=*/1>::run(x, _mm_rcp_ps(x));
59}
60#endif
61
62#endif
63
64} // end namespace internal
65
66namespace numext {
67
68template <>
69EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sqrt(const float& x) {
70 return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
71}
72
73template <>
74EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double sqrt(const double& x) {
75#if EIGEN_COMP_GNUC_STRICT
76 // This works around a GCC bug generating poor code for _mm_sqrt_pd
77 // See https://gitlab.com/libeigen/eigen/commit/8dca9f97e38970
78 return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
79#else
80 return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
81#endif
82}
83
84} // namespace numext
85
86} // end namespace Eigen
87
88#endif // EIGEN_MATH_FUNCTIONS_SSE_H
Namespace containing all symbols from the Eigen library.
Definition Core:137