2#include "../../InternalHeaderCheck.h"
7#if EIGEN_ARCH_ARM && EIGEN_COMP_CLANG
13struct gebp_traits<float, float, false, false, Architecture::NEON, GEBPPacketFull>
14 : gebp_traits<float, float, false, false, Architecture::Generic, GEBPPacketFull> {
15 EIGEN_STRONG_INLINE
void acc(
const AccPacket& c,
const ResPacket& alpha, ResPacket& r)
const {
18 asm volatile(
"vmla.f32 %q[r], %q[c], %q[alpha]" : [r]
"+w"(r) : [c]
"w"(c), [alpha]
"w"(alpha) :);
21 template <
typename LaneIdType>
22 EIGEN_STRONG_INLINE
void madd(
const Packet4f& a,
const Packet4f& b, Packet4f& c, Packet4f&,
const LaneIdType&)
const {
26 template <
typename LaneIdType>
27 EIGEN_STRONG_INLINE
void madd(
const Packet4f& a,
const QuadPacket<Packet4f>& b, Packet4f& c, Packet4f& tmp,
28 const LaneIdType& lane)
const {
29 madd(a, b.get(lane), c, tmp, lane);
37#ifndef EIGEN_NEON_GEBP_NR
38#define EIGEN_NEON_GEBP_NR 8
42struct gebp_traits<float, float, false, false, Architecture::NEON, GEBPPacketFull>
43 : gebp_traits<float, float, false, false, Architecture::Generic, GEBPPacketFull> {
44 typedef float RhsPacket;
45 typedef float32x4_t RhsPacketx4;
46 enum { nr = EIGEN_NEON_GEBP_NR };
47 EIGEN_STRONG_INLINE
void loadRhs(
const RhsScalar* b, RhsPacket& dest)
const { dest = *b; }
49 EIGEN_STRONG_INLINE
void loadRhs(
const RhsScalar* b, RhsPacketx4& dest)
const { dest = vld1q_f32(b); }
51 EIGEN_STRONG_INLINE
void updateRhs(
const RhsScalar* b, RhsPacket& dest)
const { dest = *b; }
53 EIGEN_STRONG_INLINE
void updateRhs(
const RhsScalar*, RhsPacketx4&)
const {}
55 EIGEN_STRONG_INLINE
void loadRhsQuad(
const RhsScalar* b, RhsPacket& dest)
const { loadRhs(b, dest); }
57 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacket& b, AccPacket& c, RhsPacket& ,
58 const FixedInt<0>&)
const {
59 c = vfmaq_n_f32(c, a, b);
64 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c, RhsPacket& ,
65 const FixedInt<0>&)
const {
66 madd_helper<0>(a, b, c);
68 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c, RhsPacket& ,
69 const FixedInt<1>&)
const {
70 madd_helper<1>(a, b, c);
72 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c, RhsPacket& ,
73 const FixedInt<2>&)
const {
74 madd_helper<2>(a, b, c);
76 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c, RhsPacket& ,
77 const FixedInt<3>&)
const {
78 madd_helper<3>(a, b, c);
83 EIGEN_STRONG_INLINE
void madd_helper(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c)
const {
84#if EIGEN_GNUC_STRICT_LESS_THAN(9, 0, 0)
89 asm(
"fmla %0.4s, %1.4s, %2.s[0]\n" :
"+w"(c) :
"w"(a),
"w"(b) :);
91 asm(
"fmla %0.4s, %1.4s, %2.s[1]\n" :
"+w"(c) :
"w"(a),
"w"(b) :);
93 asm(
"fmla %0.4s, %1.4s, %2.s[2]\n" :
"+w"(c) :
"w"(a),
"w"(b) :);
95 asm(
"fmla %0.4s, %1.4s, %2.s[3]\n" :
"+w"(c) :
"w"(a),
"w"(b) :);
97 c = vfmaq_laneq_f32(c, a, b, LaneID);
103struct gebp_traits<double, double, false, false, Architecture::NEON>
104 : gebp_traits<double, double, false, false, Architecture::Generic> {
105 typedef double RhsPacket;
106 enum { nr = EIGEN_NEON_GEBP_NR };
108 float64x2_t B_0, B_1;
111 EIGEN_STRONG_INLINE
void loadRhs(
const RhsScalar* b, RhsPacket& dest)
const { dest = *b; }
113 EIGEN_STRONG_INLINE
void loadRhs(
const RhsScalar* b, RhsPacketx4& dest)
const {
114 dest.B_0 = vld1q_f64(b);
115 dest.B_1 = vld1q_f64(b + 2);
118 EIGEN_STRONG_INLINE
void updateRhs(
const RhsScalar* b, RhsPacket& dest)
const { loadRhs(b, dest); }
120 EIGEN_STRONG_INLINE
void updateRhs(
const RhsScalar*, RhsPacketx4&)
const {}
122 EIGEN_STRONG_INLINE
void loadRhsQuad(
const RhsScalar* b, RhsPacket& dest)
const { loadRhs(b, dest); }
124 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacket& b, AccPacket& c, RhsPacket& ,
125 const FixedInt<0>&)
const {
126 c = vfmaq_n_f64(c, a, b);
132 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c, RhsPacket& ,
133 const FixedInt<0>&)
const {
134 madd_helper<0>(a, b, c);
136 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c, RhsPacket& ,
137 const FixedInt<1>&)
const {
138 madd_helper<1>(a, b, c);
140 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c, RhsPacket& ,
141 const FixedInt<2>&)
const {
142 madd_helper<2>(a, b, c);
144 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c, RhsPacket& ,
145 const FixedInt<3>&)
const {
146 madd_helper<3>(a, b, c);
150 template <
int LaneID>
151 EIGEN_STRONG_INLINE
void madd_helper(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c)
const {
152#if EIGEN_GNUC_STRICT_LESS_THAN(9, 0, 0)
157 asm(
"fmla %0.2d, %1.2d, %2.d[0]\n" :
"+w"(c) :
"w"(a),
"w"(b.B_0) :);
158 else if (LaneID == 1)
159 asm(
"fmla %0.2d, %1.2d, %2.d[1]\n" :
"+w"(c) :
"w"(a),
"w"(b.B_0) :);
160 else if (LaneID == 2)
161 asm(
"fmla %0.2d, %1.2d, %2.d[0]\n" :
"+w"(c) :
"w"(a),
"w"(b.B_1) :);
162 else if (LaneID == 3)
163 asm(
"fmla %0.2d, %1.2d, %2.d[1]\n" :
"+w"(c) :
"w"(a),
"w"(b.B_1) :);
166 c = vfmaq_laneq_f64(c, a, b.B_0, 0);
167 else if (LaneID == 1)
168 c = vfmaq_laneq_f64(c, a, b.B_0, 1);
169 else if (LaneID == 2)
170 c = vfmaq_laneq_f64(c, a, b.B_1, 0);
171 else if (LaneID == 3)
172 c = vfmaq_laneq_f64(c, a, b.B_1, 1);
181#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC && EIGEN_COMP_CLANG
184struct gebp_traits<half, half, false, false, Architecture::NEON>
185 : gebp_traits<half, half, false, false, Architecture::Generic> {
186 typedef half RhsPacket;
187 typedef float16x4_t RhsPacketx4;
188 typedef float16x4_t PacketHalf;
189 enum { nr = EIGEN_NEON_GEBP_NR };
191 EIGEN_STRONG_INLINE
void loadRhs(
const RhsScalar* b, RhsPacket& dest)
const { dest = *b; }
193 EIGEN_STRONG_INLINE
void loadRhs(
const RhsScalar* b, RhsPacketx4& dest)
const { dest = vld1_f16((
const __fp16*)b); }
195 EIGEN_STRONG_INLINE
void updateRhs(
const RhsScalar* b, RhsPacket& dest)
const { dest = *b; }
197 EIGEN_STRONG_INLINE
void updateRhs(
const RhsScalar*, RhsPacketx4&)
const {}
199 EIGEN_STRONG_INLINE
void loadRhsQuad(
const RhsScalar*, RhsPacket&)
const {
202 eigen_assert(
false &&
"Cannot loadRhsQuad for a scalar RHS.");
205 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacket& b, AccPacket& c, RhsPacket& ,
206 const FixedInt<0>&)
const {
207 c = vfmaq_n_f16(c, a, b);
209 EIGEN_STRONG_INLINE
void madd(
const PacketHalf& a,
const RhsPacket& b, PacketHalf& c, RhsPacket& ,
210 const FixedInt<0>&)
const {
211 c = vfma_n_f16(c, a, b);
216 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c, RhsPacket& ,
217 const FixedInt<0>&)
const {
218 madd_helper<0>(a, b, c);
220 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c, RhsPacket& ,
221 const FixedInt<1>&)
const {
222 madd_helper<1>(a, b, c);
224 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c, RhsPacket& ,
225 const FixedInt<2>&)
const {
226 madd_helper<2>(a, b, c);
228 EIGEN_STRONG_INLINE
void madd(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c, RhsPacket& ,
229 const FixedInt<3>&)
const {
230 madd_helper<3>(a, b, c);
234 template <
int LaneID>
235 EIGEN_STRONG_INLINE
void madd_helper(
const LhsPacket& a,
const RhsPacketx4& b, AccPacket& c)
const {
236 c = vfmaq_lane_f16(c, a, b, LaneID);
Namespace containing all symbols from the Eigen library.
Definition Core:137