5
#ifndef OPENCV_HAL_VSX_HPP
6
#define OPENCV_HAL_VSX_HPP
9
#include "opencv2/core/utility.hpp"
12
#define CV_SIMD128_64F 1
19CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
25
typedef
uchar lane_type;
29
explicit
v_uint8x16(
const
vec_uchar16& v) : val(v)
33
v_uint8x16(vec_bchar16 v) : val(vec_uchar16_c(v))
35
v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
36
uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
37
: val(vec_uchar16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
40
static
inline
v_uint8x16 zero() {
return
v_uint8x16(vec_uchar16_z); }
43
{
return
vec_extract(val, 0); }
48
typedef
schar lane_type;
52
explicit
v_int8x16(
const
vec_char16& v) : val(v)
56
v_int8x16(vec_bchar16 v) : val(vec_char16_c(v))
58
v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
59
schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
60
: val(vec_char16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
63
static
inline
v_int8x16 zero() {
return
v_int8x16(vec_char16_z); }
66
{
return
vec_extract(val, 0); }
71
typedef
ushort lane_type;
75
explicit
v_uint16x8(
const
vec_ushort8& v) : val(v)
79
v_uint16x8(vec_bshort8 v) : val(vec_ushort8_c(v))
81
v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
82
: val(vec_ushort8_set(v0, v1, v2, v3, v4, v5, v6, v7))
85
static
inline
v_uint16x8 zero() {
return
v_uint16x8(vec_ushort8_z); }
88
{
return
vec_extract(val, 0); }
93
typedef
short
lane_type;
97
explicit
v_int16x8(
const
vec_short8& v) : val(v)
101
v_int16x8(vec_bshort8 v) : val(vec_short8_c(v))
103
v_int16x8(
short
v0,
short
v1,
short
v2,
short
v3,
short
v4,
short
v5,
short
v6,
short
v7)
104
: val(vec_short8_set(v0, v1, v2, v3, v4, v5, v6, v7))
107
static
inline
v_int16x8 zero() {
return
v_int16x8(vec_short8_z); }
110
{
return
vec_extract(val, 0); }
115
typedef
unsigned
lane_type;
119
explicit
v_uint32x4(
const
vec_uint4& v) : val(v)
123
v_uint32x4(vec_bint4 v) : val(vec_uint4_c(v))
125
v_uint32x4(
unsigned
v0,
unsigned
v1,
unsigned
v2,
unsigned
v3) : val(vec_uint4_set(v0, v1, v2, v3))
128
static
inline
v_uint32x4 zero() {
return
v_uint32x4(vec_uint4_z); }
131
{
return
vec_extract(val, 0); }
136
typedef
int
lane_type;
140
explicit
v_int32x4(
const
vec_int4& v) : val(v)
144
v_int32x4(vec_bint4 v) : val(vec_int4_c(v))
146
v_int32x4(
int
v0,
int
v1,
int
v2,
int
v3) : val(vec_int4_set(v0, v1, v2, v3))
149
static
inline
v_int32x4 zero() {
return
v_int32x4(vec_int4_z); }
152
{
return
vec_extract(val, 0); }
157
typedef
float
lane_type;
161
explicit
v_float32x4(
const
vec_float4& v) : val(v)
165
v_float32x4(vec_bint4 v) : val(vec_float4_c(v))
167
v_float32x4(
float
v0,
float
v1,
float
v2,
float
v3) : val(vec_float4_set(v0, v1, v2, v3))
170
static
inline
v_float32x4 zero() {
return
v_float32x4(vec_float4_z); }
173
{
return
vec_extract(val, 0); }
178
typedef
uint64 lane_type;
182
explicit
v_uint64x2(
const
vec_udword2& v) : val(v)
186
v_uint64x2(vec_bdword2 v) : val(vec_udword2_c(v))
188
v_uint64x2(uint64 v0, uint64 v1) : val(vec_udword2_set(v0, v1))
191
static
inline
v_uint64x2 zero() {
return
v_uint64x2(vec_udword2_z); }
194
{
return
vec_extract(val, 0); }
199
typedef
int64 lane_type;
203
explicit
v_int64x2(
const
vec_dword2& v) : val(v)
207
v_int64x2(vec_bdword2 v) : val(vec_dword2_c(v))
209
v_int64x2(int64 v0, int64 v1) : val(vec_dword2_set(v0, v1))
212
static
inline
v_int64x2 zero() {
return
v_int64x2(vec_dword2_z); }
215
{
return
vec_extract(val, 0); }
220
typedef
double
lane_type;
224
explicit
v_float64x2(
const
vec_double2& v) : val(v)
228
v_float64x2(vec_bdword2 v) : val(vec_double2_c(v))
230
v_float64x2(
double
v0,
double
v1) : val(vec_double2_set(v0, v1))
233
static
inline
v_float64x2 zero() {
return
v_float64x2(vec_double2_z); }
236
{
return
vec_extract(val, 0); }
239
#define OPENCV_HAL_IMPL_VSX_EXTRACT_N(_Tpvec, _Tp) \
240
template<int i> inline _Tp v_extract_n(VSX_UNUSED(_Tpvec v)) { return vec_extract(v.val, i); }
242OPENCV_HAL_IMPL_VSX_EXTRACT_N(
v_uint8x16, uchar)
243OPENCV_HAL_IMPL_VSX_EXTRACT_N(
v_int8x16, schar)
244OPENCV_HAL_IMPL_VSX_EXTRACT_N(
v_uint16x8, ushort)
245OPENCV_HAL_IMPL_VSX_EXTRACT_N(
v_int16x8,
short)
246OPENCV_HAL_IMPL_VSX_EXTRACT_N(
v_uint32x4, uint)
247OPENCV_HAL_IMPL_VSX_EXTRACT_N(
v_int32x4,
int)
248OPENCV_HAL_IMPL_VSX_EXTRACT_N(
v_uint64x2, uint64)
249OPENCV_HAL_IMPL_VSX_EXTRACT_N(
v_int64x2, int64)
261
#define OPENCV_HAL_IMPL_VSX_INITVEC(_Tpvec, _Tp, suffix, cast) \
262
inline _Tpvec v_setzero_##suffix() { return _Tpvec(vec_splats((_Tp)0)); } \
263
inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));} \
264
template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a) \
265
{ return _Tpvec((cast)a.val); }
267OPENCV_HAL_IMPL_VSX_INITVEC(
v_uint8x16, uchar, u8, vec_uchar16)
268OPENCV_HAL_IMPL_VSX_INITVEC(
v_int8x16, schar, s8, vec_char16)
269OPENCV_HAL_IMPL_VSX_INITVEC(
v_uint16x8, ushort, u16, vec_ushort8)
270OPENCV_HAL_IMPL_VSX_INITVEC(
v_int16x8,
short, s16, vec_short8)
271OPENCV_HAL_IMPL_VSX_INITVEC(
v_uint32x4, uint, u32, vec_uint4)
272OPENCV_HAL_IMPL_VSX_INITVEC(
v_int32x4,
int, s32, vec_int4)
273OPENCV_HAL_IMPL_VSX_INITVEC(
v_uint64x2, uint64, u64, vec_udword2)
274OPENCV_HAL_IMPL_VSX_INITVEC(
v_int64x2, int64, s64, vec_dword2)
275OPENCV_HAL_IMPL_VSX_INITVEC(
v_float32x4,
float, f32, vec_float4)
276OPENCV_HAL_IMPL_VSX_INITVEC(
v_float64x2,
double, f64, vec_double2)
278
#define OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, ld, ld_a, st, st_a) \
279
inline _Tpvec v_load(const _Tp* ptr) \
280
{ return _Tpvec(ld(0, ptr)); } \
281
inline _Tpvec v_load_aligned(VSX_UNUSED(const _Tp* ptr)) \
282
{ return _Tpvec(ld_a(0, ptr)); } \
283
inline _Tpvec v_load_low(const _Tp* ptr) \
284
{ return _Tpvec(vec_ld_l8(ptr)); } \
285
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
286
{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); } \
287
inline void v_store(_Tp* ptr, const _Tpvec& a) \
288
{ st(a.val, 0, ptr); } \
289
inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \
290
{ st_a(a.val, 0, ptr); } \
291
inline void v_store_aligned_nocache(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \
292
{ st_a(a.val, 0, ptr); } \
293
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
294
{ if(mode == hal::STORE_UNALIGNED) st(a.val, 0, ptr); else st_a(a.val, 0, ptr); } \
295
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
296
{ vec_st_l8(a.val, ptr); } \
297
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
298
{ vec_st_h8(a.val, ptr); }
303
#ifdef CV_COMPILER_VSX_BROKEN_ALIGNED
304
#define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \
305
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vsx_ld, vsx_st, vsx_st)
307
#define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \
308
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vec_ld, vsx_st, vec_st)
311OPENCV_HAL_IMPL_VSX_LOADSTORE(
v_uint8x16, uchar)
312OPENCV_HAL_IMPL_VSX_LOADSTORE(
v_int8x16, schar)
313OPENCV_HAL_IMPL_VSX_LOADSTORE(
v_uint16x8, ushort)
314OPENCV_HAL_IMPL_VSX_LOADSTORE(
v_int16x8,
short)
315OPENCV_HAL_IMPL_VSX_LOADSTORE(
v_uint32x4, uint)
316OPENCV_HAL_IMPL_VSX_LOADSTORE(
v_int32x4,
int)
319OPENCV_HAL_IMPL_VSX_LOADSTORE_C(
v_float64x2,
double, vsx_ld, vsx_ld, vsx_st, vsx_st)
320OPENCV_HAL_IMPL_VSX_LOADSTORE_C(
v_uint64x2, uint64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
321OPENCV_HAL_IMPL_VSX_LOADSTORE_C(
v_int64x2, int64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
326
#define OPENCV_HAL_IMPL_VSX_INTERLEAVE(_Tp, _Tpvec) \
327
inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b) \
328
{ vec_ld_deinterleave(ptr, a.val, b.val);} \
329
inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, \
330
_Tpvec& b, _Tpvec& c) \
331
{ vec_ld_deinterleave(ptr, a.val, b.val, c.val); } \
332
inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b, \
333
_Tpvec& c, _Tpvec& d) \
334
{ vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); } \
335
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \
336
hal::StoreMode
=hal::STORE_UNALIGNED) \
337
{ vec_st_interleave(a.val, b.val, ptr); } \
338
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, \
339
const _Tpvec& b, const _Tpvec& c, \
340
hal::StoreMode
=hal::STORE_UNALIGNED) \
341
{ vec_st_interleave(a.val, b.val, c.val, ptr); } \
342
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \
343
const _Tpvec& c, const _Tpvec& d, \
344
hal::StoreMode
=hal::STORE_UNALIGNED) \
345
{ vec_st_interleave(a.val, b.val, c.val, d.val, ptr); }
347OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar,
v_uint8x16)
348OPENCV_HAL_IMPL_VSX_INTERLEAVE(schar,
v_int8x16)
349OPENCV_HAL_IMPL_VSX_INTERLEAVE(ushort,
v_uint16x8)
350OPENCV_HAL_IMPL_VSX_INTERLEAVE(
short,
v_int16x8)
351OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint,
v_uint32x4)
352OPENCV_HAL_IMPL_VSX_INTERLEAVE(
int,
v_int32x4)
355OPENCV_HAL_IMPL_VSX_INTERLEAVE(int64,
v_int64x2)
356OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint64,
v_uint64x2)
359
#define OPENCV_HAL_IMPL_VSX_EXPAND(_Tpvec, _Tpwvec, _Tp, fl, fh) \
360
inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
362
b0.val = fh(a.val); \
363
b1.val = fl(a.val); \
365
inline _Tpwvec v_expand_low(const _Tpvec& a) \
366
{ return _Tpwvec(fh(a.val)); } \
367
inline _Tpwvec v_expand_high(const _Tpvec& a) \
368
{ return _Tpwvec(fl(a.val)); } \
369
inline _Tpwvec v_load_expand(const _Tp* ptr) \
370
{ return _Tpwvec(fh(vec_ld_l8(ptr))); }
380
#if !defined(CV_COMPILER_VSX_BROKEN_ASM)
381
#define _LXSIWZX(out, ptr, T) __asm__ ("lxsiwzx %x0, 0, %1\r\n"
: "=wa"(out) : "r" (ptr) : "memory");
384
#define _LXSIWZX(out, ptr, T) out = (T)vec_udword2_sp(*(uint32_t*)(ptr));
391
vec_uchar16 pmu = {8, 12, 12, 12, 9, 12, 12, 12, 10, 12, 12, 12, 11, 12, 12, 12};
394
_LXSIWZX(out, ptr, vec_uchar16);
395
out = vec_perm(out, out, pmu);
405
_LXSIWZX(out, ptr, vec_char16);
406
outs = vec_unpackl(out);
407
outw = vec_unpackh(outs);
412
#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack) \
413
inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \
415
return _Tpvec(pkfnc(a.val, b.val)); \
417
inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
419
vec_st_l8(pkfnc(a.val, a.val), ptr); \
422
inline _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \
424
const __vector _Tpvn vn = vec_splats((_Tpvn)n); \
425
const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1))); \
426
return _Tpvec(pkfnc(sfnc(addfnc(a.val, delta), vn), sfnc(addfnc(b.val, delta), vn))); \
429
inline void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
431
const __vector _Tpvn vn = vec_splats((_Tpvn)n); \
432
const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1))); \
433
vec_st_l8(pkfnc(sfnc(addfnc(a.val, delta), vn), delta), ptr); \
437
vec_sr, vec_packs, vec_adds, pack)
439
vec_sra, vec_packs, vec_adds, pack)
442
vec_sr, vec_packs, vec_add, pack)
444
vec_sra, vec_packs, vec_add, pack)
447
vec_sr, vec_pack, vec_add, pack)
449
vec_sra, vec_pack, vec_add, pack)
452
vec_sra, vec_packsu, vec_adds, pack_u)
454
vec_sra, vec_packsu, vec_add, pack_u)
462
vec_uchar16 ab = vec_pack(a.val, b.val);
469
vec_ushort8 ab = vec_pack(a.val, b.val);
470
vec_ushort8 cd = vec_pack(c.val, d.val);
478
vec_uint4 ab = vec_pack(a.val, b.val);
479
vec_uint4 cd = vec_pack(c.val, d.val);
480
vec_uint4 ef = vec_pack(e.val, f.val);
481
vec_uint4 gh = vec_pack(g.val, h.val);
483
vec_ushort8 abcd = vec_pack(ab, cd);
484
vec_ushort8 efgh = vec_pack(ef, gh);
489
template
<
typename
_Tpvec>
490
inline
void
v_zip(
const
_Tpvec& a0,
const
_Tpvec& a1, _Tpvec& b0, _Tpvec& b1)
492
b0.val = vec_mergeh(a0.val, a1.val);
493
b1.val = vec_mergel(a0.val, a1.val);
496
template
<
typename
_Tpvec>
498{
return
_Tpvec(vec_mergesql(a.val, b.val)); }
500
template
<
typename
_Tpvec>
501
inline
_Tpvec
v_combine_low(
const
_Tpvec& a,
const
_Tpvec& b)
502{
return
_Tpvec(vec_mergesqh(a.val, b.val)); }
504
template
<
typename
_Tpvec>
505
inline
void
v_recombine(
const
_Tpvec& a,
const
_Tpvec& b, _Tpvec& c, _Tpvec& d)
507
c.val = vec_mergesqh(a.val, b.val);
508
d.val = vec_mergesql(a.val, b.val);
515
#define OPENCV_HAL_IMPL_VSX_BIN_OP(bin_op, _Tpvec, intrin) \
516
inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
517
{ return _Tpvec(intrin(a.val, b.val)); } \
518
inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
519
{ a.val = intrin(a.val, b.val); return a; }
521OPENCV_HAL_IMPL_VSX_BIN_OP(+,
v_uint8x16, vec_adds)
522OPENCV_HAL_IMPL_VSX_BIN_OP(-,
v_uint8x16, vec_subs)
523OPENCV_HAL_IMPL_VSX_BIN_OP(+,
v_int8x16, vec_adds)
524OPENCV_HAL_IMPL_VSX_BIN_OP(-,
v_int8x16, vec_subs)
525OPENCV_HAL_IMPL_VSX_BIN_OP(+,
v_uint16x8, vec_adds)
526OPENCV_HAL_IMPL_VSX_BIN_OP(-,
v_uint16x8, vec_subs)
527OPENCV_HAL_IMPL_VSX_BIN_OP(+,
v_int16x8, vec_adds)
528OPENCV_HAL_IMPL_VSX_BIN_OP(-,
v_int16x8, vec_subs)
529OPENCV_HAL_IMPL_VSX_BIN_OP(+,
v_uint32x4, vec_add)
530OPENCV_HAL_IMPL_VSX_BIN_OP(-,
v_uint32x4, vec_sub)
531OPENCV_HAL_IMPL_VSX_BIN_OP(*,
v_uint32x4, vec_mul)
532OPENCV_HAL_IMPL_VSX_BIN_OP(+,
v_int32x4, vec_add)
533OPENCV_HAL_IMPL_VSX_BIN_OP(-,
v_int32x4, vec_sub)
534OPENCV_HAL_IMPL_VSX_BIN_OP(*,
v_int32x4, vec_mul)
543OPENCV_HAL_IMPL_VSX_BIN_OP(+,
v_uint64x2, vec_add)
544OPENCV_HAL_IMPL_VSX_BIN_OP(-,
v_uint64x2, vec_sub)
545OPENCV_HAL_IMPL_VSX_BIN_OP(+,
v_int64x2, vec_add)
546OPENCV_HAL_IMPL_VSX_BIN_OP(-,
v_int64x2, vec_sub)
549
#define OPENCV_HAL_IMPL_VSX_MUL_SAT(_Tpvec, _Tpwvec) \
550
inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \
553
v_mul_expand(a, b, c, d); \
554
return v_pack(c, d); \
556
inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \
557
{ a = a * b; return a; }
564
template<
typename
Tvec,
typename
Twvec>
565
inline
void
v_mul_expand(
const
Tvec& a,
const
Tvec& b, Twvec& c, Twvec& d)
567
Twvec p0 = Twvec(vec_mule(a.val, b.val));
568
Twvec p1 = Twvec(vec_mulo(a.val, b.val));
574
vec_int4 p0 = vec_mule(a.val, b.val);
575
vec_int4 p1 = vec_mulo(a.val, b.val);
576
static
const
vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31};
577
return
v_int16x8(vec_perm(vec_short8_c(p0), vec_short8_c(p1), perm));
581
vec_uint4 p0 = vec_mule(a.val, b.val);
582
vec_uint4 p1 = vec_mulo(a.val, b.val);
583
static
const
vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31};
584
return
v_uint16x8(vec_perm(vec_ushort8_c(p0), vec_ushort8_c(p1), perm));
588
#define OPENCV_HAL_IMPL_VSX_BIN_FUNC(func, intrin) \
589
template<typename _Tpvec> \
590
inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
591
{ return _Tpvec(intrin(a.val, b.val)); }
593OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_add_wrap, vec_add)
594OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_sub_wrap, vec_sub)
595OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_mul_wrap, vec_mul)
598
#define OPENCV_HAL_IMPL_VSX_SHIFT_OP(_Tpvec, shr, splfunc) \
599
inline _Tpvec operator << (const _Tpvec& a, int imm) \
600
{ return _Tpvec(vec_sl(a.val, splfunc(imm))); } \
601
inline _Tpvec operator >> (const _Tpvec& a, int imm) \
602
{ return _Tpvec(shr(a.val, splfunc(imm))); } \
603
template<int imm> inline _Tpvec v_shl(const _Tpvec& a) \
604
{ return _Tpvec(vec_sl(a.val, splfunc(imm))); } \
605
template<int imm> inline _Tpvec v_shr(const _Tpvec& a) \
606
{ return _Tpvec(shr(a.val, splfunc(imm))); }
608OPENCV_HAL_IMPL_VSX_SHIFT_OP(
v_uint8x16, vec_sr, vec_uchar16_sp)
609OPENCV_HAL_IMPL_VSX_SHIFT_OP(
v_uint16x8, vec_sr, vec_ushort8_sp)
610OPENCV_HAL_IMPL_VSX_SHIFT_OP(
v_uint32x4, vec_sr, vec_uint4_sp)
611OPENCV_HAL_IMPL_VSX_SHIFT_OP(
v_uint64x2, vec_sr, vec_udword2_sp)
613OPENCV_HAL_IMPL_VSX_SHIFT_OP(
v_int8x16, vec_sra, vec_uchar16_sp)
614OPENCV_HAL_IMPL_VSX_SHIFT_OP(
v_int16x8, vec_sra, vec_ushort8_sp)
615OPENCV_HAL_IMPL_VSX_SHIFT_OP(
v_int32x4, vec_sra, vec_uint4_sp)
616OPENCV_HAL_IMPL_VSX_SHIFT_OP(
v_int64x2, vec_sra, vec_udword2_sp)
619
#define OPENCV_HAL_IMPL_VSX_LOGIC_OP(_Tpvec) \
620
OPENCV_HAL_IMPL_VSX_BIN_OP(&, _Tpvec, vec_and) \
621
OPENCV_HAL_IMPL_VSX_BIN_OP(|, _Tpvec, vec_or) \
622
OPENCV_HAL_IMPL_VSX_BIN_OP(^, _Tpvec, vec_xor) \
623
inline _Tpvec operator ~ (const _Tpvec& a) \
624
{ return _Tpvec(vec_not(a.val)); }
638
#define OPENCV_HAL_IMPL_VSX_SELECT(_Tpvec, cast) \
639
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
640
{ return _Tpvec(vec_sel(b.val, a.val, cast(mask.val))); }
642OPENCV_HAL_IMPL_VSX_SELECT(
v_uint8x16, vec_bchar16_c)
643OPENCV_HAL_IMPL_VSX_SELECT(
v_int8x16, vec_bchar16_c)
644OPENCV_HAL_IMPL_VSX_SELECT(
v_uint16x8, vec_bshort8_c)
645OPENCV_HAL_IMPL_VSX_SELECT(
v_int16x8, vec_bshort8_c)
646OPENCV_HAL_IMPL_VSX_SELECT(
v_uint32x4, vec_bint4_c)
647OPENCV_HAL_IMPL_VSX_SELECT(
v_int32x4, vec_bint4_c)
648OPENCV_HAL_IMPL_VSX_SELECT(
v_float32x4, vec_bint4_c)
649OPENCV_HAL_IMPL_VSX_SELECT(
v_float64x2, vec_bdword2_c)
652
#define OPENCV_HAL_IMPL_VSX_INT_CMP_OP(_Tpvec) \
653
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
654
{ return _Tpvec(vec_cmpeq(a.val, b.val)); } \
655
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
656
{ return _Tpvec(vec_cmpne(a.val, b.val)); } \
657
inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
658
{ return _Tpvec(vec_cmplt(a.val, b.val)); } \
659
inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \
660
{ return _Tpvec(vec_cmpgt(a.val, b.val)); } \
661
inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
662
{ return _Tpvec(vec_cmple(a.val, b.val)); } \
663
inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
664
{ return _Tpvec(vec_cmpge(a.val, b.val)); }
683OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_min, vec_min)
684OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_max, vec_max)
687
#define OPENCV_IMPL_VSX_ROTATE(_Tpvec, suffix, shf, cast) \
689
inline _Tpvec v_rotate_##suffix(const _Tpvec& a) \
691
const int wd = imm * sizeof(typename _Tpvec::lane_type); \
693
return _Tpvec::zero(); \
694
return _Tpvec((cast)shf(vec_uchar16_c(a.val), vec_uchar16_sp(wd << 3))); \
697
#define OPENCV_IMPL_VSX_ROTATE_LR(_Tpvec, cast) \
698
OPENCV_IMPL_VSX_ROTATE(_Tpvec, left, vec_slo, cast) \
699
OPENCV_IMPL_VSX_ROTATE(_Tpvec, right, vec_sro, cast)
701OPENCV_IMPL_VSX_ROTATE_LR(
v_uint8x16, vec_uchar16)
702OPENCV_IMPL_VSX_ROTATE_LR(
v_int8x16, vec_char16)
703OPENCV_IMPL_VSX_ROTATE_LR(
v_uint16x8, vec_ushort8)
704OPENCV_IMPL_VSX_ROTATE_LR(
v_int16x8, vec_short8)
705OPENCV_IMPL_VSX_ROTATE_LR(
v_uint32x4, vec_uint4)
706OPENCV_IMPL_VSX_ROTATE_LR(
v_int32x4, vec_int4)
708OPENCV_IMPL_VSX_ROTATE_LR(
v_uint64x2, vec_udword2)
709OPENCV_IMPL_VSX_ROTATE_LR(
v_int64x2, vec_dword2)
712
template<
int
imm,
typename
_Tpvec>
713
inline
_Tpvec v_rotate_right(
const
_Tpvec& a,
const
_Tpvec& b)
715
enum
{ CV_SHIFT = 16 - imm * (
sizeof(
typename
_Tpvec::lane_type)) };
719
return
_Tpvec(vec_sld(b.val, a.val, CV_SHIFT & 15));
721
return
_Tpvec(vec_sld(b.val, a.val, CV_SHIFT));
725
template<
int
imm,
typename
_Tpvec>
726
inline
_Tpvec v_rotate_left(
const
_Tpvec& a,
const
_Tpvec& b)
728
enum
{ CV_SHIFT = imm * (
sizeof(
typename
_Tpvec::lane_type)) };
731
return
_Tpvec(vec_sld(a.val, b.val, CV_SHIFT));
734
#define OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, suffix, rg1, rg2) \
736
inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \
739
return _Tpvec(vec_permi(rg1.val, rg2.val, 2)); \
740
return imm ? b : a; \
743
#define OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(_Tpvec) \
744
OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, left, b, a) \
745
OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, right, a, b)
749OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(
v_int64x2)
754
static
const
vec_uchar16 perm = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
755
vec_uchar16 vec = (vec_uchar16)a.val;
760{
return
v_reinterpret_as_s8(
v_reverse(v_reinterpret_as_u8(a))); }
764
static
const
vec_uchar16 perm = {14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1};
765
vec_uchar16 vec = (vec_uchar16)a.val;
766
return
v_reinterpret_as_u16(
v_uint8x16(vec_perm(vec, vec, perm)));
770{
return
v_reinterpret_as_s16(
v_reverse(v_reinterpret_as_u16(a))); }
774
static
const
vec_uchar16 perm = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3};
775
vec_uchar16 vec = (vec_uchar16)a.val;
776
return
v_reinterpret_as_u32(
v_uint8x16(vec_perm(vec, vec, perm)));
780{
return
v_reinterpret_as_s32(
v_reverse(v_reinterpret_as_u32(a))); }
783{
return
v_reinterpret_as_f32(
v_reverse(v_reinterpret_as_u32(a))); }
787
static
const
vec_uchar16 perm = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7};
788
vec_uchar16 vec = (vec_uchar16)a.val;
789
return
v_reinterpret_as_u64(
v_uint8x16(vec_perm(vec, vec, perm)));
793{
return
v_reinterpret_as_s64(
v_reverse(v_reinterpret_as_u64(a))); }
796{
return
v_reinterpret_as_f64(
v_reverse(v_reinterpret_as_u64(a))); }
799
template<
int
s,
typename
_Tpvec>
800
inline
_Tpvec
v_extract(
const
_Tpvec& a,
const
_Tpvec& b)
801{
return
v_rotate_right<s>(a, b); }
808
const
vec_uint4 zero4 = vec_uint4_z;
809
vec_uint4 sum4 = vec_sum4s(a.val, zero4);
810
return
(uint)vec_extract(vec_sums(vec_int4_c(sum4), vec_int4_c(zero4)), 3);
814
const
vec_int4 zero4 = vec_int4_z;
815
vec_int4 sum4 = vec_sum4s(a.val, zero4);
816
return
(
int)vec_extract(vec_sums(sum4, zero4), 3);
820
const
vec_int4 zero = vec_int4_z;
821
return
saturate_cast<int>(vec_extract(vec_sums(vec_sum4s(a.val, zero), zero), 3));
825
const
vec_int4 v4 = vec_int4_c(vec_unpackhu(vec_adds(a.val, vec_sld(a.val, a.val, 8))));
826
return
saturate_cast<uint>(vec_extract(vec_sums(v4, vec_int4_z), 3));
829
#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(_Tpvec, _Tpvec2, scalartype, suffix, func) \
830
inline scalartype v_reduce_##suffix(const _Tpvec& a) \
832
const _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \
833
return vec_extract(func(rs, vec_sld(rs, rs, 4)), 0); \
835OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(
v_uint32x4, vec_uint4, uint, sum, vec_add)
836OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(
v_uint32x4, vec_uint4, uint,
max, vec_max)
837OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(
v_uint32x4, vec_uint4, uint,
min, vec_min)
838OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(
v_int32x4, vec_int4,
int, sum, vec_add)
839OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(
v_int32x4, vec_int4,
int,
max, vec_max)
840OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(
v_int32x4, vec_int4,
int,
min, vec_min)
841OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(
v_float32x4, vec_float4,
float, sum, vec_add)
842OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(
v_float32x4, vec_float4,
float,
max, vec_max)
843OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(
v_float32x4, vec_float4,
float,
min, vec_min)
847
return
vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0);
851
return
vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0);
855
return
vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0);
858
#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(_Tpvec, _Tpvec2, scalartype, suffix, func) \
859
inline scalartype v_reduce_##suffix(const _Tpvec& a) \
861
_Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \
862
rs = func(rs, vec_sld(rs, rs, 4)); \
863
return vec_extract(func(rs, vec_sld(rs, rs, 2)), 0); \
865OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(
v_uint16x8, vec_ushort8, ushort,
max, vec_max)
866OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(
v_uint16x8, vec_ushort8, ushort,
min, vec_min)
867OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(
v_int16x8, vec_short8,
short,
max, vec_max)
868OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(
v_int16x8, vec_short8,
short,
min, vec_min)
870
#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(_Tpvec, _Tpvec2, scalartype, suffix, func) \
871
inline scalartype v_reduce_##suffix(const _Tpvec& a) \
873
_Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \
874
rs = func(rs, vec_sld(rs, rs, 4)); \
875
rs = func(rs, vec_sld(rs, rs, 2)); \
876
return vec_extract(func(rs, vec_sld(rs, rs, 1)), 0); \
878OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(
v_uint8x16, vec_uchar16, uchar,
max, vec_max)
879OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(
v_uint8x16, vec_uchar16, uchar,
min, vec_min)
880OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(
v_int8x16, vec_char16, schar,
max, vec_max)
881OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(
v_int8x16, vec_char16, schar,
min, vec_min)
886
vec_float4 ac = vec_add(vec_mergel(a.val, c.val), vec_mergeh(a.val, c.val));
887
ac = vec_add(ac, vec_sld(ac, ac, 8));
889
vec_float4 bd = vec_add(vec_mergel(b.val, d.val), vec_mergeh(b.val, d.val));
890
bd = vec_add(bd, vec_sld(bd, bd, 8));
896
const
vec_uint4 zero4 = vec_uint4_z;
897
vec_uint4 sum4 = vec_sum4s(vec_absd(a.val, b.val), zero4);
898
return
(
unsigned)vec_extract(vec_sums(vec_int4_c(sum4), vec_int4_c(zero4)), 3);
902
const
vec_int4 zero4 = vec_int4_z;
903
vec_char16 ad = vec_abss(vec_subs(a.val, b.val));
904
vec_int4 sum4 = vec_sum4s(ad, zero4);
905
return
(
unsigned)vec_extract(vec_sums(sum4, zero4), 3);
909
vec_ushort8 ad = vec_absd(a.val, b.val);
910
VSX_UNUSED(vec_int4) sum = vec_sums(vec_int4_c(vec_unpackhu(ad)) + vec_int4_c(vec_unpacklu(ad)), vec_int4_z);
911
return
(
unsigned)vec_extract(sum, 3);
915
const
vec_int4 zero4 = vec_int4_z;
916
vec_short8 ad = vec_abss(vec_subs(a.val, b.val));
917
vec_int4 sum4 = vec_sum4s(ad, zero4);
918
return
(
unsigned)vec_extract(vec_sums(sum4, zero4), 3);
922
const
vec_uint4 ad = vec_absd(a.val, b.val);
923
const
vec_uint4 rd = vec_add(ad, vec_sld(ad, ad, 8));
924
return
vec_extract(vec_add(rd, vec_sld(rd, rd, 4)), 0);
928
vec_int4 ad = vec_abss(vec_sub(a.val, b.val));
929
return
(
unsigned)vec_extract(vec_sums(ad, vec_int4_z), 3);
933
const
vec_float4 ad = vec_abs(vec_sub(a.val, b.val));
934
const
vec_float4 rd = vec_add(ad, vec_sld(ad, ad, 8));
935
return
vec_extract(vec_add(rd, vec_sld(rd, rd, 4)), 0);
959
static
const
vec_uchar16 qperm = {120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0};
960
return
vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2);
967
static
const
vec_uchar16 qperm = {112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128};
968
return
vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2);
971{
return
v_signmask(v_reinterpret_as_s16(a)); }
975
static
const
vec_uchar16 qperm = {96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128};
976
return
vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2);
979{
return
v_signmask(v_reinterpret_as_s32(a)); }
981{
return
v_signmask(v_reinterpret_as_s32(a)); }
985
VSX_UNUSED(
const
vec_dword2) sv = vec_sr(a.val, vec_udword2_sp(63));
986
return
(
int)vec_extract(sv, 0) | (int)vec_extract(sv, 1) << 1;
989{
return
v_signmask(v_reinterpret_as_s64(a)); }
991{
return
v_signmask(v_reinterpret_as_s64(a)); }
1004
template<
typename
_Tpvec>
1006{
return
vec_all_lt(a.val, _Tpvec::zero().val); }
1020
template<
typename
_Tpvec>
1022{
return
vec_any_lt(a.val, _Tpvec::zero().val); }
1049
#define OPENCV_HAL_IMPL_VSX_MULADD(_Tpvec) \
1050
inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \
1051
{ return _Tpvec(vec_sqrt(vec_madd(a.val, a.val, vec_mul(b.val, b.val)))); } \
1052
inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \
1053
{ return _Tpvec(vec_madd(a.val, a.val, vec_mul(b.val, b.val))); } \
1054
inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
1055
{ return _Tpvec(vec_madd(a.val, b.val, c.val)); } \
1056
inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
1057
{ return _Tpvec(vec_madd(a.val, b.val, c.val)); }
1063{
return
a * b + c; }
1069{
return
v_uint8x16(vec_uchar16_c(vec_abs(x.val))); }
1072{
return
v_uint16x8(vec_ushort8_c(vec_abs(x.val))); }
1075{
return
v_uint32x4(vec_uint4_c(vec_abs(x.val))); }
1085OPENCV_HAL_IMPL_VSX_BIN_FUNC(
v_absdiff, vec_absd)
1088{
return
v_reinterpret_as_u8(v_sub_wrap(v_max(a, b), v_min(a, b))); }
1090{
return
v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); }
1092{
return
v_reinterpret_as_u32(v_max(a, b) - v_min(a, b)); }
1095{
return
v_abs(a - b); }
1097{
return
v_abs(a - b); }
1101{
return
v_int8x16(vec_abss(vec_subs(a.val, b.val))); }
1103{
return
v_int16x8(vec_abss(vec_subs(a.val, b.val))); }
1109{
return
v_int32x4(vec_cts(vec_rint(a.val))); }
1112{
return
v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_int4_z)); }
1115{
return
v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_ctso(vec_rint(b.val)))); }
1118{
return
v_int32x4(vec_cts(vec_floor(a.val))); }
1121{
return
v_int32x4(vec_mergesqo(vec_ctso(vec_floor(a.val)), vec_int4_z)); }
1124{
return
v_int32x4(vec_cts(vec_ceil(a.val))); }
1127{
return
v_int32x4(vec_mergesqo(vec_ctso(vec_ceil(a.val)), vec_int4_z)); }
1133{
return
v_int32x4(vec_mergesqo(vec_ctso(a.val), vec_int4_z)); }
1140{
return
v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_float4_z)); }
1143{
return
v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_cvfo(b.val))); }
1146{
return
v_float64x2(vec_ctdo(vec_mergeh(a.val, a.val))); }
1149{
return
v_float64x2(vec_ctdo(vec_mergel(a.val, a.val))); }
1152{
return
v_float64x2(vec_cvfo(vec_mergeh(a.val, a.val))); }
1155{
return
v_float64x2(vec_cvfo(vec_mergel(a.val, a.val))); }
1162
inline
v_int8x16
v_lut(
const
schar* tab,
const
int* idx)
1164
return
v_int8x16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]],
1165
tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]);
1167
inline
v_int8x16
v_lut_pairs(
const
schar* tab,
const
int* idx)
1169
return
v_reinterpret_as_s8(
v_int16x8(*(
const
short*)(tab+idx[0]), *(
const
short*)(tab+idx[1]), *(
const
short*)(tab+idx[2]), *(
const
short*)(tab+idx[3]),
1170
*(
const
short*)(tab+idx[4]), *(
const
short*)(tab+idx[5]), *(
const
short*)(tab+idx[6]), *(
const
short*)(tab+idx[7])));
1172
inline
v_int8x16
v_lut_quads(
const
schar* tab,
const
int* idx)
1174
return
v_reinterpret_as_s8(
v_int32x4(*(
const
int*)(tab+idx[0]), *(
const
int*)(tab+idx[1]), *(
const
int*)(tab+idx[2]), *(
const
int*)(tab+idx[3])));
1176
inline
v_uint8x16
v_lut(
const
uchar* tab,
const
int* idx) {
return
v_reinterpret_as_u8(v_lut((
const
schar*)tab, idx)); }
1177
inline
v_uint8x16
v_lut_pairs(
const
uchar* tab,
const
int* idx) {
return
v_reinterpret_as_u8(v_lut_pairs((
const
schar*)tab, idx)); }
1178
inline
v_uint8x16
v_lut_quads(
const
uchar* tab,
const
int* idx) {
return
v_reinterpret_as_u8(v_lut_quads((
const
schar*)tab, idx)); }
1180
inline
v_int16x8
v_lut(
const
short* tab,
const
int* idx)
1182
return
v_int16x8(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]);
1184
inline
v_int16x8
v_lut_pairs(
const
short* tab,
const
int* idx)
1186
return
v_reinterpret_as_s16(
v_int32x4(*(
const
int*)(tab + idx[0]), *(
const
int*)(tab + idx[1]), *(
const
int*)(tab + idx[2]), *(
const
int*)(tab + idx[3])));
1188
inline
v_int16x8
v_lut_quads(
const
short* tab,
const
int* idx)
1190
return
v_reinterpret_as_s16(
v_int64x2(*(
const
int64*)(tab + idx[0]), *(
const
int64*)(tab + idx[1])));
1192
inline
v_uint16x8
v_lut(
const
ushort* tab,
const
int* idx) {
return
v_reinterpret_as_u16(v_lut((
const
short*)tab, idx)); }
1193
inline
v_uint16x8
v_lut_pairs(
const
ushort* tab,
const
int* idx) {
return
v_reinterpret_as_u16(v_lut_pairs((
const
short*)tab, idx)); }
1194
inline
v_uint16x8
v_lut_quads(
const
ushort* tab,
const
int* idx) {
return
v_reinterpret_as_u16(v_lut_quads((
const
short*)tab, idx)); }
1196
inline
v_int32x4
v_lut(
const
int* tab,
const
int* idx)
1198
return
v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
1200
inline
v_int32x4
v_lut_pairs(
const
int* tab,
const
int* idx)
1202
return
v_reinterpret_as_s32(
v_int64x2(*(
const
int64*)(tab + idx[0]), *(
const
int64*)(tab + idx[1])));
1204
inline
v_int32x4
v_lut_quads(
const
int* tab,
const
int* idx)
1206
return
v_int32x4(vsx_ld(0, tab + idx[0]));
1208
inline
v_uint32x4
v_lut(
const
unsigned* tab,
const
int* idx) {
return
v_reinterpret_as_u32(v_lut((
const
int*)tab, idx)); }
1209
inline
v_uint32x4
v_lut_pairs(
const
unsigned* tab,
const
int* idx) {
return
v_reinterpret_as_u32(v_lut_pairs((
const
int*)tab, idx)); }
1210
inline
v_uint32x4
v_lut_quads(
const
unsigned* tab,
const
int* idx) {
return
v_reinterpret_as_u32(v_lut_quads((
const
int*)tab, idx)); }
1212
inline
v_int64x2
v_lut(
const
int64_t* tab,
const
int* idx)
1214
return
v_int64x2(tab[idx[0]], tab[idx[1]]);
1216
inline
v_int64x2
v_lut_pairs(
const
int64_t* tab,
const
int* idx)
1218
return
v_int64x2(vsx_ld2(0, tab + idx[0]));
1220
inline
v_uint64x2
v_lut(
const
uint64_t* tab,
const
int* idx) {
return
v_reinterpret_as_u64(v_lut((
const
int64_t *)tab, idx)); }
1221
inline
v_uint64x2
v_lut_pairs(
const
uint64_t* tab,
const
int* idx) {
return
v_reinterpret_as_u64(v_lut_pairs((
const
int64_t *)tab, idx)); }
1223
inline
v_float32x4
v_lut(
const
float* tab,
const
int* idx)
1225
return
v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
1227
inline
v_float32x4
v_lut_pairs(
const
float* tab,
const
int* idx) {
return
v_reinterpret_as_f32(v_lut_pairs((
const
int*)tab, idx)); }
1228
inline
v_float32x4
v_lut_quads(
const
float* tab,
const
int* idx) {
return
v_load(tab + *idx); }
1230
inline
v_float64x2
v_lut(
const
double* tab,
const
int* idx)
1234
inline
v_float64x2
v_lut_pairs(
const
double* tab,
const
int* idx) {
return
v_load(tab + *idx); }
1238
const
int
idx[4] = {
1239
vec_extract(idxvec.val, 0),
1240
vec_extract(idxvec.val, 1),
1241
vec_extract(idxvec.val, 2),
1242
vec_extract(idxvec.val, 3)
1244
return
v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
1249
const
int
idx[4] = {
1250
vec_extract(idxvec.val, 0),
1251
vec_extract(idxvec.val, 1),
1252
vec_extract(idxvec.val, 2),
1253
vec_extract(idxvec.val, 3)
1255
return
v_uint32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
1260
const
int
idx[4] = {
1261
vec_extract(idxvec.val, 0),
1262
vec_extract(idxvec.val, 1),
1263
vec_extract(idxvec.val, 2),
1264
vec_extract(idxvec.val, 3)
1266
return
v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
1271
const
int
idx[2] = {
1272
vec_extract(idxvec.val, 0),
1273
vec_extract(idxvec.val, 1)
1280
vec_float4 xy0 = vec_ld_l8(tab + vec_extract(idxvec.val, 0));
1281
vec_float4 xy1 = vec_ld_l8(tab + vec_extract(idxvec.val, 1));
1282
vec_float4 xy2 = vec_ld_l8(tab + vec_extract(idxvec.val, 2));
1283
vec_float4 xy3 = vec_ld_l8(tab + vec_extract(idxvec.val, 3));
1284
vec_float4 xy02 = vec_mergeh(xy0, xy2);
1285
vec_float4 xy13 = vec_mergeh(xy1, xy3);
1286
x.val = vec_mergeh(xy02, xy13);
1287
y.val = vec_mergel(xy02, xy13);
1291
vec_double2 xy0 = vsx_ld(vec_extract(idxvec.val, 0), tab);
1292
vec_double2 xy1 = vsx_ld(vec_extract(idxvec.val, 1), tab);
1293
x.val = vec_mergeh(xy0, xy1);
1294
y.val = vec_mergel(xy0, xy1);
1299
static
const
vec_uchar16 perm = {0, 2, 1, 3, 4, 6, 5, 7, 8, 10, 9, 11, 12, 14, 13, 15};
1300
return
v_int8x16(vec_perm(vec.val, vec.val, perm));
1303{
return
v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); }
1307
static
const
vec_uchar16 perm = {0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15};
1308
return
v_int8x16(vec_perm(vec.val, vec.val, perm));
1311{
return
v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); }
1315
static
const
vec_uchar16 perm = {0,1, 4,5, 2,3, 6,7, 8,9, 12,13, 10,11, 14,15};
1316
return
v_int16x8(vec_perm(vec.val, vec.val, perm));
1319{
return
v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); }
1323
static
const
vec_uchar16 perm = {0,1, 8,9, 2,3, 10,11, 4,5, 12,13, 6,7, 14,15};
1324
return
v_int16x8(vec_perm(vec.val, vec.val, perm));
1327{
return
v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); }
1331
static
const
vec_uchar16 perm = {0,1,2,3, 8,9,10,11, 4,5,6,7, 12,13,14,15};
1332
return
v_int32x4(vec_perm(vec.val, vec.val, perm));
1335{
return
v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
1337{
return
v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
1341
static
const
vec_uchar16 perm = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 15, 15, 15};
1342
return
v_int8x16(vec_perm(vec.val, vec.val, perm));
1345{
return
v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); }
1349
static
const
vec_uchar16 perm = {0,1, 2,3, 4,5, 8,9, 10,11, 12,13, 14,15, 14,15};
1350
return
v_int16x8(vec_perm(vec.val, vec.val, perm));
1353{
return
v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }
1366
vec_ushort8 vf16 = vec_ld_l8((
const
ushort*)ptr);
1367
#if CV_VSX3 && defined(vec_extract_fp_from_shorth)
1368
return
v_float32x4(vec_extract_fp_from_shorth(vf16));
1369
#elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
1371
__asm__ __volatile__ (
"xvcvhpsp %x0,%x1"
:
"=wa"
(vf32) :
"wa"
(vec_mergeh(vf16, vf16)));
1374
const
vec_int4 z = vec_int4_z, delta = vec_int4_sp(0x38000000);
1375
const
vec_int4 signmask = vec_int4_sp(0x80000000);
1376
const
vec_int4 maxexp = vec_int4_sp(0x7c000000);
1377
const
vec_float4 deltaf = vec_float4_c(vec_int4_sp(0x38800000));
1379
vec_int4 bits = vec_int4_c(vec_mergeh(vec_short8_c(z), vec_short8_c(vf16)));
1380
vec_int4 e = vec_and(bits, maxexp), sign = vec_and(bits, signmask);
1381
vec_int4 t = vec_add(vec_sr(vec_xor(bits, sign), vec_uint4_sp(3)), delta);
1382
vec_int4 zt = vec_int4_c(vec_sub(vec_float4_c(vec_add(t, vec_int4_sp(1 << 23))), deltaf));
1384
t = vec_add(t, vec_and(delta, vec_cmpeq(maxexp, e)));
1385
vec_bint4 zmask = vec_cmpeq(e, z);
1386
vec_int4 ft = vec_sel(t, zt, zmask);
1387
return
v_float32x4(vec_float4_c(vec_or(ft, sign)));
1391
inline
void
v_pack_store(float16_t* ptr,
const
v_float32x4& v)
1394
#if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
1396
__asm__ __volatile__ (
"xvcvsphp %x0,%x1"
:
"=wa"
(vf16) :
"wa"
(v.val));
1397
vec_st_l8(vec_mergesqe(vf16, vf16), ptr);
1399
const
vec_int4 signmask = vec_int4_sp(0x80000000);
1400
const
vec_int4 rval = vec_int4_sp(0x3f000000);
1402
vec_int4 t = vec_int4_c(v.val);
1403
vec_int4 sign = vec_sra(vec_and(t, signmask), vec_uint4_sp(16));
1404
t = vec_and(vec_nor(signmask, signmask), t);
1406
vec_bint4 finitemask = vec_cmpgt(vec_int4_sp(0x47800000), t);
1407
vec_bint4 isnan = vec_cmpgt(t, vec_int4_sp(0x7f800000));
1408
vec_int4 naninf = vec_sel(vec_int4_sp(0x7c00), vec_int4_sp(0x7e00), isnan);
1409
vec_bint4 tinymask = vec_cmpgt(vec_int4_sp(0x38800000), t);
1410
vec_int4 tt = vec_int4_c(vec_add(vec_float4_c(t), vec_float4_c(rval)));
1411
tt = vec_sub(tt, rval);
1412
vec_int4 odd = vec_and(vec_sr(t, vec_uint4_sp(13)), vec_int4_sp(1));
1413
vec_int4 nt = vec_add(t, vec_int4_sp(0xc8000fff));
1414
nt = vec_sr(vec_add(nt, odd), vec_uint4_sp(13));
1415
t = vec_sel(nt, tt, tinymask);
1416
t = vec_sel(naninf, t, finitemask);
1417
t = vec_or(t, sign);
1418
vec_st_l8(vec_packs(t, t), ptr);
1422
inline
void
v_cleanup() {}
1433{
return
v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); }
1435{
return
v_int32x4(vec_msum(a.val, b.val, c.val)); }
1440
vec_dword2 even = vec_mule(a.val, b.val);
1441
vec_dword2 odd = vec_mulo(a.val, b.val);
1449{
return
v_uint32x4(vec_msum(a.val, b.val, c.val)); }
1451{
return
v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)); }
1455
const
vec_ushort8 eight = vec_ushort8_sp(8);
1456
vec_short8 a0 = vec_sra((vec_short8)vec_sld(a.val, a.val, 1), eight);
1457
vec_short8 a1 = vec_sra((vec_short8)a.val, eight);
1458
vec_short8 b0 = vec_sra((vec_short8)vec_sld(b.val, b.val, 1), eight);
1459
vec_short8 b1 = vec_sra((vec_short8)b.val, eight);
1460
return
v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, vec_int4_z)));
1465
const
vec_ushort8 eight = vec_ushort8_sp(8);
1466
vec_short8 a0 = vec_sra((vec_short8)vec_sld(a.val, a.val, 1), eight);
1467
vec_short8 a1 = vec_sra((vec_short8)a.val, eight);
1468
vec_short8 b0 = vec_sra((vec_short8)vec_sld(b.val, b.val, 1), eight);
1469
vec_short8 b1 = vec_sra((vec_short8)b.val, eight);
1470
return
v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, c.val)));
1476
const
vec_uint4 zero = vec_uint4_z;
1477
vec_uint4 even = vec_mule(a.val, b.val);
1478
vec_uint4 odd = vec_mulo(a.val, b.val);
1479
vec_udword2 e0 = (vec_udword2)vec_mergee(even, zero);
1480
vec_udword2 e1 = (vec_udword2)vec_mergeo(even, zero);
1481
vec_udword2 o0 = (vec_udword2)vec_mergee(odd, zero);
1482
vec_udword2 o1 = (vec_udword2)vec_mergeo(odd, zero);
1483
vec_udword2 s0 = vec_add(e0, o0);
1484
vec_udword2 s1 = vec_add(e1, o1);
1495
return
v_int64x2(vec_add(vec_mergeh(c.val, d.val), vec_mergel(c.val, d.val)));
1512{
return
v_int32x4(vec_msum(a.val, b.val, vec_int4_z)) + c; }
1523{
return
v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)) + c; }
1527
vec_short8 a0 = vec_unpackh(a.val);
1528
vec_short8 a1 = vec_unpackl(a.val);
1529
vec_short8 b0 = vec_unpackh(b.val);
1530
vec_short8 b1 = vec_unpackl(b.val);
1531
return
v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, vec_int4_z)));
1562
const
vec_float4 v0 = vec_splat(v.val, 0);
1563
const
vec_float4 v1 = vec_splat(v.val, 1);
1564
const
vec_float4 v2 = vec_splat(v.val, 2);
1565
VSX_UNUSED(
const
vec_float4) v3 = vec_splat(v.val, 3);
1566
return
v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, vec_mul(v3, m3.val)))));
1573
const
vec_float4 v0 = vec_splat(v.val, 0);
1574
const
vec_float4 v1 = vec_splat(v.val, 1);
1575
const
vec_float4 v2 = vec_splat(v.val, 2);
1576
return
v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, a.val))));
1579
#define OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(_Tpvec, _Tpvec2) \
1580
inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \
1581
const _Tpvec& a2, const _Tpvec& a3, \
1582
_Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) \
1584
_Tpvec2 a02 = vec_mergeh(a0.val, a2.val); \
1585
_Tpvec2 a13 = vec_mergeh(a1.val, a3.val); \
1586
b0.val = vec_mergeh(a02, a13); \
1587
b1.val = vec_mergel(a02, a13); \
1588
a02 = vec_mergel(a0.val, a2.val); \
1589
a13 = vec_mergel(a1.val, a3.val); \
1590
b2.val = vec_mergeh(a02, a13); \
1591
b3.val = vec_mergel(a02, a13); \
1593OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(
v_uint32x4, vec_uint4)
1594OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(
v_int32x4, vec_int4)
1595OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(
v_float32x4, vec_float4)
1597
template<
int
i,
typename
Tvec>
1599{
return
Tvec(vec_splat(v.val, i)); }
1602CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element maximum of two arrays or an array and a scalar.
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element minimum of two arrays or an array and a scalar.
bool v_check_any(const v_reg< _Tp, n > &a)
Check if any of packed values is less than zero
Definition:
intrin_cpp.hpp:1436
v_reg< _Tp, n > v_combine_high(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from last elements of two vectors
Definition:
intrin_cpp.hpp:2307
v_reg< float, n > v_matmul(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication
Definition:
intrin_cpp.hpp:3196
v_reg< int, n > v_round(const v_reg< float, n > &a)
Round elements
Definition:
intrin_cpp.hpp:2427
v_reg< schar, 16 > v_int8x16
Sixteen 8-bit signed integer values
Definition:
intrin_cpp.hpp:490
v_reg< uchar, 16 > v_uint8x16
Sixteen 8-bit unsigned integer values
Definition:
intrin_cpp.hpp:488
int v_signmask(const v_reg< _Tp, n > &a)
Get negative values mask
Definition:
intrin_cpp.hpp:1395
void v_zip(const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
Interleave two vectors
Definition:
intrin_cpp.hpp:1557
v_reg< int64, 2 > v_int64x2
Two 64-bit signed integer values
Definition:
intrin_cpp.hpp:506
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements and expand
Definition:
intrin_cpp.hpp:1145
V_TypeTraits< typenameV_TypeTraits< _Tp >::abs_type >::sum_type v_reduce_sad(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Sum absolute differences of values
Definition:
intrin_cpp.hpp:1377
v_reg< int, n > v_ceil(const v_reg< float, n > &a)
Ceil elements
Definition:
intrin_cpp.hpp:2465
v_reg< ushort, 8 > v_uint16x8
Eight 16-bit unsigned integer values
Definition:
intrin_cpp.hpp:492
v_reg< int, n > v_floor(const v_reg< float, n > &a)
Floor elements
Definition:
intrin_cpp.hpp:2452
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements
Definition:
intrin_cpp.hpp:1080
int v_scan_forward(const v_reg< _Tp, n > &a)
Get first negative lane index
Definition:
intrin_cpp.hpp:1412
v_reg< _Tp, n > v_reverse(const v_reg< _Tp, n > &a)
Vector reverse order
Definition:
intrin_cpp.hpp:2346
v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)
Load register contents from memory with double expand
Definition:
intrin_cpp.hpp:1875
v_reg< int, 4 > v_int32x4
Four 32-bit signed integer values
Definition:
intrin_cpp.hpp:498
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_absdiff(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Add values without saturation
Definition:
intrin_cpp.hpp:956
V_TypeTraits< _Tp >::sum_type v_reduce_sum(const v_reg< _Tp, n > &a)
Element shift left among vector
Definition:
intrin_cpp.hpp:1338
v_reg< _Tp, n > v_muladd(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
A synonym for v_fma
Definition:
intrin_cpp.hpp:1060
v_reg< int, n > v_trunc(const v_reg< float, n > &a)
Truncate elements
Definition:
intrin_cpp.hpp:2478
v_reg< unsigned, 4 > v_uint32x4
Four 32-bit unsigned integer values
Definition:
intrin_cpp.hpp:496
v_reg< _Tp, n > v_invsqrt(const v_reg< _Tp, n > &a)
Inversed square root
Definition:
intrin_cpp.hpp:1010
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements and expand
Definition:
intrin_cpp.hpp:1188
CV_INLINE v_reg< double,(n/2)> v_cvt_f64_high(const v_reg< int, n > &a)
Convert to double high part of vector
Definition:
intrin_cpp.hpp:2587
void v_recombine(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high)
Combine two vectors from lower and higher parts of two other vectors
Definition:
intrin_cpp.hpp:2325
v_reg< float, n > v_reduce_sum4(const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Sums all elements of each input vector, returns the vector of sums
Definition:
intrin_cpp.hpp:1356
void v_mul_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
Multiply and expand
Definition:
intrin_cpp.hpp:1219
v_reg< _Tp, n > v_broadcast_element(const v_reg< _Tp, n > &a)
Broadcast i-th element of vector
Definition:
intrin_cpp.hpp:2416
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load(const _Tp *ptr)
Load register contents from memory
Definition:
intrin_cpp.hpp:1587
CV_INLINE v_reg< double, n/2 > v_cvt_f64(const v_reg< int, n > &a)
Convert lower half to double
Definition:
intrin_cpp.hpp:2576
v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> v_load_expand_q(const _Tp *ptr)
Load register contents from memory with quad expand
Definition:
intrin_cpp.hpp:1964
void v_expand(const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
Expand values to the wider pack type
Definition:
intrin_cpp.hpp:1477
v_reg< uchar, 2 *n > v_pack_b(const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)
! For 16-bit boolean values
Definition:
intrin_cpp.hpp:3114
v_reg< _Tp, n > v_absdiffs(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Saturating absolute difference
Definition:
intrin_cpp.hpp:997
v_reg< uint64, 2 > v_uint64x2
Two 64-bit unsigned integer values
Definition:
intrin_cpp.hpp:504
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements
Definition:
intrin_cpp.hpp:1119
v_reg< _Tp, n > v_mul_hi(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Multiply and extract high part
Definition:
intrin_cpp.hpp:1236
v_reg< _Tp, n > v_combine_low(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from first elements of two vectors
Definition:
intrin_cpp.hpp:2285
v_reg< float, 4 > v_float32x4
Four 32-bit floating point values (single precision)
Definition:
intrin_cpp.hpp:500
v_reg< float, n > v_cvt_f32(const v_reg< int, n > &a)
Convert to float
Definition:
intrin_cpp.hpp:2537
bool v_check_all(const v_reg< _Tp, n > &a)
Check if all packed values are less than zero
Definition:
intrin_cpp.hpp:1424
v_reg< float, n > v_matmuladd(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication and add
Definition:
intrin_cpp.hpp:3226
v_reg< float, n > v_not_nan(const v_reg< float, n > &a)
Less-than comparison
Definition:
intrin_cpp.hpp:893
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_popcount(const v_reg< _Tp, n > &a)
Count the 1 bits in the vector lanes and return result as corresponding unsigned type
Definition:
intrin_cpp.hpp:827
v_reg< short, 8 > v_int16x8
Eight 16-bit signed integer values
Definition:
intrin_cpp.hpp:494
v_reg< double, 2 > v_float64x2
Two 64-bit floating point values (double precision)
Definition:
intrin_cpp.hpp:502
v_reg< _Tp, n > v_extract(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Vector extract
Definition:
intrin_cpp.hpp:2374
"black box" representation of the file storage associated with a file on disk.
Definition:
aruco.hpp:75