45
#ifndef OPENCV_HAL_INTRIN_CPP_HPP
46
#define OPENCV_HAL_INTRIN_CPP_HPP
51
#include "opencv2/core/saturate.hpp"
54
#define CV_SIMD128_CPP 1
55
#if defined(CV_FORCE_SIMD128_CPP)
57
#define CV_SIMD128_64F 1
59
#if defined(CV_DOXYGEN)
61
#define CV_SIMD128_64F 1
63
#define CV_SIMD256_64F 1
65
#define CV_SIMD512_64F 1
76CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
368
template<
typename
_Tp,
int
n>
struct
v_reg
371
typedef
_Tp lane_type;
379
explicit
v_reg(
const
_Tp* ptr) {
for(
int
i = 0; i < n; i++ ) s[i] = ptr[i]; }
384
v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; }
389
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; }
394
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
395
_Tp s4, _Tp s5, _Tp s6, _Tp s7)
397
s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
398
s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
404
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
405
_Tp s4, _Tp s5, _Tp s6, _Tp s7,
406
_Tp s8, _Tp s9, _Tp s10, _Tp s11,
407
_Tp s12, _Tp s13, _Tp s14, _Tp s15)
409
s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
410
s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
411
s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11;
412
s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15;
423
for(
int
i = 0; i < n; i++ )
436
_Tp
get0()
const
{
return
s[0]; }
439
_Tp get(
const
int
i)
const
{
return
s[i]; }
440
v_reg<_Tp, n> high()
const
444
for( i = 0; i < n/2; i++ )
452
static
v_reg<_Tp, n> zero()
455
for(
int
i = 0; i < n; i++ )
460
static
v_reg<_Tp, n> all(_Tp s)
463
for(
int
i = 0; i < n; i++ )
468
template<
typename
_Tp2,
int
n2> v_reg<_Tp2, n2> reinterpret_as()
const
470
size_t
bytes =
std::min(
sizeof(_Tp2)*n2,
sizeof(_Tp)*n);
472
std::memcpy(&c.s[0], &s[0], bytes);
476
v_reg& operator=(
const
v_reg<_Tp, n> & r)
478
for(
int
i = 0; i < n; i++ )
561
simdmax_width = simd512_width
563
simdmax_width = simd256_width
565
simdmax_width = simd128_width
620
#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \
621
__CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \
622
__CV_EXPAND(macro_name(schar, __VA_ARGS__)) \
623
__CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \
624
__CV_EXPAND(macro_name(short, __VA_ARGS__)) \
625
__CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \
626
__CV_EXPAND(macro_name(int, __VA_ARGS__)) \
627
__CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \
628
__CV_EXPAND(macro_name(int64, __VA_ARGS__)) \
630
#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \
631
__CV_EXPAND(macro_name(float, __VA_ARGS__)) \
632
__CV_EXPAND(macro_name(double, __VA_ARGS__)) \
634
#define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \
635
CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \
636
CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \
638
#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op) \
639
template<int n> inline \
640
v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
643
for( int i = 0; i < n; i++ ) \
644
c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
647
template<int n> inline \
648
v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
650
for( int i = 0; i < n; i++ ) \
651
a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
655
#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op)
657CV__HAL_INTRIN_IMPL_BIN_OP(+)
658CV__HAL_INTRIN_IMPL_BIN_OP(-)
659CV__HAL_INTRIN_IMPL_BIN_OP(*)
660CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, /)
662
#define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op) \
663
template<int n> CV_INLINE \
664
v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
667
typedef typename V_TypeTraits<_Tp>::int_type itype; \
668
for( int i = 0; i < n; i++ ) \
669
c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
670
V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
673
template<int n> CV_INLINE \
674
v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
676
typedef typename V_TypeTraits<_Tp>::int_type itype; \
677
for( int i = 0; i < n; i++ ) \
678
a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
679
V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
683
#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op) \
684
CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \
685
CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op)
688CV__HAL_INTRIN_IMPL_BIT_OP(&)
689CV__HAL_INTRIN_IMPL_BIT_OP(|)
690CV__HAL_INTRIN_IMPL_BIT_OP(^)
692
#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy) \
693
template<int n> CV_INLINE \
694
v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \
697
for( int i = 0; i < n; i++ ) \
698
c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \
702CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BITWISE_NOT_, ~)
709
#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \
710
template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \
713
for( int i = 0; i < n; i++ ) \
714
c.s[i] = cfunc(a.s[i]); \
738
#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \
739
template<typename _Tp, int n> inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
742
for( int i = 0; i < n; i++ ) \
743
c.s[i] = cfunc(a.s[i], b.s[i]); \
749
#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \
750
template<typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n>& a) \
753
for( int i = 1; i < n; i++ ) \
754
c = cfunc(c, a.s[i]); \
768OPENCV_HAL_IMPL_MINMAX_FUNC(v_min,
std::min)
780OPENCV_HAL_IMPL_MINMAX_FUNC(v_max,
std::max)
789OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min,
std::min)
798OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max,
std::max)
800
static
const
unsigned
char
popCountTable[] =
802
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
803
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
804
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
805
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
806
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
807
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
808
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
809
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
810
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
811
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
812
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
813
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
814
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
815
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
816
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
817
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
826
template<
typename
_Tp,
int
n>
830
for
(
int
i = 0; i < n*(int)
sizeof(_Tp); i++)
831
b.s[i/
sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];
837
template<
typename
_Tp,
int
n>
838
inline
void
v_minmax(
const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b,
839
v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval )
841
for(
int
i = 0; i < n; i++ )
843
minval.s[i] =
std::min(a.s[i], b.s[i]);
844
maxval.s[i] =
std::max(a.s[i], b.s[i]);
851
#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \
852
template<typename _Tp, int n> \
853
inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
855
typedef typename V_TypeTraits<_Tp>::int_type itype; \
857
for( int i = 0; i < n; i++ ) \
858
c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \
897
for
(
int
i = 0; i < n; i++)
902
inline
v_reg<double, n>
v_not_nan(
const
v_reg<double, n>& a)
904
typedef
typename
V_TypeTraits<double>::int_type itype;
906
for
(
int
i = 0; i < n; i++)
907
c.s[i] = V_TypeTraits<double>::reinterpret_from_int((itype)-(
int)(a.s[i] == a.s[i]));
913
#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \
914
template<typename _Tp, int n> \
915
inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
917
typedef _Tp2 rtype; \
919
for( int i = 0; i < n; i++ ) \
920
c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \
940
template<
typename
T>
inline
T _absdiff(T a, T b)
942
return
a > b ? a - b : b - a;
955
template<
typename
_Tp,
int
n>
960
const
rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (
sizeof(rtype)*8 - 1)) : 0);
961
for(
int
i = 0; i < n; i++ )
963
rtype ua = a.s[i] ^ mask;
964
rtype ub = b.s[i] ^ mask;
965
c.s[i] = _absdiff(ua, ub);
976
for(
int
i = 0; i < c.nlanes; i++ )
977
c.s[i] = _absdiff(a.s[i], b.s[i]);
987
for(
int
i = 0; i < c.nlanes; i++ )
988
c.s[i] = _absdiff(a.s[i], b.s[i]);
996
template<
typename
_Tp,
int
n>
1000
for(
int
i = 0; i < n; i++)
1001
c.s[i] = saturate_cast<_Tp>(
std::abs(a.s[i] - b.s[i]));
1009
template<
typename
_Tp,
int
n>
1013
for(
int
i = 0; i < n; i++ )
1022
template<
typename
_Tp,
int
n>
1026
for(
int
i = 0; i < n; i++ )
1027
c.s[i] =
std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]);
1035
template<
typename
_Tp,
int
n>
1039
for(
int
i = 0; i < n; i++ )
1040
c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i];
1048
template<
typename
_Tp,
int
n>
1053
for(
int
i = 0; i < n; i++ )
1054
d.s[i] = a.s[i]*b.s[i] + c.s[i];
1059
template<
typename
_Tp,
int
n>
1063
return
v_fma(a, b, c);
1079
template<
typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1083
v_reg<w_type, n/2> c;
1084
for(
int
i = 0; i < (n/2); i++ )
1085
c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1];
1100
template<
typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1105
v_reg<w_type, n/2> s;
1106
for(
int
i = 0; i < (n/2); i++ )
1107
s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];
1118
template<
typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1126
template<
typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1144
template<
typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1148
v_reg<q_type, n/4> s;
1149
for(
int
i = 0; i < (n/4); i++ )
1150
s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
1151
(q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3];
1166
template<
typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1171
v_reg<q_type, n/4> s;
1172
for(
int
i = 0; i < (n/4); i++ )
1173
s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
1174
(q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i];
1187
template<
typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1195
template<
typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1224
for(
int
i = 0; i < (n/2); i++ )
1226
c.s[i] = (w_type)a.s[i]*b.s[i];
1227
d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)];
1240
for
(
int
i = 0; i < n; i++)
1241
c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >>
sizeof(_Tp)*8);
1246
template<
typename
_Tp,
int
n>
inline
void
v_hsum(
const
v_reg<_Tp, n>& a,
1247
v_reg<
typename
V_TypeTraits<_Tp>::w_type, n/2>& c)
1249
typedef
typename
V_TypeTraits<_Tp>::w_type w_type;
1250
for(
int
i = 0; i < (n/2); i++ )
1252
c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1];
1259
#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \
1260
template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \
1263
for( int i = 0; i < n; i++ ) \
1264
c.s[i] = (_Tp)(a.s[i] shift_op imm); \
1280
#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \
1281
template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \
1284
for (int i = 0; i < n; i++) \
1286
int sIndex = i opA imm; \
1287
if (0 <= sIndex && sIndex < n) \
1289
b.s[i] = a.s[sIndex]; \
1298
template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
1301
for (int i = 0; i < n; i++) \
1303
int aIndex = i opA imm; \
1304
int bIndex = i opA imm opB n; \
1305
if (0 <= bIndex && bIndex < n) \
1307
c.s[i] = b.s[bIndex]; \
1309
else if (0 <= aIndex && aIndex < n) \
1311
c.s[i] = a.s[aIndex]; \
1341
for(
int
i = 1; i < n; i++ )
1360
for(
int
i = 0; i < (n/4); i++)
1362
r.s[i*4 + 0] = a.s[i*4 + 0] + a.s[i*4 + 1] + a.s[i*4 + 2] + a.s[i*4 + 3];
1363
r.s[i*4 + 1] = b.s[i*4 + 0] + b.s[i*4 + 1] + b.s[i*4 + 2] + b.s[i*4 + 3];
1364
r.s[i*4 + 2] = c.s[i*4 + 0] + c.s[i*4 + 1] + c.s[i*4 + 2] + c.s[i*4 + 3];
1365
r.s[i*4 + 3] = d.s[i*4 + 0] + d.s[i*4 + 1] + d.s[i*4 + 2] + d.s[i*4 + 3];
1380
for
(
int
i = 1; i < n; i++)
1381
c += _absdiff(a.s[i], b.s[i]);
1398
for(
int
i = 0; i < n; i++ )
1414
for
(
int
i = 0; i < n; i++)
1426
for(
int
i = 0; i < n; i++ )
1438
for(
int
i = 0; i < n; i++ )
1458
typedef
typename
Traits::int_type int_type;
1460
for(
int
i = 0; i < n; i++ )
1462
int_type m = Traits::reinterpret_int(mask.s[i]);
1464
c.s[i] = m ? a.s[i] : b.s[i];
1481
for(
int
i = 0; i < (n/2); i++ )
1484
b1.s[i] = a.s[i+(n/2)];
1497
template<
typename
_Tp,
int
n>
1498
inline
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1502
for(
int
i = 0; i < (n/2); i++ )
1516
template<
typename
_Tp,
int
n>
1517
inline
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1521
for(
int
i = 0; i < (n/2); i++ )
1522
b.s[i] = a.s[i+(n/2)];
1527
template<
typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::int_type, n>
1528
v_reinterpret_as_int(
const
v_reg<_Tp, n>& a)
1530
v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;
1531
for(
int
i = 0; i < n; i++ )
1532
c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);
1536
template<
typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::uint_type, n>
1537
v_reinterpret_as_uint(
const
v_reg<_Tp, n>& a)
1539
v_reg<typename V_TypeTraits<_Tp>::uint_type, n> c;
1540
for(
int
i = 0; i < n; i++ )
1541
c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]);
1561
for( i = 0; i < n/2; i++ )
1563
b0.s[i*2] = a0.s[i];
1564
b0.s[i*2+1] = a1.s[i];
1568
b1.s[i*2-n] = a0.s[i];
1569
b1.s[i*2-n+1] = a1.s[i];
1586
template<
typename
_Tp>
1589
#if CV_STRONG_ALIGNMENT
1592
return
v_reg<_Tp, simd128_width /
sizeof(_Tp)>(ptr);
1610
template<
typename
_Tp>
1611
inline
v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load(
const
_Tp* ptr)
1613
#if CV_STRONG_ALIGNMENT
1616
return
v_reg<_Tp, simd256_width /
sizeof(_Tp)>(ptr);
1635
template<
typename
_Tp>
1636
inline
v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load(
const
_Tp* ptr)
1638
#if CV_STRONG_ALIGNMENT
1641
return
v_reg<_Tp, simd512_width /
sizeof(_Tp)>(ptr);
1651
template<
typename
_Tp>
1655
return
v_reg<_Tp, simd128_width /
sizeof(_Tp)>(ptr);
1666
template<
typename
_Tp>
1667
inline
v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_aligned(
const
_Tp* ptr)
1670
return
v_reg<_Tp, simd256_width /
sizeof(_Tp)>(ptr);
1682
template<
typename
_Tp>
1683
inline
v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_aligned(
const
_Tp* ptr)
1686
return
v_reg<_Tp, simd512_width /
sizeof(_Tp)>(ptr);
1701
template<
typename
_Tp>
1704
#if CV_STRONG_ALIGNMENT
1707
v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;
1708
for(
int
i = 0; i < c.nlanes/2; i++ )
1728
template<
typename
_Tp>
1729
inline
v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_low(
const
_Tp* ptr)
1731
#if CV_STRONG_ALIGNMENT
1734
v_reg<_Tp, simd256_width /
sizeof(_Tp)> c;
1735
for
(
int
i = 0; i < c.nlanes / 2; i++)
1756
template<
typename
_Tp>
1757
inline
v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_low(
const
_Tp* ptr)
1759
#if CV_STRONG_ALIGNMENT
1762
v_reg<_Tp, simd512_width /
sizeof(_Tp)> c;
1763
for
(
int
i = 0; i < c.nlanes / 2; i++)
1783
template<
typename
_Tp>
1786
#if CV_STRONG_ALIGNMENT
1790
v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;
1791
for(
int
i = 0; i < c.nlanes/2; i++ )
1794
c.s[i+c.nlanes/2] = hiptr[i];
1813
template<
typename
_Tp>
1814
inline
v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_halves(
const
_Tp* loptr,
const
_Tp* hiptr)
1816
#if CV_STRONG_ALIGNMENT
1820
v_reg<_Tp, simd256_width /
sizeof(_Tp)> c;
1821
for
(
int
i = 0; i < c.nlanes / 2; i++)
1824
c.s[i + c.nlanes / 2] = hiptr[i];
1844
template<
typename
_Tp>
1845
inline
v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_halves(
const
_Tp* loptr,
const
_Tp* hiptr)
1847
#if CV_STRONG_ALIGNMENT
1851
v_reg<_Tp, simd512_width /
sizeof(_Tp)> c;
1852
for
(
int
i = 0; i < c.nlanes / 2; i++)
1855
c.s[i + c.nlanes / 2] = hiptr[i];
1873
template<
typename
_Tp>
1874
inline
v_reg<typename V_TypeTraits<_Tp>::w_type, simd128_width /
sizeof(
typename
V_TypeTraits<_Tp>::w_type)>
1877
#if CV_STRONG_ALIGNMENT
1881
v_reg<w_type, simd128_width /
sizeof(w_type)> c;
1882
for(
int
i = 0; i < c.nlanes; i++ )
1903
template<
typename
_Tp>
1904
inline
v_reg<typename V_TypeTraits<_Tp>::w_type, simd256_width /
sizeof(
typename
V_TypeTraits<_Tp>::w_type)>
1905v256_load_expand(
const
_Tp* ptr)
1907
#if CV_STRONG_ALIGNMENT
1910
typedef
typename
V_TypeTraits<_Tp>::w_type w_type;
1911
v_reg<w_type, simd256_width /
sizeof(w_type)> c;
1912
for
(
int
i = 0; i < c.nlanes; i++)
1934
template<
typename
_Tp>
1935
inline
v_reg<typename V_TypeTraits<_Tp>::w_type, simd512_width /
sizeof(
typename
V_TypeTraits<_Tp>::w_type)>
1936v512_load_expand(
const
_Tp* ptr)
1938
#if CV_STRONG_ALIGNMENT
1941
typedef
typename
V_TypeTraits<_Tp>::w_type w_type;
1942
v_reg<w_type, simd512_width /
sizeof(w_type)> c;
1943
for
(
int
i = 0; i < c.nlanes; i++)
1962
template<
typename
_Tp>
1963
inline
v_reg<typename V_TypeTraits<_Tp>::q_type, simd128_width /
sizeof(
typename
V_TypeTraits<_Tp>::q_type)>
1966
#if CV_STRONG_ALIGNMENT
1970
v_reg<q_type, simd128_width /
sizeof(q_type)> c;
1971
for(
int
i = 0; i < c.nlanes; i++ )
1991
template<
typename
_Tp>
1992
inline
v_reg<typename V_TypeTraits<_Tp>::q_type, simd256_width /
sizeof(
typename
V_TypeTraits<_Tp>::q_type)>
1993v256_load_expand_q(
const
_Tp* ptr)
1995
#if CV_STRONG_ALIGNMENT
1998
typedef
typename
V_TypeTraits<_Tp>::q_type q_type;
1999
v_reg<q_type, simd256_width /
sizeof(q_type)> c;
2000
for
(
int
i = 0; i < c.nlanes; i++)
2021
template<
typename
_Tp>
2022
inline
v_reg<typename V_TypeTraits<_Tp>::q_type, simd512_width /
sizeof(
typename
V_TypeTraits<_Tp>::q_type)>
2023v512_load_expand_q(
const
_Tp* ptr)
2025
#if CV_STRONG_ALIGNMENT
2028
typedef
typename
V_TypeTraits<_Tp>::q_type q_type;
2029
v_reg<q_type, simd512_width /
sizeof(q_type)> c;
2030
for
(
int
i = 0; i < c.nlanes; i++)
2049
#if CV_STRONG_ALIGNMENT
2053
for( i = i2 = 0; i < n; i++, i2 += 2 )
2071
#if CV_STRONG_ALIGNMENT
2075
for( i = i3 = 0; i < n; i++, i3 += 3 )
2091
template<
typename
_Tp,
int
n>
2096
#if CV_STRONG_ALIGNMENT
2100
for( i = i4 = 0; i < n; i++, i4 += 4 )
2117
template<
typename
_Tp,
int
n>
2120
hal::StoreMode
=hal::STORE_UNALIGNED)
2122
#if CV_STRONG_ALIGNMENT
2126
for( i = i2 = 0; i < n; i++, i2 += 2 )
2141
template<
typename
_Tp,
int
n>
2144
hal::StoreMode
=hal::STORE_UNALIGNED)
2146
#if CV_STRONG_ALIGNMENT
2150
for( i = i3 = 0; i < n; i++, i3 += 3 )
2169
hal::StoreMode
=hal::STORE_UNALIGNED)
2171
#if CV_STRONG_ALIGNMENT
2175
for( i = i4 = 0; i < n; i++, i4 += 4 )
2192
template<
typename
_Tp,
int
n>
2195
#if CV_STRONG_ALIGNMENT
2198
for(
int
i = 0; i < n; i++ )
2202
template<
typename
_Tp,
int
n>
2203
inline
void
v_store(_Tp* ptr,
const
v_reg<_Tp, n>& a, hal::StoreMode
)
2205
#if CV_STRONG_ALIGNMENT
2218
template<
typename
_Tp,
int
n>
2221
#if CV_STRONG_ALIGNMENT
2224
for(
int
i = 0; i < (n/2); i++ )
2235
template<
typename
_Tp,
int
n>
2238
#if CV_STRONG_ALIGNMENT
2241
for(
int
i = 0; i < (n/2); i++ )
2242
ptr[i] = a.s[i+(n/2)];
2253
template<
typename
_Tp,
int
n>
2260
template<
typename
_Tp,
int
n>
2261
inline
void
v_store_aligned_nocache(_Tp* ptr,
const
v_reg<_Tp, n>& a)
2267
template<
typename
_Tp,
int
n>
2268
inline
void
v_store_aligned(_Tp* ptr,
const
v_reg<_Tp, n>& a, hal::StoreMode
)
2284
template<
typename
_Tp,
int
n>
2288
for(
int
i = 0; i < (n/2); i++ )
2291
c.s[i+(n/2)] = b.s[i];
2306
template<
typename
_Tp,
int
n>
2310
for(
int
i = 0; i < (n/2); i++ )
2312
c.s[i] = a.s[i+(n/2)];
2313
c.s[i+(n/2)] = b.s[i+(n/2)];
2324
template<
typename
_Tp,
int
n>
2328
for(
int
i = 0; i < (n/2); i++ )
2331
low.s[i+(n/2)] = b.s[i];
2332
high.s[i] = a.s[i+(n/2)];
2333
high.s[i+(n/2)] = b.s[i+(n/2)];
2345
template<
typename
_Tp,
int
n>
2349
for(
int
i = 0; i < n; i++ )
2350
c.s[i] = a.s[n-i-1];
2373
template<
int
s,
typename
_Tp,
int
n>
2377
const
int
shift = n - s;
2379
for
(; i < shift; ++i)
2382
r.s[i] = b.s[i-shift];
2399
template<
int
s,
typename
_Tp,
int
n>
2415
template<
int
i,
typename
_Tp,
int
n>
2430
for(
int
i = 0; i < n; i++ )
2439
for(
int
i = 0; i < n; i++ )
2455
for(
int
i = 0; i < n; i++ )
2468
for(
int
i = 0; i < n; i++ )
2481
for(
int
i = 0; i < n; i++ )
2482
c.s[i] = (
int)(a.s[i]);
2490
for(
int
i = 0; i < n; i++ )
2502
for(
int
i = 0; i < n; i++ )
2514
for(
int
i = 0; i < n; i++ )
2526
for(
int
i = 0; i < n; i++ )
2528
c.s[i] = (int)(a.s[i]);
2540
for(
int
i = 0; i < n; i++ )
2541
c.s[i] = (
float)a.s[i];
2551
for(
int
i = 0; i < n; i++ )
2553
c.s[i] = (float)a.s[i];
2565
for(
int
i = 0; i < n; i++ )
2567
c.s[i] = (float)a.s[i];
2568
c.s[i+n] = (
float)b.s[i];
2578
v_reg<double, (n/2)> c;
2579
for(
int
i = 0; i < (n/2); i++ )
2580
c.s[i] = (
double)a.s[i];
2589
v_reg<double, (n/2)> c;
2590
for(
int
i = 0; i < (n/2); i++ )
2591
c.s[i] = (
double)a.s[i + (n/2)];
2600
v_reg<double, (n/2)> c;
2601
for(
int
i = 0; i < (n/2); i++ )
2602
c.s[i] = (
double)a.s[i];
2611
v_reg<double, (n/2)> c;
2612
for(
int
i = 0; i < (n/2); i++ )
2613
c.s[i] = (
double)a.s[i + (n/2)];
2623
for(
int
i = 0; i < n; i++ )
2624
c.s[i] = (
double)a.s[i];
2629
template<
typename
_Tp>
inline
v_reg<_Tp, simd128_width /
sizeof(_Tp)> v_lut(
const
_Tp* tab,
const
int* idx)
2631
v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;
2632
for
(
int
i = 0; i < c.nlanes; i++)
2633
c.s[i] = tab[idx[i]];
2636
template<
typename
_Tp>
inline
v_reg<_Tp, simd128_width /
sizeof(_Tp)> v_lut_pairs(
const
_Tp* tab,
const
int* idx)
2638
v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;
2639
for
(
int
i = 0; i < c.nlanes; i++)
2640
c.s[i] = tab[idx[i / 2] + i % 2];
2643
template<
typename
_Tp>
inline
v_reg<_Tp, simd128_width /
sizeof(_Tp)> v_lut_quads(
const
_Tp* tab,
const
int* idx)
2645
v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;
2646
for
(
int
i = 0; i < c.nlanes; i++)
2647
c.s[i] = tab[idx[i / 4] + i % 4];
2651
template<
int
n>
inline
v_reg<int, n> v_lut(
const
int* tab,
const
v_reg<int, n>& idx)
2654
for(
int
i = 0; i < n; i++ )
2655
c.s[i] = tab[idx.s[i]];
2659
template<
int
n>
inline
v_reg<unsigned, n> v_lut(
const
unsigned* tab,
const
v_reg<int, n>& idx)
2662
for
(
int
i = 0; i < n; i++)
2663
c.s[i] = tab[idx.s[i]];
2667
template<
int
n>
inline
v_reg<float, n> v_lut(
const
float* tab,
const
v_reg<int, n>& idx)
2670
for(
int
i = 0; i < n; i++ )
2671
c.s[i] = tab[idx.s[i]];
2675
template<
int
n>
inline
v_reg<double, n/2> v_lut(
const
double* tab,
const
v_reg<int, n>& idx)
2677
v_reg<double, n/2> c;
2678
for(
int
i = 0; i < n/2; i++ )
2679
c.s[i] = tab[idx.s[i]];
2684
template<
int
n>
inline
void
v_lut_deinterleave(
const
float* tab,
const
v_reg<int, n>& idx,
2685
v_reg<float, n>& x, v_reg<float, n>& y)
2687
for(
int
i = 0; i < n; i++ )
2695
template<
int
n>
inline
void
v_lut_deinterleave(
const
double* tab,
const
v_reg<int, n*2>& idx,
2696
v_reg<double, n>& x, v_reg<double, n>& y)
2698
for(
int
i = 0; i < n; i++ )
2706
template<
typename
_Tp,
int
n>
inline
v_reg<_Tp, n> v_interleave_pairs(
const
v_reg<_Tp, n>& vec)
2709
for
(
int
i = 0; i < n/4; i++)
2711
c.s[4*i ] = vec.s[4*i ];
2712
c.s[4*i+1] = vec.s[4*i+2];
2713
c.s[4*i+2] = vec.s[4*i+1];
2714
c.s[4*i+3] = vec.s[4*i+3];
2719
template<
typename
_Tp,
int
n>
inline
v_reg<_Tp, n> v_interleave_quads(
const
v_reg<_Tp, n>& vec)
2722
for
(
int
i = 0; i < n/8; i++)
2724
c.s[8*i ] = vec.s[8*i ];
2725
c.s[8*i+1] = vec.s[8*i+4];
2726
c.s[8*i+2] = vec.s[8*i+1];
2727
c.s[8*i+3] = vec.s[8*i+5];
2728
c.s[8*i+4] = vec.s[8*i+2];
2729
c.s[8*i+5] = vec.s[8*i+6];
2730
c.s[8*i+6] = vec.s[8*i+3];
2731
c.s[8*i+7] = vec.s[8*i+7];
2736
template<
typename
_Tp,
int
n>
inline
v_reg<_Tp, n> v_pack_triplets(
const
v_reg<_Tp, n>& vec)
2739
for
(
int
i = 0; i < n/4; i++)
2741
c.s[3*i ] = vec.s[4*i ];
2742
c.s[3*i+1] = vec.s[4*i+1];
2743
c.s[3*i+2] = vec.s[4*i+2];
2763
template<
typename
_Tp,
int
n>
2769
for
(
int
i = 0; i < n / 4; i++)
2771
b0.s[0 + i*4] = a0.s[0 + i*4]; b0.s[1 + i*4] = a1.s[0 + i*4];
2772
b0.s[2 + i*4] = a2.s[0 + i*4]; b0.s[3 + i*4] = a3.s[0 + i*4];
2773
b1.s[0 + i*4] = a0.s[1 + i*4]; b1.s[1 + i*4] = a1.s[1 + i*4];
2774
b1.s[2 + i*4] = a2.s[1 + i*4]; b1.s[3 + i*4] = a3.s[1 + i*4];
2775
b2.s[0 + i*4] = a0.s[2 + i*4]; b2.s[1 + i*4] = a1.s[2 + i*4];
2776
b2.s[2 + i*4] = a2.s[2 + i*4]; b2.s[3 + i*4] = a3.s[2 + i*4];
2777
b3.s[0 + i*4] = a0.s[3 + i*4]; b3.s[1 + i*4] = a1.s[3 + i*4];
2778
b3.s[2 + i*4] = a2.s[3 + i*4]; b3.s[3 + i*4] = a3.s[3 + i*4];
2784
#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix) \
2785
inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); }
2830
#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix) \
2831
inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); }
2876
#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix) \
2877
template<typename _Tp0, int n0> inline v_reg<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)> \
2878
v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \
2879
{ return a.template reinterpret_as<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)>(); }
2898
#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp) \
2899
template<int shift, int n> inline v_reg<_Tp, n> v_shl(const v_reg<_Tp, n>& a) \
2900
{ return a << shift; }
2915
#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp) \
2916
template<int shift, int n> inline v_reg<_Tp, n> v_shr(const v_reg<_Tp, n>& a) \
2917
{ return a >> shift; }
2932
#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp) \
2933
template<int shift, int n> inline v_reg<_Tp, n> v_rshr(const v_reg<_Tp, n>& a) \
2936
for( int i = 0; i < n; i++ ) \
2937
c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2954
#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast) \
2955
template<int n> inline v_reg<_Tpn, 2*n> v_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
2957
v_reg<_Tpn, 2*n> c; \
2958
for( int i = 0; i < n; i++ ) \
2960
c.s[i] = cast<_Tpn>(a.s[i]); \
2961
c.s[i+n] = cast<_Tpn>(b.s[i]); \
2989
#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast) \
2990
template<int shift, int n> inline v_reg<_Tpn, 2*n> v_rshr_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
2992
v_reg<_Tpn, 2*n> c; \
2993
for( int i = 0; i < n; i++ ) \
2995
c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2996
c.s[i+n] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3024
#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3025
template<int n> inline void v_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \
3027
for( int i = 0; i < n; i++ ) \
3028
ptr[i] = cast<_Tpn>(a.s[i]); \
3054
#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3055
template<int shift, int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \
3057
for( int i = 0; i < n; i++ ) \
3058
ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3083
template<
typename
_Tpm,
typename
_Tp,
int
n>
3084
inline
void
_pack_b(_Tpm* mptr,
const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)
3086
for
(
int
i = 0; i < n; ++i)
3088
mptr[i] = (_Tpm)a.s[i];
3089
mptr[i + n] = (_Tpm)b.s[i];
3117
_pack_b(mask.s, a, b);
3141
_pack_b(mask.s, a, b);
3142
_pack_b(mask.s + 2*n, c, d);
3172
_pack_b(mask.s, a, b);
3173
_pack_b(mask.s + 2*n, c, d);
3174
_pack_b(mask.s + 4*n, e, f);
3175
_pack_b(mask.s + 6*n, g, h);
3201
for
(
int
i = 0; i < n / 4; i++)
3203
res.s[0 + i*4] = v.s[0 + i*4] * a.s[0 + i*4] + v.s[1 + i*4] * b.s[0 + i*4] + v.s[2 + i*4] * c.s[0 + i*4] + v.s[3 + i*4] * d.s[0 + i*4];
3204
res.s[1 + i*4] = v.s[0 + i*4] * a.s[1 + i*4] + v.s[1 + i*4] * b.s[1 + i*4] + v.s[2 + i*4] * c.s[1 + i*4] + v.s[3 + i*4] * d.s[1 + i*4];
3205
res.s[2 + i*4] = v.s[0 + i*4] * a.s[2 + i*4] + v.s[1 + i*4] * b.s[2 + i*4] + v.s[2 + i*4] * c.s[2 + i*4] + v.s[3 + i*4] * d.s[2 + i*4];
3206
res.s[3 + i*4] = v.s[0 + i*4] * a.s[3 + i*4] + v.s[1 + i*4] * b.s[3 + i*4] + v.s[2 + i*4] * c.s[3 + i*4] + v.s[3 + i*4] * d.s[3 + i*4];
3231
for
(
int
i = 0; i < n / 4; i++)
3233
res.s[0 + i * 4] = v.s[0 + i * 4] * a.s[0 + i * 4] + v.s[1 + i * 4] * b.s[0 + i * 4] + v.s[2 + i * 4] * c.s[0 + i * 4] + d.s[0 + i * 4];
3234
res.s[1 + i * 4] = v.s[0 + i * 4] * a.s[1 + i * 4] + v.s[1 + i * 4] * b.s[1 + i * 4] + v.s[2 + i * 4] * c.s[1 + i * 4] + d.s[1 + i * 4];
3235
res.s[2 + i * 4] = v.s[0 + i * 4] * a.s[2 + i * 4] + v.s[1 + i * 4] * b.s[2 + i * 4] + v.s[2 + i * 4] * c.s[2 + i * 4] + d.s[2 + i * 4];
3236
res.s[3 + i * 4] = v.s[0 + i * 4] * a.s[3 + i * 4] + v.s[1 + i * 4] * b.s[3 + i * 4] + v.s[2 + i * 4] * c.s[3 + i * 4] + d.s[3 + i * 4];
3242
template<
int
n>
inline
v_reg<double, n/2>
v_dotprod_expand(
const
v_reg<int, n>& a,
const
v_reg<int, n>& b)
3244
template<
int
n>
inline
v_reg<double, n/2>
v_dotprod_expand(
const
v_reg<int, n>& a,
const
v_reg<int, n>& b,
3245
const
v_reg<double, n/2>& c)
3248
template<
int
n>
inline
v_reg<double, n/2>
v_dotprod_expand_fast(
const
v_reg<int, n>& a,
const
v_reg<int, n>& b)
3250
template<
int
n>
inline
v_reg<double, n/2>
v_dotprod_expand_fast(
const
v_reg<int, n>& a,
const
v_reg<int, n>& b,
3251
const
v_reg<double, n/2>& c)
3256
inline
v_reg<float, simd128_width /
sizeof(float)>
3259
v_reg<float, simd128_width /
sizeof(float)> v;
3260
for(
int
i = 0; i < v.nlanes; i++ )
3267
inline
v_reg<float, simd256_width /
sizeof(float)>
3268v256_load_expand(
const
float16_t* ptr)
3270
v_reg<float, simd256_width /
sizeof(float)> v;
3271
for
(
int
i = 0; i < v.nlanes; i++)
3279
inline
v_reg<float, simd512_width /
sizeof(float)>
3280v512_load_expand(
const
float16_t* ptr)
3282
v_reg<float, simd512_width /
sizeof(float)> v;
3283
for
(
int
i = 0; i < v.nlanes; i++)
3291
template<
int
n>
inline
void
3292v_pack_store(float16_t* ptr,
const
v_reg<float, n>& v)
3294
for(
int
i = 0; i < v.nlanes; i++ )
3296
ptr[i] = float16_t(v.s[i]);
3300
inline
void
v_cleanup() {}
3302
inline
void
v256_cleanup() {}
3305
inline
void
v512_cleanup() {}
3311CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
3315
#if !defined(CV_DOXYGEN)
CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element maximum of two arrays or an array and a scalar.
CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst)
Calculates a square root of array elements.
CV_EXPORTS_W void exp(InputArray src, OutputArray dst)
Calculates the exponent of every array element.
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element minimum of two arrays or an array and a scalar.
CV_EXPORTS_W void log(InputArray src, OutputArray dst)
Calculates the natural logarithm of every array element.
#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix)
Helper macro
Definition:
intrin_cpp.hpp:2830
#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp)
Helper macro
Definition:
intrin_cpp.hpp:2932
#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp)
Helper macro
Definition:
intrin_cpp.hpp:2915
#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast)
Helper macro
Definition:
intrin_cpp.hpp:2989
#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix, opA, opB)
Bitwise shift left
Definition:
intrin_cpp.hpp:1280
#define OPENCV_HAL_IMPL_CMP_OP(cmp_op)
Helper macro
Definition:
intrin_cpp.hpp:851
OPENCV_HAL_IMPL_MATH_FUNC(v_abs,(typename V_TypeTraits< _Tp >::abs_type) std::abs, typename V_TypeTraits< _Tp >::abs_type) static const unsigned char popCountTable[]
Square root of elements
#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp)
Helper macro
Definition:
intrin_cpp.hpp:2898
#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix)
Helper macro
Definition:
intrin_cpp.hpp:2784
#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)
Helper macro
Definition:
intrin_cpp.hpp:3054
#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix)
Helper macro
Definition:
intrin_cpp.hpp:2876
#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast)
Helper macro
Definition:
intrin_cpp.hpp:2954
#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2)
Helper macro
Definition:
intrin_cpp.hpp:913
#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)
Helper macro
Definition:
intrin_cpp.hpp:3024
#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op)
Helper macro
Definition:
intrin_cpp.hpp:1259
bool v_check_any(const v_reg< _Tp, n > &a)
Check if any of packed values is less than zero
Definition:
intrin_cpp.hpp:1436
v_reg< _Tp, n > v_combine_high(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from last elements of two vectors
Definition:
intrin_cpp.hpp:2307
v_reg< float, n > v_matmul(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication
Definition:
intrin_cpp.hpp:3196
v_reg< int, n > v_round(const v_reg< float, n > &a)
Round elements
Definition:
intrin_cpp.hpp:2427
CV_INLINE v_reg< _Tp, n > operator|(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise OR
v_reg< schar, 16 > v_int8x16
Sixteen 8-bit signed integer values
Definition:
intrin_cpp.hpp:490
v_reg< uchar, 16 > v_uint8x16
Sixteen 8-bit unsigned integer values
Definition:
intrin_cpp.hpp:488
void v_store_high(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (higher half)
Definition:
intrin_cpp.hpp:2236
int v_signmask(const v_reg< _Tp, n > &a)
Get negative values mask
Definition:
intrin_cpp.hpp:1395
void v_zip(const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
Interleave two vectors
Definition:
intrin_cpp.hpp:1557
v_reg< int64, 2 > v_int64x2
Two 64-bit signed integer values
Definition:
intrin_cpp.hpp:506
void v_store(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory
Definition:
intrin_cpp.hpp:2193
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements and expand
Definition:
intrin_cpp.hpp:1145
V_TypeTraits< typenameV_TypeTraits< _Tp >::abs_type >::sum_type v_reduce_sad(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Sum absolute differences of values
Definition:
intrin_cpp.hpp:1377
v_reg< int, n > v_ceil(const v_reg< float, n > &a)
Ceil elements
Definition:
intrin_cpp.hpp:2465
v_reg< ushort, 8 > v_uint16x8
Eight 16-bit unsigned integer values
Definition:
intrin_cpp.hpp:492
CV_INLINE v_reg< _Tp, n > operator&(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise AND
void v_store_low(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (lower half)
Definition:
intrin_cpp.hpp:2219
v_reg< int, n > v_floor(const v_reg< float, n > &a)
Floor elements
Definition:
intrin_cpp.hpp:2452
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements
Definition:
intrin_cpp.hpp:1080
int v_scan_forward(const v_reg< _Tp, n > &a)
Get first negative lane index
Definition:
intrin_cpp.hpp:1412
v_reg< _Tp, n > v_reverse(const v_reg< _Tp, n > &a)
Vector reverse order
Definition:
intrin_cpp.hpp:2346
v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)
Load register contents from memory with double expand
Definition:
intrin_cpp.hpp:1875
v_reg< int, 4 > v_int32x4
Four 32-bit signed integer values
Definition:
intrin_cpp.hpp:498
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_absdiff(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Add values without saturation
Definition:
intrin_cpp.hpp:956
V_TypeTraits< _Tp >::sum_type v_reduce_sum(const v_reg< _Tp, n > &a)
Element shift left among vector
Definition:
intrin_cpp.hpp:1338
v_reg< _Tp, n > v_muladd(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
A synonym for v_fma
Definition:
intrin_cpp.hpp:1060
v_reg< _Tp, n > v_sqr_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Square of the magnitude
Definition:
intrin_cpp.hpp:1036
v_reg< int, n > v_trunc(const v_reg< float, n > &a)
Truncate elements
Definition:
intrin_cpp.hpp:2478
v_reg< unsigned, 4 > v_uint32x4
Four 32-bit unsigned integer values
Definition:
intrin_cpp.hpp:496
CV_INLINE v_reg< _Tp, n > operator/(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Divide values
v_reg< _Tp, n > v_invsqrt(const v_reg< _Tp, n > &a)
Inversed square root
Definition:
intrin_cpp.hpp:1010
v_reg< _Tp, n > v_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Magnitude
Definition:
intrin_cpp.hpp:1023
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements and expand
Definition:
intrin_cpp.hpp:1188
CV_INLINE v_reg< double,(n/2)> v_cvt_f64_high(const v_reg< int, n > &a)
Convert to double high part of vector
Definition:
intrin_cpp.hpp:2587
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_low(const _Tp *ptr)
Load 64-bits of data to lower part (high part is undefined).
Definition:
intrin_cpp.hpp:1702
void v_recombine(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high)
Combine two vectors from lower and higher parts of two other vectors
Definition:
intrin_cpp.hpp:2325
v_reg< float, n > v_reduce_sum4(const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Sums all elements of each input vector, returns the vector of sums
Definition:
intrin_cpp.hpp:1356
void v_mul_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
Multiply and expand
Definition:
intrin_cpp.hpp:1219
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_aligned(const _Tp *ptr)
Load register contents from memory (aligned)
Definition:
intrin_cpp.hpp:1652
v_reg< _Tp, n > v_broadcast_element(const v_reg< _Tp, n > &a)
Broadcast i-th element of vector
Definition:
intrin_cpp.hpp:2416
v_reg< _Tp, n > v_select(const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Per-element select (blend operation)
Definition:
intrin_cpp.hpp:1454
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load(const _Tp *ptr)
Load register contents from memory
Definition:
intrin_cpp.hpp:1587
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_low(const v_reg< _Tp, n > &a)
Expand lower values to the wider pack type
Definition:
intrin_cpp.hpp:1499
CV_INLINE v_reg< _Tp, n > operator~(const v_reg< _Tp, n > &a)
Bitwise NOT
CV_INLINE v_reg< double, n/2 > v_cvt_f64(const v_reg< int, n > &a)
Convert lower half to double
Definition:
intrin_cpp.hpp:2576
v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> v_load_expand_q(const _Tp *ptr)
Load register contents from memory with quad expand
Definition:
intrin_cpp.hpp:1964
void v_expand(const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
Expand values to the wider pack type
Definition:
intrin_cpp.hpp:1477
v_reg< uchar, 2 *n > v_pack_b(const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)
! For 16-bit boolean values
Definition:
intrin_cpp.hpp:3114
v_reg< _Tp, n > v_fma(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
Multiply and add
Definition:
intrin_cpp.hpp:1049
CV_INLINE v_reg< _Tp, n > operator^(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise XOR
void v_store_interleave(_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, hal::StoreMode=hal::STORE_UNALIGNED)
Interleave and store (2 channels)
Definition:
intrin_cpp.hpp:2118
void v_transpose4x4(v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, const v_reg< _Tp, n > &a2, const v_reg< _Tp, n > &a3, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1, v_reg< _Tp, n > &b2, v_reg< _Tp, n > &b3)
Transpose 4x4 matrix
Definition:
intrin_cpp.hpp:2764
v_reg< _Tp, n > v_absdiffs(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Saturating absolute difference
Definition:
intrin_cpp.hpp:997
v_reg< uint64, 2 > v_uint64x2
Two 64-bit unsigned integer values
Definition:
intrin_cpp.hpp:504
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_high(const v_reg< _Tp, n > &a)
Expand higher values to the wider pack type
Definition:
intrin_cpp.hpp:1518
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements
Definition:
intrin_cpp.hpp:1119
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_halves(const _Tp *loptr, const _Tp *hiptr)
Load register contents from two memory blocks
Definition:
intrin_cpp.hpp:1784
v_reg< _Tp, n > v_mul_hi(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Multiply and extract high part
Definition:
intrin_cpp.hpp:1236
v_reg< _Tp, n > v_combine_low(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from first elements of two vectors
Definition:
intrin_cpp.hpp:2285
v_reg< float, 4 > v_float32x4
Four 32-bit floating point values (single precision)
Definition:
intrin_cpp.hpp:500
v_reg< float, n > v_cvt_f32(const v_reg< int, n > &a)
Convert to float
Definition:
intrin_cpp.hpp:2537
bool v_check_all(const v_reg< _Tp, n > &a)
Check if all packed values are less than zero
Definition:
intrin_cpp.hpp:1424
v_reg< float, n > v_matmuladd(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication and add
Definition:
intrin_cpp.hpp:3226
_Tp v_extract_n(const v_reg< _Tp, n > &v)
Vector extract
Definition:
intrin_cpp.hpp:2400
v_reg< float, n > v_not_nan(const v_reg< float, n > &a)
Less-than comparison
Definition:
intrin_cpp.hpp:893
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_popcount(const v_reg< _Tp, n > &a)
Count the 1 bits in the vector lanes and return result as corresponding unsigned type
Definition:
intrin_cpp.hpp:827
void v_store_aligned(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (aligned)
Definition:
intrin_cpp.hpp:2254
v_reg< short, 8 > v_int16x8
Eight 16-bit signed integer values
Definition:
intrin_cpp.hpp:494
v_reg< double, 2 > v_float64x2
Two 64-bit floating point values (double precision)
Definition:
intrin_cpp.hpp:502
v_reg< _Tp, n > v_extract(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Vector extract
Definition:
intrin_cpp.hpp:2374
void v_load_deinterleave(const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b)
Load and deinterleave (2 channels)
Definition:
intrin_cpp.hpp:2046
softfloat abs(softfloat a)
Absolute value
Definition:
softfloat.hpp:444
CV_INLINE int cvRound(double value)
Rounds floating-point number to the nearest integer
Definition:
fast_math.hpp:200
CV_INLINE int cvCeil(double value)
Rounds floating-point number to the nearest integer not smaller than the original.
Definition:
fast_math.hpp:254
static _Tp saturate_cast(uchar v)
Template function for accurate conversion from one primitive type to another.
Definition:
saturate.hpp:80
CV_INLINE int cvFloor(double value)
Rounds floating-point number to the nearest integer not larger than the original.
Definition:
fast_math.hpp:234
static bool isAligned(const T &data)
Alignment check of passed values
Definition:
utility.hpp:517
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails
Definition:
base.hpp:342
#define CV_DbgAssert(expr)
Definition:
base.hpp:375
Quat< T > cos(const Quat< T > &q)
Quat< T > sin(const Quat< T > &q)
"black box" representation of the file storage associated with a file on disk.
Definition:
aruco.hpp:75
Definition:
intrin.hpp:104
Definition:
intrin_cpp.hpp:369
_Tp get0() const
Access first value
Definition:
intrin_cpp.hpp:436
v_reg(const v_reg< _Tp, n > &r)
Copy constructor
Definition:
intrin_cpp.hpp:421
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7, _Tp s8, _Tp s9, _Tp s10, _Tp s11, _Tp s12, _Tp s13, _Tp s14, _Tp s15)
Constructor
Definition:
intrin_cpp.hpp:404
v_reg(const _Tp *ptr)
Constructor
Definition:
intrin_cpp.hpp:379
v_reg()
Default constructor
Definition:
intrin_cpp.hpp:418
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7)
Constructor
Definition:
intrin_cpp.hpp:394
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3)
Constructor
Definition:
intrin_cpp.hpp:389
v_reg(_Tp s0, _Tp s1)
Constructor
Definition:
intrin_cpp.hpp:384