45#ifndef OPENCV_HAL_INTRIN_CPP_HPP
46#define OPENCV_HAL_INTRIN_CPP_HPP
51#include "opencv2/core/saturate.hpp"
54#define CV_SIMD128_CPP 1
55#if defined(CV_FORCE_SIMD128_CPP)
57#define CV_SIMD128_64F 1
59#if defined(CV_DOXYGEN)
61#define CV_SIMD128_64F 1
63#define CV_SIMD256_64F 1
65#define CV_SIMD512_64F 1
76CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
368template<
typename _Tp,
int n>
struct v_reg
371 typedef _Tp lane_type;
379 explicit v_reg(
const _Tp* ptr) {
for(
int i = 0; i < n; i++ ) s[i] = ptr[i]; }
384 v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; }
389 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; }
394 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
395 _Tp s4, _Tp s5, _Tp s6, _Tp s7)
397 s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
398 s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
404 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
405 _Tp s4, _Tp s5, _Tp s6, _Tp s7,
406 _Tp s8, _Tp s9, _Tp s10, _Tp s11,
407 _Tp s12, _Tp s13, _Tp s14, _Tp s15)
409 s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
410 s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
411 s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11;
412 s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15;
423 for(
int i = 0; i < n; i++ )
436 _Tp
get0()
const {
return s[0]; }
439 _Tp get(
const int i)
const {
return s[i]; }
440 v_reg<_Tp, n> high()
const
444 for( i = 0; i < n/2; i++ )
452 static v_reg<_Tp, n> zero()
455 for(
int i = 0; i < n; i++ )
460 static v_reg<_Tp, n> all(_Tp s)
463 for(
int i = 0; i < n; i++ )
468 template<
typename _Tp2,
int n2> v_reg<_Tp2, n2> reinterpret_as()
const
470 size_t bytes =
std::min(
sizeof(_Tp2)*n2,
sizeof(_Tp)*n);
472 std::memcpy(&c.s[0], &s[0], bytes);
476 v_reg& operator=(
const v_reg<_Tp, n> & r)
478 for(
int i = 0; i < n; i++ )
561 simdmax_width = simd512_width
563 simdmax_width = simd256_width
565 simdmax_width = simd128_width
620#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \
621__CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \
622__CV_EXPAND(macro_name(schar, __VA_ARGS__)) \
623__CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \
624__CV_EXPAND(macro_name(short, __VA_ARGS__)) \
625__CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \
626__CV_EXPAND(macro_name(int, __VA_ARGS__)) \
627__CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \
628__CV_EXPAND(macro_name(int64, __VA_ARGS__)) \
630#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \
631__CV_EXPAND(macro_name(float, __VA_ARGS__)) \
632__CV_EXPAND(macro_name(double, __VA_ARGS__)) \
634#define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \
635CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \
636CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \
638#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op) \
639template<int n> inline \
640v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
643 for( int i = 0; i < n; i++ ) \
644 c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
647template<int n> inline \
648v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
650 for( int i = 0; i < n; i++ ) \
651 a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
655#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op)
657CV__HAL_INTRIN_IMPL_BIN_OP(+)
658CV__HAL_INTRIN_IMPL_BIN_OP(-)
659CV__HAL_INTRIN_IMPL_BIN_OP(*)
660CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, /)
662#define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op) \
663template<int n> CV_INLINE \
664v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
667 typedef typename V_TypeTraits<_Tp>::int_type itype; \
668 for( int i = 0; i < n; i++ ) \
669 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
670 V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
673template<int n> CV_INLINE \
674v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
676 typedef typename V_TypeTraits<_Tp>::int_type itype; \
677 for( int i = 0; i < n; i++ ) \
678 a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
679 V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
683#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op) \
684CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \
685CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op)
688CV__HAL_INTRIN_IMPL_BIT_OP(&)
689CV__HAL_INTRIN_IMPL_BIT_OP(|)
690CV__HAL_INTRIN_IMPL_BIT_OP(^)
692#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy) \
693template<int n> CV_INLINE \
694v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \
697 for( int i = 0; i < n; i++ ) \
698 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \
702CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BITWISE_NOT_, ~)
709#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \
710template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \
713 for( int i = 0; i < n; i++ ) \
714 c.s[i] = cfunc(a.s[i]); \
738#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \
739template<typename _Tp, int n> inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
742 for( int i = 0; i < n; i++ ) \
743 c.s[i] = cfunc(a.s[i], b.s[i]); \
749#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \
750template<typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n>& a) \
753 for( int i = 1; i < n; i++ ) \
754 c = cfunc(c, a.s[i]); \
768OPENCV_HAL_IMPL_MINMAX_FUNC(v_min,
std::min)
780OPENCV_HAL_IMPL_MINMAX_FUNC(v_max,
std::max)
789OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min,
std::min)
798OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max,
std::max)
800static const unsigned char popCountTable[] =
802 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
803 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
804 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
805 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
806 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
807 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
808 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
809 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
810 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
811 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
812 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
813 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
814 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
815 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
816 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
817 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
826template<
typename _Tp,
int n>
830 for (
int i = 0; i < n*(int)
sizeof(_Tp); i++)
831 b.s[i/
sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];
837template<
typename _Tp,
int n>
838inline void v_minmax(
const v_reg<_Tp, n>& a,
const v_reg<_Tp, n>& b,
839 v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval )
841 for(
int i = 0; i < n; i++ )
843 minval.s[i] =
std::min(a.s[i], b.s[i]);
844 maxval.s[i] =
std::max(a.s[i], b.s[i]);
851#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \
852template<typename _Tp, int n> \
853inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
855 typedef typename V_TypeTraits<_Tp>::int_type itype; \
857 for( int i = 0; i < n; i++ ) \
858 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \
897 for (
int i = 0; i < n; i++)
902inline v_reg<double, n>
v_not_nan(
const v_reg<double, n>& a)
904 typedef typename V_TypeTraits<double>::int_type itype;
906 for (
int i = 0; i < n; i++)
907 c.s[i] = V_TypeTraits<double>::reinterpret_from_int((itype)-(
int)(a.s[i] == a.s[i]));
913#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \
914template<typename _Tp, int n> \
915inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
917 typedef _Tp2 rtype; \
919 for( int i = 0; i < n; i++ ) \
920 c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \
940template<
typename T>
inline T _absdiff(T a, T b)
942 return a > b ? a - b : b - a;
955template<
typename _Tp,
int n>
960 const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (
sizeof(rtype)*8 - 1)) : 0);
961 for(
int i = 0; i < n; i++ )
963 rtype ua = a.s[i] ^ mask;
964 rtype ub = b.s[i] ^ mask;
965 c.s[i] = _absdiff(ua, ub);
976 for(
int i = 0; i < c.nlanes; i++ )
977 c.s[i] = _absdiff(a.s[i], b.s[i]);
987 for(
int i = 0; i < c.nlanes; i++ )
988 c.s[i] = _absdiff(a.s[i], b.s[i]);
996template<
typename _Tp,
int n>
1000 for(
int i = 0; i < n; i++)
1001 c.s[i] = saturate_cast<_Tp>(
std::abs(a.s[i] - b.s[i]));
1009template<
typename _Tp,
int n>
1013 for(
int i = 0; i < n; i++ )
1022template<
typename _Tp,
int n>
1026 for(
int i = 0; i < n; i++ )
1027 c.s[i] =
std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]);
1035template<
typename _Tp,
int n>
1039 for(
int i = 0; i < n; i++ )
1040 c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i];
1048template<
typename _Tp,
int n>
1053 for(
int i = 0; i < n; i++ )
1054 d.s[i] = a.s[i]*b.s[i] + c.s[i];
1059template<
typename _Tp,
int n>
1063 return v_fma(a, b, c);
1079template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1083 v_reg<w_type, n/2> c;
1084 for(
int i = 0; i < (n/2); i++ )
1085 c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1];
1100template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1105 v_reg<w_type, n/2> s;
1106 for(
int i = 0; i < (n/2); i++ )
1107 s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];
1118template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1126template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1144template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1148 v_reg<q_type, n/4> s;
1149 for(
int i = 0; i < (n/4); i++ )
1150 s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
1151 (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3];
1166template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1171 v_reg<q_type, n/4> s;
1172 for(
int i = 0; i < (n/4); i++ )
1173 s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
1174 (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i];
1187template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1195template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1224 for(
int i = 0; i < (n/2); i++ )
1226 c.s[i] = (w_type)a.s[i]*b.s[i];
1227 d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)];
1240 for (
int i = 0; i < n; i++)
1241 c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >>
sizeof(_Tp)*8);
1246template<
typename _Tp,
int n>
inline void v_hsum(
const v_reg<_Tp, n>& a,
1247 v_reg<
typename V_TypeTraits<_Tp>::w_type, n/2>& c)
1249 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1250 for(
int i = 0; i < (n/2); i++ )
1252 c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1];
1259#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \
1260template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \
1263 for( int i = 0; i < n; i++ ) \
1264 c.s[i] = (_Tp)(a.s[i] shift_op imm); \
1280#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \
1281template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \
1284 for (int i = 0; i < n; i++) \
1286 int sIndex = i opA imm; \
1287 if (0 <= sIndex && sIndex < n) \
1289 b.s[i] = a.s[sIndex]; \
1298template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
1301 for (int i = 0; i < n; i++) \
1303 int aIndex = i opA imm; \
1304 int bIndex = i opA imm opB n; \
1305 if (0 <= bIndex && bIndex < n) \
1307 c.s[i] = b.s[bIndex]; \
1309 else if (0 <= aIndex && aIndex < n) \
1311 c.s[i] = a.s[aIndex]; \
1341 for(
int i = 1; i < n; i++ )
1360 for(
int i = 0; i < (n/4); i++)
1362 r.s[i*4 + 0] = a.s[i*4 + 0] + a.s[i*4 + 1] + a.s[i*4 + 2] + a.s[i*4 + 3];
1363 r.s[i*4 + 1] = b.s[i*4 + 0] + b.s[i*4 + 1] + b.s[i*4 + 2] + b.s[i*4 + 3];
1364 r.s[i*4 + 2] = c.s[i*4 + 0] + c.s[i*4 + 1] + c.s[i*4 + 2] + c.s[i*4 + 3];
1365 r.s[i*4 + 3] = d.s[i*4 + 0] + d.s[i*4 + 1] + d.s[i*4 + 2] + d.s[i*4 + 3];
1380 for (
int i = 1; i < n; i++)
1381 c += _absdiff(a.s[i], b.s[i]);
1398 for(
int i = 0; i < n; i++ )
1414 for (
int i = 0; i < n; i++)
1426 for(
int i = 0; i < n; i++ )
1438 for(
int i = 0; i < n; i++ )
1458 typedef typename Traits::int_type int_type;
1460 for(
int i = 0; i < n; i++ )
1462 int_type m = Traits::reinterpret_int(mask.s[i]);
1464 c.s[i] = m ? a.s[i] : b.s[i];
1481 for(
int i = 0; i < (n/2); i++ )
1484 b1.s[i] = a.s[i+(n/2)];
1497template<
typename _Tp,
int n>
1498inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1502 for(
int i = 0; i < (n/2); i++ )
1516template<
typename _Tp,
int n>
1517inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1521 for(
int i = 0; i < (n/2); i++ )
1522 b.s[i] = a.s[i+(n/2)];
1527template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
1528 v_reinterpret_as_int(
const v_reg<_Tp, n>& a)
1530 v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;
1531 for(
int i = 0; i < n; i++ )
1532 c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);
1536template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::uint_type, n>
1537 v_reinterpret_as_uint(
const v_reg<_Tp, n>& a)
1539 v_reg<typename V_TypeTraits<_Tp>::uint_type, n> c;
1540 for(
int i = 0; i < n; i++ )
1541 c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]);
1561 for( i = 0; i < n/2; i++ )
1563 b0.s[i*2] = a0.s[i];
1564 b0.s[i*2+1] = a1.s[i];
1568 b1.s[i*2-n] = a0.s[i];
1569 b1.s[i*2-n+1] = a1.s[i];
1586template<
typename _Tp>
1589#if CV_STRONG_ALIGNMENT
1592 return v_reg<_Tp, simd128_width /
sizeof(_Tp)>(ptr);
1610template<
typename _Tp>
1611inline v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load(
const _Tp* ptr)
1613#if CV_STRONG_ALIGNMENT
1616 return v_reg<_Tp, simd256_width /
sizeof(_Tp)>(ptr);
1635template<
typename _Tp>
1636inline v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load(
const _Tp* ptr)
1638#if CV_STRONG_ALIGNMENT
1641 return v_reg<_Tp, simd512_width /
sizeof(_Tp)>(ptr);
1651template<
typename _Tp>
1655 return v_reg<_Tp, simd128_width /
sizeof(_Tp)>(ptr);
1666template<
typename _Tp>
1667inline v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_aligned(
const _Tp* ptr)
1670 return v_reg<_Tp, simd256_width /
sizeof(_Tp)>(ptr);
1682template<
typename _Tp>
1683inline v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_aligned(
const _Tp* ptr)
1686 return v_reg<_Tp, simd512_width /
sizeof(_Tp)>(ptr);
1701template<
typename _Tp>
1704#if CV_STRONG_ALIGNMENT
1707 v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;
1708 for(
int i = 0; i < c.nlanes/2; i++ )
1728template<
typename _Tp>
1729inline v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_low(
const _Tp* ptr)
1731#if CV_STRONG_ALIGNMENT
1734 v_reg<_Tp, simd256_width /
sizeof(_Tp)> c;
1735 for (
int i = 0; i < c.nlanes / 2; i++)
1756template<
typename _Tp>
1757inline v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_low(
const _Tp* ptr)
1759#if CV_STRONG_ALIGNMENT
1762 v_reg<_Tp, simd512_width /
sizeof(_Tp)> c;
1763 for (
int i = 0; i < c.nlanes / 2; i++)
1783template<
typename _Tp>
1786#if CV_STRONG_ALIGNMENT
1790 v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;
1791 for(
int i = 0; i < c.nlanes/2; i++ )
1794 c.s[i+c.nlanes/2] = hiptr[i];
1813template<
typename _Tp>
1814inline v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_halves(
const _Tp* loptr,
const _Tp* hiptr)
1816#if CV_STRONG_ALIGNMENT
1820 v_reg<_Tp, simd256_width /
sizeof(_Tp)> c;
1821 for (
int i = 0; i < c.nlanes / 2; i++)
1824 c.s[i + c.nlanes / 2] = hiptr[i];
1844template<
typename _Tp>
1845inline v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_halves(
const _Tp* loptr,
const _Tp* hiptr)
1847#if CV_STRONG_ALIGNMENT
1851 v_reg<_Tp, simd512_width /
sizeof(_Tp)> c;
1852 for (
int i = 0; i < c.nlanes / 2; i++)
1855 c.s[i + c.nlanes / 2] = hiptr[i];
1873template<
typename _Tp>
1874inline v_reg<typename V_TypeTraits<_Tp>::w_type, simd128_width /
sizeof(
typename V_TypeTraits<_Tp>::w_type)>
1877#if CV_STRONG_ALIGNMENT
1881 v_reg<w_type, simd128_width /
sizeof(w_type)> c;
1882 for(
int i = 0; i < c.nlanes; i++ )
1903template<
typename _Tp>
1904inline v_reg<typename V_TypeTraits<_Tp>::w_type, simd256_width /
sizeof(
typename V_TypeTraits<_Tp>::w_type)>
1905v256_load_expand(
const _Tp* ptr)
1907#if CV_STRONG_ALIGNMENT
1910 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1911 v_reg<w_type, simd256_width /
sizeof(w_type)> c;
1912 for (
int i = 0; i < c.nlanes; i++)
1934template<
typename _Tp>
1935inline v_reg<typename V_TypeTraits<_Tp>::w_type, simd512_width /
sizeof(
typename V_TypeTraits<_Tp>::w_type)>
1936v512_load_expand(
const _Tp* ptr)
1938#if CV_STRONG_ALIGNMENT
1941 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1942 v_reg<w_type, simd512_width /
sizeof(w_type)> c;
1943 for (
int i = 0; i < c.nlanes; i++)
1962template<
typename _Tp>
1963inline v_reg<typename V_TypeTraits<_Tp>::q_type, simd128_width /
sizeof(
typename V_TypeTraits<_Tp>::q_type)>
1966#if CV_STRONG_ALIGNMENT
1970 v_reg<q_type, simd128_width /
sizeof(q_type)> c;
1971 for(
int i = 0; i < c.nlanes; i++ )
1991template<
typename _Tp>
1992inline v_reg<typename V_TypeTraits<_Tp>::q_type, simd256_width /
sizeof(
typename V_TypeTraits<_Tp>::q_type)>
1993v256_load_expand_q(
const _Tp* ptr)
1995#if CV_STRONG_ALIGNMENT
1998 typedef typename V_TypeTraits<_Tp>::q_type q_type;
1999 v_reg<q_type, simd256_width /
sizeof(q_type)> c;
2000 for (
int i = 0; i < c.nlanes; i++)
2021template<
typename _Tp>
2022inline v_reg<typename V_TypeTraits<_Tp>::q_type, simd512_width /
sizeof(
typename V_TypeTraits<_Tp>::q_type)>
2023v512_load_expand_q(
const _Tp* ptr)
2025#if CV_STRONG_ALIGNMENT
2028 typedef typename V_TypeTraits<_Tp>::q_type q_type;
2029 v_reg<q_type, simd512_width /
sizeof(q_type)> c;
2030 for (
int i = 0; i < c.nlanes; i++)
2049#if CV_STRONG_ALIGNMENT
2053 for( i = i2 = 0; i < n; i++, i2 += 2 )
2071#if CV_STRONG_ALIGNMENT
2075 for( i = i3 = 0; i < n; i++, i3 += 3 )
2091template<
typename _Tp,
int n>
2096#if CV_STRONG_ALIGNMENT
2100 for( i = i4 = 0; i < n; i++, i4 += 4 )
2117template<
typename _Tp,
int n>
2120 hal::StoreMode =hal::STORE_UNALIGNED)
2122#if CV_STRONG_ALIGNMENT
2126 for( i = i2 = 0; i < n; i++, i2 += 2 )
2141template<
typename _Tp,
int n>
2144 hal::StoreMode =hal::STORE_UNALIGNED)
2146#if CV_STRONG_ALIGNMENT
2150 for( i = i3 = 0; i < n; i++, i3 += 3 )
2169 hal::StoreMode =hal::STORE_UNALIGNED)
2171#if CV_STRONG_ALIGNMENT
2175 for( i = i4 = 0; i < n; i++, i4 += 4 )
2192template<
typename _Tp,
int n>
2195#if CV_STRONG_ALIGNMENT
2198 for(
int i = 0; i < n; i++ )
2202template<
typename _Tp,
int n>
2203inline void v_store(_Tp* ptr,
const v_reg<_Tp, n>& a, hal::StoreMode )
2205#if CV_STRONG_ALIGNMENT
2218template<
typename _Tp,
int n>
2221#if CV_STRONG_ALIGNMENT
2224 for(
int i = 0; i < (n/2); i++ )
2235template<
typename _Tp,
int n>
2238#if CV_STRONG_ALIGNMENT
2241 for(
int i = 0; i < (n/2); i++ )
2242 ptr[i] = a.s[i+(n/2)];
2253template<
typename _Tp,
int n>
2260template<
typename _Tp,
int n>
2261inline void v_store_aligned_nocache(_Tp* ptr,
const v_reg<_Tp, n>& a)
2267template<
typename _Tp,
int n>
2268inline void v_store_aligned(_Tp* ptr,
const v_reg<_Tp, n>& a, hal::StoreMode )
2284template<
typename _Tp,
int n>
2288 for(
int i = 0; i < (n/2); i++ )
2291 c.s[i+(n/2)] = b.s[i];
2306template<
typename _Tp,
int n>
2310 for(
int i = 0; i < (n/2); i++ )
2312 c.s[i] = a.s[i+(n/2)];
2313 c.s[i+(n/2)] = b.s[i+(n/2)];
2324template<
typename _Tp,
int n>
2328 for(
int i = 0; i < (n/2); i++ )
2331 low.s[i+(n/2)] = b.s[i];
2332 high.s[i] = a.s[i+(n/2)];
2333 high.s[i+(n/2)] = b.s[i+(n/2)];
2345template<
typename _Tp,
int n>
2349 for(
int i = 0; i < n; i++ )
2350 c.s[i] = a.s[n-i-1];
2373template<
int s,
typename _Tp,
int n>
2377 const int shift = n - s;
2379 for (; i < shift; ++i)
2382 r.s[i] = b.s[i-shift];
2399template<
int s,
typename _Tp,
int n>
2415template<
int i,
typename _Tp,
int n>
2430 for(
int i = 0; i < n; i++ )
2439 for(
int i = 0; i < n; i++ )
2455 for(
int i = 0; i < n; i++ )
2468 for(
int i = 0; i < n; i++ )
2481 for(
int i = 0; i < n; i++ )
2482 c.s[i] = (
int)(a.s[i]);
2490 for(
int i = 0; i < n; i++ )
2502 for(
int i = 0; i < n; i++ )
2514 for(
int i = 0; i < n; i++ )
2526 for(
int i = 0; i < n; i++ )
2528 c.s[i] = (int)(a.s[i]);
2540 for(
int i = 0; i < n; i++ )
2541 c.s[i] = (
float)a.s[i];
2551 for(
int i = 0; i < n; i++ )
2553 c.s[i] = (float)a.s[i];
2565 for(
int i = 0; i < n; i++ )
2567 c.s[i] = (float)a.s[i];
2568 c.s[i+n] = (
float)b.s[i];
2578 v_reg<double, (n/2)> c;
2579 for(
int i = 0; i < (n/2); i++ )
2580 c.s[i] = (
double)a.s[i];
2589 v_reg<double, (n/2)> c;
2590 for(
int i = 0; i < (n/2); i++ )
2591 c.s[i] = (
double)a.s[i + (n/2)];
2600 v_reg<double, (n/2)> c;
2601 for(
int i = 0; i < (n/2); i++ )
2602 c.s[i] = (
double)a.s[i];
2611 v_reg<double, (n/2)> c;
2612 for(
int i = 0; i < (n/2); i++ )
2613 c.s[i] = (
double)a.s[i + (n/2)];
2623 for(
int i = 0; i < n; i++ )
2624 c.s[i] = (
double)a.s[i];
2629template<
typename _Tp>
inline v_reg<_Tp, simd128_width /
sizeof(_Tp)> v_lut(
const _Tp* tab,
const int* idx)
2631 v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;
2632 for (
int i = 0; i < c.nlanes; i++)
2633 c.s[i] = tab[idx[i]];
2636template<
typename _Tp>
inline v_reg<_Tp, simd128_width /
sizeof(_Tp)> v_lut_pairs(
const _Tp* tab,
const int* idx)
2638 v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;
2639 for (
int i = 0; i < c.nlanes; i++)
2640 c.s[i] = tab[idx[i / 2] + i % 2];
2643template<
typename _Tp>
inline v_reg<_Tp, simd128_width /
sizeof(_Tp)> v_lut_quads(
const _Tp* tab,
const int* idx)
2645 v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;
2646 for (
int i = 0; i < c.nlanes; i++)
2647 c.s[i] = tab[idx[i / 4] + i % 4];
2651template<
int n>
inline v_reg<int, n> v_lut(
const int* tab,
const v_reg<int, n>& idx)
2654 for(
int i = 0; i < n; i++ )
2655 c.s[i] = tab[idx.s[i]];
2659template<
int n>
inline v_reg<unsigned, n> v_lut(
const unsigned* tab,
const v_reg<int, n>& idx)
2662 for (
int i = 0; i < n; i++)
2663 c.s[i] = tab[idx.s[i]];
2667template<
int n>
inline v_reg<float, n> v_lut(
const float* tab,
const v_reg<int, n>& idx)
2670 for(
int i = 0; i < n; i++ )
2671 c.s[i] = tab[idx.s[i]];
2675template<
int n>
inline v_reg<double, n/2> v_lut(
const double* tab,
const v_reg<int, n>& idx)
2677 v_reg<double, n/2> c;
2678 for(
int i = 0; i < n/2; i++ )
2679 c.s[i] = tab[idx.s[i]];
2684template<
int n>
inline void v_lut_deinterleave(
const float* tab,
const v_reg<int, n>& idx,
2685 v_reg<float, n>& x, v_reg<float, n>& y)
2687 for(
int i = 0; i < n; i++ )
2695template<
int n>
inline void v_lut_deinterleave(
const double* tab,
const v_reg<int, n*2>& idx,
2696 v_reg<double, n>& x, v_reg<double, n>& y)
2698 for(
int i = 0; i < n; i++ )
2706template<
typename _Tp,
int n>
inline v_reg<_Tp, n> v_interleave_pairs(
const v_reg<_Tp, n>& vec)
2709 for (
int i = 0; i < n/4; i++)
2711 c.s[4*i ] = vec.s[4*i ];
2712 c.s[4*i+1] = vec.s[4*i+2];
2713 c.s[4*i+2] = vec.s[4*i+1];
2714 c.s[4*i+3] = vec.s[4*i+3];
2719template<
typename _Tp,
int n>
inline v_reg<_Tp, n> v_interleave_quads(
const v_reg<_Tp, n>& vec)
2722 for (
int i = 0; i < n/8; i++)
2724 c.s[8*i ] = vec.s[8*i ];
2725 c.s[8*i+1] = vec.s[8*i+4];
2726 c.s[8*i+2] = vec.s[8*i+1];
2727 c.s[8*i+3] = vec.s[8*i+5];
2728 c.s[8*i+4] = vec.s[8*i+2];
2729 c.s[8*i+5] = vec.s[8*i+6];
2730 c.s[8*i+6] = vec.s[8*i+3];
2731 c.s[8*i+7] = vec.s[8*i+7];
2736template<
typename _Tp,
int n>
inline v_reg<_Tp, n> v_pack_triplets(
const v_reg<_Tp, n>& vec)
2739 for (
int i = 0; i < n/4; i++)
2741 c.s[3*i ] = vec.s[4*i ];
2742 c.s[3*i+1] = vec.s[4*i+1];
2743 c.s[3*i+2] = vec.s[4*i+2];
2763template<
typename _Tp,
int n>
2769 for (
int i = 0; i < n / 4; i++)
2771 b0.s[0 + i*4] = a0.s[0 + i*4]; b0.s[1 + i*4] = a1.s[0 + i*4];
2772 b0.s[2 + i*4] = a2.s[0 + i*4]; b0.s[3 + i*4] = a3.s[0 + i*4];
2773 b1.s[0 + i*4] = a0.s[1 + i*4]; b1.s[1 + i*4] = a1.s[1 + i*4];
2774 b1.s[2 + i*4] = a2.s[1 + i*4]; b1.s[3 + i*4] = a3.s[1 + i*4];
2775 b2.s[0 + i*4] = a0.s[2 + i*4]; b2.s[1 + i*4] = a1.s[2 + i*4];
2776 b2.s[2 + i*4] = a2.s[2 + i*4]; b2.s[3 + i*4] = a3.s[2 + i*4];
2777 b3.s[0 + i*4] = a0.s[3 + i*4]; b3.s[1 + i*4] = a1.s[3 + i*4];
2778 b3.s[2 + i*4] = a2.s[3 + i*4]; b3.s[3 + i*4] = a3.s[3 + i*4];
2784#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix) \
2785inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); }
2830#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix) \
2831inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); }
2876#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix) \
2877template<typename _Tp0, int n0> inline v_reg<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)> \
2878 v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \
2879{ return a.template reinterpret_as<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)>(); }
2898#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp) \
2899template<int shift, int n> inline v_reg<_Tp, n> v_shl(const v_reg<_Tp, n>& a) \
2900{ return a << shift; }
2915#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp) \
2916template<int shift, int n> inline v_reg<_Tp, n> v_shr(const v_reg<_Tp, n>& a) \
2917{ return a >> shift; }
2932#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp) \
2933template<int shift, int n> inline v_reg<_Tp, n> v_rshr(const v_reg<_Tp, n>& a) \
2936 for( int i = 0; i < n; i++ ) \
2937 c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2954#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast) \
2955template<int n> inline v_reg<_Tpn, 2*n> v_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
2957 v_reg<_Tpn, 2*n> c; \
2958 for( int i = 0; i < n; i++ ) \
2960 c.s[i] = cast<_Tpn>(a.s[i]); \
2961 c.s[i+n] = cast<_Tpn>(b.s[i]); \
2989#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast) \
2990template<int shift, int n> inline v_reg<_Tpn, 2*n> v_rshr_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
2992 v_reg<_Tpn, 2*n> c; \
2993 for( int i = 0; i < n; i++ ) \
2995 c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2996 c.s[i+n] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3024#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3025template<int n> inline void v_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \
3027 for( int i = 0; i < n; i++ ) \
3028 ptr[i] = cast<_Tpn>(a.s[i]); \
3054#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3055template<int shift, int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \
3057 for( int i = 0; i < n; i++ ) \
3058 ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3083template<
typename _Tpm,
typename _Tp,
int n>
3084inline void _pack_b(_Tpm* mptr,
const v_reg<_Tp, n>& a,
const v_reg<_Tp, n>& b)
3086 for (
int i = 0; i < n; ++i)
3088 mptr[i] = (_Tpm)a.s[i];
3089 mptr[i + n] = (_Tpm)b.s[i];
3117 _pack_b(mask.s, a, b);
3141 _pack_b(mask.s, a, b);
3142 _pack_b(mask.s + 2*n, c, d);
3172 _pack_b(mask.s, a, b);
3173 _pack_b(mask.s + 2*n, c, d);
3174 _pack_b(mask.s + 4*n, e, f);
3175 _pack_b(mask.s + 6*n, g, h);
3201 for (
int i = 0; i < n / 4; i++)
3203 res.s[0 + i*4] = v.s[0 + i*4] * a.s[0 + i*4] + v.s[1 + i*4] * b.s[0 + i*4] + v.s[2 + i*4] * c.s[0 + i*4] + v.s[3 + i*4] * d.s[0 + i*4];
3204 res.s[1 + i*4] = v.s[0 + i*4] * a.s[1 + i*4] + v.s[1 + i*4] * b.s[1 + i*4] + v.s[2 + i*4] * c.s[1 + i*4] + v.s[3 + i*4] * d.s[1 + i*4];
3205 res.s[2 + i*4] = v.s[0 + i*4] * a.s[2 + i*4] + v.s[1 + i*4] * b.s[2 + i*4] + v.s[2 + i*4] * c.s[2 + i*4] + v.s[3 + i*4] * d.s[2 + i*4];
3206 res.s[3 + i*4] = v.s[0 + i*4] * a.s[3 + i*4] + v.s[1 + i*4] * b.s[3 + i*4] + v.s[2 + i*4] * c.s[3 + i*4] + v.s[3 + i*4] * d.s[3 + i*4];
3231 for (
int i = 0; i < n / 4; i++)
3233 res.s[0 + i * 4] = v.s[0 + i * 4] * a.s[0 + i * 4] + v.s[1 + i * 4] * b.s[0 + i * 4] + v.s[2 + i * 4] * c.s[0 + i * 4] + d.s[0 + i * 4];
3234 res.s[1 + i * 4] = v.s[0 + i * 4] * a.s[1 + i * 4] + v.s[1 + i * 4] * b.s[1 + i * 4] + v.s[2 + i * 4] * c.s[1 + i * 4] + d.s[1 + i * 4];
3235 res.s[2 + i * 4] = v.s[0 + i * 4] * a.s[2 + i * 4] + v.s[1 + i * 4] * b.s[2 + i * 4] + v.s[2 + i * 4] * c.s[2 + i * 4] + d.s[2 + i * 4];
3236 res.s[3 + i * 4] = v.s[0 + i * 4] * a.s[3 + i * 4] + v.s[1 + i * 4] * b.s[3 + i * 4] + v.s[2 + i * 4] * c.s[3 + i * 4] + d.s[3 + i * 4];
3242template<
int n>
inline v_reg<double, n/2>
v_dotprod_expand(
const v_reg<int, n>& a,
const v_reg<int, n>& b)
3244template<
int n>
inline v_reg<double, n/2>
v_dotprod_expand(
const v_reg<int, n>& a,
const v_reg<int, n>& b,
3245 const v_reg<double, n/2>& c)
3248template<
int n>
inline v_reg<double, n/2>
v_dotprod_expand_fast(
const v_reg<int, n>& a,
const v_reg<int, n>& b)
3250template<
int n>
inline v_reg<double, n/2>
v_dotprod_expand_fast(
const v_reg<int, n>& a,
const v_reg<int, n>& b,
3251 const v_reg<double, n/2>& c)
3256inline v_reg<float, simd128_width /
sizeof(float)>
3259 v_reg<float, simd128_width /
sizeof(float)> v;
3260 for(
int i = 0; i < v.nlanes; i++ )
3267inline v_reg<float, simd256_width /
sizeof(float)>
3268v256_load_expand(
const float16_t* ptr)
3270 v_reg<float, simd256_width /
sizeof(float)> v;
3271 for (
int i = 0; i < v.nlanes; i++)
3279inline v_reg<float, simd512_width /
sizeof(float)>
3280v512_load_expand(
const float16_t* ptr)
3282 v_reg<float, simd512_width /
sizeof(float)> v;
3283 for (
int i = 0; i < v.nlanes; i++)
3291template<
int n>
inline void
3292v_pack_store(float16_t* ptr,
const v_reg<float, n>& v)
3294 for(
int i = 0; i < v.nlanes; i++ )
3296 ptr[i] = float16_t(v.s[i]);
3300inline void v_cleanup() {}
3302inline void v256_cleanup() {}
3305inline void v512_cleanup() {}
3311CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
3315#if !defined(CV_DOXYGEN)
CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element maximum of two arrays or an array and a scalar.
CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst)
Calculates a square root of array elements.
CV_EXPORTS_W void exp(InputArray src, OutputArray dst)
Calculates the exponent of every array element.
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element minimum of two arrays or an array and a scalar.
CV_EXPORTS_W void log(InputArray src, OutputArray dst)
Calculates the natural logarithm of every array element.
#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix)
Helper macro
Definition: intrin_cpp.hpp:2830
#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp)
Helper macro
Definition: intrin_cpp.hpp:2932
#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp)
Helper macro
Definition: intrin_cpp.hpp:2915
#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast)
Helper macro
Definition: intrin_cpp.hpp:2989
#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix, opA, opB)
Bitwise shift left
Definition: intrin_cpp.hpp:1280
#define OPENCV_HAL_IMPL_CMP_OP(cmp_op)
Helper macro
Definition: intrin_cpp.hpp:851
OPENCV_HAL_IMPL_MATH_FUNC(v_abs,(typename V_TypeTraits< _Tp >::abs_type) std::abs, typename V_TypeTraits< _Tp >::abs_type) static const unsigned char popCountTable[]
Square root of elements
#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp)
Helper macro
Definition: intrin_cpp.hpp:2898
#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix)
Helper macro
Definition: intrin_cpp.hpp:2784
#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)
Helper macro
Definition: intrin_cpp.hpp:3054
#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix)
Helper macro
Definition: intrin_cpp.hpp:2876
#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast)
Helper macro
Definition: intrin_cpp.hpp:2954
#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2)
Helper macro
Definition: intrin_cpp.hpp:913
#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)
Helper macro
Definition: intrin_cpp.hpp:3024
#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op)
Helper macro
Definition: intrin_cpp.hpp:1259
bool v_check_any(const v_reg< _Tp, n > &a)
Check if any of packed values is less than zero
Definition: intrin_cpp.hpp:1436
v_reg< _Tp, n > v_combine_high(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from last elements of two vectors
Definition: intrin_cpp.hpp:2307
v_reg< float, n > v_matmul(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication
Definition: intrin_cpp.hpp:3196
v_reg< int, n > v_round(const v_reg< float, n > &a)
Round elements
Definition: intrin_cpp.hpp:2427
CV_INLINE v_reg< _Tp, n > operator|(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise OR
v_reg< schar, 16 > v_int8x16
Sixteen 8-bit signed integer values
Definition: intrin_cpp.hpp:490
v_reg< uchar, 16 > v_uint8x16
Sixteen 8-bit unsigned integer values
Definition: intrin_cpp.hpp:488
void v_store_high(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (higher half)
Definition: intrin_cpp.hpp:2236
int v_signmask(const v_reg< _Tp, n > &a)
Get negative values mask
Definition: intrin_cpp.hpp:1395
void v_zip(const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
Interleave two vectors
Definition: intrin_cpp.hpp:1557
v_reg< int64, 2 > v_int64x2
Two 64-bit signed integer values
Definition: intrin_cpp.hpp:506
void v_store(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory
Definition: intrin_cpp.hpp:2193
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements and expand
Definition: intrin_cpp.hpp:1145
V_TypeTraits< typenameV_TypeTraits< _Tp >::abs_type >::sum_type v_reduce_sad(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Sum absolute differences of values
Definition: intrin_cpp.hpp:1377
v_reg< int, n > v_ceil(const v_reg< float, n > &a)
Ceil elements
Definition: intrin_cpp.hpp:2465
v_reg< ushort, 8 > v_uint16x8
Eight 16-bit unsigned integer values
Definition: intrin_cpp.hpp:492
CV_INLINE v_reg< _Tp, n > operator&(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise AND
void v_store_low(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (lower half)
Definition: intrin_cpp.hpp:2219
v_reg< int, n > v_floor(const v_reg< float, n > &a)
Floor elements
Definition: intrin_cpp.hpp:2452
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements
Definition: intrin_cpp.hpp:1080
int v_scan_forward(const v_reg< _Tp, n > &a)
Get first negative lane index
Definition: intrin_cpp.hpp:1412
v_reg< _Tp, n > v_reverse(const v_reg< _Tp, n > &a)
Vector reverse order
Definition: intrin_cpp.hpp:2346
v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)
Load register contents from memory with double expand
Definition: intrin_cpp.hpp:1875
v_reg< int, 4 > v_int32x4
Four 32-bit signed integer values
Definition: intrin_cpp.hpp:498
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_absdiff(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Add values without saturation
Definition: intrin_cpp.hpp:956
V_TypeTraits< _Tp >::sum_type v_reduce_sum(const v_reg< _Tp, n > &a)
Element shift left among vector
Definition: intrin_cpp.hpp:1338
v_reg< _Tp, n > v_muladd(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
A synonym for v_fma
Definition: intrin_cpp.hpp:1060
v_reg< _Tp, n > v_sqr_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Square of the magnitude
Definition: intrin_cpp.hpp:1036
v_reg< int, n > v_trunc(const v_reg< float, n > &a)
Truncate elements
Definition: intrin_cpp.hpp:2478
v_reg< unsigned, 4 > v_uint32x4
Four 32-bit unsigned integer values
Definition: intrin_cpp.hpp:496
CV_INLINE v_reg< _Tp, n > operator/(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Divide values
v_reg< _Tp, n > v_invsqrt(const v_reg< _Tp, n > &a)
Inversed square root
Definition: intrin_cpp.hpp:1010
v_reg< _Tp, n > v_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Magnitude
Definition: intrin_cpp.hpp:1023
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements and expand
Definition: intrin_cpp.hpp:1188
CV_INLINE v_reg< double,(n/2)> v_cvt_f64_high(const v_reg< int, n > &a)
Convert to double high part of vector
Definition: intrin_cpp.hpp:2587
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_low(const _Tp *ptr)
Load 64-bits of data to lower part (high part is undefined).
Definition: intrin_cpp.hpp:1702
void v_recombine(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high)
Combine two vectors from lower and higher parts of two other vectors
Definition: intrin_cpp.hpp:2325
v_reg< float, n > v_reduce_sum4(const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Sums all elements of each input vector, returns the vector of sums
Definition: intrin_cpp.hpp:1356
void v_mul_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
Multiply and expand
Definition: intrin_cpp.hpp:1219
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_aligned(const _Tp *ptr)
Load register contents from memory (aligned)
Definition: intrin_cpp.hpp:1652
v_reg< _Tp, n > v_broadcast_element(const v_reg< _Tp, n > &a)
Broadcast i-th element of vector
Definition: intrin_cpp.hpp:2416
v_reg< _Tp, n > v_select(const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Per-element select (blend operation)
Definition: intrin_cpp.hpp:1454
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load(const _Tp *ptr)
Load register contents from memory
Definition: intrin_cpp.hpp:1587
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_low(const v_reg< _Tp, n > &a)
Expand lower values to the wider pack type
Definition: intrin_cpp.hpp:1499
CV_INLINE v_reg< _Tp, n > operator~(const v_reg< _Tp, n > &a)
Bitwise NOT
CV_INLINE v_reg< double, n/2 > v_cvt_f64(const v_reg< int, n > &a)
Convert lower half to double
Definition: intrin_cpp.hpp:2576
v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> v_load_expand_q(const _Tp *ptr)
Load register contents from memory with quad expand
Definition: intrin_cpp.hpp:1964
void v_expand(const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
Expand values to the wider pack type
Definition: intrin_cpp.hpp:1477
v_reg< uchar, 2 *n > v_pack_b(const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)
! For 16-bit boolean values
Definition: intrin_cpp.hpp:3114
v_reg< _Tp, n > v_fma(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
Multiply and add
Definition: intrin_cpp.hpp:1049
CV_INLINE v_reg< _Tp, n > operator^(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise XOR
void v_store_interleave(_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, hal::StoreMode=hal::STORE_UNALIGNED)
Interleave and store (2 channels)
Definition: intrin_cpp.hpp:2118
void v_transpose4x4(v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, const v_reg< _Tp, n > &a2, const v_reg< _Tp, n > &a3, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1, v_reg< _Tp, n > &b2, v_reg< _Tp, n > &b3)
Transpose 4x4 matrix
Definition: intrin_cpp.hpp:2764
v_reg< _Tp, n > v_absdiffs(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Saturating absolute difference
Definition: intrin_cpp.hpp:997
v_reg< uint64, 2 > v_uint64x2
Two 64-bit unsigned integer values
Definition: intrin_cpp.hpp:504
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_high(const v_reg< _Tp, n > &a)
Expand higher values to the wider pack type
Definition: intrin_cpp.hpp:1518
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements
Definition: intrin_cpp.hpp:1119
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_halves(const _Tp *loptr, const _Tp *hiptr)
Load register contents from two memory blocks
Definition: intrin_cpp.hpp:1784
v_reg< _Tp, n > v_mul_hi(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Multiply and extract high part
Definition: intrin_cpp.hpp:1236
v_reg< _Tp, n > v_combine_low(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from first elements of two vectors
Definition: intrin_cpp.hpp:2285
v_reg< float, 4 > v_float32x4
Four 32-bit floating point values (single precision)
Definition: intrin_cpp.hpp:500
v_reg< float, n > v_cvt_f32(const v_reg< int, n > &a)
Convert to float
Definition: intrin_cpp.hpp:2537
bool v_check_all(const v_reg< _Tp, n > &a)
Check if all packed values are less than zero
Definition: intrin_cpp.hpp:1424
v_reg< float, n > v_matmuladd(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication and add
Definition: intrin_cpp.hpp:3226
_Tp v_extract_n(const v_reg< _Tp, n > &v)
Vector extract
Definition: intrin_cpp.hpp:2400
v_reg< float, n > v_not_nan(const v_reg< float, n > &a)
Less-than comparison
Definition: intrin_cpp.hpp:893
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_popcount(const v_reg< _Tp, n > &a)
Count the 1 bits in the vector lanes and return result as corresponding unsigned type
Definition: intrin_cpp.hpp:827
void v_store_aligned(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (aligned)
Definition: intrin_cpp.hpp:2254
v_reg< short, 8 > v_int16x8
Eight 16-bit signed integer values
Definition: intrin_cpp.hpp:494
v_reg< double, 2 > v_float64x2
Two 64-bit floating point values (double precision)
Definition: intrin_cpp.hpp:502
v_reg< _Tp, n > v_extract(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Vector extract
Definition: intrin_cpp.hpp:2374
void v_load_deinterleave(const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b)
Load and deinterleave (2 channels)
Definition: intrin_cpp.hpp:2046
softfloat abs(softfloat a)
Absolute value
Definition: softfloat.hpp:444
CV_INLINE int cvRound(double value)
Rounds floating-point number to the nearest integer
Definition: fast_math.hpp:200
CV_INLINE int cvCeil(double value)
Rounds floating-point number to the nearest integer not smaller than the original.
Definition: fast_math.hpp:254
static _Tp saturate_cast(uchar v)
Template function for accurate conversion from one primitive type to another.
Definition: saturate.hpp:80
CV_INLINE int cvFloor(double value)
Rounds floating-point number to the nearest integer not larger than the original.
Definition: fast_math.hpp:234
static bool isAligned(const T &data)
Alignment check of passed values
Definition: utility.hpp:517
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails
Definition: base.hpp:342
#define CV_DbgAssert(expr)
Definition: base.hpp:375
Quat< T > cos(const Quat< T > &q)
Quat< T > sin(const Quat< T > &q)
"black box" representation of the file storage associated with a file on disk.
Definition: aruco.hpp:75
Definition: intrin.hpp:104
Definition: intrin_cpp.hpp:369
_Tp get0() const
Access first value
Definition: intrin_cpp.hpp:436
v_reg(const v_reg< _Tp, n > &r)
Copy constructor
Definition: intrin_cpp.hpp:421
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7, _Tp s8, _Tp s9, _Tp s10, _Tp s11, _Tp s12, _Tp s13, _Tp s14, _Tp s15)
Constructor
Definition: intrin_cpp.hpp:404
v_reg(const _Tp *ptr)
Constructor
Definition: intrin_cpp.hpp:379
v_reg()
Default constructor
Definition: intrin_cpp.hpp:418
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7)
Constructor
Definition: intrin_cpp.hpp:394
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3)
Constructor
Definition: intrin_cpp.hpp:389
v_reg(_Tp s0, _Tp s1)
Constructor
Definition: intrin_cpp.hpp:384