cpp/ja/intrin__cpp_8hpp_source.html

/*M///////////////////////////////////////////////////////////////////////////////////////


//


//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.


//


//  By downloading, copying, installing or using the software you agree to this license.


//  If you do not agree to this license, do not download, install,


//  copy or use the software.


//


//


//                          License Agreement


//                For Open Source Computer Vision Library


//


// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.


// Copyright (C) 2009, Willow Garage Inc., all rights reserved.


// Copyright (C) 2013, OpenCV Foundation, all rights reserved.


// Copyright (C) 2015, Itseez Inc., all rights reserved.


// Third party copyrights are property of their respective owners.


//


// Redistribution and use in source and binary forms, with or without modification,


// are permitted provided that the following conditions are met:


//


//   * Redistribution's of source code must retain the above copyright notice,


//     this list of conditions and the following disclaimer.


//


//   * Redistribution's in binary form must reproduce the above copyright notice,


//     this list of conditions and the following disclaimer in the documentation


//     and/or other materials provided with the distribution.


//


//   * The name of the copyright holders may not be used to endorse or promote products


//     derived from this software without specific prior written permission.


//


// This software is provided by the copyright holders and contributors "as is" and


// any express or implied warranties, including, but not limited to, the implied


// warranties of merchantability and fitness for a particular purpose are disclaimed.


// In no event shall the Intel Corporation or contributors be liable for any direct,


// indirect, incidental, special, exemplary, or consequential damages


// (including, but not limited to, procurement of substitute goods or services;


// loss of use, data, or profits; or business interruption) however caused


// and on any theory of liability, whether in contract, strict liability,


// or tort (including negligence or otherwise) arising in any way out of


// the use of this software, even if advised of the possibility of such damage.


//


//M*/


#ifndef OPENCV_HAL_INTRIN_CPP_HPP


#define OPENCV_HAL_INTRIN_CPP_HPP


#include <limits>


#include <cstring>


#include <algorithm>


#include "opencv2/core/saturate.hpp"


#define CV_SIMD128_CPP 1


#if defined(CV_FORCE_SIMD128_CPP)


#define CV_SIMD128 1


#define CV_SIMD128_64F 1


#endif


#if defined(CV_DOXYGEN)


#define CV_SIMD128 1


#define CV_SIMD128_64F 1


#define CV_SIMD256 1


#define CV_SIMD256_64F 1


#define CV_SIMD512 1


#define CV_SIMD512_64F 1


#else


#define CV_SIMD256 0

// Explicitly disable SIMD256 and SIMD512 support for scalar intrinsic implementation


#define CV_SIMD512 0

// to avoid warnings during compilation


#endif


namespace

cv


{


#ifndef CV_DOXYGEN


CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN


#endif


template<typename
_Tp,
int
n>
struct

v_reg


{


typedef
_Tp lane_type;


enum
{ nlanes = n };


// !@endcond


explicit
v_reg(const
_Tp* ptr) {
for(
int
i = 0; i < n; i++ ) s[i] = ptr[i]; }


v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; }


v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; }


v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,


_Tp s4, _Tp s5, _Tp s6, _Tp s7)


{


s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;


s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;


}


v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,


_Tp s4, _Tp s5, _Tp s6, _Tp s7,


_Tp s8, _Tp s9, _Tp s10, _Tp s11,


_Tp s12, _Tp s13, _Tp s14, _Tp s15)


{


s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;


s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;


s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11;


s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15;


}


v_reg() {}


v_reg(const
v_reg<_Tp, n>
& r)


{


for(
int
i = 0; i < n; i++ )


s[i] = r.s[i];


}


_Tp
get0()
const
{
return
s[0]; }


_Tp get(const
int
i)
const
{
return
s[i]; }


v_reg<_Tp, n> high()
const


{


v_reg<_Tp, n> c;


int
i;


for( i = 0; i < n/2; i++ )


{


c.s[i] = s[i+(n/2)];


c.s[i+(n/2)] = 0;


}


return
c;


}


static
v_reg<_Tp, n> zero()


{


v_reg<_Tp, n> c;


for(
int
i = 0; i < n; i++ )


c.s[i] = (_Tp)0;


return
c;


}


static
v_reg<_Tp, n> all(_Tp s)


{


v_reg<_Tp, n> c;


for(
int
i = 0; i < n; i++ )


c.s[i] = s;


return
c;


}


template<typename
_Tp2,
int
n2> v_reg<_Tp2, n2> reinterpret_as()
const


{


size_t
bytes =
std::min(sizeof(_Tp2)*n2,
sizeof(_Tp)*n);


v_reg<_Tp2, n2> c;


std::memcpy(&c.s[0], &s[0], bytes);


return
c;


}


v_reg& operator=(const
v_reg<_Tp, n> & r)


{


for(
int
i = 0; i < n; i++ )


s[i] = r.s[i];


return
*this;


}


_Tp s[n];


};


typedef
v_reg<uchar, 16>
v_uint8x16;


typedef
v_reg<schar, 16>
v_int8x16;


typedef
v_reg<ushort, 8>
v_uint16x8;


typedef
v_reg<short, 8>
v_int16x8;


typedef
v_reg<unsigned, 4>
v_uint32x4;


typedef
v_reg<int, 4>
v_int32x4;


typedef
v_reg<float, 4>
v_float32x4;


typedef
v_reg<double, 2>
v_float64x2;


typedef
v_reg<uint64, 2>
v_uint64x2;


typedef
v_reg<int64, 2>
v_int64x2;


#if CV_SIMD256


typedef
v_reg<uchar, 32>
v_uint8x32;


typedef
v_reg<schar, 32>
v_int8x32;


typedef
v_reg<ushort, 16>
v_uint16x16;


typedef
v_reg<short, 16>
v_int16x16;


typedef
v_reg<unsigned, 8>
v_uint32x8;


typedef
v_reg<int, 8>
v_int32x8;


typedef
v_reg<float, 8>
v_float32x8;


typedef
v_reg<double, 4>
v_float64x4;


typedef
v_reg<uint64, 4>
v_uint64x4;


typedef
v_reg<int64, 4>
v_int64x4;


#endif


#if CV_SIMD512


typedef
v_reg<uchar, 64>
v_uint8x64;


typedef
v_reg<schar, 64>
v_int8x64;


typedef
v_reg<ushort, 32>
v_uint16x32;


typedef
v_reg<short, 32>
v_int16x32;


typedef
v_reg<unsigned, 16>
v_uint32x16;


typedef
v_reg<int, 16>
v_int32x16;


typedef
v_reg<float, 16>
v_float32x16;


typedef
v_reg<double, 8>
v_float64x8;


typedef
v_reg<uint64, 8>
v_uint64x8;


typedef
v_reg<int64, 8>
v_int64x8;


#endif


enum
{


simd128_width = 16,


#if CV_SIMD256


simd256_width = 32,


#endif


#if CV_SIMD512


simd512_width = 64,


simdmax_width = simd512_width


#elif CV_SIMD256


simdmax_width = simd256_width


#else


simdmax_width = simd128_width


#endif


};


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>
operator+(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>& operator+=(v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>
operator-(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>& operator-=(v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>
operator*(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>& operator*=(v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>
operator/(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>& operator/=(v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>
operator&(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>& operator&=(v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>
operator|(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>
operator^(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>& operator^=(v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b);


template<typename
_Tp,
int
n> CV_INLINE
v_reg<_Tp, n>
operator~(const
v_reg<_Tp, n>& a);


#ifndef CV_DOXYGEN


#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \


__CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \


__CV_EXPAND(macro_name(schar, __VA_ARGS__)) \


__CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \


__CV_EXPAND(macro_name(short, __VA_ARGS__)) \


__CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \


__CV_EXPAND(macro_name(int, __VA_ARGS__)) \


__CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \


__CV_EXPAND(macro_name(int64, __VA_ARGS__)) \


#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \


__CV_EXPAND(macro_name(float, __VA_ARGS__)) \


__CV_EXPAND(macro_name(double, __VA_ARGS__)) \


#define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \


CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \


CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \


#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op) \


template<int n> inline \


v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \


{ \


v_reg<_Tp, n> c; \


for( int i = 0; i < n; i++ ) \


c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \


return c; \


} \


template<int n> inline \


v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \


{ \


for( int i = 0; i < n; i++ ) \


a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \


return a; \


}


#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op)


CV__HAL_INTRIN_IMPL_BIN_OP(+)


CV__HAL_INTRIN_IMPL_BIN_OP(-)


CV__HAL_INTRIN_IMPL_BIN_OP(*)


CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, /)


#define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op) \


template<int n> CV_INLINE \


v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \


{ \


v_reg<_Tp, n> c; \


typedef typename V_TypeTraits<_Tp>::int_type itype; \


for( int i = 0; i < n; i++ ) \


c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \


V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \


return c; \


} \


template<int n> CV_INLINE \


v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \


{ \


typedef typename V_TypeTraits<_Tp>::int_type itype; \


for( int i = 0; i < n; i++ ) \


a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \


V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \


return a; \


}


#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op) \


CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \


CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op)

/* TODO: FIXIT remove this after masks refactoring */


CV__HAL_INTRIN_IMPL_BIT_OP(&)


CV__HAL_INTRIN_IMPL_BIT_OP(|)


CV__HAL_INTRIN_IMPL_BIT_OP(^)


#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy) \


template<int n> CV_INLINE \


v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \


{ \


v_reg<_Tp, n> c; \


for( int i = 0; i < n; i++ ) \


c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \


return c; \


} \


CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BITWISE_NOT_, ~)


#endif

// !CV_DOXYGEN


#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \


template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \


{ \


v_reg<_Tp2, n> c; \


for( int i = 0; i < n; i++ ) \


c.s[i] = cfunc(a.s[i]); \


return c; \


}


OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt,
std::sqrt, _Tp)


OPENCV_HAL_IMPL_MATH_FUNC(v_sin,
std::sin, _Tp)


OPENCV_HAL_IMPL_MATH_FUNC(v_cos,
std::cos, _Tp)


OPENCV_HAL_IMPL_MATH_FUNC(v_exp,
std::exp, _Tp)


OPENCV_HAL_IMPL_MATH_FUNC(v_log,
std::log, _Tp)


OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename
V_TypeTraits<_Tp>::abs_type)std::abs,


typename
V_TypeTraits<_Tp>::abs_type)


#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \


template<typename _Tp, int n> inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \


{ \


v_reg<_Tp, n> c; \


for( int i = 0; i < n; i++ ) \


c.s[i] = cfunc(a.s[i], b.s[i]); \


return c; \


}


#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \


template<typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n>& a) \


{ \


_Tp c = a.s[0]; \


for( int i = 1; i < n; i++ ) \


c = cfunc(c, a.s[i]); \


return c; \


}


OPENCV_HAL_IMPL_MINMAX_FUNC(v_min,
std::min)


OPENCV_HAL_IMPL_MINMAX_FUNC(v_max,
std::max)


OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min,
std::min)


OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max,
std::max)


static
const
unsigned
char
popCountTable[] =


{


0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,


1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,


1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,


2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,


1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,


2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,


2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,


3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,


1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,


2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,


2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,


3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,


2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,


3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,


3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,


4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,


};


template<typename
_Tp,
int
n>


inline
v_reg<typename V_TypeTraits<_Tp>::abs_type, n>
v_popcount(const
v_reg<_Tp, n>& a)


{


v_reg<typename V_TypeTraits<_Tp>::abs_type, n> b =
v_reg<typename V_TypeTraits<_Tp>::abs_type, n>::zero();


for
(int
i = 0; i < n*(int)sizeof(_Tp); i++)


b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];


return
b;


}


template<typename
_Tp,
int
n>


inline
void
v_minmax(
const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b,


v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval )


{


for(
int
i = 0; i < n; i++ )


{


minval.s[i] =
std::min(a.s[i], b.s[i]);


maxval.s[i] =
std::max(a.s[i], b.s[i]);


}


}


#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \


template<typename _Tp, int n> \


inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \


{ \


typedef typename V_TypeTraits<_Tp>::int_type itype; \


v_reg<_Tp, n> c; \


for( int i = 0; i < n; i++ ) \


c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \


return c; \


}


OPENCV_HAL_IMPL_CMP_OP(<)


OPENCV_HAL_IMPL_CMP_OP(>)


OPENCV_HAL_IMPL_CMP_OP(<=)


OPENCV_HAL_IMPL_CMP_OP(>=)


OPENCV_HAL_IMPL_CMP_OP(==)


OPENCV_HAL_IMPL_CMP_OP(!=)


template<int
n>


inline
v_reg<float, n>
v_not_nan(const
v_reg<float, n>& a)


{


typedef
typename
V_TypeTraits<float>::int_type
itype;


v_reg<float, n>
c;


for
(int
i = 0; i < n; i++)


c.s[i] =
V_TypeTraits<float>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));


return
c;


}


template<int
n>


inline
v_reg<double, n>
v_not_nan(const
v_reg<double, n>& a)


{


typedef
typename
V_TypeTraits<double>::int_type itype;


v_reg<double, n> c;


for
(int
i = 0; i < n; i++)


c.s[i] = V_TypeTraits<double>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));


return
c;


}


#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \


template<typename _Tp, int n> \


inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \


{ \


typedef _Tp2 rtype; \


v_reg<rtype, n> c; \


for( int i = 0; i < n; i++ ) \


c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \


return c; \


}


OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp)


OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp)


OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp)


template<typename
T>
inline
T _absdiff(T a, T b)


{


return
a > b ? a - b : b - a;


}


template<typename
_Tp,
int
n>


inline
v_reg<typename V_TypeTraits<_Tp>::abs_type, n>
v_absdiff(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>
& b)


{


typedef
typename
V_TypeTraits<_Tp>::abs_type
rtype;


v_reg<rtype, n>
c;


const
rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0);


for(
int
i = 0; i < n; i++ )


{


rtype ua = a.s[i] ^ mask;


rtype ub = b.s[i] ^ mask;


c.s[i] = _absdiff(ua, ub);


}


return
c;


}


template<int
n>
inline
v_reg<float, n>
v_absdiff(const
v_reg<float, n>& a,
const
v_reg<float, n>& b)


{


v_reg<float, n>
c;


for(
int
i = 0; i < c.nlanes; i++ )


c.s[i] = _absdiff(a.s[i], b.s[i]);


return
c;


}


template<int
n>
inline
v_reg<double, n>
v_absdiff(const
v_reg<double, n>& a,
const
v_reg<double, n>& b)


{


v_reg<double, n>
c;


for(
int
i = 0; i < c.nlanes; i++ )


c.s[i] = _absdiff(a.s[i], b.s[i]);


return
c;


}


template<typename
_Tp,
int
n>


inline
v_reg<_Tp, n>
v_absdiffs(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{


v_reg<_Tp, n>
c;


for(
int
i = 0; i < n; i++)


c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i]));


return
c;


}


template<typename
_Tp,
int
n>


inline
v_reg<_Tp, n>
v_invsqrt(const
v_reg<_Tp, n>& a)


{


v_reg<_Tp, n>
c;


for(
int
i = 0; i < n; i++ )


c.s[i] = 1.f/std::sqrt(a.s[i]);


return
c;


}


template<typename
_Tp,
int
n>


inline
v_reg<_Tp, n>
v_magnitude(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{


v_reg<_Tp, n>
c;


for(
int
i = 0; i < n; i++ )


c.s[i] =
std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]);


return
c;


}


template<typename
_Tp,
int
n>


inline
v_reg<_Tp, n>
v_sqr_magnitude(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{


v_reg<_Tp, n>
c;


for(
int
i = 0; i < n; i++ )


c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i];


return
c;


}


template<typename
_Tp,
int
n>


inline
v_reg<_Tp, n>
v_fma(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b,


const
v_reg<_Tp, n>& c)


{


v_reg<_Tp, n>
d;


for(
int
i = 0; i < n; i++ )


d.s[i] = a.s[i]*b.s[i] + c.s[i];


return
d;


}


template<typename
_Tp,
int
n>


inline
v_reg<_Tp, n>
v_muladd(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b,


const
v_reg<_Tp, n>& c)


{


return
v_fma(a, b, c);


}


template<typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>


v_dotprod(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{


typedef
typename
V_TypeTraits<_Tp>::w_type
w_type;


v_reg<w_type, n/2> c;


for(
int
i = 0; i < (n/2); i++ )


c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1];


return
c;


}


template<typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>


v_dotprod(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b,


const
v_reg<typename
V_TypeTraits<_Tp>::w_type, n / 2>& c)


{


typedef
typename
V_TypeTraits<_Tp>::w_type
w_type;


v_reg<w_type, n/2> s;


for(
int
i = 0; i < (n/2); i++ )


s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];


return
s;


}


template<typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>


v_dotprod_fast(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{
return
v_dotprod(a, b); }


template<typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>


v_dotprod_fast(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b,


const
v_reg<typename
V_TypeTraits<_Tp>::w_type, n / 2>& c)


{
return
v_dotprod(a, b, c); }


template<typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>


v_dotprod_expand(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{


typedef
typename
V_TypeTraits<_Tp>::q_type
q_type;


v_reg<q_type, n/4> s;


for(
int
i = 0; i < (n/4); i++ )


s.s[i] = (q_type)a.s[i*4    ]*b.s[i*4    ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +


(q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3];


return
s;


}


template<typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>


v_dotprod_expand(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b,


const
v_reg<typename
V_TypeTraits<_Tp>::q_type, n / 4>& c)


{


typedef
typename
V_TypeTraits<_Tp>::q_type
q_type;


v_reg<q_type, n/4> s;


for(
int
i = 0; i < (n/4); i++ )


s.s[i] = (q_type)a.s[i*4    ]*b.s[i*4    ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +


(q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i];


return
s;


}


template<typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>


v_dotprod_expand_fast(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{
return
v_dotprod_expand(a, b); }


template<typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>


v_dotprod_expand_fast(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b,


const
v_reg<typename
V_TypeTraits<_Tp>::q_type, n / 4>& c)


{
return
v_dotprod_expand(a, b, c); }


template<typename
_Tp,
int
n>
inline
void
v_mul_expand(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b,


v_reg<typename
V_TypeTraits<_Tp>::w_type, n/2>& c,


v_reg<typename
V_TypeTraits<_Tp>::w_type, n/2>& d)


{


typedef
typename
V_TypeTraits<_Tp>::w_type
w_type;


for(
int
i = 0; i < (n/2); i++ )


{


c.s[i] = (w_type)a.s[i]*b.s[i];


d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)];


}


}


template<typename
_Tp,
int
n>
inline
v_reg<_Tp, n>
v_mul_hi(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{


typedef
typename
V_TypeTraits<_Tp>::w_type
w_type;


v_reg<_Tp, n>
c;


for
(int
i = 0; i < n; i++)


c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >>
sizeof(_Tp)*8);


return
c;


}


template<typename
_Tp,
int
n>
inline
void
v_hsum(const
v_reg<_Tp, n>& a,


v_reg<typename
V_TypeTraits<_Tp>::w_type, n/2>& c)


{


typedef
typename
V_TypeTraits<_Tp>::w_type w_type;


for(
int
i = 0; i < (n/2); i++ )


{


c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1];


}


}


#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \


template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \


{ \


v_reg<_Tp, n> c; \


for( int i = 0; i < n; i++ ) \


c.s[i] = (_Tp)(a.s[i] shift_op imm); \


return c; \


}


OPENCV_HAL_IMPL_SHIFT_OP(<< )


OPENCV_HAL_IMPL_SHIFT_OP(>> )


#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \


template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \


{ \


v_reg<_Tp, n> b; \


for (int i = 0; i < n; i++) \


{ \


int sIndex = i opA imm; \


if (0 <= sIndex && sIndex < n) \


{ \


b.s[i] = a.s[sIndex]; \


} \


else \


{ \


b.s[i] = 0; \


} \


} \


return b; \


} \


template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \


{ \


v_reg<_Tp, n> c; \


for (int i = 0; i < n; i++) \


{ \


int aIndex = i opA imm; \


int bIndex = i opA imm opB n; \


if (0 <= bIndex && bIndex < n) \


{ \


c.s[i] = b.s[bIndex]; \


} \


else if (0 <= aIndex && aIndex < n) \


{ \


c.s[i] = a.s[aIndex]; \


} \


else \


{ \


c.s[i] = 0; \


} \


} \


return c; \


}


OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left,  -, +)


OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -)


template<typename _Tp,
int
n> inline typename
V_TypeTraits<_Tp>::sum_type
v_reduce_sum(const
v_reg<_Tp, n>& a)


{


typename
V_TypeTraits<_Tp>::sum_type
c = a.s[0];


for(
int
i = 1; i < n; i++ )


c += a.s[i];


return
c;


}


template<int
n>
inline
v_reg<float, n>
v_reduce_sum4(const
v_reg<float, n>& a,
const
v_reg<float, n>& b,


const
v_reg<float, n>& c,
const
v_reg<float, n>& d)


{


v_reg<float, n>
r;


for(int
i = 0; i < (n/4); i++)


{


r.s[i*4 + 0] = a.s[i*4 + 0] + a.s[i*4 + 1] + a.s[i*4 + 2] + a.s[i*4 + 3];


r.s[i*4 + 1] = b.s[i*4 + 0] + b.s[i*4 + 1] + b.s[i*4 + 2] + b.s[i*4 + 3];


r.s[i*4 + 2] = c.s[i*4 + 0] + c.s[i*4 + 1] + c.s[i*4 + 2] + c.s[i*4 + 3];


r.s[i*4 + 3] = d.s[i*4 + 0] + d.s[i*4 + 1] + d.s[i*4 + 2] + d.s[i*4 + 3];


}


return
r;


}


template<typename
_Tp,
int
n>
inline
typename
V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type
>::sum_type
v_reduce_sad(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{


typename
V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type
>::sum_type c = _absdiff(a.s[0], b.s[0]);


for
(int
i = 1; i < n; i++)


c += _absdiff(a.s[i], b.s[i]);


return
c;


}


template<typename
_Tp,
int
n>
inline
int
v_signmask(const
v_reg<_Tp, n>& a)


{


int
mask = 0;


for(
int
i = 0; i < n; i++ )


mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i;


return
mask;


}


template
<typename
_Tp,
int
n>
inline
int
v_scan_forward(const
v_reg<_Tp, n>& a)


{


for
(int
i = 0; i < n; i++)


if(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0)


return
i;


return
0;


}


template<typename
_Tp,
int
n>
inline
bool
v_check_all(const
v_reg<_Tp, n>& a)


{


for(
int
i = 0; i < n; i++ )


if(
V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 )


return
false;


return
true;


}


template<typename
_Tp,
int
n>
inline
bool
v_check_any(const
v_reg<_Tp, n>& a)


{


for(
int
i = 0; i < n; i++ )


if(
V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 )


return
true;


return
false;


}


template<typename
_Tp,
int
n>
inline
v_reg<_Tp, n>
v_select(const
v_reg<_Tp, n>& mask,


const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{


typedef
V_TypeTraits<_Tp>
Traits;


typedef
typename
Traits::int_type int_type;


v_reg<_Tp, n>
c;


for(
int
i = 0; i < n; i++ )


{


int_type m = Traits::reinterpret_int(mask.s[i]);


CV_DbgAssert(m == 0 || m == (~(int_type)0));
// restrict mask values: 0 or 0xff/0xffff/etc


c.s[i] = m ? a.s[i] : b.s[i];


}


return
c;


}


template<typename
_Tp,
int
n>
inline
void
v_expand(const
v_reg<_Tp, n>& a,


v_reg<typename
V_TypeTraits<_Tp>::w_type, n/2>& b0,


v_reg<typename
V_TypeTraits<_Tp>::w_type, n/2>& b1)


{


for(
int
i = 0; i < (n/2); i++ )


{


b0.s[i] = a.s[i];


b1.s[i] = a.s[i+(n/2)];


}


}


template<typename
_Tp,
int
n>


inline
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>


v_expand_low(const
v_reg<_Tp, n>& a)


{


v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> b;


for(
int
i = 0; i < (n/2); i++ )


b.s[i] = a.s[i];


return
b;


}


template<typename
_Tp,
int
n>


inline
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>


v_expand_high(const
v_reg<_Tp, n>& a)


{


v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> b;


for(
int
i = 0; i < (n/2); i++ )


b.s[i] = a.s[i+(n/2)];


return
b;


}


template<typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::int_type, n>


v_reinterpret_as_int(const
v_reg<_Tp, n>& a)


{


v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;


for(
int
i = 0; i < n; i++ )


c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);


return
c;


}


template<typename
_Tp,
int
n>
inline
v_reg<typename V_TypeTraits<_Tp>::uint_type, n>


v_reinterpret_as_uint(const
v_reg<_Tp, n>& a)


{


v_reg<typename V_TypeTraits<_Tp>::uint_type, n> c;


for(
int
i = 0; i < n; i++ )


c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]);


return
c;


}


template<typename
_Tp,
int
n>
inline
void
v_zip(
const
v_reg<_Tp, n>& a0,
const
v_reg<_Tp, n>& a1,


v_reg<_Tp, n>& b0,
v_reg<_Tp, n>& b1 )


{


int
i;


for( i = 0; i < n/2; i++ )


{


b0.s[i*2] = a0.s[i];


b0.s[i*2+1] = a1.s[i];


}


for( ; i < n; i++ )


{


b1.s[i*2-n] = a0.s[i];


b1.s[i*2-n+1] = a1.s[i];


}


}


template<typename
_Tp>


inline
v_reg<_Tp, simd128_width /
sizeof(_Tp)>
v_load(const
_Tp* ptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


return
v_reg<_Tp, simd128_width /
sizeof(_Tp)>(ptr);


}


#if CV_SIMD256


template<typename
_Tp>


inline
v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load(const
_Tp* ptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


return
v_reg<_Tp, simd256_width /
sizeof(_Tp)>(ptr);


}


#endif


#if CV_SIMD512


template<typename
_Tp>


inline
v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load(const
_Tp* ptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


return
v_reg<_Tp, simd512_width /
sizeof(_Tp)>(ptr);


}


#endif


template<typename
_Tp>


inline
v_reg<_Tp, simd128_width /
sizeof(_Tp)>
v_load_aligned(const
_Tp* ptr)


{


CV_Assert(isAligned<sizeof(v_reg<_Tp, simd128_width /
sizeof(_Tp)>)>(ptr));


return
v_reg<_Tp, simd128_width /
sizeof(_Tp)>(ptr);


}


#if CV_SIMD256


template<typename
_Tp>


inline
v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_aligned(const
_Tp* ptr)


{


CV_Assert(isAligned<sizeof(v_reg<_Tp, simd256_width /
sizeof(_Tp)>)>(ptr));


return
v_reg<_Tp, simd256_width /
sizeof(_Tp)>(ptr);


}


#endif


#if CV_SIMD512


template<typename
_Tp>


inline
v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_aligned(const
_Tp* ptr)


{


CV_Assert(isAligned<sizeof(v_reg<_Tp, simd512_width /
sizeof(_Tp)>)>(ptr));


return
v_reg<_Tp, simd512_width /
sizeof(_Tp)>(ptr);


}


#endif


template<typename
_Tp>


inline
v_reg<_Tp, simd128_width /
sizeof(_Tp)>
v_load_low(const
_Tp* ptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;


for(
int
i = 0; i < c.nlanes/2; i++ )


{


c.s[i] = ptr[i];


}


return
c;


}


#if CV_SIMD256


template<typename
_Tp>


inline
v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_low(const
_Tp* ptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


v_reg<_Tp, simd256_width /
sizeof(_Tp)> c;


for
(int
i = 0; i < c.nlanes / 2; i++)


{


c.s[i] = ptr[i];


}


return
c;


}


#endif


#if CV_SIMD512


template<typename
_Tp>


inline
v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_low(const
_Tp* ptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


v_reg<_Tp, simd512_width /
sizeof(_Tp)> c;


for
(int
i = 0; i < c.nlanes / 2; i++)


{


c.s[i] = ptr[i];


}


return
c;


}


#endif


template<typename
_Tp>


inline
v_reg<_Tp, simd128_width /
sizeof(_Tp)>
v_load_halves(const
_Tp* loptr,
const
_Tp* hiptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(loptr));


CV_Assert(isAligned<sizeof(_Tp)>(hiptr));


#endif


v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;


for(
int
i = 0; i < c.nlanes/2; i++ )


{


c.s[i] = loptr[i];


c.s[i+c.nlanes/2] = hiptr[i];


}


return
c;


}


#if CV_SIMD256


template<typename
_Tp>


inline
v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_halves(const
_Tp* loptr,
const
_Tp* hiptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(loptr));


CV_Assert(isAligned<sizeof(_Tp)>(hiptr));


#endif


v_reg<_Tp, simd256_width /
sizeof(_Tp)> c;


for
(int
i = 0; i < c.nlanes / 2; i++)


{


c.s[i] = loptr[i];


c.s[i + c.nlanes / 2] = hiptr[i];


}


return
c;


}


#endif


#if CV_SIMD512


template<typename
_Tp>


inline
v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_halves(const
_Tp* loptr,
const
_Tp* hiptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(loptr));


CV_Assert(isAligned<sizeof(_Tp)>(hiptr));


#endif


v_reg<_Tp, simd512_width /
sizeof(_Tp)> c;


for
(int
i = 0; i < c.nlanes / 2; i++)


{


c.s[i] = loptr[i];


c.s[i + c.nlanes / 2] = hiptr[i];


}


return
c;


}


#endif


template<typename
_Tp>


inline
v_reg<typename V_TypeTraits<_Tp>::w_type, simd128_width /
sizeof(typename
V_TypeTraits<_Tp>::w_type)>


v_load_expand(const
_Tp* ptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


typedef
typename
V_TypeTraits<_Tp>::w_type
w_type;


v_reg<w_type, simd128_width /
sizeof(w_type)> c;


for(
int
i = 0; i < c.nlanes; i++ )


{


c.s[i] = ptr[i];


}


return
c;


}


#if CV_SIMD256


template<typename
_Tp>


inline
v_reg<typename V_TypeTraits<_Tp>::w_type, simd256_width /
sizeof(typename
V_TypeTraits<_Tp>::w_type)>


v256_load_expand(const
_Tp* ptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


typedef
typename
V_TypeTraits<_Tp>::w_type w_type;


v_reg<w_type, simd256_width /
sizeof(w_type)> c;


for
(int
i = 0; i < c.nlanes; i++)


{


c.s[i] = ptr[i];


}


return
c;


}


#endif


#if CV_SIMD512


template<typename
_Tp>


inline
v_reg<typename V_TypeTraits<_Tp>::w_type, simd512_width /
sizeof(typename
V_TypeTraits<_Tp>::w_type)>


v512_load_expand(const
_Tp* ptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


typedef
typename
V_TypeTraits<_Tp>::w_type w_type;


v_reg<w_type, simd512_width /
sizeof(w_type)> c;


for
(int
i = 0; i < c.nlanes; i++)


{


c.s[i] = ptr[i];


}


return
c;


}


#endif


template<typename
_Tp>


inline
v_reg<typename V_TypeTraits<_Tp>::q_type, simd128_width /
sizeof(typename
V_TypeTraits<_Tp>::q_type)>


v_load_expand_q(const
_Tp* ptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


typedef
typename
V_TypeTraits<_Tp>::q_type
q_type;


v_reg<q_type, simd128_width /
sizeof(q_type)> c;


for(
int
i = 0; i < c.nlanes; i++ )


{


c.s[i] = ptr[i];


}


return
c;


}


#if CV_SIMD256


template<typename
_Tp>


inline
v_reg<typename V_TypeTraits<_Tp>::q_type, simd256_width /
sizeof(typename
V_TypeTraits<_Tp>::q_type)>


v256_load_expand_q(const
_Tp* ptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


typedef
typename
V_TypeTraits<_Tp>::q_type q_type;


v_reg<q_type, simd256_width /
sizeof(q_type)> c;


for
(int
i = 0; i < c.nlanes; i++)


{


c.s[i] = ptr[i];


}


return
c;


}


#endif


#if CV_SIMD512


template<typename
_Tp>


inline
v_reg<typename V_TypeTraits<_Tp>::q_type, simd512_width /
sizeof(typename
V_TypeTraits<_Tp>::q_type)>


v512_load_expand_q(const
_Tp* ptr)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


typedef
typename
V_TypeTraits<_Tp>::q_type q_type;


v_reg<q_type, simd512_width /
sizeof(q_type)> c;


for
(int
i = 0; i < c.nlanes; i++)


{


c.s[i] = ptr[i];


}


return
c;


}


#endif


template<typename
_Tp,
int
n>
inline
void
v_load_deinterleave(const
_Tp* ptr,
v_reg<_Tp, n>& a,


v_reg<_Tp, n>& b)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


int
i, i2;


for( i = i2 = 0; i < n; i++, i2 += 2 )


{


a.s[i] = ptr[i2];


b.s[i] = ptr[i2+1];


}


}


template<typename
_Tp,
int
n>
inline
void
v_load_deinterleave(const
_Tp* ptr,
v_reg<_Tp, n>& a,


v_reg<_Tp, n>& b,
v_reg<_Tp, n>& c)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


int
i, i3;


for( i = i3 = 0; i < n; i++, i3 += 3 )


{


a.s[i] = ptr[i3];


b.s[i] = ptr[i3+1];


c.s[i] = ptr[i3+2];


}


}


template<typename
_Tp,
int
n>


inline
void
v_load_deinterleave(const
_Tp* ptr,
v_reg<_Tp, n>& a,


v_reg<_Tp, n>& b,
v_reg<_Tp, n>& c,


v_reg<_Tp, n>& d)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


int
i, i4;


for( i = i4 = 0; i < n; i++, i4 += 4 )


{


a.s[i] = ptr[i4];


b.s[i] = ptr[i4+1];


c.s[i] = ptr[i4+2];


d.s[i] = ptr[i4+3];


}


}


template<typename
_Tp,
int
n>


inline
void
v_store_interleave( _Tp* ptr,
const
v_reg<_Tp, n>& a,


const
v_reg<_Tp, n>& b,


hal::StoreMode
/*mode*/=hal::STORE_UNALIGNED)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


int
i, i2;


for( i = i2 = 0; i < n; i++, i2 += 2 )


{


ptr[i2] = a.s[i];


ptr[i2+1] = b.s[i];


}


}


template<typename
_Tp,
int
n>


inline
void
v_store_interleave( _Tp* ptr,
const
v_reg<_Tp, n>& a,


const
v_reg<_Tp, n>& b,
const
v_reg<_Tp, n>& c,


hal::StoreMode
/*mode*/=hal::STORE_UNALIGNED)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


int
i, i3;


for( i = i3 = 0; i < n; i++, i3 += 3 )


{


ptr[i3] = a.s[i];


ptr[i3+1] = b.s[i];


ptr[i3+2] = c.s[i];


}


}


template<typename
_Tp,
int
n>
inline
void
v_store_interleave( _Tp* ptr,
const
v_reg<_Tp, n>& a,


const
v_reg<_Tp, n>& b,
const
v_reg<_Tp, n>& c,


const
v_reg<_Tp, n>& d,


hal::StoreMode
/*mode*/=hal::STORE_UNALIGNED)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


int
i, i4;


for( i = i4 = 0; i < n; i++, i4 += 4 )


{


ptr[i4] = a.s[i];


ptr[i4+1] = b.s[i];


ptr[i4+2] = c.s[i];


ptr[i4+3] = d.s[i];


}


}


template<typename
_Tp,
int
n>


inline
void
v_store(_Tp* ptr,
const
v_reg<_Tp, n>& a)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


for(
int
i = 0; i < n; i++ )


ptr[i] = a.s[i];


}


template<typename
_Tp,
int
n>


inline
void
v_store(_Tp* ptr,
const
v_reg<_Tp, n>& a, hal::StoreMode
/*mode*/)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


v_store(ptr, a);


}


template<typename
_Tp,
int
n>


inline
void
v_store_low(_Tp* ptr,
const
v_reg<_Tp, n>& a)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


for(
int
i = 0; i < (n/2); i++ )


ptr[i] = a.s[i];


}


template<typename
_Tp,
int
n>


inline
void
v_store_high(_Tp* ptr,
const
v_reg<_Tp, n>& a)


{


#if CV_STRONG_ALIGNMENT


CV_Assert(isAligned<sizeof(_Tp)>(ptr));


#endif


for(
int
i = 0; i < (n/2); i++ )


ptr[i] = a.s[i+(n/2)];


}


template<typename
_Tp,
int
n>


inline
void
v_store_aligned(_Tp* ptr,
const
v_reg<_Tp, n>& a)


{


CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));


v_store(ptr, a);


}


template<typename
_Tp,
int
n>


inline
void
v_store_aligned_nocache(_Tp* ptr,
const
v_reg<_Tp, n>& a)


{


CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));


v_store(ptr, a);


}


template<typename
_Tp,
int
n>


inline
void
v_store_aligned(_Tp* ptr,
const
v_reg<_Tp, n>& a, hal::StoreMode
/*mode*/)


{


CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));


v_store(ptr, a);


}


template<typename
_Tp,
int
n>


inline
v_reg<_Tp, n>
v_combine_low(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{


v_reg<_Tp, n>
c;


for(
int
i = 0; i < (n/2); i++ )


{


c.s[i] = a.s[i];


c.s[i+(n/2)] = b.s[i];


}


return
c;


}


template<typename
_Tp,
int
n>


inline
v_reg<_Tp, n>
v_combine_high(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{


v_reg<_Tp, n>
c;


for(
int
i = 0; i < (n/2); i++ )


{


c.s[i] = a.s[i+(n/2)];


c.s[i+(n/2)] = b.s[i+(n/2)];


}


return
c;


}


template<typename
_Tp,
int
n>


inline
void
v_recombine(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b,


v_reg<_Tp, n>& low,
v_reg<_Tp, n>& high)


{


for(
int
i = 0; i < (n/2); i++ )


{


low.s[i] = a.s[i];


low.s[i+(n/2)] = b.s[i];


high.s[i] = a.s[i+(n/2)];


high.s[i+(n/2)] = b.s[i+(n/2)];


}


}


template<typename
_Tp,
int
n>


inline
v_reg<_Tp, n>
v_reverse(const
v_reg<_Tp, n>& a)


{


v_reg<_Tp, n>
c;


for(
int
i = 0; i < n; i++ )


c.s[i] = a.s[n-i-1];


return
c;


}


template<int
s,
typename
_Tp,
int
n>


inline
v_reg<_Tp, n>
v_extract(const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{


v_reg<_Tp, n>
r;


const
int
shift = n - s;


int
i = 0;


for
(; i < shift; ++i)


r.s[i] = a.s[i+s];


for (; i < n; ++i)


r.s[i] = b.s[i-shift];


return
r;


}


template<int
s,
typename
_Tp,
int
n>


inline
_Tp
v_extract_n(const
v_reg<_Tp, n>& v)


{


CV_DbgAssert(s >= 0 && s < n);


return
v.s[s];


}


template<int
i,
typename
_Tp,
int
n>


inline
v_reg<_Tp, n>
v_broadcast_element(const
v_reg<_Tp, n>& a)


{


CV_DbgAssert(i >= 0 && i < n);


return
v_reg<_Tp, n>::all(a.s[i]);


}


template<int
n>
inline
v_reg<int, n>
v_round(const
v_reg<float, n>& a)


{


v_reg<int, n>
c;


for(
int
i = 0; i < n; i++ )


c.s[i] =
cvRound(a.s[i]);


return
c;


}


template<int
n>
inline
v_reg<int, n*2>
v_round(const
v_reg<double, n>& a,
const
v_reg<double, n>& b)


{


v_reg<int, n*2>
c;


for(
int
i = 0; i < n; i++ )


{


c.s[i] =
cvRound(a.s[i]);


c.s[i+n] =
cvRound(b.s[i]);


}


return
c;


}


template<int
n>
inline
v_reg<int, n>
v_floor(const
v_reg<float, n>& a)


{


v_reg<int, n>
c;


for(
int
i = 0; i < n; i++ )


c.s[i] =
cvFloor(a.s[i]);


return
c;


}


template<int
n>
inline
v_reg<int, n>
v_ceil(const
v_reg<float, n>& a)


{


v_reg<int, n>
c;


for(
int
i = 0; i < n; i++ )


c.s[i] =
cvCeil(a.s[i]);


return
c;


}


template<int
n>
inline
v_reg<int, n>
v_trunc(const
v_reg<float, n>& a)


{


v_reg<int, n>
c;


for(
int
i = 0; i < n; i++ )


c.s[i] = (int)(a.s[i]);


return
c;


}


template<int
n>
inline
v_reg<int, n*2>
v_round(const
v_reg<double, n>& a)


{


v_reg<int, n*2>
c;


for(
int
i = 0; i < n; i++ )


{


c.s[i] =
cvRound(a.s[i]);


c.s[i+n] = 0;


}


return
c;


}


template<int
n>
inline
v_reg<int, n*2>
v_floor(const
v_reg<double, n>& a)


{


v_reg<int, n*2>
c;


for(
int
i = 0; i < n; i++ )


{


c.s[i] =
cvFloor(a.s[i]);


c.s[i+n] = 0;


}


return
c;


}


template<int
n>
inline
v_reg<int, n*2>
v_ceil(const
v_reg<double, n>& a)


{


v_reg<int, n*2>
c;


for(
int
i = 0; i < n; i++ )


{


c.s[i] =
cvCeil(a.s[i]);


c.s[i+n] = 0;


}


return
c;


}


template<int
n>
inline
v_reg<int, n*2>
v_trunc(const
v_reg<double, n>& a)


{


v_reg<int, n*2>
c;


for(
int
i = 0; i < n; i++ )


{


c.s[i] = (int)(a.s[i]);


c.s[i+n] = 0;


}


return
c;


}


template<int
n>
inline
v_reg<float, n>
v_cvt_f32(const
v_reg<int, n>& a)


{


v_reg<float, n>
c;


for(
int
i = 0; i < n; i++ )


c.s[i] = (float)a.s[i];


return
c;


}


template<int
n>
inline
v_reg<float, n*2>
v_cvt_f32(const
v_reg<double, n>& a)


{


v_reg<float, n*2>
c;


for(
int
i = 0; i < n; i++ )


{


c.s[i] = (float)a.s[i];


c.s[i+n] = 0;


}


return
c;


}


template<int
n>
inline
v_reg<float, n*2>
v_cvt_f32(const
v_reg<double, n>& a,
const
v_reg<double, n>& b)


{


v_reg<float, n*2>
c;


for(
int
i = 0; i < n; i++ )


{


c.s[i] = (float)a.s[i];


c.s[i+n] = (float)b.s[i];


}


return
c;


}


template<int
n> CV_INLINE
v_reg<double, n/2>
v_cvt_f64(const
v_reg<int, n>& a)


{


v_reg<double, (n/2)> c;


for(
int
i = 0; i < (n/2); i++ )


c.s[i] = (double)a.s[i];


return
c;


}


template<int
n> CV_INLINE
v_reg<double, (n/2)>
v_cvt_f64_high(const
v_reg<int, n>& a)


{


v_reg<double, (n/2)> c;


for(
int
i = 0; i < (n/2); i++ )


c.s[i] = (double)a.s[i + (n/2)];


return
c;


}


template<int
n> CV_INLINE
v_reg<double, (n/2)>
v_cvt_f64(const
v_reg<float, n>& a)


{


v_reg<double, (n/2)> c;


for(
int
i = 0; i < (n/2); i++ )


c.s[i] = (double)a.s[i];


return
c;


}


template<int
n> CV_INLINE
v_reg<double, (n/2)>
v_cvt_f64_high(const
v_reg<float, n>& a)


{


v_reg<double, (n/2)> c;


for(
int
i = 0; i < (n/2); i++ )


c.s[i] = (double)a.s[i + (n/2)];


return
c;


}


template<int
n> CV_INLINE
v_reg<double, n>
v_cvt_f64(const
v_reg<int64, n>& a)


{


v_reg<double, n>
c;


for(
int
i = 0; i < n; i++ )


c.s[i] = (double)a.s[i];


return
c;


}


template<typename
_Tp>
inline
v_reg<_Tp, simd128_width /
sizeof(_Tp)> v_lut(const
_Tp* tab,
const
int* idx)


{


v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;


for
(int
i = 0; i < c.nlanes; i++)


c.s[i] = tab[idx[i]];


return
c;


}


template<typename
_Tp>
inline
v_reg<_Tp, simd128_width /
sizeof(_Tp)> v_lut_pairs(const
_Tp* tab,
const
int* idx)


{


v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;


for
(int
i = 0; i < c.nlanes; i++)


c.s[i] = tab[idx[i / 2] + i % 2];


return
c;


}


template<typename
_Tp>
inline
v_reg<_Tp, simd128_width /
sizeof(_Tp)> v_lut_quads(const
_Tp* tab,
const
int* idx)


{


v_reg<_Tp, simd128_width /
sizeof(_Tp)> c;


for
(int
i = 0; i < c.nlanes; i++)


c.s[i] = tab[idx[i / 4] + i % 4];


return
c;


}


template<int
n>
inline
v_reg<int, n> v_lut(const
int* tab,
const
v_reg<int, n>& idx)


{


v_reg<int, n> c;


for(
int
i = 0; i < n; i++ )


c.s[i] = tab[idx.s[i]];


return
c;


}


template<int
n>
inline
v_reg<unsigned, n> v_lut(const
unsigned* tab,
const
v_reg<int, n>& idx)


{


v_reg<int, n> c;


for
(int
i = 0; i < n; i++)


c.s[i] = tab[idx.s[i]];


return
c;


}


template<int
n>
inline
v_reg<float, n> v_lut(const
float* tab,
const
v_reg<int, n>& idx)


{


v_reg<float, n> c;


for(
int
i = 0; i < n; i++ )


c.s[i] = tab[idx.s[i]];


return
c;


}


template<int
n>
inline
v_reg<double, n/2> v_lut(const
double* tab,
const
v_reg<int, n>& idx)


{


v_reg<double, n/2> c;


for(
int
i = 0; i < n/2; i++ )


c.s[i] = tab[idx.s[i]];


return
c;


}


template<int
n>
inline
void
v_lut_deinterleave(const
float* tab,
const
v_reg<int, n>& idx,


v_reg<float, n>& x, v_reg<float, n>& y)


{


for(
int
i = 0; i < n; i++ )


{


int
j = idx.s[i];


x.s[i] = tab[j];


y.s[i] = tab[j+1];


}


}


template<int
n>
inline
void
v_lut_deinterleave(const
double* tab,
const
v_reg<int, n*2>& idx,


v_reg<double, n>& x, v_reg<double, n>& y)


{


for(
int
i = 0; i < n; i++ )


{


int
j = idx.s[i];


x.s[i] = tab[j];


y.s[i] = tab[j+1];


}


}


template<typename
_Tp,
int
n>
inline
v_reg<_Tp, n> v_interleave_pairs(const
v_reg<_Tp, n>& vec)


{


v_reg<_Tp, n> c;


for
(int
i = 0; i < n/4; i++)


{


c.s[4*i  ] = vec.s[4*i  ];


c.s[4*i+1] = vec.s[4*i+2];


c.s[4*i+2] = vec.s[4*i+1];


c.s[4*i+3] = vec.s[4*i+3];


}


return
c;


}


template<typename
_Tp,
int
n>
inline
v_reg<_Tp, n> v_interleave_quads(const
v_reg<_Tp, n>& vec)


{


v_reg<_Tp, n> c;


for
(int
i = 0; i < n/8; i++)


{


c.s[8*i  ] = vec.s[8*i  ];


c.s[8*i+1] = vec.s[8*i+4];


c.s[8*i+2] = vec.s[8*i+1];


c.s[8*i+3] = vec.s[8*i+5];


c.s[8*i+4] = vec.s[8*i+2];


c.s[8*i+5] = vec.s[8*i+6];


c.s[8*i+6] = vec.s[8*i+3];


c.s[8*i+7] = vec.s[8*i+7];


}


return
c;


}


template<typename
_Tp,
int
n>
inline
v_reg<_Tp, n> v_pack_triplets(const
v_reg<_Tp, n>& vec)


{


v_reg<_Tp, n> c;


for
(int
i = 0; i < n/4; i++)


{


c.s[3*i  ] = vec.s[4*i  ];


c.s[3*i+1] = vec.s[4*i+1];


c.s[3*i+2] = vec.s[4*i+2];


}


return
c;


}


template<typename
_Tp,
int
n>


inline
void
v_transpose4x4(
v_reg<_Tp, n>& a0,
const
v_reg<_Tp, n>& a1,


const
v_reg<_Tp, n>& a2,
const
v_reg<_Tp, n>& a3,


v_reg<_Tp, n>& b0,
v_reg<_Tp, n>& b1,


v_reg<_Tp, n>& b2,
v_reg<_Tp, n>& b3 )


{


for
(int
i = 0; i < n / 4; i++)


{


b0.s[0 + i*4] = a0.s[0 + i*4]; b0.s[1 + i*4] = a1.s[0 + i*4];


b0.s[2 + i*4] = a2.s[0 + i*4]; b0.s[3 + i*4] = a3.s[0 + i*4];


b1.s[0 + i*4] = a0.s[1 + i*4]; b1.s[1 + i*4] = a1.s[1 + i*4];


b1.s[2 + i*4] = a2.s[1 + i*4]; b1.s[3 + i*4] = a3.s[1 + i*4];


b2.s[0 + i*4] = a0.s[2 + i*4]; b2.s[1 + i*4] = a1.s[2 + i*4];


b2.s[2 + i*4] = a2.s[2 + i*4]; b2.s[3 + i*4] = a3.s[2 + i*4];


b3.s[0 + i*4] = a0.s[3 + i*4]; b3.s[1 + i*4] = a1.s[3 + i*4];


b3.s[2 + i*4] = a2.s[3 + i*4]; b3.s[3 + i*4] = a3.s[3 + i*4];


}


}


#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix) \


inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); }


OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, v, u8)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, v, s8)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, v, u16)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, v, s16)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, v, u32)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, v, s32)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, v, f32)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, v, f64)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, v, u64)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, v, s64)


#if CV_SIMD256


OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x32, v256, u8)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x32, v256, s8)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x16, v256, u16)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x16, v256, s16)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x8, v256, u32)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x8, v256, s32)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x8, v256, f32)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x4, v256, f64)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x4, v256, u64)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x4, v256, s64)


#endif


#if CV_SIMD512


OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x64, v512, u8)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x64, v512, s8)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x32, v512, u16)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x32, v512, s16)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x16, v512, u32)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x16, v512, s32)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x16, v512, f32)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x8, v512, f64)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x8, v512, u64)


OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x8, v512, s64)


#endif


#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix) \


inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); }


OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x16, uchar, v, u8)


OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, v, s8)


OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, v, u16)


OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8,
short, v, s16)


OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4,
unsigned, v, u32)


OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4,
int, v, s32)


OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4,
float, v, f32)


OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2,
double, v, f64)


OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, v, u64)


OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, v, s64)


#if CV_SIMD256


OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x32, uchar, v256, u8)


OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x32, schar, v256, s8)


OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x16, ushort, v256, u16)


OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x16,
short, v256, s16)


OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x8,
unsigned, v256, u32)


OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x8,
int, v256, s32)


OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x8,
float, v256, f32)


OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x4,
double, v256, f64)


OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x4, uint64, v256, u64)


OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x4, int64, v256, s64)


#endif


#if CV_SIMD512


OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x64, uchar, v512, u8)


OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x64, schar, v512, s8)


OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x32, ushort, v512, u16)


OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x32,
short, v512, s16)


OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x16,
unsigned, v512, u32)


OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x16,
int, v512, s32)


OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x16,
float, v512, f32)


OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x8,
double, v512, f64)


OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x8, uint64, v512, u64)


OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x8, int64, v512, s64)


#endif


#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix) \


template<typename _Tp0, int n0> inline v_reg<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)> \


v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \


{ return a.template reinterpret_as<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)>(); }


OPENCV_HAL_IMPL_C_REINTERPRET(uchar, u8)


OPENCV_HAL_IMPL_C_REINTERPRET(schar, s8)


OPENCV_HAL_IMPL_C_REINTERPRET(ushort, u16)


OPENCV_HAL_IMPL_C_REINTERPRET(short, s16)


OPENCV_HAL_IMPL_C_REINTERPRET(unsigned, u32)


OPENCV_HAL_IMPL_C_REINTERPRET(int, s32)


OPENCV_HAL_IMPL_C_REINTERPRET(float, f32)


OPENCV_HAL_IMPL_C_REINTERPRET(double, f64)


OPENCV_HAL_IMPL_C_REINTERPRET(uint64, u64)


OPENCV_HAL_IMPL_C_REINTERPRET(int64, s64)


#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp) \


template<int shift, int n> inline v_reg<_Tp, n> v_shl(const v_reg<_Tp, n>& a) \


{ return a << shift; }


OPENCV_HAL_IMPL_C_SHIFTL(ushort)


OPENCV_HAL_IMPL_C_SHIFTL(short)


OPENCV_HAL_IMPL_C_SHIFTL(unsigned)


OPENCV_HAL_IMPL_C_SHIFTL(int)


OPENCV_HAL_IMPL_C_SHIFTL(uint64)


OPENCV_HAL_IMPL_C_SHIFTL(int64)


#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp) \


template<int shift, int n> inline v_reg<_Tp, n> v_shr(const v_reg<_Tp, n>& a) \


{ return a >> shift; }


OPENCV_HAL_IMPL_C_SHIFTR(ushort)


OPENCV_HAL_IMPL_C_SHIFTR(short)


OPENCV_HAL_IMPL_C_SHIFTR(unsigned)


OPENCV_HAL_IMPL_C_SHIFTR(int)


OPENCV_HAL_IMPL_C_SHIFTR(uint64)


OPENCV_HAL_IMPL_C_SHIFTR(int64)


#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp) \


template<int shift, int n> inline v_reg<_Tp, n> v_rshr(const v_reg<_Tp, n>& a) \


{ \


v_reg<_Tp, n> c; \


for( int i = 0; i < n; i++ ) \


c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \


return c; \


}


OPENCV_HAL_IMPL_C_RSHIFTR(ushort)


OPENCV_HAL_IMPL_C_RSHIFTR(short)


OPENCV_HAL_IMPL_C_RSHIFTR(unsigned)


OPENCV_HAL_IMPL_C_RSHIFTR(int)


OPENCV_HAL_IMPL_C_RSHIFTR(uint64)


OPENCV_HAL_IMPL_C_RSHIFTR(int64)


#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast) \


template<int n> inline v_reg<_Tpn, 2*n> v_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \


{ \


v_reg<_Tpn, 2*n> c; \


for( int i = 0; i < n; i++ ) \


{ \


c.s[i] = cast<_Tpn>(a.s[i]); \


c.s[i+n] = cast<_Tpn>(b.s[i]); \


} \


return c; \


}


OPENCV_HAL_IMPL_C_PACK(ushort, uchar, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_PACK(short, schar, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_PACK(unsigned, ushort, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_PACK(int,
short, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_PACK(uint64,
unsigned, pack, static_cast)


OPENCV_HAL_IMPL_C_PACK(int64,
int, pack, static_cast)


OPENCV_HAL_IMPL_C_PACK(short, uchar, pack_u,
saturate_cast)


OPENCV_HAL_IMPL_C_PACK(int, ushort, pack_u,
saturate_cast)


#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast) \


template<int shift, int n> inline v_reg<_Tpn, 2*n> v_rshr_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \


{ \


v_reg<_Tpn, 2*n> c; \


for( int i = 0; i < n; i++ ) \


{ \


c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \


c.s[i+n] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \


} \


return c; \


}


OPENCV_HAL_IMPL_C_RSHR_PACK(ushort, uchar, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK(short, schar, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK(unsigned, ushort, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK(int,
short, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK(uint64,
unsigned, pack, static_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK(int64,
int, pack, static_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK(short, uchar, pack_u,
saturate_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK(int, ushort, pack_u,
saturate_cast)


#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \


template<int n> inline void v_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \


{ \


for( int i = 0; i < n; i++ ) \


ptr[i] = cast<_Tpn>(a.s[i]); \


}


OPENCV_HAL_IMPL_C_PACK_STORE(ushort, uchar, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_PACK_STORE(short, schar, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_PACK_STORE(unsigned, ushort, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_PACK_STORE(int,
short, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_PACK_STORE(uint64,
unsigned, pack, static_cast)


OPENCV_HAL_IMPL_C_PACK_STORE(int64,
int, pack, static_cast)


OPENCV_HAL_IMPL_C_PACK_STORE(short, uchar, pack_u,
saturate_cast)


OPENCV_HAL_IMPL_C_PACK_STORE(int, ushort, pack_u,
saturate_cast)


#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \


template<int shift, int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \


{ \


for( int i = 0; i < n; i++ ) \


ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \


}


OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(ushort, uchar, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(short, schar, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(unsigned, ushort, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int,
short, pack,
saturate_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(uint64,
unsigned, pack, static_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int64,
int, pack, static_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(short, uchar, pack_u,
saturate_cast)


OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int, ushort, pack_u,
saturate_cast)


template<typename
_Tpm,
typename
_Tp,
int
n>


inline
void
_pack_b(_Tpm* mptr,
const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b)


{


for
(int
i = 0; i < n; ++i)


{


mptr[i] = (_Tpm)a.s[i];


mptr[i + n] = (_Tpm)b.s[i];


}


}


template<int
n>
inline
v_reg<uchar, 2*n>
v_pack_b(const
v_reg<ushort, n>& a,
const
v_reg<ushort, n>& b)


{


v_reg<uchar, 2*n>
mask;


_pack_b(mask.s, a, b);


return
mask;


}


template<int
n>
inline
v_reg<uchar, 4*n>
v_pack_b(const
v_reg<unsigned, n>& a,
const
v_reg<unsigned, n>& b,


const
v_reg<unsigned, n>& c,
const
v_reg<unsigned, n>& d)


{


v_reg<uchar, 4*n>
mask;


_pack_b(mask.s, a, b);


_pack_b(mask.s + 2*n, c, d);


return
mask;


}


template<int
n>
inline
v_reg<uchar, 8*n>
v_pack_b(const
v_reg<uint64, n>& a,
const
v_reg<uint64, n>& b,


const
v_reg<uint64, n>& c,
const
v_reg<uint64, n>& d,


const
v_reg<uint64, n>& e,
const
v_reg<uint64, n>& f,


const
v_reg<uint64, n>& g,
const
v_reg<uint64, n>& h)


{


v_reg<uchar, 8*n>
mask;


_pack_b(mask.s, a, b);


_pack_b(mask.s + 2*n, c, d);


_pack_b(mask.s + 4*n, e, f);


_pack_b(mask.s + 6*n, g, h);


return
mask;


}


template<int
n>


inline
v_reg<float, n>
v_matmul(const
v_reg<float, n>& v,


const
v_reg<float, n>& a,
const
v_reg<float, n>& b,


const
v_reg<float, n>& c,
const
v_reg<float, n>& d)


{


v_reg<float, n>
res;


for
(int
i = 0; i < n / 4; i++)


{


res.s[0 + i*4] = v.s[0 + i*4] * a.s[0 + i*4] + v.s[1 + i*4] * b.s[0 + i*4] + v.s[2 + i*4] * c.s[0 + i*4] + v.s[3 + i*4] * d.s[0 + i*4];


res.s[1 + i*4] = v.s[0 + i*4] * a.s[1 + i*4] + v.s[1 + i*4] * b.s[1 + i*4] + v.s[2 + i*4] * c.s[1 + i*4] + v.s[3 + i*4] * d.s[1 + i*4];


res.s[2 + i*4] = v.s[0 + i*4] * a.s[2 + i*4] + v.s[1 + i*4] * b.s[2 + i*4] + v.s[2 + i*4] * c.s[2 + i*4] + v.s[3 + i*4] * d.s[2 + i*4];


res.s[3 + i*4] = v.s[0 + i*4] * a.s[3 + i*4] + v.s[1 + i*4] * b.s[3 + i*4] + v.s[2 + i*4] * c.s[3 + i*4] + v.s[3 + i*4] * d.s[3 + i*4];


}


return
res;


}


template<int
n>


inline
v_reg<float, n>
v_matmuladd(const
v_reg<float, n>& v,


const
v_reg<float, n>& a,
const
v_reg<float, n>& b,


const
v_reg<float, n>& c,
const
v_reg<float, n>& d)


{


v_reg<float, n>
res;


for
(int
i = 0; i < n / 4; i++)


{


res.s[0 + i * 4] = v.s[0 + i * 4] * a.s[0 + i * 4] + v.s[1 + i * 4] * b.s[0 + i * 4] + v.s[2 + i * 4] * c.s[0 + i * 4] + d.s[0 + i * 4];


res.s[1 + i * 4] = v.s[0 + i * 4] * a.s[1 + i * 4] + v.s[1 + i * 4] * b.s[1 + i * 4] + v.s[2 + i * 4] * c.s[1 + i * 4] + d.s[1 + i * 4];


res.s[2 + i * 4] = v.s[0 + i * 4] * a.s[2 + i * 4] + v.s[1 + i * 4] * b.s[2 + i * 4] + v.s[2 + i * 4] * c.s[2 + i * 4] + d.s[2 + i * 4];


res.s[3 + i * 4] = v.s[0 + i * 4] * a.s[3 + i * 4] + v.s[1 + i * 4] * b.s[3 + i * 4] + v.s[2 + i * 4] * c.s[3 + i * 4] + d.s[3 + i * 4];


}


return
res;


}


template<int
n>
inline
v_reg<double, n/2>
v_dotprod_expand(const
v_reg<int, n>& a,
const
v_reg<int, n>& b)


{
return
v_fma(v_cvt_f64(a),
v_cvt_f64(b),
v_cvt_f64_high(a) *
v_cvt_f64_high(b)); }


template<int
n>
inline
v_reg<double, n/2>
v_dotprod_expand(const
v_reg<int, n>& a,
const
v_reg<int, n>& b,


const
v_reg<double, n/2>& c)


{
return
v_fma(v_cvt_f64(a),
v_cvt_f64(b),
v_fma(v_cvt_f64_high(a),
v_cvt_f64_high(b), c)); }


template<int
n>
inline
v_reg<double, n/2>
v_dotprod_expand_fast(const
v_reg<int, n>& a,
const
v_reg<int, n>& b)


{
return
v_dotprod_expand(a, b); }


template<int
n>
inline
v_reg<double, n/2>
v_dotprod_expand_fast(const
v_reg<int, n>& a,
const
v_reg<int, n>& b,


const
v_reg<double, n/2>& c)


{
return
v_dotprod_expand(a, b, c); }


inline
v_reg<float, simd128_width /
sizeof(float)>


v_load_expand(const
float16_t* ptr)


{


v_reg<float, simd128_width /
sizeof(float)> v;


for(
int
i = 0; i < v.nlanes; i++ )


{


v.s[i] = ptr[i];


}


return
v;


}


#if CV_SIMD256


inline
v_reg<float, simd256_width /
sizeof(float)>


v256_load_expand(const
float16_t* ptr)


{


v_reg<float, simd256_width /
sizeof(float)> v;


for
(int
i = 0; i < v.nlanes; i++)


{


v.s[i] = ptr[i];


}


return
v;


}


#endif


#if CV_SIMD512


inline
v_reg<float, simd512_width /
sizeof(float)>


v512_load_expand(const
float16_t* ptr)


{


v_reg<float, simd512_width /
sizeof(float)> v;


for
(int
i = 0; i < v.nlanes; i++)


{


v.s[i] = ptr[i];


}


return
v;


}


#endif


template<int
n>
inline
void


v_pack_store(float16_t* ptr,
const
v_reg<float, n>& v)


{


for(
int
i = 0; i < v.nlanes; i++ )


{


ptr[i] = float16_t(v.s[i]);


}


}


inline
void
v_cleanup() {}


#if CV_SIMD256


inline
void
v256_cleanup() {}


#endif


#if CV_SIMD512


inline
void
v512_cleanup() {}


#endif


#ifndef CV_DOXYGEN


CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END


#endif


}


#if !defined(CV_DOXYGEN)


#undef CV_SIMD256


#undef CV_SIMD512


#endif


#endif


cv::max

CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst)

Calculates per-element maximum of two arrays or an array and a scalar.


cv::sqrt

CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst)

Calculates a square root of array elements.


cv::exp

CV_EXPORTS_W void exp(InputArray src, OutputArray dst)

Calculates the exponent of every array element.


cv::min

CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)

Calculates per-element minimum of two arrays or an array and a scalar.


cv::log

CV_EXPORTS_W void log(InputArray src, OutputArray dst)

Calculates the natural logarithm of every array element.


OPENCV_HAL_IMPL_C_INIT_VAL

#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix)

Helper macro


Definition:
intrin_cpp.hpp:2830


OPENCV_HAL_IMPL_C_RSHIFTR

#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp)

Helper macro


Definition:
intrin_cpp.hpp:2932


OPENCV_HAL_IMPL_C_SHIFTR

#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp)

Helper macro


Definition:
intrin_cpp.hpp:2915


OPENCV_HAL_IMPL_C_RSHR_PACK

#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast)

Helper macro


Definition:
intrin_cpp.hpp:2989


OPENCV_HAL_IMPL_ROTATE_SHIFT_OP

#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix, opA, opB)

Bitwise shift left


Definition:
intrin_cpp.hpp:1280


OPENCV_HAL_IMPL_CMP_OP

#define OPENCV_HAL_IMPL_CMP_OP(cmp_op)

Helper macro


Definition:
intrin_cpp.hpp:851


cv::OPENCV_HAL_IMPL_MATH_FUNC

OPENCV_HAL_IMPL_MATH_FUNC(v_abs,(typename V_TypeTraits< _Tp >::abs_type) std::abs, typename V_TypeTraits< _Tp >::abs_type) static const unsigned char popCountTable[]

Square root of elements


OPENCV_HAL_IMPL_C_SHIFTL

#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp)

Helper macro


Definition:
intrin_cpp.hpp:2898


OPENCV_HAL_IMPL_C_INIT_ZERO

#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix)

Helper macro


Definition:
intrin_cpp.hpp:2784


OPENCV_HAL_IMPL_C_RSHR_PACK_STORE

#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)

Helper macro


Definition:
intrin_cpp.hpp:3054


OPENCV_HAL_IMPL_C_REINTERPRET

#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix)

Helper macro


Definition:
intrin_cpp.hpp:2876


OPENCV_HAL_IMPL_C_PACK

#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast)

Helper macro


Definition:
intrin_cpp.hpp:2954


OPENCV_HAL_IMPL_ARITHM_OP

#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2)

Helper macro


Definition:
intrin_cpp.hpp:913


OPENCV_HAL_IMPL_C_PACK_STORE

#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)

Helper macro


Definition:
intrin_cpp.hpp:3024


OPENCV_HAL_IMPL_SHIFT_OP

#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op)

Helper macro


Definition:
intrin_cpp.hpp:1259


cv::v_check_any

bool v_check_any(const v_reg< _Tp, n > &a)

Check if any of packed values is less than zero


Definition:
intrin_cpp.hpp:1436


cv::v_combine_high

v_reg< _Tp, n > v_combine_high(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Combine vector from last elements of two vectors


Definition:
intrin_cpp.hpp:2307


cv::v_matmul

v_reg< float, n > v_matmul(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)

Matrix multiplication


Definition:
intrin_cpp.hpp:3196


cv::v_round

v_reg< int, n > v_round(const v_reg< float, n > &a)

Round elements


Definition:
intrin_cpp.hpp:2427


cv::operator|

CV_INLINE v_reg< _Tp, n > operator|(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Bitwise OR


cv::v_int8x16

v_reg< schar, 16 > v_int8x16

Sixteen 8-bit signed integer values


Definition:
intrin_cpp.hpp:490


cv::v_uint8x16

v_reg< uchar, 16 > v_uint8x16

Sixteen 8-bit unsigned integer values


Definition:
intrin_cpp.hpp:488


cv::v_store_high

void v_store_high(_Tp *ptr, const v_reg< _Tp, n > &a)

Store data to memory (higher half)


Definition:
intrin_cpp.hpp:2236


cv::v_signmask

int v_signmask(const v_reg< _Tp, n > &a)

Get negative values mask


Definition:
intrin_cpp.hpp:1395


cv::v_zip

void v_zip(const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)

Interleave two vectors


Definition:
intrin_cpp.hpp:1557


cv::v_int64x2

v_reg< int64, 2 > v_int64x2

Two 64-bit signed integer values


Definition:
intrin_cpp.hpp:506


cv::v_store

void v_store(_Tp *ptr, const v_reg< _Tp, n > &a)

Store data to memory


Definition:
intrin_cpp.hpp:2193


cv::v_dotprod_expand

v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Dot product of elements and expand


Definition:
intrin_cpp.hpp:1145


cv::v_reduce_sad

V_TypeTraits< typenameV_TypeTraits< _Tp >::abs_type >::sum_type v_reduce_sad(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Sum absolute differences of values


Definition:
intrin_cpp.hpp:1377


cv::v_ceil

v_reg< int, n > v_ceil(const v_reg< float, n > &a)

Ceil elements


Definition:
intrin_cpp.hpp:2465


cv::v_uint16x8

v_reg< ushort, 8 > v_uint16x8

Eight 16-bit unsigned integer values


Definition:
intrin_cpp.hpp:492


cv::operator&

CV_INLINE v_reg< _Tp, n > operator&(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Bitwise AND


cv::v_store_low

void v_store_low(_Tp *ptr, const v_reg< _Tp, n > &a)

Store data to memory (lower half)


Definition:
intrin_cpp.hpp:2219


cv::v_floor

v_reg< int, n > v_floor(const v_reg< float, n > &a)

Floor elements


Definition:
intrin_cpp.hpp:2452


cv::v_dotprod

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Dot product of elements


Definition:
intrin_cpp.hpp:1080


cv::v_scan_forward

int v_scan_forward(const v_reg< _Tp, n > &a)

Get first negative lane index


Definition:
intrin_cpp.hpp:1412


cv::v_reverse

v_reg< _Tp, n > v_reverse(const v_reg< _Tp, n > &a)

Vector reverse order


Definition:
intrin_cpp.hpp:2346


cv::v_load_expand

v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)

Load register contents from memory with double expand


Definition:
intrin_cpp.hpp:1875


cv::v_int32x4

v_reg< int, 4 > v_int32x4

Four 32-bit signed integer values


Definition:
intrin_cpp.hpp:498


cv::v_absdiff

v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_absdiff(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Add values without saturation


Definition:
intrin_cpp.hpp:956


cv::v_reduce_sum

V_TypeTraits< _Tp >::sum_type v_reduce_sum(const v_reg< _Tp, n > &a)

Element shift left among vector


Definition:
intrin_cpp.hpp:1338


cv::v_muladd

v_reg< _Tp, n > v_muladd(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)

A synonym for v_fma


Definition:
intrin_cpp.hpp:1060


cv::v_sqr_magnitude

v_reg< _Tp, n > v_sqr_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Square of the magnitude


Definition:
intrin_cpp.hpp:1036


cv::v_trunc

v_reg< int, n > v_trunc(const v_reg< float, n > &a)

Truncate elements


Definition:
intrin_cpp.hpp:2478


cv::v_uint32x4

v_reg< unsigned, 4 > v_uint32x4

Four 32-bit unsigned integer values


Definition:
intrin_cpp.hpp:496


cv::operator/

CV_INLINE v_reg< _Tp, n > operator/(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Divide values


cv::v_invsqrt

v_reg< _Tp, n > v_invsqrt(const v_reg< _Tp, n > &a)

Inversed square root


Definition:
intrin_cpp.hpp:1010


cv::v_magnitude

v_reg< _Tp, n > v_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Magnitude


Definition:
intrin_cpp.hpp:1023


cv::v_dotprod_expand_fast

v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Fast Dot product of elements and expand


Definition:
intrin_cpp.hpp:1188


cv::v_cvt_f64_high

CV_INLINE v_reg< double,(n/2)> v_cvt_f64_high(const v_reg< int, n > &a)

Convert to double high part of vector


Definition:
intrin_cpp.hpp:2587


cv::v_load_low

v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_low(const _Tp *ptr)

Load 64-bits of data to lower part (high part is undefined).


Definition:
intrin_cpp.hpp:1702


cv::v_recombine

void v_recombine(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high)

Combine two vectors from lower and higher parts of two other vectors


Definition:
intrin_cpp.hpp:2325


cv::v_reduce_sum4

v_reg< float, n > v_reduce_sum4(const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)

Sums all elements of each input vector, returns the vector of sums


Definition:
intrin_cpp.hpp:1356


cv::v_mul_expand

void v_mul_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)

Multiply and expand


Definition:
intrin_cpp.hpp:1219


cv::v_load_aligned

v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_aligned(const _Tp *ptr)

Load register contents from memory (aligned)


Definition:
intrin_cpp.hpp:1652


cv::v_broadcast_element

v_reg< _Tp, n > v_broadcast_element(const v_reg< _Tp, n > &a)

Broadcast i-th element of vector


Definition:
intrin_cpp.hpp:2416


cv::v_select

v_reg< _Tp, n > v_select(const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Per-element select (blend operation)


Definition:
intrin_cpp.hpp:1454


cv::v_load

v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load(const _Tp *ptr)

Load register contents from memory


Definition:
intrin_cpp.hpp:1587


cv::v_expand_low

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_low(const v_reg< _Tp, n > &a)

Expand lower values to the wider pack type


Definition:
intrin_cpp.hpp:1499


cv::operator~

CV_INLINE v_reg< _Tp, n > operator~(const v_reg< _Tp, n > &a)

Bitwise NOT


cv::v_cvt_f64

CV_INLINE v_reg< double, n/2 > v_cvt_f64(const v_reg< int, n > &a)

Convert lower half to double


Definition:
intrin_cpp.hpp:2576


cv::v_load_expand_q

v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> v_load_expand_q(const _Tp *ptr)

Load register contents from memory with quad expand


Definition:
intrin_cpp.hpp:1964


cv::v_expand

void v_expand(const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)

Expand values to the wider pack type


Definition:
intrin_cpp.hpp:1477


cv::v_pack_b

v_reg< uchar, 2 *n > v_pack_b(const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)

! For 16-bit boolean values


Definition:
intrin_cpp.hpp:3114


cv::v_fma

v_reg< _Tp, n > v_fma(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)

Multiply and add


Definition:
intrin_cpp.hpp:1049


cv::operator^

CV_INLINE v_reg< _Tp, n > operator^(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Bitwise XOR


cv::v_store_interleave

void v_store_interleave(_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, hal::StoreMode=hal::STORE_UNALIGNED)

Interleave and store (2 channels)


Definition:
intrin_cpp.hpp:2118


cv::v_transpose4x4

void v_transpose4x4(v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, const v_reg< _Tp, n > &a2, const v_reg< _Tp, n > &a3, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1, v_reg< _Tp, n > &b2, v_reg< _Tp, n > &b3)

Transpose 4x4 matrix


Definition:
intrin_cpp.hpp:2764


cv::v_absdiffs

v_reg< _Tp, n > v_absdiffs(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Saturating absolute difference


Definition:
intrin_cpp.hpp:997


cv::v_uint64x2

v_reg< uint64, 2 > v_uint64x2

Two 64-bit unsigned integer values


Definition:
intrin_cpp.hpp:504


cv::v_expand_high

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_high(const v_reg< _Tp, n > &a)

Expand higher values to the wider pack type


Definition:
intrin_cpp.hpp:1518


cv::v_dotprod_fast

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Fast Dot product of elements


Definition:
intrin_cpp.hpp:1119


cv::v_load_halves

v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_halves(const _Tp *loptr, const _Tp *hiptr)

Load register contents from two memory blocks


Definition:
intrin_cpp.hpp:1784


cv::v_mul_hi

v_reg< _Tp, n > v_mul_hi(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Multiply and extract high part


Definition:
intrin_cpp.hpp:1236


cv::v_combine_low

v_reg< _Tp, n > v_combine_low(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Combine vector from first elements of two vectors


Definition:
intrin_cpp.hpp:2285


cv::v_float32x4

v_reg< float, 4 > v_float32x4

Four 32-bit floating point values (single precision)


Definition:
intrin_cpp.hpp:500


cv::v_cvt_f32

v_reg< float, n > v_cvt_f32(const v_reg< int, n > &a)

Convert to float


Definition:
intrin_cpp.hpp:2537


cv::v_check_all

bool v_check_all(const v_reg< _Tp, n > &a)

Check if all packed values are less than zero


Definition:
intrin_cpp.hpp:1424


cv::v_matmuladd

v_reg< float, n > v_matmuladd(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)

Matrix multiplication and add


Definition:
intrin_cpp.hpp:3226


cv::v_extract_n

_Tp v_extract_n(const v_reg< _Tp, n > &v)

Vector extract


Definition:
intrin_cpp.hpp:2400


cv::v_not_nan

v_reg< float, n > v_not_nan(const v_reg< float, n > &a)

Less-than comparison


Definition:
intrin_cpp.hpp:893


cv::v_popcount

v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_popcount(const v_reg< _Tp, n > &a)

Count the 1 bits in the vector lanes and return result as corresponding unsigned type


Definition:
intrin_cpp.hpp:827


cv::v_store_aligned

void v_store_aligned(_Tp *ptr, const v_reg< _Tp, n > &a)

Store data to memory (aligned)


Definition:
intrin_cpp.hpp:2254


cv::v_int16x8

v_reg< short, 8 > v_int16x8

Eight 16-bit signed integer values


Definition:
intrin_cpp.hpp:494


cv::v_float64x2

v_reg< double, 2 > v_float64x2

Two 64-bit floating point values (double precision)


Definition:
intrin_cpp.hpp:502


cv::v_extract

v_reg< _Tp, n > v_extract(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Vector extract


Definition:
intrin_cpp.hpp:2374


cv::v_load_deinterleave

void v_load_deinterleave(const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b)

Load and deinterleave (2 channels)


Definition:
intrin_cpp.hpp:2046


cv::abs

softfloat abs(softfloat a)

Absolute value


Definition:
softfloat.hpp:444


cvRound

CV_INLINE int cvRound(double value)

Rounds floating-point number to the nearest integer


Definition:
fast_math.hpp:200


cvCeil

CV_INLINE int cvCeil(double value)

Rounds floating-point number to the nearest integer not smaller than the original.


Definition:
fast_math.hpp:254


cv::saturate_cast

static _Tp saturate_cast(uchar v)

Template function for accurate conversion from one primitive type to another.


Definition:
saturate.hpp:80


cvFloor

CV_INLINE int cvFloor(double value)

Rounds floating-point number to the nearest integer not larger than the original.


Definition:
fast_math.hpp:234


cv::isAligned

static bool isAligned(const T &data)

Alignment check of passed values


Definition:
utility.hpp:517


CV_Assert

#define CV_Assert(expr)

Checks a condition at runtime and throws exception if it fails


Definition:
base.hpp:342


CV_DbgAssert

#define CV_DbgAssert(expr)


Definition:
base.hpp:375


cv::cos

Quat< T > cos(const Quat< T > &q)


cv::sin

Quat< T > sin(const Quat< T > &q)


cv

"black box" representation of the file storage associated with a file on disk.


Definition:
aruco.hpp:75


cv::V_TypeTraits


Definition:
intrin.hpp:104


cv::v_reg


Definition:
intrin_cpp.hpp:369


cv::v_reg::get0

_Tp get0() const

Access first value


Definition:
intrin_cpp.hpp:436


cv::v_reg::v_reg

v_reg(const v_reg< _Tp, n > &r)

Copy constructor


Definition:
intrin_cpp.hpp:421


cv::v_reg::v_reg

v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7, _Tp s8, _Tp s9, _Tp s10, _Tp s11, _Tp s12, _Tp s13, _Tp s14, _Tp s15)

Constructor


Definition:
intrin_cpp.hpp:404


cv::v_reg::v_reg

v_reg(const _Tp *ptr)

Constructor


Definition:
intrin_cpp.hpp:379


cv::v_reg::v_reg

v_reg()

Default constructor


Definition:
intrin_cpp.hpp:418


cv::v_reg::v_reg

v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7)

Constructor


Definition:
intrin_cpp.hpp:394


cv::v_reg::v_reg

v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3)

Constructor


Definition:
intrin_cpp.hpp:389


cv::v_reg::v_reg

v_reg(_Tp s0, _Tp s1)

Constructor


Definition:
intrin_cpp.hpp:384