モジュール
	Private implementation helpers

クラス
struct	cv::v_reg< _Tp, n >

マクロ定義
#define	CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...)

#define	CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...)

#define	CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...)

#define	CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op)

#define	CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op)

#define	CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op)

#define	CV__HAL_INTRIN_IMPL_BIT_OP(bit_op)

#define	CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy)

型定義
typedef v_reg< uchar, 16 >	cv::v_uint8x16
	Sixteen 8-bit unsigned integer values

typedef v_reg< schar, 16 >	cv::v_int8x16
	Sixteen 8-bit signed integer values

typedef v_reg< ushort, 8 >	cv::v_uint16x8
	Eight 16-bit unsigned integer values

typedef v_reg< short, 8 >	cv::v_int16x8
	Eight 16-bit signed integer values

typedef v_reg< unsigned, 4 >	cv::v_uint32x4
	Four 32-bit unsigned integer values

typedef v_reg< int, 4 >	cv::v_int32x4
	Four 32-bit signed integer values

typedef v_reg< float, 4 >	cv::v_float32x4
	Four 32-bit floating point values (single precision)

typedef v_reg< double, 2 >	cv::v_float64x2
	Two 64-bit floating point values (double precision)

typedef v_reg< uint64, 2 >	cv::v_uint64x2
	Two 64-bit unsigned integer values

typedef v_reg< int64, 2 >	cv::v_int64x2
	Two 64-bit signed integer values

列挙型
enum	{ simd128_width = 16 , simdmax_width = simd128_width }

関数
void	CV__SIMD_NAMESPACE::vx_cleanup ()
	SIMD processing state cleanup call

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator+ (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Add values [詳解]

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	cv::operator+= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator- (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Subtract values [詳解]

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	cv::operator-= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator* (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Multiply values [詳解]

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	*cv::operator=** (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator/ (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Divide values [詳解]

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	cv::operator/= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator& (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Bitwise AND [詳解]

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	cv::operator&= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator\| (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Bitwise OR [詳解]

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	cv::operator\|= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator^ (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Bitwise XOR [詳解]

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	cv::operator^= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator~ (const v_reg< _Tp, n > &a)
	Bitwise NOT [詳解]

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::abs_type, n >	cv::v_popcount (const v_reg< _Tp, n > &a)
	Count the 1 bits in the vector lanes and return result as corresponding unsigned type [詳解]

template<int n>
v_reg< float, n >	cv::v_not_nan (const v_reg< float, n > &a)
	Less-than comparison [詳解]

template<int n>
v_reg< double, n >	cv::v_not_nan (const v_reg< double, n > &a)

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::abs_type, n >	cv::v_absdiff (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Add values without saturation [詳解]

template<int n>
v_reg< float, n >	cv::v_absdiff (const v_reg< float, n > &a, const v_reg< float, n > &b)

template<int n>
v_reg< double, n >	cv::v_absdiff (const v_reg< double, n > &a, const v_reg< double, n > &b)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_absdiffs (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Saturating absolute difference [詳解]

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_invsqrt (const v_reg< _Tp, n > &a)
	Inversed square root [詳解]

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Magnitude [詳解]

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_sqr_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Square of the magnitude [詳解]

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_fma (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
	Multiply and add [詳解]

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_muladd (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
	A synonym for v_fma

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_dotprod (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Dot product of elements [詳解]

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_dotprod (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c)
	Dot product of elements [詳解]

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_dotprod_fast (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Fast Dot product of elements [詳解]

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_dotprod_fast (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c)
	Fast Dot product of elements [詳解]

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 >	cv::v_dotprod_expand (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Dot product of elements and expand [詳解]

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 >	cv::v_dotprod_expand (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > &c)
	Dot product of elements [詳解]

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 >	cv::v_dotprod_expand_fast (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Fast Dot product of elements and expand [詳解]

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 >	cv::v_dotprod_expand_fast (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > &c)
	Fast Dot product of elements [詳解]

template<typename _Tp , int n>
void	cv::v_mul_expand (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
	Multiply and expand [詳解]

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_mul_hi (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Multiply and extract high part [詳解]

template<typename _Tp , int n>
V_TypeTraits< _Tp >::sum_type	cv::v_reduce_sum (const v_reg< _Tp, n > &a)
	Element shift left among vector [詳解]

template<int n>
v_reg< float, n >	cv::v_reduce_sum4 (const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
	Sums all elements of each input vector, returns the vector of sums [詳解]

template<typename _Tp , int n>
V_TypeTraits< typenameV_TypeTraits< _Tp >::abs_type >::sum_type	cv::v_reduce_sad (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Sum absolute differences of values [詳解]

template<typename _Tp , int n>
int	cv::v_signmask (const v_reg< _Tp, n > &a)
	Get negative values mask [詳解]

template<typename _Tp , int n>
int	cv::v_scan_forward (const v_reg< _Tp, n > &a)
	Get first negative lane index [詳解]

template<typename _Tp , int n>
bool	cv::v_check_all (const v_reg< _Tp, n > &a)
	Check if all packed values are less than zero [詳解]

template<typename _Tp , int n>
bool	cv::v_check_any (const v_reg< _Tp, n > &a)
	Check if any of packed values is less than zero [詳解]

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_select (const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Per-element select (blend operation) [詳解]

template<typename _Tp , int n>
void	cv::v_expand (const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
	Expand values to the wider pack type [詳解]

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_expand_low (const v_reg< _Tp, n > &a)
	Expand lower values to the wider pack type [詳解]

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_expand_high (const v_reg< _Tp, n > &a)
	Expand higher values to the wider pack type [詳解]

template<typename _Tp , int n>
void	cv::v_zip (const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
	Interleave two vectors [詳解]

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_load (const _Tp *ptr)
	Load register contents from memory [詳解]

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_load_aligned (const _Tp *ptr)
	Load register contents from memory (aligned) [詳解]

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_load_low (const _Tp *ptr)
	Load 64-bits of data to lower part (high part is undefined). [詳解]

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_load_halves (const _Tp loptr, const _Tp hiptr)
	Load register contents from two memory blocks [詳解]

template<typename _Tp >
v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)>	cv::v_load_expand (const _Tp *ptr)
	Load register contents from memory with double expand [詳解]

template<typename _Tp >
v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)>	cv::v_load_expand_q (const _Tp *ptr)
	Load register contents from memory with quad expand [詳解]

template<typename _Tp , int n>
void	cv::v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b)
	Load and deinterleave (2 channels) [詳解]

template<typename _Tp , int n>
void	cv::v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c)
	Load and deinterleave (3 channels) [詳解]

template<typename _Tp , int n>
void	cv::v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c, v_reg< _Tp, n > &d)
	Load and deinterleave (4 channels) [詳解]

template<typename _Tp , int n>
void	cv::v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, hal::StoreMode=hal::STORE_UNALIGNED)
	Interleave and store (2 channels) [詳解]

template<typename _Tp , int n>
void	cv::v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c, hal::StoreMode=hal::STORE_UNALIGNED)
	Interleave and store (3 channels) [詳解]

template<typename _Tp , int n>
void	cv::v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c, const v_reg< _Tp, n > &d, hal::StoreMode=hal::STORE_UNALIGNED)
	Interleave and store (4 channels) [詳解]

template<typename _Tp , int n>
void	cv::v_store (_Tp *ptr, const v_reg< _Tp, n > &a)
	Store data to memory [詳解]

template<typename _Tp , int n>
void	cv::v_store (_Tp *ptr, const v_reg< _Tp, n > &a, hal::StoreMode)

template<typename _Tp , int n>
void	cv::v_store_low (_Tp *ptr, const v_reg< _Tp, n > &a)
	Store data to memory (lower half) [詳解]

template<typename _Tp , int n>
void	cv::v_store_high (_Tp *ptr, const v_reg< _Tp, n > &a)
	Store data to memory (higher half) [詳解]

template<typename _Tp , int n>
void	cv::v_store_aligned (_Tp *ptr, const v_reg< _Tp, n > &a)
	Store data to memory (aligned) [詳解]

template<typename _Tp , int n>
void	cv::v_store_aligned_nocache (_Tp *ptr, const v_reg< _Tp, n > &a)

template<typename _Tp , int n>
void	cv::v_store_aligned (_Tp *ptr, const v_reg< _Tp, n > &a, hal::StoreMode)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_combine_low (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Combine vector from first elements of two vectors [詳解]

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_combine_high (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Combine vector from last elements of two vectors [詳解]

template<typename _Tp , int n>
void	cv::v_recombine (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high)
	Combine two vectors from lower and higher parts of two other vectors [詳解]

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_reverse (const v_reg< _Tp, n > &a)
	Vector reverse order [詳解]

template<int s, typename _Tp , int n>
v_reg< _Tp, n >	cv::v_extract (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Vector extract [詳解]

template<int s, typename _Tp , int n>
_Tp	cv::v_extract_n (const v_reg< _Tp, n > &v)
	Vector extract [詳解]

template<int i, typename _Tp , int n>
v_reg< _Tp, n >	cv::v_broadcast_element (const v_reg< _Tp, n > &a)
	Broadcast i-th element of vector [詳解]

template<int n>
v_reg< int, n >	cv::v_round (const v_reg< float, n > &a)
	Round elements [詳解]

template<int n>
v_reg< int, n *2 >	cv::v_round (const v_reg< double, n > &a, const v_reg< double, n > &b)

template<int n>
v_reg< int, n >	cv::v_floor (const v_reg< float, n > &a)
	Floor elements [詳解]

template<int n>
v_reg< int, n >	cv::v_ceil (const v_reg< float, n > &a)
	Ceil elements [詳解]

template<int n>
v_reg< int, n >	cv::v_trunc (const v_reg< float, n > &a)
	Truncate elements [詳解]

template<int n>
v_reg< int, n *2 >	cv::v_round (const v_reg< double, n > &a)

template<int n>
v_reg< int, n *2 >	cv::v_floor (const v_reg< double, n > &a)

template<int n>
v_reg< int, n *2 >	cv::v_ceil (const v_reg< double, n > &a)

template<int n>
v_reg< int, n *2 >	cv::v_trunc (const v_reg< double, n > &a)

template<int n>
v_reg< float, n >	cv::v_cvt_f32 (const v_reg< int, n > &a)
	Convert to float [詳解]

template<int n>
v_reg< float, n *2 >	cv::v_cvt_f32 (const v_reg< double, n > &a)
	Convert lower half to float [詳解]

template<int n>
v_reg< float, n *2 >	cv::v_cvt_f32 (const v_reg< double, n > &a, const v_reg< double, n > &b)
	Convert to float [詳解]

template<int n>
CV_INLINE v_reg< double, n/2 >	cv::v_cvt_f64 (const v_reg< int, n > &a)
	Convert lower half to double [詳解]

template<int n>
CV_INLINE v_reg< double,(n/2)>	cv::v_cvt_f64_high (const v_reg< int, n > &a)
	Convert to double high part of vector [詳解]

template<int n>
CV_INLINE v_reg< double,(n/2)>	cv::v_cvt_f64 (const v_reg< float, n > &a)
	Convert lower half to double [詳解]

template<int n>
CV_INLINE v_reg< double,(n/2)>	cv::v_cvt_f64_high (const v_reg< float, n > &a)
	Convert to double high part of vector [詳解]

template<int n>
CV_INLINE v_reg< double, n >	cv::v_cvt_f64 (const v_reg< int64, n > &a)
	Convert to double [詳解]

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_lut (const _Tp tab, const int idx)

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_lut_pairs (const _Tp tab, const int idx)

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_lut_quads (const _Tp tab, const int idx)

template<int n>
v_reg< int, n >	cv::v_lut (const int *tab, const v_reg< int, n > &idx)

template<int n>
v_reg< unsigned, n >	cv::v_lut (const unsigned *tab, const v_reg< int, n > &idx)

template<int n>
v_reg< float, n >	cv::v_lut (const float *tab, const v_reg< int, n > &idx)

template<int n>
v_reg< double, n/2 >	cv::v_lut (const double *tab, const v_reg< int, n > &idx)

template<int n>
void	cv::v_lut_deinterleave (const float *tab, const v_reg< int, n > &idx, v_reg< float, n > &x, v_reg< float, n > &y)

template<int n>
void	cv::v_lut_deinterleave (const double tab, const v_reg< int, n 2 > &idx, v_reg< double, n > &x, v_reg< double, n > &y)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_interleave_pairs (const v_reg< _Tp, n > &vec)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_interleave_quads (const v_reg< _Tp, n > &vec)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_pack_triplets (const v_reg< _Tp, n > &vec)

template<typename _Tp , int n>
void	cv::v_transpose4x4 (v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, const v_reg< _Tp, n > &a2, const v_reg< _Tp, n > &a3, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1, v_reg< _Tp, n > &b2, v_reg< _Tp, n > &b3)
	Transpose 4x4 matrix [詳解]

template<int n>
v_reg< float, n >	cv::v_matmul (const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
	Matrix multiplication [詳解]

template<int n>
v_reg< float, n >	cv::v_matmuladd (const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
	Matrix multiplication and add [詳解]

template<int n>
v_reg< double, n/2 >	cv::v_dotprod_expand (const v_reg< int, n > &a, const v_reg< int, n > &b)

template<int n>
v_reg< double, n/2 >	cv::v_dotprod_expand (const v_reg< int, n > &a, const v_reg< int, n > &b, const v_reg< double, n/2 > &c)

template<int n>
v_reg< double, n/2 >	cv::v_dotprod_expand_fast (const v_reg< int, n > &a, const v_reg< int, n > &b)

template<int n>
v_reg< double, n/2 >	cv::v_dotprod_expand_fast (const v_reg< int, n > &a, const v_reg< int, n > &b, const v_reg< double, n/2 > &c)

v_reg< float, simd128_width/sizeof(float)>	cv::v_load_expand (const float16_t *ptr)

template<int n>
void	cv::v_pack_store (float16_t *ptr, const v_reg< float, n > &v)

void	cv::v_cleanup ()

Wide init with value
Create maximum available capacity vector with elements set to a specific value
v_uint8	CV__SIMD_NAMESPACE::vx_setall_u8 (uchar v)

v_int8	CV__SIMD_NAMESPACE::vx_setall_s8 (schar v)

v_uint16	CV__SIMD_NAMESPACE::vx_setall_u16 (ushort v)

v_int16	CV__SIMD_NAMESPACE::vx_setall_s16 (short v)

v_int32	CV__SIMD_NAMESPACE::vx_setall_s32 (int v)

v_uint32	CV__SIMD_NAMESPACE::vx_setall_u32 (unsigned v)

v_float32	CV__SIMD_NAMESPACE::vx_setall_f32 (float v)

v_int64	CV__SIMD_NAMESPACE::vx_setall_s64 (int64 v)

v_uint64	CV__SIMD_NAMESPACE::vx_setall_u64 (uint64 v)

Wide init with zero
Create maximum available capacity vector with elements set to zero
v_uint8	CV__SIMD_NAMESPACE::vx_setzero_u8 ()

v_int8	CV__SIMD_NAMESPACE::vx_setzero_s8 ()

v_uint16	CV__SIMD_NAMESPACE::vx_setzero_u16 ()

v_int16	CV__SIMD_NAMESPACE::vx_setzero_s16 ()

v_int32	CV__SIMD_NAMESPACE::vx_setzero_s32 ()

v_uint32	CV__SIMD_NAMESPACE::vx_setzero_u32 ()

v_float32	CV__SIMD_NAMESPACE::vx_setzero_f32 ()

v_int64	CV__SIMD_NAMESPACE::vx_setzero_s64 ()

v_uint64	CV__SIMD_NAMESPACE::vx_setzero_u64 ()

Wide load from memory
Load maximum available capacity register contents from memory
v_uint8	CV__SIMD_NAMESPACE::vx_load (const uchar *ptr)

v_int8	CV__SIMD_NAMESPACE::vx_load (const schar *ptr)

v_uint16	CV__SIMD_NAMESPACE::vx_load (const ushort *ptr)

v_int16	CV__SIMD_NAMESPACE::vx_load (const short *ptr)

v_int32	CV__SIMD_NAMESPACE::vx_load (const int *ptr)

v_uint32	CV__SIMD_NAMESPACE::vx_load (const unsigned *ptr)

v_float32	CV__SIMD_NAMESPACE::vx_load (const float *ptr)

v_int64	CV__SIMD_NAMESPACE::vx_load (const int64 *ptr)

v_uint64	CV__SIMD_NAMESPACE::vx_load (const uint64 *ptr)

Wide load from memory(aligned)
Load maximum available capacity register contents from memory(aligned)
v_uint8	CV__SIMD_NAMESPACE::vx_load_aligned (const uchar *ptr)

v_int8	CV__SIMD_NAMESPACE::vx_load_aligned (const schar *ptr)

v_uint16	CV__SIMD_NAMESPACE::vx_load_aligned (const ushort *ptr)

v_int16	CV__SIMD_NAMESPACE::vx_load_aligned (const short *ptr)

v_int32	CV__SIMD_NAMESPACE::vx_load_aligned (const int *ptr)

v_uint32	CV__SIMD_NAMESPACE::vx_load_aligned (const unsigned *ptr)

v_float32	CV__SIMD_NAMESPACE::vx_load_aligned (const float *ptr)

v_int64	CV__SIMD_NAMESPACE::vx_load_aligned (const int64 *ptr)

v_uint64	CV__SIMD_NAMESPACE::vx_load_aligned (const uint64 *ptr)

Wide load lower half from memory
Load lower half of maximum available capacity register from memory
v_uint8	CV__SIMD_NAMESPACE::vx_load_low (const uchar *ptr)

v_int8	CV__SIMD_NAMESPACE::vx_load_low (const schar *ptr)

v_uint16	CV__SIMD_NAMESPACE::vx_load_low (const ushort *ptr)

v_int16	CV__SIMD_NAMESPACE::vx_load_low (const short *ptr)

v_int32	CV__SIMD_NAMESPACE::vx_load_low (const int *ptr)

v_uint32	CV__SIMD_NAMESPACE::vx_load_low (const unsigned *ptr)

v_float32	CV__SIMD_NAMESPACE::vx_load_low (const float *ptr)

v_int64	CV__SIMD_NAMESPACE::vx_load_low (const int64 *ptr)

v_uint64	CV__SIMD_NAMESPACE::vx_load_low (const uint64 *ptr)

Wide load halfs from memory
Load maximum available capacity register contents from two memory blocks
v_uint8	CV__SIMD_NAMESPACE::vx_load_halves (const uchar ptr0, const uchar ptr1)

v_int8	CV__SIMD_NAMESPACE::vx_load_halves (const schar ptr0, const schar ptr1)

v_uint16	CV__SIMD_NAMESPACE::vx_load_halves (const ushort ptr0, const ushort ptr1)

v_int16	CV__SIMD_NAMESPACE::vx_load_halves (const short ptr0, const short ptr1)

v_int32	CV__SIMD_NAMESPACE::vx_load_halves (const int ptr0, const int ptr1)

v_uint32	CV__SIMD_NAMESPACE::vx_load_halves (const unsigned ptr0, const unsigned ptr1)

v_float32	CV__SIMD_NAMESPACE::vx_load_halves (const float ptr0, const float ptr1)

v_int64	CV__SIMD_NAMESPACE::vx_load_halves (const int64 ptr0, const int64 ptr1)

v_uint64	CV__SIMD_NAMESPACE::vx_load_halves (const uint64 ptr0, const uint64 ptr1)

Wide LUT of elements
Load maximum available capacity register contents with array elements by provided indexes
v_uint8	CV__SIMD_NAMESPACE::vx_lut (const uchar ptr, const int idx)

v_int8	CV__SIMD_NAMESPACE::vx_lut (const schar ptr, const int idx)

v_uint16	CV__SIMD_NAMESPACE::vx_lut (const ushort ptr, const int idx)

v_int16	CV__SIMD_NAMESPACE::vx_lut (const short ptr, const int idx)

v_int32	CV__SIMD_NAMESPACE::vx_lut (const int ptr, const int idx)

v_uint32	CV__SIMD_NAMESPACE::vx_lut (const unsigned ptr, const int idx)

v_float32	CV__SIMD_NAMESPACE::vx_lut (const float ptr, const int idx)

v_int64	CV__SIMD_NAMESPACE::vx_lut (const int64 ptr, const int idx)

v_uint64	CV__SIMD_NAMESPACE::vx_lut (const uint64 ptr, const int idx)

Wide LUT of element pairs
Load maximum available capacity register contents with array element pairs by provided indexes
v_uint8	CV__SIMD_NAMESPACE::vx_lut_pairs (const uchar ptr, const int idx)

v_int8	CV__SIMD_NAMESPACE::vx_lut_pairs (const schar ptr, const int idx)

v_uint16	CV__SIMD_NAMESPACE::vx_lut_pairs (const ushort ptr, const int idx)

v_int16	CV__SIMD_NAMESPACE::vx_lut_pairs (const short ptr, const int idx)

v_int32	CV__SIMD_NAMESPACE::vx_lut_pairs (const int ptr, const int idx)

v_uint32	CV__SIMD_NAMESPACE::vx_lut_pairs (const unsigned ptr, const int idx)

v_float32	CV__SIMD_NAMESPACE::vx_lut_pairs (const float ptr, const int idx)

v_int64	CV__SIMD_NAMESPACE::vx_lut_pairs (const int64 ptr, const int idx)

v_uint64	CV__SIMD_NAMESPACE::vx_lut_pairs (const uint64 ptr, const int idx)

Wide LUT of element quads
Load maximum available capacity register contents with array element quads by provided indexes
v_uint8	CV__SIMD_NAMESPACE::vx_lut_quads (const uchar ptr, const int idx)

v_int8	CV__SIMD_NAMESPACE::vx_lut_quads (const schar ptr, const int idx)

v_uint16	CV__SIMD_NAMESPACE::vx_lut_quads (const ushort ptr, const int idx)

v_int16	CV__SIMD_NAMESPACE::vx_lut_quads (const short ptr, const int idx)

v_int32	CV__SIMD_NAMESPACE::vx_lut_quads (const int ptr, const int idx)

v_uint32	CV__SIMD_NAMESPACE::vx_lut_quads (const unsigned ptr, const int idx)

v_float32	CV__SIMD_NAMESPACE::vx_lut_quads (const float ptr, const int idx)

Wide load with double expansion
Load maximum available capacity register contents from memory with double expand
v_uint16	CV__SIMD_NAMESPACE::vx_load_expand (const uchar *ptr)

v_int16	CV__SIMD_NAMESPACE::vx_load_expand (const schar *ptr)

v_uint32	CV__SIMD_NAMESPACE::vx_load_expand (const ushort *ptr)

v_int32	CV__SIMD_NAMESPACE::vx_load_expand (const short *ptr)

v_int64	CV__SIMD_NAMESPACE::vx_load_expand (const int *ptr)

v_uint64	CV__SIMD_NAMESPACE::vx_load_expand (const unsigned *ptr)

v_float32	CV__SIMD_NAMESPACE::vx_load_expand (const float16_t *ptr)

Wide load with quad expansion
Load maximum available capacity register contents from memory with quad expand
v_uint32	CV__SIMD_NAMESPACE::vx_load_expand_q (const uchar *ptr)

v_int32	CV__SIMD_NAMESPACE::vx_load_expand_q (const schar *ptr)

Pack boolean values
Pack boolean values from multiple vectors to one unsigned 8-bit integer vector 覚え書き Must provide valid boolean values to guarantee same result for all architectures.
template<int n>
v_reg< uchar, 2 *n >	cv::v_pack_b (const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)
	! For 16-bit boolean values [詳解]

template<int n>
v_reg< uchar, 4 *n >	cv::v_pack_b (const v_reg< unsigned, n > &a, const v_reg< unsigned, n > &b, const v_reg< unsigned, n > &c, const v_reg< unsigned, n > &d)

template<int n>
v_reg< uchar, 8 *n >	cv::v_pack_b (const v_reg< uint64, n > &a, const v_reg< uint64, n > &b, const v_reg< uint64, n > &c, const v_reg< uint64, n > &d, const v_reg< uint64, n > &e, const v_reg< uint64, n > &f, const v_reg< uint64, n > &g, const v_reg< uint64, n > &h)

詳解

"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on different platforms. Currently a few different SIMD extensions on different architectures are supported. 128 bit registers of various types support is implemented for a wide range of architectures including x86(SSE/SSE2/SSE4.2), ARM(NEON), PowerPC(VSX), MIPS(MSA). 256 bit long registers are supported on x86(AVX2) and 512 bit long registers are supported on x86(AVX512). In case when there is no SIMD extension available during compilation, fallback C++ implementation of intrinsics will be chosen and code will work as expected although it could be slower.

Types

There are several types representing packed values vector registers, each type is implemented as a structure based on a one SIMD register.

cv::v_uint8 and cv::v_int8: 8-bit integer values (unsigned/signed) - char
cv::v_uint16 and cv::v_int16: 16-bit integer values (unsigned/signed) - short
cv::v_uint32 and cv::v_int32: 32-bit integer values (unsigned/signed) - int
cv::v_uint64 and cv::v_int64: 64-bit integer values (unsigned/signed) - int64
cv::v_float32: 32-bit floating point values (signed) - float
cv::v_float64: 64-bit floating point values (signed) - double

Exact bit length(and value quantity) of listed types is compile time deduced and depends on architecture SIMD capabilities chosen as available during compilation of the library. All the types contains nlanes enumeration to check for exact value quantity of the type.

In case the exact bit length of the type is important it is possible to use specific fixed length register types.

There are several types representing 128-bit registers.

cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char
cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short
cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsigned/signed) - int
cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64
cv::v_float32x4: four 32-bit floating point values (signed) - float
cv::v_float64x2: two 64-bit floating point values (signed) - double

There are several types representing 256-bit registers.

cv::v_uint8x32 and cv::v_int8x32: thirty two 8-bit integer values (unsigned/signed) - char
cv::v_uint16x16 and cv::v_int16x16: sixteen 16-bit integer values (unsigned/signed) - short
cv::v_uint32x8 and cv::v_int32x8: eight 32-bit integer values (unsigned/signed) - int
cv::v_uint64x4 and cv::v_int64x4: four 64-bit integer values (unsigned/signed) - int64
cv::v_float32x8: eight 32-bit floating point values (signed) - float
cv::v_float64x4: four 64-bit floating point values (signed) - double

覚え書き: 256 bit registers at the moment implemented for AVX2 SIMD extension only, if you want to use this type directly, don't forget to check the CV_SIMD256 preprocessor definition:
#if CV_SIMD256

//...

#endif

There are several types representing 512-bit registers.

cv::v_uint8x64 and cv::v_int8x64: sixty four 8-bit integer values (unsigned/signed) - char
cv::v_uint16x32 and cv::v_int16x32: thirty two 16-bit integer values (unsigned/signed) - short
cv::v_uint32x16 and cv::v_int32x16: sixteen 32-bit integer values (unsigned/signed) - int
cv::v_uint64x8 and cv::v_int64x8: eight 64-bit integer values (unsigned/signed) - int64
cv::v_float32x16: sixteen 32-bit floating point values (signed) - float
cv::v_float64x8: eight 64-bit floating point values (signed) - double
覚え書き
512 bit registers at the moment implemented for AVX512 SIMD extension only, if you want to use this type directly, don't forget to check the CV_SIMD512 preprocessor definition.

cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to check the CV_SIMD128_64F preprocessor definition.

Load and store operations

These operations allow to set contents of the register explicitly or by loading it from some memory block and to save contents of the register to memory block.

There are variable size register load operations that provide result of maximum available size depending on chosen platform capabilities.

Constructors: from memory,
Other create methods: vx_setall_s8, vx_setall_u8, ..., vx_setzero_u8, vx_setzero_s8, ...
Memory load operations: vx_load, vx_load_aligned, vx_load_low, vx_load_halves,
Memory operations with expansion of values: vx_load_expand, vx_load_expand_q

Also there are fixed size register load/store operations.

For 128 bit registers

Constructors: from memory, from two values, ...
Other create methods: v_setall_s8, v_setall_u8, ..., v_setzero_u8, v_setzero_s8, ...
Memory load operations: v_load, v_load_aligned, v_load_low, v_load_halves,
Memory operations with expansion of values: v_load_expand, v_load_expand_q

For 256 bit registers(check CV_SIMD256 preprocessor definition)

Constructors: from memory, from four values, ...
Other create methods: v256_setall_s8, v256_setall_u8, ..., v256_setzero_u8, v256_setzero_s8, ...
Memory load operations: v256_load, v256_load_aligned, v256_load_low, v256_load_halves,
Memory operations with expansion of values: v256_load_expand, v256_load_expand_q

For 512 bit registers(check CV_SIMD512 preprocessor definition)

Constructors: from memory, from eight values, ...
Other create methods: v512_setall_s8, v512_setall_u8, ..., v512_setzero_u8, v512_setzero_s8, ...
Memory load operations: v512_load, v512_load_aligned, v512_load_low, v512_load_halves,
Memory operations with expansion of values: v512_load_expand, v512_load_expand_q

Store to memory operations are similar across different platform capabilities: v_store, v_store_aligned, v_store_high, v_store_low

Value reordering

These operations allow to reorder or recombine elements in one or multiple vectors.

Interleave, deinterleave (2, 3 and 4 channels): v_load_deinterleave, v_store_interleave
Expand: v_expand, v_expand_low, v_expand_high
Pack: v_pack, v_pack_u, v_pack_b, v_rshr_pack, v_rshr_pack_u, v_pack_store, v_pack_u_store, v_rshr_pack_store, v_rshr_pack_u_store
Recombine: v_zip, v_recombine, v_combine_low, v_combine_high
Reverse: v_reverse
Extract: v_extract

Arithmetic, bitwise and comparison operations

Element-wise binary and unary operations.

Arithmetics: +, -, *, /, v_mul_expand
Non-saturating arithmetics: v_add_wrap, v_sub_wrap
Bitwise shifts: <<, >>, v_shl, v_shr
Bitwise logic: &, |, ^, ~
Comparison: >, >=, <, <=, ==, !=
min/max: v_min, v_max

Reduce and mask

Most of these operations return only one value.

Reduce: v_reduce_min, v_reduce_max, v_reduce_sum, v_popcount
Mask: v_signmask, v_check_all, v_check_any, v_select

Other math

Some frequent operations: v_sqrt, v_invsqrt, v_magnitude, v_sqr_magnitude
Absolute values: v_abs, v_absdiff, v_absdiffs

Conversions

Different type conversions and casts:

Rounding: v_round, v_floor, v_ceil, v_trunc,
To float: v_cvt_f32, v_cvt_f64
Reinterpret: v_reinterpret_as_u8, v_reinterpret_as_s8, ...

Matrix operations

In these operations vectors represent matrix rows/columns: v_dotprod, v_dotprod_fast, v_dotprod_expand, v_dotprod_expand_fast, v_matmul, v_transpose4x4

Usability

Most operations are implemented only for some subset of the available types, following matrices shows the applicability of different operations to the types.

Regular integers:

Operations\Types	uint 8	int 8	uint 16	int 16	uint 32	int 32
load, store	x	x	x	x	x	x
interleave	x	x	x	x	x	x
expand	x	x	x	x	x	x
expand_low	x	x	x	x	x	x
expand_high	x	x	x	x	x	x
expand_q	x	x
add, sub	x	x	x	x	x	x
add_wrap, sub_wrap	x	x	x	x
mul_wrap	x	x	x	x
mul	x	x	x	x	x	x
mul_expand	x	x	x	x	x
compare	x	x	x	x	x	x
shift			x	x	x	x
dotprod				x		x
dotprod_fast				x		x
dotprod_expand	x	x	x	x		x
dotprod_expand_fast	x	x	x	x		x
logical	x	x	x	x	x	x
min, max	x	x	x	x	x	x
absdiff	x	x	x	x	x	x
absdiffs		x		x
reduce	x	x	x	x	x	x
mask	x	x	x	x	x	x
pack	x	x	x	x	x	x
pack_u	x		x
pack_b	x
unpack	x	x	x	x	x	x
extract	x	x	x	x	x	x
rotate (lanes)	x	x	x	x	x	x
cvt_flt32						x
cvt_flt64						x
transpose4x4					x	x
reverse	x	x	x	x	x	x
extract_n	x	x	x	x	x	x
broadcast_element					x	x

Big integers:

Operations\Types	uint 64	int 64
load, store	x	x
add, sub	x	x
shift	x	x
logical	x	x
reverse	x	x
extract	x	x
rotate (lanes)	x	x
cvt_flt64		x
extract_n	x	x

Floating point:

Operations\Types	float 32	float 64
load, store	x	x
interleave	x
add, sub	x	x
mul	x	x
div	x	x
compare	x	x
min, max	x	x
absdiff	x	x
reduce	x
mask	x	x
unpack	x	x
cvt_flt32		x
cvt_flt64	x
sqrt, abs	x	x
float math	x	x
transpose4x4	x
extract	x	x
rotate (lanes)	x	x
reverse	x	x
extract_n	x	x
broadcast_element	x

マクロ定義詳解

◆ CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES

#define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES	(	macro_name,
		...
	)

値:

CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \

CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \

◆ CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES

#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES	(	macro_name,
		...
	)

値:

__CV_EXPAND(macro_name(float, __VA_ARGS__)) \

__CV_EXPAND(macro_name(double, __VA_ARGS__)) \

◆ CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES

#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES	(	macro_name,
		...
	)

値:

__CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \
__CV_EXPAND(macro_name(schar, __VA_ARGS__)) \
__CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \
__CV_EXPAND(macro_name(short, __VA_ARGS__)) \
__CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \
__CV_EXPAND(macro_name(int, __VA_ARGS__)) \
__CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \
__CV_EXPAND(macro_name(int64, __VA_ARGS__)) \

◆ CV__HAL_INTRIN_IMPL_BIN_OP_

#define CV__HAL_INTRIN_IMPL_BIN_OP_	(	_Tp,
		bin_op
	)

値:

template<int n> inline \
v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
{ \
    v_reg<_Tp, n> c; \
    for( int i = 0; i < n; i++ ) \
        c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
    return c; \
} \
template<int n> inline \
v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
{ \
    for( int i = 0; i < n; i++ ) \
        a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
    return a; \
}

◆ CV__HAL_INTRIN_IMPL_BIT_OP

#define CV__HAL_INTRIN_IMPL_BIT_OP ( bit_op )

値:

CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \

CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) /* TODO: FIXIT remove this after masks refactoring */

◆ CV__HAL_INTRIN_IMPL_BIT_OP_

#define CV__HAL_INTRIN_IMPL_BIT_OP_	(	_Tp,
		bit_op
	)

値:

template<int n> CV_INLINE \
v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
{ \
    v_reg<_Tp, n> c; \
    typedef typename V_TypeTraits<_Tp>::int_type itype; \
    for( int i = 0; i < n; i++ ) \
        c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
                                                        V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
    return c; \
} \
template<int n> CV_INLINE \
v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
{ \
    typedef typename V_TypeTraits<_Tp>::int_type itype; \
    for( int i = 0; i < n; i++ ) \
        a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
                                                        V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
    return a; \
}

◆ CV__HAL_INTRIN_IMPL_BITWISE_NOT_

#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_	(	_Tp,
		dummy
	)

値:

template<int n> CV_INLINE \
v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \
{ \
    v_reg<_Tp, n> c; \
    for( int i = 0; i < n; i++ ) \
        c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \
    return c; \
} \

関数詳解

◆ operator&()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator&	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Bitwise AND

Only for integer types.

◆ operator*()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator*	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Multiply values

For 16- and 32-bit integer types and floating types.

◆ operator+()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator+	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Add values

For all types.

◆ operator-()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator-	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Subtract values

For all types.

◆ operator/()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator/	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Divide values

For floating types only.

◆ operator^()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator^	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Bitwise XOR

Only for integer types.

◆ operator|()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator\|	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Bitwise OR

Only for integer types.

◆ operator~()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator~ ( const v_reg< _Tp, n > & a )

Bitwise NOT

Only for integer types.

◆ v_absdiff() [1/3]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::abs_type, n > cv::v_absdiff	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Add values without saturation

For 8- and 16-bit integer values.

Subtract values without saturation

For 8- and 16-bit integer values.

Multiply values without saturation

For 8- and 16-bit integer values.

Absolute difference

Returns $ |a - b| $ converted to corresponding unsigned type. Example:

v_int32x4 a, b; // {1, 2, 3, 4} and {4, 3, 2, 1}

v_uint32x4 c = v_absdiff(a, b); // result is {3, 1, 1, 3}

cv::v_absdiff

v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_absdiff(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Add values without saturation

Definition: intrin_cpp.hpp:956

cv::v_int32x4

Definition: intrin_rvv.hpp:390

cv::v_uint32x4

Definition: intrin_rvv.hpp:358

For 8-, 16-, 32-bit integer source types.

◆ v_absdiff() [2/3]

template<int n>

v_reg< double, n > cv::v_absdiff	(	const v_reg< double, n > &	a,
		const v_reg< double, n > &	b
	)

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

For 64-bit floating point values

◆ v_absdiff() [3/3]

template<int n>

v_reg< float, n > cv::v_absdiff	(	const v_reg< float, n > &	a,
		const v_reg< float, n > &	b
	)

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

For 32-bit floating point values

◆ v_absdiffs()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_absdiffs	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Saturating absolute difference

Returns $ saturate(|a - b|) $ . For 8-, 16-bit signed integer source types.

◆ v_broadcast_element()

template<int i, typename _Tp , int n>

v_reg< _Tp, n > cv::v_broadcast_element ( const v_reg< _Tp, n > & a )

inline

Broadcast i-th element of vector

Scheme:

{ v[0] v[1] v[2] ... v[SZ] } => { v[i], v[i], v[i] ... v[i] }

Restriction: 0 <= i < nlanes Supported types: 32-bit integers and floats (s32/u32/f32)

◆ v_ceil() [1/2]

template<int n>

v_reg< int, n *2 > cv::v_ceil ( const v_reg< double, n > & a )

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

◆ v_ceil() [2/2]

template<int n>

v_reg< int, n > cv::v_ceil ( const v_reg< float, n > & a )

inline

Ceil elements

Ceil each value. Input type is float vector ==> output type is int vector.

覚え書き: Only for floating point types.

◆ v_check_all()

template<typename _Tp , int n>

bool cv::v_check_all ( const v_reg< _Tp, n > & a )

inline

Check if all packed values are less than zero

Unsigned values will be casted to signed: uchar 254 => char -2.

◆ v_check_any()

template<typename _Tp , int n>

bool cv::v_check_any ( const v_reg< _Tp, n > & a )

inline

Check if any of packed values is less than zero

Unsigned values will be casted to signed: uchar 254 => char -2.

◆ v_combine_high()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_combine_high	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Combine vector from last elements of two vectors

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
---------------
  {A3 A4 B3 B4}

For all types except 64-bit.

◆ v_combine_low()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_combine_low	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Combine vector from first elements of two vectors

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
---------------
  {A1 A2 B1 B2}

For all types except 64-bit.

◆ v_cvt_f32() [1/3]

template<int n>

v_reg< float, n *2 > cv::v_cvt_f32 ( const v_reg< double, n > & a )

inline

Convert lower half to float

Supported input type is cv::v_float64.

◆ v_cvt_f32() [2/3]

template<int n>

v_reg< float, n *2 > cv::v_cvt_f32	(	const v_reg< double, n > &	a,
		const v_reg< double, n > &	b
	)

inline

Convert to float

Supported input type is cv::v_float64.

◆ v_cvt_f32() [3/3]

template<int n>

v_reg< float, n > cv::v_cvt_f32 ( const v_reg< int, n > & a )

inline

Convert to float

Supported input type is cv::v_int32.

◆ v_cvt_f64() [1/3]

template<int n>

CV_INLINE v_reg< double,(n/2)> cv::v_cvt_f64 ( const v_reg< float, n > & a )

Convert lower half to double

Supported input type is cv::v_float32.

◆ v_cvt_f64() [2/3]

template<int n>

CV_INLINE v_reg< double, n/2 > cv::v_cvt_f64 ( const v_reg< int, n > & a )

Convert lower half to double

Supported input type is cv::v_int32.

◆ v_cvt_f64() [3/3]

template<int n>

CV_INLINE v_reg< double, n > cv::v_cvt_f64 ( const v_reg< int64, n > & a )

Convert to double

Supported input type is cv::v_int64.

◆ v_cvt_f64_high() [1/2]

template<int n>

CV_INLINE v_reg< double,(n/2)> cv::v_cvt_f64_high ( const v_reg< float, n > & a )

Convert to double high part of vector

Supported input type is cv::v_float32.

◆ v_cvt_f64_high() [2/2]

template<int n>

CV_INLINE v_reg< double,(n/2)> cv::v_cvt_f64_high ( const v_reg< int, n > & a )

Convert to double high part of vector

Supported input type is cv::v_int32.

◆ v_dotprod() [1/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > cv::v_dotprod	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Dot product of elements

Multiply values in two registers and sum adjacent result pairs.

Scheme:

  {A1 A2 ...} // 16-bit
x {B1 B2 ...} // 16-bit
-------------
{A1B1+A2B2 ...} // 32-bit

◆ v_dotprod() [2/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > cv::v_dotprod	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	c
	)

inline

Dot product of elements

Same as cv::v_dotprod, but add a third element to the sum of adjacent pairs. Scheme:

  {A1 A2 ...} // 16-bit
x {B1 B2 ...} // 16-bit
-------------
  {A1B1+A2B2+C1 ...} // 32-bit

◆ v_dotprod_expand() [1/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > cv::v_dotprod_expand	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Dot product of elements and expand

Multiply values in two registers and expand the sum of adjacent result pairs.

Scheme:

  {A1 A2 A3 A4 ...} // 8-bit
x {B1 B2 B3 B4 ...} // 8-bit
-------------
  {A1B1+A2B2+A3B3+A4B4 ...} // 32-bit

◆ v_dotprod_expand() [2/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > cv::v_dotprod_expand	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > &	c
	)

inline

Dot product of elements

Same as cv::v_dotprod_expand, but add a third element to the sum of adjacent pairs. Scheme:

  {A1 A2 A3 A4 ...} // 8-bit
x {B1 B2 B3 B4 ...} // 8-bit
-------------
  {A1B1+A2B2+A3B3+A4B4+C1 ...} // 32-bit

◆ v_dotprod_expand_fast() [1/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > cv::v_dotprod_expand_fast	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Fast Dot product of elements and expand

Multiply values in two registers and expand the sum of adjacent result pairs.

Same as cv::v_dotprod_expand, but it may perform unorder sum between result pairs in some platforms, this intrinsic can be used if the sum among all lanes is only matters and also it should be yielding better performance on the affected platforms.

◆ v_dotprod_expand_fast() [2/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > cv::v_dotprod_expand_fast	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > &	c
	)

inline

Fast Dot product of elements

Same as cv::v_dotprod_expand_fast, but add a third element to the sum of adjacent pairs.

◆ v_dotprod_fast() [1/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > cv::v_dotprod_fast	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Fast Dot product of elements

Same as cv::v_dotprod, but it may perform unorder sum between result pairs in some platforms, this intrinsic can be used if the sum among all lanes is only matters and also it should be yielding better performance on the affected platforms.

◆ v_dotprod_fast() [2/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > cv::v_dotprod_fast	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	c
	)

inline

Fast Dot product of elements

Same as cv::v_dotprod_fast, but add a third element to the sum of adjacent pairs.

◆ v_expand()

template<typename _Tp , int n>

void cv::v_expand	(	const v_reg< _Tp, n > &	a,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	b0,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	b1
	)

inline

Expand values to the wider pack type

Copy contents of register to two registers with 2x wider pack type. Scheme:

int32x4 int64x2 int64x2

{A B C D} ==> {A B} , {C D}

◆ v_expand_high()

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > cv::v_expand_high ( const v_reg< _Tp, n > & a )

inline

Expand higher values to the wider pack type

Same as cv::v_expand_low, but expand higher half of the vector instead.

Scheme:

int32x4 int64x2

{A B C D} ==> {C D}

◆ v_expand_low()

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > cv::v_expand_low ( const v_reg< _Tp, n > & a )

inline

Expand lower values to the wider pack type

Same as cv::v_expand, but return lower half of the vector.

Scheme:

int32x4 int64x2

{A B C D} ==> {A B}

◆ v_extract()

template<int s, typename _Tp , int n>

v_reg< _Tp, n > cv::v_extract	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Vector extract

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
========================
shift = 1  {A2 A3 A4 B1}
shift = 2  {A3 A4 B1 B2}
shift = 3  {A4 B1 B2 B3}

Restriction: 0 <= shift < nlanes

Usage:

v_int32x4 a, b, c;

c = v_extract<2>(a, b);

For all types.

◆ v_extract_n()

template<int s, typename _Tp , int n>

_Tp cv::v_extract_n ( const v_reg< _Tp, n > & v )

inline

Vector extract

Scheme: Return the s-th element of v. Restriction: 0 <= s < nlanes

Usage:

v_int32x4 a;
int r;
r = v_extract_n<2>(a);

For all types.

◆ v_floor() [1/2]

template<int n>

v_reg< int, n *2 > cv::v_floor ( const v_reg< double, n > & a )

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

◆ v_floor() [2/2]

template<int n>

v_reg< int, n > cv::v_floor ( const v_reg< float, n > & a )

inline

Floor elements

Floor each value. Input type is float vector ==> output type is int vector.

覚え書き: Only for floating point types.

◆ v_fma()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_fma	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< _Tp, n > &	c
	)

inline

Multiply and add

Returns $ a*b + c $ For floating point types and signed 32bit int only.

◆ v_invsqrt()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_invsqrt ( const v_reg< _Tp, n > & a )

inline

Inversed square root

Returns $ 1/sqrt(a) $ For floating point types only.

◆ v_load()

template<typename _Tp >

v_reg< _Tp, simd128_width/sizeof(_Tp)> cv::v_load ( const _Tp * ptr )

inline

Load register contents from memory

引数

ptr	pointer to memory block with data

戻り値: register object

覚え書き: Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc.; Use vx_load version to get maximum available register length result; Alignment requirement: if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (sizeof(lane type) should be enough). Do not cast pointer types without runtime check for pointer alignment (like uchar* => int*).

◆ v_load_aligned()

template<typename _Tp >

v_reg< _Tp, simd128_width/sizeof(_Tp)> cv::v_load_aligned ( const _Tp * ptr )

inline

Load register contents from memory (aligned)

similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary in case of SIMD128, 32-byte - SIMD256, etc)

覚え書き: Use vx_load_aligned version to get maximum available register length result

◆ v_load_deinterleave() [1/3]

template<typename _Tp , int n>

void cv::v_load_deinterleave	(	const _Tp *	ptr,
		v_reg< _Tp, n > &	a,
		v_reg< _Tp, n > &	b
	)

inline

Load and deinterleave (2 channels)

Load data from memory deinterleave and store to 2 registers. Scheme:

{A1 B1 A2 B2 ...} ==> {A1 A2 ...}, {B1 B2 ...}

For all types except 64-bit.

◆ v_load_deinterleave() [2/3]

template<typename _Tp , int n>

void cv::v_load_deinterleave	(	const _Tp *	ptr,
		v_reg< _Tp, n > &	a,
		v_reg< _Tp, n > &	b,
		v_reg< _Tp, n > &	c
	)

inline

Load and deinterleave (3 channels)

Load data from memory deinterleave and store to 3 registers. Scheme:

{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}

For all types except 64-bit.

◆ v_load_deinterleave() [3/3]

template<typename _Tp , int n>

void cv::v_load_deinterleave	(	const _Tp *	ptr,
		v_reg< _Tp, n > &	a,
		v_reg< _Tp, n > &	b,
		v_reg< _Tp, n > &	c,
		v_reg< _Tp, n > &	d
	)

inline

Load and deinterleave (4 channels)

Load data from memory deinterleave and store to 4 registers. Scheme:

{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...}

For all types except 64-bit.

◆ v_load_expand()

template<typename _Tp >

v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> cv::v_load_expand ( const _Tp * ptr )

inline

Load register contents from memory with double expand

Same as cv::v_load, but result pack type will be 2x wider than memory type.

short buf[4] = {1, 2, 3, 4}; // type is int16

v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32

cv::v_load_expand

v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)

Load register contents from memory with double expand

Definition: intrin_cpp.hpp:1875

cv::v_int32x4

v_reg< int, 4 > v_int32x4

Four 32-bit signed integer values

Definition: intrin_cpp.hpp:498

For 8-, 16-, 32-bit integer source types.

覚え書き: Use vx_load_expand version to get maximum available register length result

◆ v_load_expand_q()

template<typename _Tp >

v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> cv::v_load_expand_q ( const _Tp * ptr )

inline

Load register contents from memory with quad expand

Same as cv::v_load_expand, but result type is 4 times wider than source.

char buf[4] = {1, 2, 3, 4}; // type is int8

v_int32x4 r = v_load_expand_q(buf); // r = {1, 2, 3, 4} - type is int32

cv::v_load_expand_q

v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> v_load_expand_q(const _Tp *ptr)

Load register contents from memory with quad expand

Definition: intrin_cpp.hpp:1964

For 8-bit integer source types.

覚え書き: Use vx_load_expand_q version to get maximum available register length result

◆ v_load_halves()

template<typename _Tp >

v_reg< _Tp, simd128_width/sizeof(_Tp)> cv::v_load_halves	(	const _Tp *	loptr,
		const _Tp *	hiptr
	)

inline

Load register contents from two memory blocks

引数

loptr	memory block containing data for first half (0..n/2)
hiptr	memory block containing data for second half (n/2..n)

int lo[2] = { 1, 2 }, hi[2] = { 3, 4 };

v_int32x4 r = v_load_halves(lo, hi);

cv::v_load_halves

v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_halves(const _Tp *loptr, const _Tp *hiptr)

Load register contents from two memory blocks

Definition: intrin_cpp.hpp:1784

覚え書き: Use vx_load_halves version to get maximum available register length result

◆ v_load_low()

template<typename _Tp >

v_reg< _Tp, simd128_width/sizeof(_Tp)> cv::v_load_low ( const _Tp * ptr )

inline

Load 64-bits of data to lower part (high part is undefined).

引数

ptr	memory block containing data for first half (0..n/2)

int lo[2] = { 1, 2 };

v_int32x4 r = v_load_low(lo);

cv::v_load_low

v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_low(const _Tp *ptr)

Load 64-bits of data to lower part (high part is undefined).

Definition: intrin_cpp.hpp:1702

覚え書き: Use vx_load_low version to get maximum available register length result

◆ v_magnitude()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_magnitude	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Magnitude

Returns $ sqrt(a^2 + b^2) $ For floating point types only.

◆ v_matmul()

template<int n>

v_reg< float, n > cv::v_matmul	(	const v_reg< float, n > &	v,
		const v_reg< float, n > &	a,
		const v_reg< float, n > &	b,
		const v_reg< float, n > &	c,
		const v_reg< float, n > &	d
	)

inline

Matrix multiplication

Scheme:

{A0 A1 A2 A3}   |V0|
{B0 B1 B2 B3}   |V1|
{C0 C1 C2 C3}   |V2|
{D0 D1 D2 D3} x |V3|
====================
{R0 R1 R2 R3}, where:
R0 = A0V0 + B0V1 + C0V2 + D0V3,
R1 = A1V0 + B1V1 + C1V2 + D1V3
...

◆ v_matmuladd()

template<int n>

v_reg< float, n > cv::v_matmuladd	(	const v_reg< float, n > &	v,
		const v_reg< float, n > &	a,
		const v_reg< float, n > &	b,
		const v_reg< float, n > &	c,
		const v_reg< float, n > &	d
	)

inline

Matrix multiplication and add

Scheme:

{A0 A1 A2 A3}   |V0|   |D0|
{B0 B1 B2 B3}   |V1|   |D1|
{C0 C1 C2 C3} x |V2| + |D2|
====================   |D3|
{R0 R1 R2 R3}, where:
R0 = A0V0 + B0V1 + C0V2 + D0,
R1 = A1V0 + B1V1 + C1V2 + D1
...

◆ v_mul_expand()

template<typename _Tp , int n>

void cv::v_mul_expand	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	c,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	d
	)

inline

Multiply and expand

Multiply values two registers and store results in two registers with wider pack type. Scheme:

  {A B C D} // 32-bit
x {E F G H} // 32-bit
---------------
{AE BF}         // 64-bit
        {CG DH} // 64-bit

Example:

v_uint32x4 a, b; // {1,2,3,4} and {2,2,2,2}
v_uint64x2 c, d; // results
v_mul_expand(a, b, c, d); // c, d = {2,4}, {6, 8}

Implemented only for 16- and unsigned 32-bit source types (v_int16x8, v_uint16x8, v_uint32x4).

◆ v_mul_hi()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_mul_hi	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Multiply and extract high part

Multiply values two registers and store high part of the results. Implemented only for 16-bit source types (v_int16x8, v_uint16x8). Returns $ a*b >> 16 $

◆ v_not_nan()

template<int n>

v_reg< float, n > cv::v_not_nan ( const v_reg< float, n > & a )

inline

Less-than comparison

For all types except 64-bit integer values.

Greater-than comparison

For all types except 64-bit integer values.

Less-than or equal comparison

For all types except 64-bit integer values.

Greater-than or equal comparison

For all types except 64-bit integer values.

Equal comparison

For all types except 64-bit integer values.

Not equal comparison

For all types except 64-bit integer values.

◆ v_pack_b() [1/3]

template<int n>

v_reg< uchar, 8 *n > cv::v_pack_b	(	const v_reg< uint64, n > &	a,
		const v_reg< uint64, n > &	b,
		const v_reg< uint64, n > &	c,
		const v_reg< uint64, n > &	d,
		const v_reg< uint64, n > &	e,
		const v_reg< uint64, n > &	f,
		const v_reg< uint64, n > &	g,
		const v_reg< uint64, n > &	h
	)

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。 For 64-bit boolean values

Scheme:

a  {0xFFFF.. 0}
b  {0 0xFFFF..}
c  {0xFFFF.. 0}
d  {0 0xFFFF..}
 
e  {0xFFFF.. 0}
f  {0xFFFF.. 0}
g  {0 0xFFFF..}
h  {0 0xFFFF..}
===============
{
   0xFF 0 0 0xFF 0xFF 0 0 0xFF
   0xFF 0 0xFF 0 0 0xFF 0 0xFF
}

◆ v_pack_b() [2/3]

template<int n>

v_reg< uchar, 4 *n > cv::v_pack_b	(	const v_reg< unsigned, n > &	a,
		const v_reg< unsigned, n > &	b,
		const v_reg< unsigned, n > &	c,
		const v_reg< unsigned, n > &	d
	)

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。 For 32-bit boolean values

Scheme:

a  {0xFFFF.. 0 0 0xFFFF..}
b  {0 0xFFFF.. 0xFFFF.. 0}
c  {0xFFFF.. 0 0xFFFF.. 0}
d  {0 0xFFFF.. 0 0xFFFF..}
===============
{
   0xFF 0 0 0xFF 0 0xFF 0xFF 0
   0xFF 0 0xFF 0 0 0xFF 0 0xFF
}

◆ v_pack_b() [3/3]

template<int n>

v_reg< uchar, 2 *n > cv::v_pack_b	(	const v_reg< ushort, n > &	a,
		const v_reg< ushort, n > &	b
	)

inline

! For 16-bit boolean values

Scheme:

a  {0xFFFF 0 0 0xFFFF 0 0xFFFF 0xFFFF 0}
b  {0xFFFF 0 0xFFFF 0 0 0xFFFF 0 0xFFFF}
===============
{
   0xFF 0 0 0xFF 0 0xFF 0xFF 0
   0xFF 0 0xFF 0 0 0xFF 0 0xFF
}

◆ v_popcount()

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::abs_type, n > cv::v_popcount ( const v_reg< _Tp, n > & a )

inline

Count the 1 bits in the vector lanes and return result as corresponding unsigned type

Scheme:

{A1 A2 A3 ...} => {popcount(A1), popcount(A2), popcount(A3), ...}

For all integer types.

◆ v_recombine()

template<typename _Tp , int n>

void cv::v_recombine	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		v_reg< _Tp, n > &	low,
		v_reg< _Tp, n > &	high
	)

inline

Combine two vectors from lower and higher parts of two other vectors

low = cv::v_combine_low(a, b);

high = cv::v_combine_high(a, b);

cv::v_combine_high

v_reg< _Tp, n > v_combine_high(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Combine vector from last elements of two vectors

Definition: intrin_cpp.hpp:2307

cv::v_combine_low

v_reg< _Tp, n > v_combine_low(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Combine vector from first elements of two vectors

Definition: intrin_cpp.hpp:2285

◆ v_reduce_sad()

template<typename _Tp , int n>

V_TypeTraits< typenameV_TypeTraits< _Tp >::abs_type >::sum_type cv::v_reduce_sad	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Sum absolute differences of values

Scheme:

{A1 A2 A3 ...} {B1 B2 B3 ...} => sum{ABS(A1-B1),abs(A2-B2),abs(A3-B3),...}

cv::abs

softfloat abs(softfloat a)

Absolute value

Definition: softfloat.hpp:444

For all types except 64-bit types.

◆ v_reduce_sum()

template<typename _Tp , int n>

V_TypeTraits< _Tp >::sum_type cv::v_reduce_sum ( const v_reg< _Tp, n > & a )

inline

Element shift left among vector

For all type

Element shift right among vector

For all type

Sum packed values

Scheme:

{A1 A2 A3 ...} => sum{A1,A2,A3,...}

◆ v_reduce_sum4()

template<int n>

v_reg< float, n > cv::v_reduce_sum4	(	const v_reg< float, n > &	a,
		const v_reg< float, n > &	b,
		const v_reg< float, n > &	c,
		const v_reg< float, n > &	d
	)

inline

Sums all elements of each input vector, returns the vector of sums

Scheme:

result[0] = a[0] + a[1] + a[2] + a[3]
result[1] = b[0] + b[1] + b[2] + b[3]
result[2] = c[0] + c[1] + c[2] + c[3]
result[3] = d[0] + d[1] + d[2] + d[3]

◆ v_reverse()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_reverse ( const v_reg< _Tp, n > & a )

inline

Vector reverse order

Reverse the order of the vector Scheme:

REG {A1 ... An} ==> REG {An ... A1}

For all types.

◆ v_round() [1/3]

template<int n>

v_reg< int, n *2 > cv::v_round ( const v_reg< double, n > & a )

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

◆ v_round() [2/3]

template<int n>

v_reg< int, n *2 > cv::v_round	(	const v_reg< double, n > &	a,
		const v_reg< double, n > &	b
	)

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

◆ v_round() [3/3]

template<int n>

v_reg< int, n > cv::v_round ( const v_reg< float, n > & a )

inline

Round elements

Rounds each value. Input type is float vector ==> output type is int vector.

覚え書き: Only for floating point types.

◆ v_scan_forward()

template<typename _Tp , int n>

int cv::v_scan_forward ( const v_reg< _Tp, n > & a )

inline

Get first negative lane index

Returned value is an index of first negative lane (undefined for input of all positive values) Example:

v_int32x4 r; // set to {0, 0, -1, -1}

int idx = v_heading_zeros(r); // idx = 2

◆ v_select()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_select	(	const v_reg< _Tp, n > &	mask,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Per-element select (blend operation)

Return value will be built by combining values a and b using the following scheme: result[i] = mask[i] ? a[i] : b[i];

覚え書き

: mask element values are restricted to these values:

0: select element from b
0xff/0xffff/etc: select element from a (fully compatible with bitwise-based operator)

◆ v_signmask()

template<typename _Tp , int n>

int cv::v_signmask ( const v_reg< _Tp, n > & a )

inline

Get negative values mask

非推奨:: v_signmask depends on a lane count heavily and therefore isn't universal enough

Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes. Example:

v_int32x4 r; // set to {-1, -1, 1, 1}

int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011

cv::v_signmask

int v_signmask(const v_reg< _Tp, n > &a)

Get negative values mask

Definition: intrin_cpp.hpp:1395

◆ v_sqr_magnitude()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_sqr_magnitude	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

Square of the magnitude

Returns $ a^2 + b^2 $ For floating point types only.

◆ v_store()

template<typename _Tp , int n>

void cv::v_store	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

inline

Store data to memory

Store register contents to memory. Scheme:

REG {A B C D} ==> MEM {A B C D}

Pointer can be unaligned.

◆ v_store_aligned()

template<typename _Tp , int n>

void cv::v_store_aligned	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

inline

Store data to memory (aligned)

Store register contents to memory. Scheme:

REG {A B C D} ==> MEM {A B C D}

Pointer should be aligned by 16-byte boundary.

◆ v_store_high()

template<typename _Tp , int n>

void cv::v_store_high	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

inline

Store data to memory (higher half)

Store higher half of register contents to memory. Scheme:

REG {A B C D} ==> MEM {C D}

◆ v_store_interleave() [1/3]

template<typename _Tp , int n>

void cv::v_store_interleave	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< _Tp, n > &	c,
		const v_reg< _Tp, n > &	d,
		hal::StoreMode	= `hal::STORE_UNALIGNED`
	)

inline

Interleave and store (4 channels)

Interleave and store data from 4 registers to memory. Scheme:

{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...}

For all types except 64-bit.

◆ v_store_interleave() [2/3]

template<typename _Tp , int n>

void cv::v_store_interleave	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< _Tp, n > &	c,
		hal::StoreMode	= `hal::STORE_UNALIGNED`
	)

inline

Interleave and store (3 channels)

Interleave and store data from 3 registers to memory. Scheme:

{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} ==> {A1 B1 C1 A2 B2 C2 ...}

For all types except 64-bit.

◆ v_store_interleave() [3/3]

template<typename _Tp , int n>

void cv::v_store_interleave	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		hal::StoreMode	= `hal::STORE_UNALIGNED`
	)

inline

Interleave and store (2 channels)

Interleave and store data from 2 registers to memory. Scheme:

{A1 A2 ...}, {B1 B2 ...} ==> {A1 B1 A2 B2 ...}

For all types except 64-bit.

◆ v_store_low()

template<typename _Tp , int n>

void cv::v_store_low	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

inline

Store data to memory (lower half)

Store lower half of register contents to memory. Scheme:

REG {A B C D} ==> MEM {A B}

◆ v_transpose4x4()

template<typename _Tp , int n>

void cv::v_transpose4x4	(	v_reg< _Tp, n > &	a0,
		const v_reg< _Tp, n > &	a1,
		const v_reg< _Tp, n > &	a2,
		const v_reg< _Tp, n > &	a3,
		v_reg< _Tp, n > &	b0,
		v_reg< _Tp, n > &	b1,
		v_reg< _Tp, n > &	b2,
		v_reg< _Tp, n > &	b3
	)

inline

Transpose 4x4 matrix

Scheme:

a0  {A1 A2 A3 A4}
a1  {B1 B2 B3 B4}
a2  {C1 C2 C3 C4}
a3  {D1 D2 D3 D4}
===============
b0  {A1 B1 C1 D1}
b1  {A2 B2 C2 D2}
b2  {A3 B3 C3 D3}
b3  {A4 B4 C4 D4}

◆ v_trunc() [1/2]

template<int n>

v_reg< int, n *2 > cv::v_trunc ( const v_reg< double, n > & a )

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

◆ v_trunc() [2/2]

template<int n>

v_reg< int, n > cv::v_trunc ( const v_reg< float, n > & a )

inline

Truncate elements

Truncate each value. Input type is float vector ==> output type is int vector.

覚え書き: Only for floating point types.

◆ v_zip()

template<typename _Tp , int n>

void cv::v_zip	(	const v_reg< _Tp, n > &	a0,
		const v_reg< _Tp, n > &	a1,
		v_reg< _Tp, n > &	b0,
		v_reg< _Tp, n > &	b1
	)

inline

Interleave two vectors

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
---------------
  {A1 B1 A2 B2} and {A3 B3 A4 B4}

For all types except 64-bit.

モジュール

クラス

マクロ定義

型定義

列挙型

関数

Wide init with value

Wide init with zero

Wide load from memory

Wide load from memory(aligned)

Wide load lower half from memory

Wide load halfs from memory

Wide LUT of elements

Wide LUT of element pairs

Wide LUT of element quads

Wide load with double expansion

Wide load with quad expansion

Pack boolean values

詳解

Types

Load and store operations

Value reordering

Arithmetic, bitwise and comparison operations

Reduce and mask

Other math

Conversions

Matrix operations

Usability

マクロ定義詳解

◆ CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES

◆ CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES

◆ CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES

◆ CV__HAL_INTRIN_IMPL_BIN_OP_

◆ CV__HAL_INTRIN_IMPL_BIT_OP

◆ CV__HAL_INTRIN_IMPL_BIT_OP_

◆ CV__HAL_INTRIN_IMPL_BITWISE_NOT_

関数詳解

◆ operator&()

◆ operator*()

◆ operator+()

◆ operator-()

◆ operator/()

◆ operator^()

◆ operator|()

◆ operator~()

◆ v_absdiff() [1/3]

◆ v_absdiff() [2/3]

◆ v_absdiff() [3/3]

◆ v_absdiffs()

◆ v_broadcast_element()

◆ v_ceil() [1/2]

◆ v_ceil() [2/2]

◆ v_check_all()

◆ v_check_any()

◆ v_combine_high()

◆ v_combine_low()

◆ v_cvt_f32() [1/3]

◆ v_cvt_f32() [2/3]

◆ v_cvt_f32() [3/3]

◆ v_cvt_f64() [1/3]

◆ v_cvt_f64() [2/3]

◆ v_cvt_f64() [3/3]

◆ v_cvt_f64_high() [1/2]

◆ v_cvt_f64_high() [2/2]

◆ v_dotprod() [1/2]

◆ v_dotprod() [2/2]

◆ v_dotprod_expand() [1/2]

◆ v_dotprod_expand() [2/2]

◆ v_dotprod_expand_fast() [1/2]

◆ v_dotprod_expand_fast() [2/2]

◆ v_dotprod_fast() [1/2]

◆ v_dotprod_fast() [2/2]

◆ v_expand()

◆ v_expand_high()

◆ v_expand_low()

◆ v_extract()

◆ v_extract_n()

◆ v_floor() [1/2]

◆ v_floor() [2/2]

◆ v_fma()