モジュール
	Private implementation helpers

クラス
struct	cv::v_reg< _Tp, n >

マクロ定義
#define	CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...)

#define	CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...)

#define	CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...)

#define	CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op)

#define	CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op)

#define	CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op)

#define	CV__HAL_INTRIN_IMPL_BIT_OP(bit_op)

#define	CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy)

型定義
typedef v_reg< uchar, 16 >	cv::v_uint8x16
	16個の8ビット符号なし整数値

typedef v_reg< schar, 16 >	cv::v_int8x16
	16 個の 8 ビット符号付き整数値

typedef v_reg< ushort, 8 >	cv::v_uint16x8
	8個の16ビット符号なし整数値

typedef v_reg< short, 8 >	cv::v_int16x8
	8個の16ビット符号付き整数値

typedef v_reg< unsigned, 4 >	cv::v_uint32x4
	4つの32ビット符号なし整数値

typedef v_reg< int, 4 >	cv::v_int32x4
	4個の32ビット符号付き整数値

typedef v_reg< float, 4 >	cv::v_float32x4
	32ビット浮動小数点値（単精度）4個

typedef v_reg< double, 2 >	cv::v_float64x2
	2つの64ビット浮動小数点値（倍精度）

typedef v_reg< uint64, 2 >	cv::v_uint64x2
	2つの64ビット符号なし整数値

typedef v_reg< int64, 2 >	cv::v_int64x2
	2つの64ビット符号付き整数値

列挙型
enum	{ simd128_width = 16 , simdmax_width = simd128_width }

関数
void	CV__SIMD_NAMESPACE::vx_cleanup ()
	SIMD処理状態のクリーンアップコール

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator+ (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	値の加算[【詳解】（英語］

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	cv::operator+= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator- (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	値の減算[【詳解】（英語］

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	cv::operator-= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator* (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	値の乗算[【詳解】（英語］

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	*cv::operator=** (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator/ (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	値の除算[【詳解】（英語］

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	cv::operator/= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator& (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	ビットごとのAND[【詳解】（英語］

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	cv::operator&= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator\| (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	ビットごとのOR[【詳解】（英語］

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	cv::operator\|= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator^ (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	ビットごとのXOR[【詳解】（英語］

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n > &	cv::operator^= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
CV_INLINE v_reg< _Tp, n >	cv::operator~ (const v_reg< _Tp, n > &a)
	ビットごとのNOT[【詳解】（英語］

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::abs_type, n >	cv::v_popcount (const v_reg< _Tp, n > &a)
	ベクトルレーン内の1ビットをカウントし、対応する符号なしタイプとして結果を返します。[【詳解】（英語］

template<int n>
v_reg< float, n >	cv::v_not_nan (const v_reg< float, n > &a)
	小数点以下の比較[【詳解】（英語］

template<int n>
v_reg< double, n >	cv::v_not_nan (const v_reg< double, n > &a)

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::abs_type, n >	cv::v_absdiff (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	彩度のない値の加算[【詳解】（英語］

template<int n>
v_reg< float, n >	cv::v_absdiff (const v_reg< float, n > &a, const v_reg< float, n > &b)

template<int n>
v_reg< double, n >	cv::v_absdiff (const v_reg< double, n > &a, const v_reg< double, n > &b)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_absdiffs (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	飽和絶対差[【詳解】（英語］

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_invsqrt (const v_reg< _Tp, n > &a)
	反転平方根[【詳解】（英語］

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	マグニチュード[【詳解】（英語］

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_sqr_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	マグニチュードの2乗[【詳解】（英語］

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_fma (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
	乗算・加算[【詳解】（英語］

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_muladd (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
	v_fmaの同義語

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_dotprod (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	要素のドットプロダクト[【詳解】（英語］

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_dotprod (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c)
	要素のドットプロダクト[【詳解】（英語］

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_dotprod_fast (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	要素の高速ドット積[【詳解】（英語］

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_dotprod_fast (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c)
	要素の高速ドット積[【詳解】（英語］

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 >	cv::v_dotprod_expand (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	要素の点積と展開[【詳解】（英語］

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 >	cv::v_dotprod_expand (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > &c)
	要素のドットプロダクト[【詳解】（英語］

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 >	cv::v_dotprod_expand_fast (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	高速な要素のドット積と展開[【詳解】（英語］

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 >	cv::v_dotprod_expand_fast (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > &c)
	要素の高速ドット積[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_mul_expand (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
	乗算と展開[【詳解】（英語］

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_mul_hi (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	乗算と高次部分の抽出[【詳解】（英語］

template<typename _Tp , int n>
V_TypeTraits< _Tp >::sum_type	cv::v_reduce_sum (const v_reg< _Tp, n > &a)
	ベクトルの要素を左にシフトする[【詳解】（英語］

template<int n>
v_reg< float, n >	cv::v_reduce_sum4 (const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
	各入力ベクトルのすべての要素の和をとり、和のベクトルを返します。[【詳解】（英語］

template<typename _Tp , int n>
V_TypeTraits< typenameV_TypeTraits< _Tp >::abs_type >::sum_type	cv::v_reduce_sad (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	値の差の絶対値の合計[【詳解】（英語］

template<typename _Tp , int n>
int	cv::v_signmask (const v_reg< _Tp, n > &a)
	負の値のマスク取得[【詳解】（英語］

template<typename _Tp , int n>
int	cv::v_scan_forward (const v_reg< _Tp, n > &a)
	最初の負のレーンのインデックスを取得[【詳解】（英語］

template<typename _Tp , int n>
bool	cv::v_check_all (const v_reg< _Tp, n > &a)
	パックされたすべての値がゼロより小さいかどうかをチェックします。[【詳解】（英語］

template<typename _Tp , int n>
bool	cv::v_check_any (const v_reg< _Tp, n > &a)
	パックされた値のいずれかがゼロより小さいかどうかをチェックします。[【詳解】（英語］

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_select (const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	エレメント単位の選択（ブレンド操作）[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_expand (const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
	値をより広いパックタイプに展開[【詳解】（英語］

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_expand_low (const v_reg< _Tp, n > &a)
	低い値をより広いパックタイプに展開します。[【詳解】（英語］

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_expand_high (const v_reg< _Tp, n > &a)
	より高い値をより広いパックタイプに展開[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_zip (const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
	2つのベクターをインターリーブする[【詳解】（英語］

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_load (const _Tp *ptr)
	レジスタの内容をメモリから読み込む[【詳解】（英語］

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_load_aligned (const _Tp *ptr)
	メモリからレジスタの内容を読み込む（アラインド）．[【詳解】（英語］

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_load_low (const _Tp *ptr)
	下位部分に64ビットのデータをロードする（上位部分は未定義）。[【詳解】（英語］

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_load_halves (const _Tp loptr, const _Tp hiptr)
	2つのメモリブロックからレジスタコンテンツをロード[【詳解】（英語］

template<typename _Tp >
v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)>	cv::v_load_expand (const _Tp *ptr)
	メモリからレジスタの内容をダブルエキスパンダーでロードします。[【詳解】（英語］

template<typename _Tp >
v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)>	cv::v_load_expand_q (const _Tp *ptr)
	クワッドエキスパンダーでメモリからレジスタの内容をロードします。[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b)
	ロード＆デインターリーブ（2チャネル）[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c)
	ロードアンドデインターリーブ(3チャネル)[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c, v_reg< _Tp, n > &d)
	ロード＆デインターリーブ(4チャネル)[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, hal::StoreMode=hal::STORE_UNALIGNED)
	インターリーブ＆ストア(2チャンネル)[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c, hal::StoreMode=hal::STORE_UNALIGNED)
	インターリーブ＆ストア(3チャネル)[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c, const v_reg< _Tp, n > &d, hal::StoreMode=hal::STORE_UNALIGNED)
	インターリーブ＆ストア(4チャンネル)[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_store (_Tp *ptr, const v_reg< _Tp, n > &a)
	データのメモリへの格納[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_store (_Tp *ptr, const v_reg< _Tp, n > &a, hal::StoreMode)

template<typename _Tp , int n>
void	cv::v_store_low (_Tp *ptr, const v_reg< _Tp, n > &a)
	メモリへの格納(下半分)[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_store_high (_Tp *ptr, const v_reg< _Tp, n > &a)
	データのメモリへの保存（上位半分）[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_store_aligned (_Tp *ptr, const v_reg< _Tp, n > &a)
	メモリへのデータの格納（アラインド）[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_store_aligned_nocache (_Tp *ptr, const v_reg< _Tp, n > &a)

template<typename _Tp , int n>
void	cv::v_store_aligned (_Tp *ptr, const v_reg< _Tp, n > &a, hal::StoreMode)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_combine_low (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	2つのベクトルの最初の要素からベクトルを合成する[【詳解】（英語］

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_combine_high (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	2つのベクトルの最後の要素からベクトルを合成する[【詳解】（英語］

template<typename _Tp , int n>
void	cv::v_recombine (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high)
	2つのベクトルの下位部分と上位部分から2つのベクトルを合成する[【詳解】（英語］

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_reverse (const v_reg< _Tp, n > &a)
	ベクトルの逆順[【詳解】（英語］

template<int s, typename _Tp , int n>
v_reg< _Tp, n >	cv::v_extract (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	ベクトル抽出[【詳解】（英語］

template<int s, typename _Tp , int n>
_Tp	cv::v_extract_n (const v_reg< _Tp, n > &v)
	ベクトル抽出[【詳解】（英語］

template<int i, typename _Tp , int n>
v_reg< _Tp, n >	cv::v_broadcast_element (const v_reg< _Tp, n > &a)
	ベクトルのi番目の要素を放送する[【詳解】（英語］

template<int n>
v_reg< int, n >	cv::v_round (const v_reg< float, n > &a)
	要素の丸め[【詳解】（英語］

template<int n>
v_reg< int, n *2 >	cv::v_round (const v_reg< double, n > &a, const v_reg< double, n > &b)

template<int n>
v_reg< int, n >	cv::v_floor (const v_reg< float, n > &a)
	要素の階調[【詳解】（英語］

template<int n>
v_reg< int, n >	cv::v_ceil (const v_reg< float, n > &a)
	要素の暗号化[【詳解】（英語］

template<int n>
v_reg< int, n >	cv::v_trunc (const v_reg< float, n > &a)
	要素の切り捨て[【詳解】（英語］

template<int n>
v_reg< int, n *2 >	cv::v_round (const v_reg< double, n > &a)

template<int n>
v_reg< int, n *2 >	cv::v_floor (const v_reg< double, n > &a)

template<int n>
v_reg< int, n *2 >	cv::v_ceil (const v_reg< double, n > &a)

template<int n>
v_reg< int, n *2 >	cv::v_trunc (const v_reg< double, n > &a)

template<int n>
v_reg< float, n >	cv::v_cvt_f32 (const v_reg< int, n > &a)
	float に変換します．[【詳解】（英語］

template<int n>
v_reg< float, n *2 >	cv::v_cvt_f32 (const v_reg< double, n > &a)
	下半分を float に変換します．[【詳解】（英語］

template<int n>
v_reg< float, n *2 >	cv::v_cvt_f32 (const v_reg< double, n > &a, const v_reg< double, n > &b)
	float に変換します．[【詳解】（英語］

template<int n>
CV_INLINE v_reg< double, n/2 >	cv::v_cvt_f64 (const v_reg< int, n > &a)
	下半分を double に変換します．[【詳解】（英語］

template<int n>
CV_INLINE v_reg< double,(n/2)>	cv::v_cvt_f64_high (const v_reg< int, n > &a)
	ベクトルの上位部分を double に変換します．[【詳解】（英語］

template<int n>
CV_INLINE v_reg< double,(n/2)>	cv::v_cvt_f64 (const v_reg< float, n > &a)
	下半分を double に変換します．[【詳解】（英語］

template<int n>
CV_INLINE v_reg< double,(n/2)>	cv::v_cvt_f64_high (const v_reg< float, n > &a)
	ベクトルの上位部分を double に変換します．[【詳解】（英語］

template<int n>
CV_INLINE v_reg< double, n >	cv::v_cvt_f64 (const v_reg< int64, n > &a)
	double に変換します．[【詳解】（英語］

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_lut (const _Tp tab, const int idx)

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_lut_pairs (const _Tp tab, const int idx)

template<typename _Tp >
v_reg< _Tp, simd128_width/sizeof(_Tp)>	cv::v_lut_quads (const _Tp tab, const int idx)

template<int n>
v_reg< int, n >	cv::v_lut (const int *tab, const v_reg< int, n > &idx)

template<int n>
v_reg< unsigned, n >	cv::v_lut (const unsigned *tab, const v_reg< int, n > &idx)

template<int n>
v_reg< float, n >	cv::v_lut (const float *tab, const v_reg< int, n > &idx)

template<int n>
v_reg< double, n/2 >	cv::v_lut (const double *tab, const v_reg< int, n > &idx)

template<int n>
void	cv::v_lut_deinterleave (const float *tab, const v_reg< int, n > &idx, v_reg< float, n > &x, v_reg< float, n > &y)

template<int n>
void	cv::v_lut_deinterleave (const double tab, const v_reg< int, n 2 > &idx, v_reg< double, n > &x, v_reg< double, n > &y)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_interleave_pairs (const v_reg< _Tp, n > &vec)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_interleave_quads (const v_reg< _Tp, n > &vec)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_pack_triplets (const v_reg< _Tp, n > &vec)

template<typename _Tp , int n>
void	cv::v_transpose4x4 (v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, const v_reg< _Tp, n > &a2, const v_reg< _Tp, n > &a3, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1, v_reg< _Tp, n > &b2, v_reg< _Tp, n > &b3)
	4x4の行列を転置する[【詳解】（英語］

template<int n>
v_reg< float, n >	cv::v_matmul (const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
	行列の乗算[【詳解】（英語］

template<int n>
v_reg< float, n >	cv::v_matmuladd (const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
	行列の乗算と加算[【詳解】（英語］

template<int n>
v_reg< double, n/2 >	cv::v_dotprod_expand (const v_reg< int, n > &a, const v_reg< int, n > &b)

template<int n>
v_reg< double, n/2 >	cv::v_dotprod_expand (const v_reg< int, n > &a, const v_reg< int, n > &b, const v_reg< double, n/2 > &c)

template<int n>
v_reg< double, n/2 >	cv::v_dotprod_expand_fast (const v_reg< int, n > &a, const v_reg< int, n > &b)

template<int n>
v_reg< double, n/2 >	cv::v_dotprod_expand_fast (const v_reg< int, n > &a, const v_reg< int, n > &b, const v_reg< double, n/2 > &c)

v_reg< float, simd128_width/sizeof(float)>	cv::v_load_expand (const float16_t *ptr)

template<int n>
void	cv::v_pack_store (float16_t *ptr, const v_reg< float, n > &v)

void	cv::v_cleanup ()

Wide init with value
Create maximum available capacity vector with elements set to a specific value
v_uint8	CV__SIMD_NAMESPACE::vx_setall_u8 (uchar v)

v_int8	CV__SIMD_NAMESPACE::vx_setall_s8 (schar v)

v_uint16	CV__SIMD_NAMESPACE::vx_setall_u16 (ushort v)

v_int16	CV__SIMD_NAMESPACE::vx_setall_s16 (short v)

v_int32	CV__SIMD_NAMESPACE::vx_setall_s32 (int v)

v_uint32	CV__SIMD_NAMESPACE::vx_setall_u32 (unsigned v)

v_float32	CV__SIMD_NAMESPACE::vx_setall_f32 (float v)

v_int64	CV__SIMD_NAMESPACE::vx_setall_s64 (int64 v)

v_uint64	CV__SIMD_NAMESPACE::vx_setall_u64 (uint64 v)

Wide init with zero
Create maximum available capacity vector with elements set to zero
v_uint8	CV__SIMD_NAMESPACE::vx_setzero_u8 ()

v_int8	CV__SIMD_NAMESPACE::vx_setzero_s8 ()

v_uint16	CV__SIMD_NAMESPACE::vx_setzero_u16 ()

v_int16	CV__SIMD_NAMESPACE::vx_setzero_s16 ()

v_int32	CV__SIMD_NAMESPACE::vx_setzero_s32 ()

v_uint32	CV__SIMD_NAMESPACE::vx_setzero_u32 ()

v_float32	CV__SIMD_NAMESPACE::vx_setzero_f32 ()

v_int64	CV__SIMD_NAMESPACE::vx_setzero_s64 ()

v_uint64	CV__SIMD_NAMESPACE::vx_setzero_u64 ()

Wide load from memory
Load maximum available capacity register contents from memory
v_uint8	CV__SIMD_NAMESPACE::vx_load (const uchar *ptr)

v_int8	CV__SIMD_NAMESPACE::vx_load (const schar *ptr)

v_uint16	CV__SIMD_NAMESPACE::vx_load (const ushort *ptr)

v_int16	CV__SIMD_NAMESPACE::vx_load (const short *ptr)

v_int32	CV__SIMD_NAMESPACE::vx_load (const int *ptr)

v_uint32	CV__SIMD_NAMESPACE::vx_load (const unsigned *ptr)

v_float32	CV__SIMD_NAMESPACE::vx_load (const float *ptr)

v_int64	CV__SIMD_NAMESPACE::vx_load (const int64 *ptr)

v_uint64	CV__SIMD_NAMESPACE::vx_load (const uint64 *ptr)

Wide load from memory(aligned)
Load maximum available capacity register contents from memory(aligned)
v_uint8	CV__SIMD_NAMESPACE::vx_load_aligned (const uchar *ptr)

v_int8	CV__SIMD_NAMESPACE::vx_load_aligned (const schar *ptr)

v_uint16	CV__SIMD_NAMESPACE::vx_load_aligned (const ushort *ptr)

v_int16	CV__SIMD_NAMESPACE::vx_load_aligned (const short *ptr)

v_int32	CV__SIMD_NAMESPACE::vx_load_aligned (const int *ptr)

v_uint32	CV__SIMD_NAMESPACE::vx_load_aligned (const unsigned *ptr)

v_float32	CV__SIMD_NAMESPACE::vx_load_aligned (const float *ptr)

v_int64	CV__SIMD_NAMESPACE::vx_load_aligned (const int64 *ptr)

v_uint64	CV__SIMD_NAMESPACE::vx_load_aligned (const uint64 *ptr)

Wide load lower half from memory
Load lower half of maximum available capacity register from memory
v_uint8	CV__SIMD_NAMESPACE::vx_load_low (const uchar *ptr)

v_int8	CV__SIMD_NAMESPACE::vx_load_low (const schar *ptr)

v_uint16	CV__SIMD_NAMESPACE::vx_load_low (const ushort *ptr)

v_int16	CV__SIMD_NAMESPACE::vx_load_low (const short *ptr)

v_int32	CV__SIMD_NAMESPACE::vx_load_low (const int *ptr)

v_uint32	CV__SIMD_NAMESPACE::vx_load_low (const unsigned *ptr)

v_float32	CV__SIMD_NAMESPACE::vx_load_low (const float *ptr)

v_int64	CV__SIMD_NAMESPACE::vx_load_low (const int64 *ptr)

v_uint64	CV__SIMD_NAMESPACE::vx_load_low (const uint64 *ptr)

Wide load halfs from memory
Load maximum available capacity register contents from two memory blocks
v_uint8	CV__SIMD_NAMESPACE::vx_load_halves (const uchar ptr0, const uchar ptr1)

v_int8	CV__SIMD_NAMESPACE::vx_load_halves (const schar ptr0, const schar ptr1)

v_uint16	CV__SIMD_NAMESPACE::vx_load_halves (const ushort ptr0, const ushort ptr1)

v_int16	CV__SIMD_NAMESPACE::vx_load_halves (const short ptr0, const short ptr1)

v_int32	CV__SIMD_NAMESPACE::vx_load_halves (const int ptr0, const int ptr1)

v_uint32	CV__SIMD_NAMESPACE::vx_load_halves (const unsigned ptr0, const unsigned ptr1)

v_float32	CV__SIMD_NAMESPACE::vx_load_halves (const float ptr0, const float ptr1)

v_int64	CV__SIMD_NAMESPACE::vx_load_halves (const int64 ptr0, const int64 ptr1)

v_uint64	CV__SIMD_NAMESPACE::vx_load_halves (const uint64 ptr0, const uint64 ptr1)

Wide LUT of elements
Load maximum available capacity register contents with array elements by provided indexes
v_uint8	CV__SIMD_NAMESPACE::vx_lut (const uchar ptr, const int idx)

v_int8	CV__SIMD_NAMESPACE::vx_lut (const schar ptr, const int idx)

v_uint16	CV__SIMD_NAMESPACE::vx_lut (const ushort ptr, const int idx)

v_int16	CV__SIMD_NAMESPACE::vx_lut (const short ptr, const int idx)

v_int32	CV__SIMD_NAMESPACE::vx_lut (const int ptr, const int idx)

v_uint32	CV__SIMD_NAMESPACE::vx_lut (const unsigned ptr, const int idx)

v_float32	CV__SIMD_NAMESPACE::vx_lut (const float ptr, const int idx)

v_int64	CV__SIMD_NAMESPACE::vx_lut (const int64 ptr, const int idx)

v_uint64	CV__SIMD_NAMESPACE::vx_lut (const uint64 ptr, const int idx)

Wide LUT of element pairs
Load maximum available capacity register contents with array element pairs by provided indexes
v_uint8	CV__SIMD_NAMESPACE::vx_lut_pairs (const uchar ptr, const int idx)

v_int8	CV__SIMD_NAMESPACE::vx_lut_pairs (const schar ptr, const int idx)

v_uint16	CV__SIMD_NAMESPACE::vx_lut_pairs (const ushort ptr, const int idx)

v_int16	CV__SIMD_NAMESPACE::vx_lut_pairs (const short ptr, const int idx)

v_int32	CV__SIMD_NAMESPACE::vx_lut_pairs (const int ptr, const int idx)

v_uint32	CV__SIMD_NAMESPACE::vx_lut_pairs (const unsigned ptr, const int idx)

v_float32	CV__SIMD_NAMESPACE::vx_lut_pairs (const float ptr, const int idx)

v_int64	CV__SIMD_NAMESPACE::vx_lut_pairs (const int64 ptr, const int idx)

v_uint64	CV__SIMD_NAMESPACE::vx_lut_pairs (const uint64 ptr, const int idx)

Wide LUT of element quads
Load maximum available capacity register contents with array element quads by provided indexes
v_uint8	CV__SIMD_NAMESPACE::vx_lut_quads (const uchar ptr, const int idx)

v_int8	CV__SIMD_NAMESPACE::vx_lut_quads (const schar ptr, const int idx)

v_uint16	CV__SIMD_NAMESPACE::vx_lut_quads (const ushort ptr, const int idx)

v_int16	CV__SIMD_NAMESPACE::vx_lut_quads (const short ptr, const int idx)

v_int32	CV__SIMD_NAMESPACE::vx_lut_quads (const int ptr, const int idx)

v_uint32	CV__SIMD_NAMESPACE::vx_lut_quads (const unsigned ptr, const int idx)

v_float32	CV__SIMD_NAMESPACE::vx_lut_quads (const float ptr, const int idx)

Wide load with double expansion
Load maximum available capacity register contents from memory with double expand
v_uint16	CV__SIMD_NAMESPACE::vx_load_expand (const uchar *ptr)

v_int16	CV__SIMD_NAMESPACE::vx_load_expand (const schar *ptr)

v_uint32	CV__SIMD_NAMESPACE::vx_load_expand (const ushort *ptr)

v_int32	CV__SIMD_NAMESPACE::vx_load_expand (const short *ptr)

v_int64	CV__SIMD_NAMESPACE::vx_load_expand (const int *ptr)

v_uint64	CV__SIMD_NAMESPACE::vx_load_expand (const unsigned *ptr)

v_float32	CV__SIMD_NAMESPACE::vx_load_expand (const float16_t *ptr)

Wide load with quad expansion
Load maximum available capacity register contents from memory with quad expand
v_uint32	CV__SIMD_NAMESPACE::vx_load_expand_q (const uchar *ptr)

v_int32	CV__SIMD_NAMESPACE::vx_load_expand_q (const schar *ptr)

Pack boolean values
Pack boolean values from multiple vectors to one unsigned 8-bit integer vector 覚え書き Must provide valid boolean values to guarantee same result for all architectures.
template<int n>
v_reg< uchar, 2 *n >	cv::v_pack_b (const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)
	! 16ビットのブーリアン値の場合[【詳解】（英語］

template<int n>
v_reg< uchar, 4 *n >	cv::v_pack_b (const v_reg< unsigned, n > &a, const v_reg< unsigned, n > &b, const v_reg< unsigned, n > &c, const v_reg< unsigned, n > &d)

template<int n>
v_reg< uchar, 8 *n >	cv::v_pack_b (const v_reg< uint64, n > &a, const v_reg< uint64, n > &b, const v_reg< uint64, n > &c, const v_reg< uint64, n > &d, const v_reg< uint64, n > &e, const v_reg< uint64, n > &f, const v_reg< uint64, n > &g, const v_reg< uint64, n > &h)

詳解

"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on different platforms. Currently a few different SIMD extensions on different architectures are supported. 128 bit registers of various types support is implemented for a wide range of architectures including x86(SSE/SSE2/SSE4.2), ARM(NEON), PowerPC(VSX), MIPS(MSA). 256 bit long registers are supported on x86(AVX2) and 512 bit long registers are supported on x86(AVX512). In case when there is no SIMD extension available during compilation, fallback C++ implementation of intrinsics will be chosen and code will work as expected although it could be slower.

Types

There are several types representing packed values vector registers, each type is implemented as a structure based on a one SIMD register.

cv::v_uint8 and cv::v_int8: 8-bit integer values (unsigned/signed) - char
cv::v_uint16 and cv::v_int16: 16-bit integer values (unsigned/signed) - short
cv::v_uint32 and cv::v_int32: 32-bit integer values (unsigned/signed) - int
cv::v_uint64 and cv::v_int64: 64-bit integer values (unsigned/signed) - int64
cv::v_float32: 32-bit floating point values (signed) - float
cv::v_float64: 64-bit floating point values (signed) - double

Exact bit length(and value quantity) of listed types is compile time deduced and depends on architecture SIMD capabilities chosen as available during compilation of the library. All the types contains nlanes enumeration to check for exact value quantity of the type.

In case the exact bit length of the type is important it is possible to use specific fixed length register types.

There are several types representing 128-bit registers.

cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char
cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short
cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsigned/signed) - int
cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64
cv::v_float32x4: four 32-bit floating point values (signed) - float
cv::v_float64x2: two 64-bit floating point values (signed) - double

There are several types representing 256-bit registers.

cv::v_uint8x32 and cv::v_int8x32: thirty two 8-bit integer values (unsigned/signed) - char
cv::v_uint16x16 and cv::v_int16x16: sixteen 16-bit integer values (unsigned/signed) - short
cv::v_uint32x8 and cv::v_int32x8: eight 32-bit integer values (unsigned/signed) - int
cv::v_uint64x4 and cv::v_int64x4: four 64-bit integer values (unsigned/signed) - int64
cv::v_float32x8: eight 32-bit floating point values (signed) - float
cv::v_float64x4: four 64-bit floating point values (signed) - double

覚え書き: 256 bit registers at the moment implemented for AVX2 SIMD extension only, if you want to use this type directly, don't forget to check the CV_SIMD256 preprocessor definition:

#if CV_SIMD256

//...

#endif

There are several types representing 512-bit registers.

cv::v_uint8x64 and cv::v_int8x64: sixty four 8-bit integer values (unsigned/signed) - char
cv::v_uint16x32 and cv::v_int16x32: thirty two 16-bit integer values (unsigned/signed) - short
cv::v_uint32x16 and cv::v_int32x16: sixteen 32-bit integer values (unsigned/signed) - int
cv::v_uint64x8 and cv::v_int64x8: eight 64-bit integer values (unsigned/signed) - int64
cv::v_float32x16: sixteen 32-bit floating point values (signed) - float
cv::v_float64x8: eight 64-bit floating point values (signed) - double

覚え書き

512 bit registers at the moment implemented for AVX512 SIMD extension only, if you want to use this type directly, don't forget to check the CV_SIMD512 preprocessor definition.

cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to check the CV_SIMD128_64F preprocessor definition.

Load and store operations

These operations allow to set contents of the register explicitly or by loading it from some memory block and to save contents of the register to memory block.

There are variable size register load operations that provide result of maximum available size depending on chosen platform capabilities.

Constructors: from memory,
Other create methods: vx_setall_s8, vx_setall_u8, ..., vx_setzero_u8, vx_setzero_s8, ...
Memory load operations: vx_load, vx_load_aligned, vx_load_low, vx_load_halves,
Memory operations with expansion of values: vx_load_expand, vx_load_expand_q

Also there are fixed size register load/store operations.

For 128 bit registers

Constructors: from memory, from two values, ...
Other create methods: v_setall_s8, v_setall_u8, ..., v_setzero_u8, v_setzero_s8, ...
Memory load operations: v_load, v_load_aligned, v_load_low, v_load_halves,
Memory operations with expansion of values: v_load_expand, v_load_expand_q

For 256 bit registers(check CV_SIMD256 preprocessor definition)

Constructors: from memory, from four values, ...
Other create methods: v256_setall_s8, v256_setall_u8, ..., v256_setzero_u8, v256_setzero_s8, ...
Memory load operations: v256_load, v256_load_aligned, v256_load_low, v256_load_halves,
Memory operations with expansion of values: v256_load_expand, v256_load_expand_q

For 512 bit registers(check CV_SIMD512 preprocessor definition)

Constructors: from memory, from eight values, ...
Other create methods: v512_setall_s8, v512_setall_u8, ..., v512_setzero_u8, v512_setzero_s8, ...
Memory load operations: v512_load, v512_load_aligned, v512_load_low, v512_load_halves,
Memory operations with expansion of values: v512_load_expand, v512_load_expand_q

Store to memory operations are similar across different platform capabilities: v_store, v_store_aligned, v_store_high, v_store_low

Value reordering

These operations allow to reorder or recombine elements in one or multiple vectors.

Interleave, deinterleave (2, 3 and 4 channels): v_load_deinterleave, v_store_interleave
Expand: v_expand, v_expand_low, v_expand_high
Pack: v_pack, v_pack_u, v_pack_b, v_rshr_pack, v_rshr_pack_u, v_pack_store, v_pack_u_store, v_rshr_pack_store, v_rshr_pack_u_store
Recombine: v_zip, v_recombine, v_combine_low, v_combine_high
Reverse: v_reverse
Extract: v_extract

Arithmetic, bitwise and comparison operations

Element-wise binary and unary operations.

Arithmetics: +, -, *, /, v_mul_expand
Non-saturating arithmetics: v_add_wrap, v_sub_wrap
Bitwise shifts: <<, >>, v_shl, v_shr
Bitwise logic: &, |, ^, ~
Comparison: >, >=, <, <=, ==, !=
min/max: v_min, v_max

Reduce and mask

Most of these operations return only one value.

Reduce: v_reduce_min, v_reduce_max, v_reduce_sum, v_popcount
Mask: v_signmask, v_check_all, v_check_any, v_select

Other math

Some frequent operations: v_sqrt, v_invsqrt, v_magnitude, v_sqr_magnitude
Absolute values: v_abs, v_absdiff, v_absdiffs

Conversions

Different type conversions and casts:

Rounding: v_round, v_floor, v_ceil, v_trunc,
To float: v_cvt_f32, v_cvt_f64
Reinterpret: v_reinterpret_as_u8, v_reinterpret_as_s8, ...

Matrix operations

In these operations vectors represent matrix rows/columns: v_dotprod, v_dotprod_fast, v_dotprod_expand, v_dotprod_expand_fast, v_matmul, v_transpose4x4

Usability

Most operations are implemented only for some subset of the available types, following matrices shows the applicability of different operations to the types.

Regular integers:

Operations\Types	uint 8	int 8	uint 16	int 16	uint 32	int 32
load, store	x	x	x	x	x	x
interleave	x	x	x	x	x	x
expand	x	x	x	x	x	x
expand_low	x	x	x	x	x	x
expand_high	x	x	x	x	x	x
expand_q	x	x
add, sub	x	x	x	x	x	x
add_wrap, sub_wrap	x	x	x	x
mul_wrap	x	x	x	x
mul	x	x	x	x	x	x
mul_expand	x	x	x	x	x
compare	x	x	x	x	x	x
shift			x	x	x	x
dotprod				x		x
dotprod_fast				x		x
dotprod_expand	x	x	x	x		x
dotprod_expand_fast	x	x	x	x		x
logical	x	x	x	x	x	x
min, max	x	x	x	x	x	x
absdiff	x	x	x	x	x	x
absdiffs		x		x
reduce	x	x	x	x	x	x
mask	x	x	x	x	x	x
pack	x	x	x	x	x	x
pack_u	x		x
pack_b	x
unpack	x	x	x	x	x	x
extract	x	x	x	x	x	x
rotate (lanes)	x	x	x	x	x	x
cvt_flt32						x
cvt_flt64						x
transpose4x4					x	x
reverse	x	x	x	x	x	x
extract_n	x	x	x	x	x	x
broadcast_element					x	x

Big integers:

Operations\Types	uint 64	int 64
load, store	x	x
add, sub	x	x
shift	x	x
logical	x	x
reverse	x	x
extract	x	x
rotate (lanes)	x	x
cvt_flt64		x
extract_n	x	x

Floating point:

Operations\Types	float 32	float 64
load, store	x	x
interleave	x
add, sub	x	x
mul	x	x
div	x	x
compare	x	x
min, max	x	x
absdiff	x	x
reduce	x
mask	x	x
unpack	x	x
cvt_flt32		x
cvt_flt64	x
sqrt, abs	x	x
float math	x	x
transpose4x4	x
extract	x	x
rotate (lanes)	x	x
reverse	x	x
extract_n	x	x
broadcast_element	x

マクロ定義詳解

◆ CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES

#define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES	(	macro_name,
		...
	)

値:

CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \

CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \

◆ CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES

#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES	(	macro_name,
		...
	)

値:

__CV_EXPAND(macro_name(float, __VA_ARGS__)) \

__CV_EXPAND(macro_name(double, __VA_ARGS__)) \

◆ CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES

#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES	(	macro_name,
		...
	)

値:

__CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \
__CV_EXPAND(macro_name(schar, __VA_ARGS__)) \
__CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \
__CV_EXPAND(macro_name(short, __VA_ARGS__)) \
__CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \
__CV_EXPAND(macro_name(int, __VA_ARGS__)) \
__CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \
__CV_EXPAND(macro_name(int64, __VA_ARGS__)) \

◆ CV__HAL_INTRIN_IMPL_BIN_OP_

#define CV__HAL_INTRIN_IMPL_BIN_OP_	(	_Tp,
		bin_op
	)

値:

template<int
n> inline \
v_reg<_Tp, n>
operator
bin_op (const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b) \
{ \

v_reg<_Tp, n> c; \

for(
int
i = 0; i < n; i++ ) \

c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \

return c; \
} \
template<int n> inline \
v_reg<_Tp, n>&
operator
bin_op##= (v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b) \
{ \

for(
int
i = 0; i < n; i++ ) \

a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \

return a; \
}

◆ CV__HAL_INTRIN_IMPL_BIT_OP

#define CV__HAL_INTRIN_IMPL_BIT_OP ( bit_op )

値:

CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \

CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) /* TODO: FIXIT remove this after masks refactoring */

◆ CV__HAL_INTRIN_IMPL_BIT_OP_

#define CV__HAL_INTRIN_IMPL_BIT_OP_	(	_Tp,
		bit_op
	)

値:

template<int
n> CV_INLINE \
v_reg<_Tp, n>
operator
bit_op (const
v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b) \
{ \

v_reg<_Tp, n> c; \

typedef
typename
V_TypeTraits<_Tp>::int_type itype; \

for(
int
i = 0; i < n; i++ ) \

c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \

V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \

return c; \
} \
template<int n> CV_INLINE \
v_reg<_Tp, n>&
operator
bit_op##= (v_reg<_Tp, n>& a,
const
v_reg<_Tp, n>& b) \
{ \

typedef
typename
V_TypeTraits<_Tp>::int_type itype; \

for(
int
i = 0; i < n; i++ ) \

a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \

V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \

return a; \
}

◆ CV__HAL_INTRIN_IMPL_BITWISE_NOT_

#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_	(	_Tp,
		dummy
	)

値:

template<int
n> CV_INLINE \
v_reg<_Tp, n>
operator ~
(const
v_reg<_Tp, n>& a) \
{ \

v_reg<_Tp, n> c; \

for(
int
i = 0; i < n; i++ ) \

c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \

return c; \
} \

関数詳解

◆ operator&()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator&	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

ビットごとのAND

整数型の場合のみ。

◆ operator*()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator*	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

値の乗算

16ビット、32ビットの整数型、浮動小数点型に対応。

◆ operator+()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator+	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

値の加算

すべてのタイプに対応。

◆ operator-()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator-	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

値の減算

すべてのタイプに対応。

◆ operator/()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator/	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

値の除算

浮動小数点タイプのみ。

◆ operator^()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator^	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

ビットごとのXOR

整数型の場合のみ。

◆ operator|()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator\|	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

ビットごとのOR

整数型の場合のみ。

◆ operator~()

template<typename _Tp , int n>

CV_INLINE v_reg< _Tp, n > cv::operator~ ( const v_reg< _Tp, n > & a )

ビットごとのNOT

整数型の場合のみ。

◆ v_absdiff() [1/3]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::abs_type, n > cv::v_absdiff	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

彩度のない値の加算

8ビットおよび16ビットの整数値に対して

サチュレーションなしでの値の減算

8ビットおよび16ビットの整数値に対して

彩度のない値の乗算

8ビットおよび16ビットの整数値に対して

差の絶対値

戻り値 $ |a - b| $ 対応する符号なしタイプに変換された例

v_int32x4 a, b; // {1, 2, 3, 4} and {4, 3, 2, 1}

v_uint32x4 c = v_absdiff(a, b); // result is {3, 1, 1, 3}

cv::v_absdiff

v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_absdiff(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Add values without saturation

Definition: intrin_cpp.hpp:956

cv::v_int32x4

Definition: intrin_rvv.hpp:390

cv::v_uint32x4

Definition: intrin_rvv.hpp:358

8、16、32ビットの整数型の場合

◆ v_absdiff() [2/3]

template<int n>

v_reg< double, n > cv::v_absdiff	(	const v_reg< double, n > &	a,
		const v_reg< double, n > &	b
	)

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

64ビット浮動小数点値の場合

◆ v_absdiff() [3/3]

template<int n>

v_reg< float, n > cv::v_absdiff	(	const v_reg< float, n > &	a,
		const v_reg< float, n > &	b
	)

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

32ビット浮動小数点値の場合

◆ v_absdiffs()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_absdiffs	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

飽和絶対差

戻り値 $ saturate(|a - b|) $ . 8 ビット，16 ビットの符号付き整数型の場合

◆ v_broadcast_element()

template<int i, typename _Tp , int n>

v_reg< _Tp, n > cv::v_broadcast_element ( const v_reg< _Tp, n > & a )

inline

ベクトルのi番目の要素を放送する

スキーム．

{ v[0] v[1] v[2] ... v[SZ] } => { v[i], v[i], v[i] ... v[i] }

制限があります。0 <= i < nlanes サポートされる型。対応する型：32ビット整数と浮動小数点数 (s32/u32/f32)

◆ v_ceil() [1/2]

template<int n>

v_reg< int, n *2 > cv::v_ceil ( const v_reg< double, n > & a )

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

◆ v_ceil() [2/2]

template<int n>

v_reg< int, n > cv::v_ceil ( const v_reg< float, n > & a )

inline

要素の暗号化

各値を消去します。入力型は float ベクトル ==> 出力型は int ベクトルです。

覚え書き: 浮動小数点型のみ対応。

◆ v_check_all()

template<typename _Tp , int n>

bool cv::v_check_all ( const v_reg< _Tp, n > & a )

inline

パックされたすべての値がゼロより小さいかどうかをチェックします。

符号なしの値は符号ありにキャストされます。uchar 254 => char -2.

◆ v_check_any()

template<typename _Tp , int n>

bool cv::v_check_any ( const v_reg< _Tp, n > & a )

inline

パックされた値のいずれかがゼロより小さいかどうかをチェックします。

符号なしの値は符号ありにキャストされます。uchar 254 => char -2.

◆ v_combine_high()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_combine_high	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

2つのベクトルの最後の要素からベクトルを合成する

スキーム．

{A1 A2 A3 A4}

{B1 B2 B3 B4}
---------------

{A3 A4 B3 B4}

64ビットを除くすべての型で

◆ v_combine_low()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_combine_low	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

2つのベクトルの最初の要素からベクトルを合成する

スキーム．

{A1 A2 A3 A4}

{B1 B2 B3 B4}
---------------

{A1 A2 B1 B2}

64ビットを除くすべての型で

◆ v_cvt_f32() [1/3]

template<int n>

v_reg< float, n *2 > cv::v_cvt_f32 ( const v_reg< double, n > & a )

inline

下半分を float に変換します．

サポートされる入力形式は， cv::v_float64 です．

◆ v_cvt_f32() [2/3]

template<int n>

v_reg< float, n *2 > cv::v_cvt_f32	(	const v_reg< double, n > &	a,
		const v_reg< double, n > &	b
	)

inline

float に変換します．

サポートされる入力形式は， cv::v_float64 です．

◆ v_cvt_f32() [3/3]

template<int n>

v_reg< float, n > cv::v_cvt_f32 ( const v_reg< int, n > & a )

inline

float に変換します．

サポートされる入力形式は， cv::v_int32 です．

◆ v_cvt_f64() [1/3]

template<int n>

CV_INLINE v_reg< double,(n/2)> cv::v_cvt_f64 ( const v_reg< float, n > & a )

下半分を double に変換します．

サポートされる入力形式は， cv::v_float32 です．

◆ v_cvt_f64() [2/3]

template<int n>

CV_INLINE v_reg< double, n/2 > cv::v_cvt_f64 ( const v_reg< int, n > & a )

下半分を double に変換します．

サポートされる入力形式は， cv::v_int32 です．

◆ v_cvt_f64() [3/3]

template<int n>

CV_INLINE v_reg< double, n > cv::v_cvt_f64 ( const v_reg< int64, n > & a )

double に変換します．

サポートされる入力形式は， cv::v_int64 です．

◆ v_cvt_f64_high() [1/2]

template<int n>

CV_INLINE v_reg< double,(n/2)> cv::v_cvt_f64_high ( const v_reg< float, n > & a )

ベクトルの上位部分を double に変換します．

サポートされる入力形式は， cv::v_float32 です．

◆ v_cvt_f64_high() [2/2]

template<int n>

CV_INLINE v_reg< double,(n/2)> cv::v_cvt_f64_high ( const v_reg< int, n > & a )

ベクトルの上位部分を double に変換します．

サポートされる入力形式は， cv::v_int32 です．

◆ v_dotprod() [1/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > cv::v_dotprod	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

要素のドットプロダクト

2つのレジスタの値を乗算し，隣接する結果の組を合計します．

スキーム．

{A1 A2 ...}
// 16-bit
x {B1 B2 ...}
// 16-bit
-------------
{A1B1+A2B2 ...}
// 32-bit

◆ v_dotprod() [2/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > cv::v_dotprod	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	c
	)

inline

要素のドットプロダクト

と同じです．cv::v_dotprodただし，隣接するペアの和に3番目の要素を加えます．Schemeです。

{A1 A2 ...}
// 16-bit
x {B1 B2 ...}
// 16-bit
-------------

{A1B1+A2B2+C1 ...}
// 32-bit

◆ v_dotprod_expand() [1/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > cv::v_dotprod_expand	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

要素の点積と展開

2つのレジスタの値を乗算し、隣接する結果ペアの和を拡張します。

スキーム．

{A1 A2 A3 A4 ...}
// 8-bit
x {B1 B2 B3 B4 ...}
// 8-bit
-------------

{A1B1+A2B2+A3B3+A4B4 ...}
// 32-bit

◆ v_dotprod_expand() [2/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > cv::v_dotprod_expand	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > &	c
	)

inline

要素のドットプロダクト

と同じです．cv::v_dotprod_expandただし，隣接するペアの和に3番目の要素を加えます．Schemeです。

{A1 A2 A3 A4 ...}
// 8-bit
x {B1 B2 B3 B4 ...}
// 8-bit
-------------

{A1B1+A2B2+A3B3+A4B4+C1 ...}
// 32-bit

◆ v_dotprod_expand_fast() [1/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > cv::v_dotprod_expand_fast	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

高速な要素のドット積と展開

2つのレジスタの値を乗算し、隣接する結果ペアの和を拡張します。

と同じです．cv::v_dotprod_expandただし、プラットフォームによっては、結果ペアの和が順不同になる可能性があります。すべてのレーン間の和が問題になるだけで、影響を受けるプラットフォームでより良いパフォーマンスが得られる場合は、この組込みを使用できます。

◆ v_dotprod_expand_fast() [2/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > cv::v_dotprod_expand_fast	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > &	c
	)

inline

要素の高速ドット積

と同じです．cv::v_dotprod_expand_fast隣接するペアの合計に3番目の要素を追加します。

◆ v_dotprod_fast() [1/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > cv::v_dotprod_fast	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

要素の高速ドット積

と同じです．cv::v_dotprodただし、プラットフォームによっては、結果ペアの和が順不同になる可能性があります。すべてのレーン間の和が問題になるだけで、影響を受けるプラットフォームでより良いパフォーマンスが得られる場合は、この組込みを使用できます。

◆ v_dotprod_fast() [2/2]

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > cv::v_dotprod_fast	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	c
	)

inline

要素の高速ドット積

と同じです．cv::v_dotprod_fast隣接するペアの合計に3番目の要素を追加します。

◆ v_expand()

template<typename _Tp , int n>

void cv::v_expand	(	const v_reg< _Tp, n > &	a,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	b0,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	b1
	)

inline

値をより広いパックタイプに展開

レジスタの内容を2倍のパックタイプで2つのレジスタにコピーします。スキーム。

int32x4 int64x2 int64x2

{A B C D} ==> {A B} , {C D}

◆ v_expand_high()

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > cv::v_expand_high ( const v_reg< _Tp, n > & a )

inline

より高い値をより広いパックタイプに展開

と同じです．cv::v_expand_lowベクトルの上位半分を拡張します。

スキーム．

int32x4 int64x2

{A B C D} ==> {C D}

◆ v_expand_low()

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > cv::v_expand_low ( const v_reg< _Tp, n > & a )

inline

低い値をより広いパックタイプに展開します。

と同じです．cv::v_expandただし、ベクターの下半分を返します。

スキーム．

int32x4 int64x2

{A B C D} ==> {A B}

◆ v_extract()

template<int s, typename _Tp , int n>

v_reg< _Tp, n > cv::v_extract	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

ベクトル抽出

スキーム．

{A1 A2 A3 A4}

{B1 B2 B3 B4}
========================
shift = 1  {A2 A3 A4 B1}
shift = 2  {A3 A4 B1 B2}
shift = 3  {A4 B1 B2 B3}

制限があります。0 <= shift < nlanes

使用方法

v_int32x4 a, b, c;

c = v_extract<2>(a, b);

すべてのタイプに対応。

◆ v_extract_n()

template<int s, typename _Tp , int n>

_Tp cv::v_extract_n ( const v_reg< _Tp, n > & v )

inline

ベクトル抽出

Schemeです。vのs番目の要素を返します。0 <= s < nlanes

使用方法

v_int32x4
a;

int
r;
r = v_extract_n<2>(a);

すべてのタイプに対応。

◆ v_floor() [1/2]

template<int n>

v_reg< int, n *2 > cv::v_floor ( const v_reg< double, n > & a )

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

◆ v_floor() [2/2]

template<int n>

v_reg< int, n > cv::v_floor ( const v_reg< float, n > & a )

inline

要素の階調

各値を階調します。入力形式は float ベクトル ==> 出力形式は int ベクトルです。

覚え書き: 浮動小数点型のみ対応。

◆ v_fma()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_fma	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< _Tp, n > &	c
	)

inline

乗算・加算

戻り値 $ a*b + c $ 浮動小数点型と符号付き32ビットint型のみです。

◆ v_invsqrt()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_invsqrt ( const v_reg< _Tp, n > & a )

inline

反転平方根

戻り値 $ 1/sqrt(a) $ 浮動小数点タイプのみ。

◆ v_load()

template<typename _Tp >

v_reg< _Tp, simd128_width/sizeof(_Tp)> cv::v_load ( const _Tp * ptr )

inline

レジスタの内容をメモリから読み込む

引数

ptr	データのあるメモリブロックへのポインタ

戻り値: レジスタ・オブジェクト

覚え書き: 戻される型は，渡されたポインタの型から検出されます。cv::v_uint8x16, int ==>cv::v_int32x4などです。; 利用可能な最大のレジスタ長の結果を得るためには，vx_load バージョンを利用してください．; アラインメントの要求： CV_STRONG_ALIGNMENT=1 の場合，渡されるポインタはアラインメントされていなければいけません（十分でしょう）．sizeof(lane type)で十分です)．実行時にポインタのアラインメントをチェックしないで，ポインタ型をキャストしてはいけません（例えばuchar*=>int*).

◆ v_load_aligned()

template<typename _Tp >

v_reg< _Tp, simd128_width/sizeof(_Tp)> cv::v_load_aligned ( const _Tp * ptr )

inline

メモリからレジスタの内容を読み込む（アラインド）．

に似ています。cv::v_loadただし、ソースメモリブロックはアラインされている必要があります（SIMD128の場合は16バイト境界、SIMD256の場合は32バイト境界など）。

覚え書き: vx_load_alignedバージョンを使用して、利用可能な最大のレジスタ長の結果を得る。

◆ v_load_deinterleave() [1/3]

template<typename _Tp , int n>

void cv::v_load_deinterleave	(	const _Tp *	ptr,
		v_reg< _Tp, n > &	a,
		v_reg< _Tp, n > &	b
	)

inline

ロード＆デインターリーブ（2チャネル）

メモリからデータをロードしてデインタリーブし、2つのレジスタに格納する。スキームです。

{A1 B1 A2 B2 ...} ==> {A1 A2 ...}, {B1 B2 ...}

64ビットを除くすべての型で

◆ v_load_deinterleave() [2/3]

template<typename _Tp , int n>

void cv::v_load_deinterleave	(	const _Tp *	ptr,
		v_reg< _Tp, n > &	a,
		v_reg< _Tp, n > &	b,
		v_reg< _Tp, n > &	c
	)

inline

ロードアンドデインターリーブ(3チャネル)

メモリからデータをデインターリーブして3つのレジスタに格納します。スキーム

{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}

64ビットを除くすべての型で

◆ v_load_deinterleave() [3/3]

template<typename _Tp , int n>

void cv::v_load_deinterleave	(	const _Tp *	ptr,
		v_reg< _Tp, n > &	a,
		v_reg< _Tp, n > &	b,
		v_reg< _Tp, n > &	c,
		v_reg< _Tp, n > &	d
	)

inline

ロード＆デインターリーブ(4チャネル)

メモリからデータをデインターリーブして4つのレジスタに格納します。スキーム

{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...}

64ビットを除くすべての型で

◆ v_load_expand()

template<typename _Tp >

v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> cv::v_load_expand ( const _Tp * ptr )

inline

メモリからレジスタの内容をダブルエキスパンダーでロードします。

と同じです．cv::v_loadただし、結果のパックタイプはメモリタイプの2倍になります。

short buf[4] = {1, 2, 3, 4}; // type is int16

v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32

cv::v_load_expand

v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)

Load register contents from memory with double expand

Definition: intrin_cpp.hpp:1875

cv::v_int32x4

v_reg< int, 4 > v_int32x4

Four 32-bit signed integer values

Definition: intrin_cpp.hpp:498

8、16、32ビットの整数型の場合

覚え書き: vx_load_expandバージョンを使用して、利用可能な最大のレジスタ長の結果を得る。

◆ v_load_expand_q()

template<typename _Tp >

v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> cv::v_load_expand_q ( const _Tp * ptr )

inline

クワッドエキスパンダーでメモリからレジスタの内容をロードします。

と同じです．cv::v_load_expandしかし、結果のタイプはソースの4倍の幅になります。

char buf[4] = {1, 2, 3, 4}; // type is int8

v_int32x4 r = v_load_expand_q(buf); // r = {1, 2, 3, 4} - type is int32

cv::v_load_expand_q

v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> v_load_expand_q(const _Tp *ptr)

Load register contents from memory with quad expand

Definition: intrin_cpp.hpp:1964

8 ビット整数のソースタイプの場合。

覚え書き: 利用可能な最大のレジスタ長の結果を得るには、vx_load_expand_q バージョンを使用してください。

◆ v_load_halves()

template<typename _Tp >

v_reg< _Tp, simd128_width/sizeof(_Tp)> cv::v_load_halves	(	const _Tp *	loptr,
		const _Tp *	hiptr
	)

inline

2つのメモリブロックからレジスタコンテンツをロード

引数

loptr	前半(0..n/2)のデータを含むメモリブロック
hiptr	後半(n/2..n)のデータを含むメモリブロック

int lo[2] = { 1, 2 }, hi[2] = { 3, 4 };

v_int32x4 r = v_load_halves(lo, hi);

cv::v_load_halves

v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_halves(const _Tp *loptr, const _Tp *hiptr)

Load register contents from two memory blocks

Definition: intrin_cpp.hpp:1784

覚え書き: vx_load_halvesバージョンを使用して、利用可能な最大のレジスタ長の結果を得る。

◆ v_load_low()

template<typename _Tp >

v_reg< _Tp, simd128_width/sizeof(_Tp)> cv::v_load_low ( const _Tp * ptr )

inline

下位部分に64ビットのデータをロードする（上位部分は未定義）。

引数

ptr	前半(0..n/2)のデータを含むメモリブロック

int lo[2] = { 1, 2 };

v_int32x4 r = v_load_low(lo);

cv::v_load_low

v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_low(const _Tp *ptr)

Load 64-bits of data to lower part (high part is undefined).

Definition: intrin_cpp.hpp:1702

覚え書き: vx_load_low バージョンを使用して、利用可能な最大のレジスタ長の結果を得る。

◆ v_magnitude()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_magnitude	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

マグニチュード

戻り値 $ sqrt(a^2 + b^2) $ 浮動小数点タイプのみ。

◆ v_matmul()

template<int n>

v_reg< float, n > cv::v_matmul	(	const v_reg< float, n > &	v,
		const v_reg< float, n > &	a,
		const v_reg< float, n > &	b,
		const v_reg< float, n > &	c,
		const v_reg< float, n > &	d
	)

inline

行列の乗算

スキーム．

{A0 A1 A2 A3}   |V0|
{B0 B1 B2 B3}   |V1|
{C0 C1 C2 C3}   |V2|
{D0 D1 D2 D3} x |V3|
====================
{R0 R1 R2 R3}, where:
R0 = A0V0 + B0V1 + C0V2 + D0V3,
R1 = A1V0 + B1V1 + C1V2 + D1V3
...

◆ v_matmuladd()

template<int n>

v_reg< float, n > cv::v_matmuladd	(	const v_reg< float, n > &	v,
		const v_reg< float, n > &	a,
		const v_reg< float, n > &	b,
		const v_reg< float, n > &	c,
		const v_reg< float, n > &	d
	)

inline

行列の乗算と加算

スキーム．

{A0 A1 A2 A3}   |V0|   |D0|
{B0 B1 B2 B3}   |V1|   |D1|
{C0 C1 C2 C3} x |V2| + |D2|
====================   |D3|
{R0 R1 R2 R3}, where:
R0 = A0V0 + B0V1 + C0V2 + D0,
R1 = A1V0 + B1V1 + C1V2 + D1
...

◆ v_mul_expand()

template<typename _Tp , int n>

void cv::v_mul_expand	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	c,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	d
	)

inline

乗算と展開

2つのレジスタの値を乗算し、結果をより広いパックタイプの2つのレジスタに格納します。スキーム。

{A B C D}
// 32-bit
x {E F G H}
// 32-bit
---------------
{AE BF}
// 64-bit

{CG DH}
// 64-bit

例

v_uint32x4
a, b;
// {1,2,3,4} and {2,2,2,2}

v_uint64x2
c, d;
// results

v_mul_expand(a, b, c, d);
// c, d = {2,4}, {6, 8}

16ビットと符号なし32ビットのソースタイプにのみ実装されています(v_int16x8,v_uint16x8,v_uint32x4).

◆ v_mul_hi()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_mul_hi	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

乗算と高次部分の抽出

2つのレジスタの値を乗算し、その結果の上位部分を格納します。16ビットのソースタイプ(v_int16x8,v_uint16x8). 戻り値 $ a*b >> 16 $

◆ v_not_nan()

template<int n>

v_reg< float, n > cv::v_not_nan ( const v_reg< float, n > & a )

inline

小数点以下の比較

64ビットの整数値を除くすべての型に対して

大なり小なりの比較

64ビットの整数値を除くすべての型に対して

小数点以下の比較

64ビットの整数値を除くすべての型に対して

大なり小なりの比較

64ビットの整数値を除くすべての型に対して

等価比較

64ビットの整数値を除くすべての型に対して

等しくない比較

64ビットの整数値を除くすべての型に対して

◆ v_pack_b() [1/3]

template<int n>

v_reg< uchar, 8 *n > cv::v_pack_b	(	const v_reg< uint64, n > &	a,
		const v_reg< uint64, n > &	b,
		const v_reg< uint64, n > &	c,
		const v_reg< uint64, n > &	d,
		const v_reg< uint64, n > &	e,
		const v_reg< uint64, n > &	f,
		const v_reg< uint64, n > &	g,
		const v_reg< uint64, n > &	h
	)

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。 64ビットのブール値の場合

スキーム．

a  {0xFFFF.. 0}
b  {0 0xFFFF..}
c  {0xFFFF.. 0}
d  {0 0xFFFF..}

e  {0xFFFF.. 0}
f  {0xFFFF.. 0}
g  {0 0xFFFF..}
h  {0 0xFFFF..}
===============
{

0xFF 0 0 0xFF 0xFF 0 0 0xFF

0xFF 0 0xFF 0 0 0xFF 0 0xFF
}

◆ v_pack_b() [2/3]

template<int n>

v_reg< uchar, 4 *n > cv::v_pack_b	(	const v_reg< unsigned, n > &	a,
		const v_reg< unsigned, n > &	b,
		const v_reg< unsigned, n > &	c,
		const v_reg< unsigned, n > &	d
	)

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。 32ビットのブール値の場合

スキーム．

a  {0xFFFF.. 0 0 0xFFFF..}
b  {0 0xFFFF.. 0xFFFF.. 0}
c  {0xFFFF.. 0 0xFFFF.. 0}
d  {0 0xFFFF.. 0 0xFFFF..}
===============
{

0xFF 0 0 0xFF 0 0xFF 0xFF 0

0xFF 0 0xFF 0 0 0xFF 0 0xFF
}

◆ v_pack_b() [3/3]

template<int n>

v_reg< uchar, 2 *n > cv::v_pack_b	(	const v_reg< ushort, n > &	a,
		const v_reg< ushort, n > &	b
	)

inline

! 16ビットのブーリアン値の場合

スキーム．

a  {0xFFFF 0 0 0xFFFF 0 0xFFFF 0xFFFF 0}
b  {0xFFFF 0 0xFFFF 0 0 0xFFFF 0 0xFFFF}
===============
{

0xFF 0 0 0xFF 0 0xFF 0xFF 0

0xFF 0 0xFF 0 0 0xFF 0 0xFF
}

◆ v_popcount()

template<typename _Tp , int n>

v_reg< typename V_TypeTraits< _Tp >::abs_type, n > cv::v_popcount ( const v_reg< _Tp, n > & a )

inline

ベクトルレーン内の1ビットをカウントし、対応する符号なしタイプとして結果を返します。

スキーム．

{A1 A2 A3 ...} => {popcount(A1), popcount(A2), popcount(A3), ...}

すべての整数型の場合

◆ v_recombine()

template<typename _Tp , int n>

void cv::v_recombine	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		v_reg< _Tp, n > &	low,
		v_reg< _Tp, n > &	high
	)

inline

2つのベクトルの下位部分と上位部分から2つのベクトルを合成する

low = cv::v_combine_low(a, b);

high = cv::v_combine_high(a, b);

cv::v_combine_high

v_reg< _Tp, n > v_combine_high(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Combine vector from last elements of two vectors

Definition: intrin_cpp.hpp:2307

cv::v_combine_low

v_reg< _Tp, n > v_combine_low(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

Combine vector from first elements of two vectors

Definition: intrin_cpp.hpp:2285

◆ v_reduce_sad()

template<typename _Tp , int n>

V_TypeTraits< typenameV_TypeTraits< _Tp >::abs_type >::sum_type cv::v_reduce_sad	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

値の差の絶対値の合計

スキーム．

{A1 A2 A3 ...} {B1 B2 B3 ...} => sum{ABS(A1-B1),abs(A2-B2),abs(A3-B3),...}

cv::abs

softfloat abs(softfloat a)

Absolute value

Definition: softfloat.hpp:444

64ビット型を除くすべての型が対象です。

◆ v_reduce_sum()

template<typename _Tp , int n>

V_TypeTraits< _Tp >::sum_type cv::v_reduce_sum ( const v_reg< _Tp, n > & a )

inline

ベクトルの要素を左にシフトする

すべての型に対して

ベクトル間での要素の右シフト

すべての型に対して

パックされた値の和

スキーム．

{A1 A2 A3 ...} => sum{A1,A2,A3,...}

◆ v_reduce_sum4()

template<int n>

v_reg< float, n > cv::v_reduce_sum4	(	const v_reg< float, n > &	a,
		const v_reg< float, n > &	b,
		const v_reg< float, n > &	c,
		const v_reg< float, n > &	d
	)

inline

各入力ベクトルのすべての要素の和をとり、和のベクトルを返します。

スキーム．

result[0] = a[0] + a[1] + a[2] + a[3]
result[1] = b[0] + b[1] + b[2] + b[3]
result[2] = c[0] + c[1] + c[2] + c[3]
result[3] = d[0] + d[1] + d[2] + d[3]

◆ v_reverse()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_reverse ( const v_reg< _Tp, n > & a )

inline

ベクトルの逆順

ベクトルスキームの順序を逆にします。

REG {A1 ... An} ==> REG {An ... A1}

すべてのタイプに対応。

◆ v_round() [1/3]

template<int n>

v_reg< int, n *2 > cv::v_round ( const v_reg< double, n > & a )

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

◆ v_round() [2/3]

template<int n>

v_reg< int, n *2 > cv::v_round	(	const v_reg< double, n > &	a,
		const v_reg< double, n > &	b
	)

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

◆ v_round() [3/3]

template<int n>

v_reg< int, n > cv::v_round ( const v_reg< float, n > & a )

inline

要素の丸め

各値を丸めます。入力形式は float ベクトル ==> 出力形式は int ベクトルです。

覚え書き: 浮動小数点型のみ対応。

◆ v_scan_forward()

template<typename _Tp , int n>

int cv::v_scan_forward ( const v_reg< _Tp, n > & a )

inline

最初の負のレーンのインデックスを取得

返される値は、最初の負のレーンのインデックスです（すべての正の値を入力した場合は未定義）例:

v_int32x4 r; // set to {0, 0, -1, -1}

int idx = v_heading_zeros(r); // idx = 2

◆ v_select()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_select	(	const v_reg< _Tp, n > &	mask,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

エレメント単位の選択（ブレンド操作）

戻り値は、以下の方法で値を組み合わせて作られます。aおよびbresult[i] = mask[i] ? a[i] : b[i]. 結果[i]は、以下のスキームで値を組み合わせて作られます。

覚え書き

:マスク要素の値は以下の値に限定されます。

0: 要素をb
0xff/0xffff/etc: 要素の選択a(ビット演算子と完全互換)

◆ v_signmask()

template<typename _Tp , int n>

int cv::v_signmask ( const v_reg< _Tp, n > & a )

inline

負の値のマスク取得

非推奨:: v_signmask は，レーン数に大きく依存するため，十分な汎用性がありません．

負のパック値のインデックスに対応する場所のビットを1にしたビットマスクが返されます。例

v_int32x4 r; // set to {-1, -1, 1, 1}

int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011

cv::v_signmask

int v_signmask(const v_reg< _Tp, n > &a)

Get negative values mask

Definition: intrin_cpp.hpp:1395

◆ v_sqr_magnitude()

template<typename _Tp , int n>

v_reg< _Tp, n > cv::v_sqr_magnitude	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

マグニチュードの2乗

戻り値 $ a^2 + b^2 $ 浮動小数点タイプのみ。

◆ v_store()

template<typename _Tp , int n>

void cv::v_store	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

inline

データのメモリへの格納

レジスタの内容をメモリに格納します。Schemeです。

REG {A B C D} ==> MEM {A B C D}

ポインタはアンアラインでもよい。

◆ v_store_aligned()

template<typename _Tp , int n>

void cv::v_store_aligned	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

inline

メモリへのデータの格納（アラインド）

レジスタの内容をメモリに格納します。Schemeです。

REG {A B C D} ==> MEM {A B C D}

ポインタは16バイト境界でアラインされている必要があります。

◆ v_store_high()

template<typename _Tp , int n>

void cv::v_store_high	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

inline

データのメモリへの保存（上位半分）

レジスタの内容の上位半分をメモリに格納します。スキーム。

REG {A B C D} ==> MEM {C D}

◆ v_store_interleave() [1/3]

template<typename _Tp , int n>

void cv::v_store_interleave	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< _Tp, n > &	c,
		const v_reg< _Tp, n > &	d,
		hal::StoreMode	= `hal::STORE_UNALIGNED`
	)

inline

インターリーブ＆ストア(4チャンネル)

4つのレジスタのデータをインターリーブしてメモリに格納します。スキーム

{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...}

64ビットを除くすべての型で

◆ v_store_interleave() [2/3]

template<typename _Tp , int n>

void cv::v_store_interleave	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< _Tp, n > &	c,
		hal::StoreMode	= `hal::STORE_UNALIGNED`
	)

inline

インターリーブ＆ストア(3チャネル)

3つのレジスタのデータをインターリーブしてメモリに格納します。スキーム

{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} ==> {A1 B1 C1 A2 B2 C2 ...}

64ビットを除くすべての型で

◆ v_store_interleave() [3/3]

template<typename _Tp , int n>

void cv::v_store_interleave	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		hal::StoreMode	= `hal::STORE_UNALIGNED`
	)

inline

インターリーブ＆ストア(2チャンネル)

2つのレジスタのデータをインターリーブしてメモリに格納します。スキーム

{A1 A2 ...}, {B1 B2 ...} ==> {A1 B1 A2 B2 ...}

64ビットを除くすべての型で

◆ v_store_low()

template<typename _Tp , int n>

void cv::v_store_low	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

inline

メモリへの格納(下半分)

レジスタの内容の下半分をメモリに格納します。スキーム

REG {A B C D} ==> MEM {A B}

◆ v_transpose4x4()

template<typename _Tp , int n>

void cv::v_transpose4x4	(	v_reg< _Tp, n > &	a0,
		const v_reg< _Tp, n > &	a1,
		const v_reg< _Tp, n > &	a2,
		const v_reg< _Tp, n > &	a3,
		v_reg< _Tp, n > &	b0,
		v_reg< _Tp, n > &	b1,
		v_reg< _Tp, n > &	b2,
		v_reg< _Tp, n > &	b3
	)

inline

4x4の行列を転置する

スキーム．

a0  {A1 A2 A3 A4}
a1  {B1 B2 B3 B4}
a2  {C1 C2 C3 C4}
a3  {D1 D2 D3 D4}
===============
b0  {A1 B1 C1 D1}
b1  {A2 B2 C2 D2}
b2  {A3 B3 C3 D3}
b3  {A4 B4 C4 D4}

◆ v_trunc() [1/2]

template<int n>

v_reg< int, n *2 > cv::v_trunc ( const v_reg< double, n > & a )

inline

これはオーバーロードされたメンバ関数です。利便性のために用意されています。元の関数との違いは引き数のみです。

◆ v_trunc() [2/2]

template<int n>

v_reg< int, n > cv::v_trunc ( const v_reg< float, n > & a )

inline

要素の切り捨て

各値を切り捨てます。入力は float ベクトル ==> 出力は int ベクトルです。

覚え書き: 浮動小数点型のみ対応。

◆ v_zip()

template<typename _Tp , int n>

void cv::v_zip	(	const v_reg< _Tp, n > &	a0,
		const v_reg< _Tp, n > &	a1,
		v_reg< _Tp, n > &	b0,
		v_reg< _Tp, n > &	b1
	)

inline

2つのベクターをインターリーブする

スキーム．

{A1 A2 A3 A4}

{B1 B2 B3 B4}
---------------

{A1 B1 A2 B2} and {A3 B3 A4 B4}

64ビットを除くすべての型で

モジュール

クラス

マクロ定義

型定義

列挙型

関数

Wide init with value

Wide init with zero

Wide load from memory

Wide load from memory(aligned)

Wide load lower half from memory

Wide load halfs from memory

Wide LUT of elements

Wide LUT of element pairs

Wide LUT of element quads

Wide load with double expansion

Wide load with quad expansion

Pack boolean values

詳解

Types

Load and store operations

Value reordering

Arithmetic, bitwise and comparison operations

Reduce and mask

Other math

Conversions

Matrix operations

Usability

マクロ定義詳解

◆ CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES

◆ CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES

◆ CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES

◆ CV__HAL_INTRIN_IMPL_BIN_OP_

◆ CV__HAL_INTRIN_IMPL_BIT_OP

◆ CV__HAL_INTRIN_IMPL_BIT_OP_

◆ CV__HAL_INTRIN_IMPL_BITWISE_NOT_

関数詳解

◆ operator&()

◆ operator*()

◆ operator+()

◆ operator-()

◆ operator/()

◆ operator^()

◆ operator|()

◆ operator~()

◆ v_absdiff() [1/3]

◆ v_absdiff() [2/3]

◆ v_absdiff() [3/3]

◆ v_absdiffs()

◆ v_broadcast_element()

◆ v_ceil() [1/2]

◆ v_ceil() [2/2]

◆ v_check_all()

◆ v_check_any()

◆ v_combine_high()

◆ v_combine_low()

◆ v_cvt_f32() [1/3]

◆ v_cvt_f32() [2/3]

◆ v_cvt_f32() [3/3]

◆ v_cvt_f64() [1/3]

◆ v_cvt_f64() [2/3]

◆ v_cvt_f64() [3/3]

◆ v_cvt_f64_high() [1/2]

◆ v_cvt_f64_high() [2/2]

◆ v_dotprod() [1/2]

◆ v_dotprod() [2/2]

◆ v_dotprod_expand() [1/2]

◆ v_dotprod_expand() [2/2]

◆ v_dotprod_expand_fast() [1/2]

◆ v_dotprod_expand_fast() [2/2]

◆ v_dotprod_fast() [1/2]

◆ v_dotprod_fast() [2/2]

◆ v_expand()

◆ v_expand_high()

◆ v_expand_low()

◆ v_extract()

◆ v_extract_n()

◆ v_floor() [1/2]

◆ v_floor() [2/2]

◆ v_fma()