5
#ifndef OPENCV_HAL_INTRIN_SSE_EM_HPP
6
#define OPENCV_HAL_INTRIN_SSE_EM_HPP
13CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
15
#define OPENCV_HAL_SSE_WRAP_1(fun, tp) \
16
inline tp _v128_##fun(const tp& a) \
17
{ return _mm_##fun(a); }
19
#define OPENCV_HAL_SSE_WRAP_2(fun, tp) \
20
inline tp _v128_##fun(const tp& a, const tp& b) \
21
{ return _mm_##fun(a, b); }
23
#define OPENCV_HAL_SSE_WRAP_3(fun, tp) \
24
inline tp _v128_##fun(const tp& a, const tp& b, const tp& c) \
25
{ return _mm_##fun(a, b, c); }
31
inline
__m128i _v128_comgt_epu32(
const
__m128i& a,
const
__m128i& b)
33
const
__m128i delta = _mm_set1_epi32((
int)0x80000000);
34
return
_mm_cmpgt_epi32(_mm_xor_si128(a, delta), _mm_xor_si128(b, delta));
38OPENCV_HAL_SSE_WRAP_2(_v128_comgt_epu32, __m128i)
46
inline
__m128i _v128_blendv_epi8(
const
__m128i& a,
const
__m128i& b,
const
__m128i& mask)
47{
return
_mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(b, a), mask)); }
51
inline
__m128i _v128_cvtepu8_epi16(
const
__m128i& a)
53
const
__m128i z = _mm_setzero_si128();
54
return
_mm_unpacklo_epi8(a, z);
56
inline
__m128i _v128_cvtepi8_epi16(
const
__m128i& a)
57{
return
_mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); }
59
inline
__m128i _v128_cvtepu8_epi32(
const
__m128i& a)
61
const
__m128i z = _mm_setzero_si128();
62
return
_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z);
64
inline
__m128i _v128_cvtepi8_epi32(
const
__m128i& a)
66
__m128i r = _mm_unpacklo_epi8(a, a);
67
r = _mm_unpacklo_epi8(r, r);
68
return
_mm_srai_epi32(r, 24);
71
inline
__m128i _v128_cvtepu16_epi32(
const
__m128i& a)
73
const
__m128i z = _mm_setzero_si128();
74
return
_mm_unpacklo_epi16(a, z);
76
inline
__m128i _v128_cvtepi16_epi32(
const
__m128i& a)
77{
return
_mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); }
79
inline
__m128i _v128_cvtepu32_epi64(
const
__m128i& a)
81
const
__m128i z = _mm_setzero_si128();
82
return
_mm_unpacklo_epi32(a, z);
84
inline
__m128i _v128_cvtepi32_epi64(
const
__m128i& a)
85{
return
_mm_unpacklo_epi32(a, _mm_srai_epi32(a, 31)); }
88
inline
__m128i _v128_mullo_epi32(
const
__m128i& a,
const
__m128i& b)
90
__m128i c0 = _mm_mul_epu32(a, b);
91
__m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a, 32), _mm_srli_epi64(b, 32));
92
__m128i d0 = _mm_unpacklo_epi32(c0, c1);
93
__m128i d1 = _mm_unpackhi_epi32(c0, c1);
94
return
_mm_unpacklo_epi64(d0, d1);
98
inline
__m128i _v128_min_epu32(
const
__m128i& a,
const
__m128i& b)
99{
return
_v128_blendv_epi8(a, b, _v128_comgt_epu32(a, b)); }
103OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi16, __m128i)
104OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi16, __m128i)
105OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi32, __m128i)
106OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi32, __m128i)
107OPENCV_HAL_SSE_WRAP_1(cvtepu16_epi32, __m128i)
108OPENCV_HAL_SSE_WRAP_1(cvtepi16_epi32, __m128i)
109OPENCV_HAL_SSE_WRAP_1(cvtepu32_epi64, __m128i)
110OPENCV_HAL_SSE_WRAP_1(cvtepi32_epi64, __m128i)
111OPENCV_HAL_SSE_WRAP_2(min_epu32, __m128i)
112OPENCV_HAL_SSE_WRAP_2(mullo_epi32, __m128i)
113OPENCV_HAL_SSE_WRAP_3(blendv_epi8, __m128i)
120
inline
__m128i _v128_cvtepu8_epi16_high(
const
__m128i& a)
122
const
__m128i z = _mm_setzero_si128();
123
return
_mm_unpackhi_epi8(a, z);
125
inline
__m128i _v128_cvtepi8_epi16_high(
const
__m128i& a)
126{
return
_mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8); }
128
inline
__m128i _v128_cvtepu16_epi32_high(
const
__m128i& a)
130
const
__m128i z = _mm_setzero_si128();
131
return
_mm_unpackhi_epi16(a, z);
133
inline
__m128i _v128_cvtepi16_epi32_high(
const
__m128i& a)
134{
return
_mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16); }
136
inline
__m128i _v128_cvtepu32_epi64_high(
const
__m128i& a)
138
const
__m128i z = _mm_setzero_si128();
139
return
_mm_unpackhi_epi32(a, z);
141
inline
__m128i _v128_cvtepi32_epi64_high(
const
__m128i& a)
142{
return
_mm_unpackhi_epi32(a, _mm_srai_epi32(a, 31)); }
145
inline
__m128i _v128_packs_epu32(
const
__m128i& a,
const
__m128i& b)
147
const
__m128i m = _mm_set1_epi32(65535);
148
__m128i am = _v128_min_epu32(a, m);
149
__m128i bm = _v128_min_epu32(b, m);
151
return
_mm_packus_epi32(am, bm);
153
const
__m128i d = _mm_set1_epi32(32768), nd = _mm_set1_epi16(-32768);
154
am = _mm_sub_epi32(am, d);
155
bm = _mm_sub_epi32(bm, d);
156
am = _mm_packs_epi32(am, bm);
157
return
_mm_sub_epi16(am, nd);
162
inline
int64 _v128_extract_epi64(
const
__m128i& a)
164
#if defined(CV__SIMD_HAVE_mm_extract_epi64) || (CV_SSE4_1 && (defined(__x86_64__)
|| defined(_M_X64)
))
165
#define CV__SIMD_NATIVE_mm_extract_epi64 1
166
return
_mm_extract_epi64(a, i);
168
CV_DECL_ALIGNED(16) int64 tmp[2];
169
_mm_store_si128((__m128i*)tmp, a);
174CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
"black box" representation of the file storage associated with a file on disk.
Definition:
aruco.hpp:75