OpenCV 4.5.3(日本語機械翻訳)
intrin.hpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16 // Copyright (C) 2015, Itseez Inc., all rights reserved.
17 // Third party copyrights are property of their respective owners.
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // * Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // * Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // * The name of the copyright holders may not be used to endorse or promote products
30 // derived from this software without specific prior written permission.
31 //
32 // This software is provided by the copyright holders and contributors "as is" and
33 // any express or implied warranties, including, but not limited to, the implied
34 // warranties of merchantability and fitness for a particular purpose are disclaimed.
35 // In no event shall the Intel Corporation or contributors be liable for any direct,
36 // indirect, incidental, special, exemplary, or consequential damages
37 // (including, but not limited to, procurement of substitute goods or services;
38 // loss of use, data, or profits; or business interruption) however caused
39 // and on any theory of liability, whether in contract, strict liability,
40 // or tort (including negligence or otherwise) arising in any way out of
41 // the use of this software, even if advised of the possibility of such damage.
42 //
43 //M*/
44
45 #ifndef OPENCV_HAL_INTRIN_HPP
46 #define OPENCV_HAL_INTRIN_HPP
47
48 #include <cmath>
49 #include <float.h>
50 #include <stdlib.h>
51 #include "opencv2/core/cvdef.h"
52
53 #define OPENCV_HAL_ADD(a, b) ((a) + (b))
54 #define OPENCV_HAL_AND(a, b) ((a) & (b))
55 #define OPENCV_HAL_NOP(a) (a)
56 #define OPENCV_HAL_1ST(a, b) (a)
57
58 namespace {
59 inline unsigned int trailingZeros32(unsigned int value) {
60 #if defined(_MSC_VER)
61 #if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64)
62 unsigned long index = 0;
63 _BitScanForward(&index, value);
64 return (unsigned int)index;
65 #elif defined(__clang__)
66 // clang-cl doesn't export _tzcnt_u32 for non BMI systems
67 return value ? __builtin_ctz(value) : 32;
68 #else
69 return _tzcnt_u32(value);
70 #endif
71 #elif defined(__GNUC__) || defined(__GNUG__)
72 return __builtin_ctz(value);
73 #elif defined(__ICC) || defined(__INTEL_COMPILER)
74 return _bit_scan_forward(value);
75 #elif defined(__clang__)
76 return llvm.cttz.i32(value, true);
77 #else
78 static const int MultiplyDeBruijnBitPosition[32] = {
79 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
80 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
81 return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
82 #endif
83}
84}
85
86 // unlike HAL API, which is in cv::hal,
87 // we put intrinsics into cv namespace to make its
88 // access from within opencv code more accessible
89 namespace cv {
90
91 namespace hal {
92
93 enum StoreMode
94{
95 STORE_UNALIGNED = 0,
96 STORE_ALIGNED = 1,
97 STORE_ALIGNED_NOCACHE = 2
98};
99
100}
101
102 // TODO FIXIT: Don't use "God" traits. Split on separate cases.
103 template<typename _Tp> struct V_TypeTraits
104{
105};
106
107 #define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_) \
108 template<> struct V_TypeTraits<type> \
109 { \
110 typedef type value_type; \
111 typedef int_type_ int_type; \
112 typedef abs_type_ abs_type; \
113 typedef uint_type_ uint_type; \
114 typedef w_type_ w_type; \
115 typedef q_type_ q_type; \
116 typedef sum_type_ sum_type; \
117 \
118 static inline int_type reinterpret_int(type x) \
119 { \
120 union { type l; int_type i; } v; \
121 v.l = x; \
122 return v.i; \
123 } \
124 \
125 static inline type reinterpret_from_int(int_type x) \
126 { \
127 union { type l; int_type i; } v; \
128 v.i = x; \
129 return v.l; \
130 } \
131 }
132
133 #define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_) \
134 template<> struct V_TypeTraits<type> \
135 { \
136 typedef type value_type; \
137 typedef int_type_ int_type; \
138 typedef abs_type_ abs_type; \
139 typedef uint_type_ uint_type; \
140 typedef w_type_ w_type; \
141 typedef sum_type_ sum_type; \
142 \
143 static inline int_type reinterpret_int(type x) \
144 { \
145 union { type l; int_type i; } v; \
146 v.l = x; \
147 return v.i; \
148 } \
149 \
150 static inline type reinterpret_from_int(int_type x) \
151 { \
152 union { type l; int_type i; } v; \
153 v.i = x; \
154 return v.l; \
155 } \
156 }
157
158CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned);
159CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int);
160CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned);
161CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int);
162CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned);
163CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int);
164CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float);
165CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64);
166CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64);
167CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double);
168
169 #ifndef CV_DOXYGEN
170
171 #ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE
172 #ifdef CV_FORCE_SIMD128_CPP
173 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP
174 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP {
175 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
176 #elif defined(CV_CPU_DISPATCH_MODE)
177 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
178 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
179 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
180 #else
181 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
182 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
183 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
184 #endif
185 #endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE
186
187CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
188CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
189 using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
190 #endif
191}
192
193 #ifdef CV_DOXYGEN
194 # undef CV_AVX2
195 # undef CV_SSE2
196 # undef CV_NEON
197 # undef CV_VSX
198 # undef CV_FP16
199 # undef CV_MSA
200 # undef CV_RVV
201 #endif
202
203 #if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV071 || CV_RVV) && !defined(CV_FORCE_SIMD128_CPP)
204 #define CV__SIMD_FORWARD 128
205 #include "opencv2/core/hal/intrin_forward.hpp"
206 #endif
207
208 #if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP)
209
210 #include "opencv2/core/hal/intrin_sse_em.hpp"
211 #include "opencv2/core/hal/intrin_sse.hpp"
212
213 #elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP)
214
215 #include "opencv2/core/hal/intrin_neon.hpp"
216
217 #elif CV_RVV071 && !defined(CV_FORCE_SIMD128_CPP)
218 #define CV_SIMD128_CPP 0
219 #include "opencv2/core/hal/intrin_rvv071.hpp"
220
221 #elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP)
222
223 #include "opencv2/core/hal/intrin_vsx.hpp"
224
225 #elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP)
226
227 #include "opencv2/core/hal/intrin_msa.hpp"
228
229 #elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP)
230 #include "opencv2/core/hal/intrin_wasm.hpp"
231
232 #elif CV_RVV && !defined(CV_FORCE_SIMD128_CPP)
233 #include "opencv2/core/hal/intrin_rvv.hpp"
234
235 #else
236
237 #include "opencv2/core/hal/intrin_cpp.hpp"
238
239 #endif
240
241 // AVX2 can be used together with SSE2, so
242 // we define those two sets of intrinsics at once.
243 // Most of the intrinsics do not conflict (the proper overloaded variant is
244 // resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2),
245 // but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
246 // Correspondingly, the wide intrinsics (which are mapped to the "widest"
247 // available instruction set) will get vx_ prefix
248 // (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
249 #if CV_AVX2
250
251 #define CV__SIMD_FORWARD 256
252 #include "opencv2/core/hal/intrin_forward.hpp"
253 #include "opencv2/core/hal/intrin_avx.hpp"
254
255 #endif
256
257 // AVX512 can be used together with SSE2 and AVX2, so
258 // we define those sets of intrinsics at once.
259 // For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load().
260 // Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load())
261 #if CV_AVX512_SKX
262
263 #define CV__SIMD_FORWARD 512
264 #include "opencv2/core/hal/intrin_forward.hpp"
265 #include "opencv2/core/hal/intrin_avx512.hpp"
266
267 #endif
268
270
271 namespace cv {
272
273 #ifndef CV_DOXYGEN
274CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
275 #endif
276
277 #ifndef CV_SIMD128
278 #define CV_SIMD128 0
279 #endif
280
281 #ifndef CV_SIMD128_CPP
282 #define CV_SIMD128_CPP 0
283 #endif
284
285 #ifndef CV_SIMD128_64F
286 #define CV_SIMD128_64F 0
287 #endif
288
289 #ifndef CV_SIMD256
290 #define CV_SIMD256 0
291 #endif
292
293 #ifndef CV_SIMD256_64F
294 #define CV_SIMD256_64F 0
295 #endif
296
297 #ifndef CV_SIMD512
298 #define CV_SIMD512 0
299 #endif
300
301 #ifndef CV_SIMD512_64F
302 #define CV_SIMD512_64F 0
303 #endif
304
305 #ifndef CV_SIMD128_FP16
306 #define CV_SIMD128_FP16 0
307 #endif
308
309 #ifndef CV_SIMD256_FP16
310 #define CV_SIMD256_FP16 0
311 #endif
312
313 #ifndef CV_SIMD512_FP16
314 #define CV_SIMD512_FP16 0
315 #endif
316
317 //==================================================================================================
318
319 template<typename _Tp> struct V_RegTraits
320{
321};
322
323 #define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \
324 template<> struct V_RegTraits<_reg> \
325 { \
326 typedef _reg reg; \
327 typedef _u_reg u_reg; \
328 typedef _w_reg w_reg; \
329 typedef _q_reg q_reg; \
330 typedef _int_reg int_reg; \
331 typedef _round_reg round_reg; \
332 }
333
334 #if CV_SIMD128 || CV_SIMD128_CPP
335 CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void);
336 CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void);
337 CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void);
338 CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void);
339 CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void);
340 CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void);
341 #if CV_SIMD128_64F || CV_SIMD128_CPP
342 CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4);
343 #else
344 CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4);
345 #endif
346 CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void);
347 CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void);
348 #if CV_SIMD128_64F
349 CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4);
350 #endif
351 #endif
352
353 #if CV_SIMD256
354 CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void);
355 CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void);
356 CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void);
357 CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void);
358 CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void);
359 CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void);
360 CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8);
361 CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void);
362 CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void);
363 CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8);
364 #endif
365
366 #if CV_SIMD512
367 CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void);
368 CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void);
369 CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void);
370 CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void);
371 CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void);
372 CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void);
373 CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16);
374 CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void);
375 CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void);
376 CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16);
377 #endif
379
380 #if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
381 #define CV__SIMD_NAMESPACE simd512
382 namespace CV__SIMD_NAMESPACE {
383 #define CV_SIMD 1
384 #define CV_SIMD_64F CV_SIMD512_64F
385 #define CV_SIMD_FP16 CV_SIMD512_FP16
386 #define CV_SIMD_WIDTH 64
390 typedef v_uint8x64 v_uint8;
392 typedef v_int8x64 v_int8;
394 typedef v_uint16x32 v_uint16;
396 typedef v_int16x32 v_int16;
398 typedef v_uint32x16 v_uint32;
400 typedef v_int32x16 v_int32;
402 typedef v_uint64x8 v_uint64;
404 typedef v_int64x8 v_int64;
406 typedef v_float32x16 v_float32;
407 #if CV_SIMD512_64F
409 typedef v_float64x8 v_float64;
410 #endif
412
413 #define VXPREFIX(func) v512##func
414} // namespace
415 using namespace CV__SIMD_NAMESPACE;
416 #elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
417 #define CV__SIMD_NAMESPACE simd256
418 namespace CV__SIMD_NAMESPACE {
419 #define CV_SIMD 1
420 #define CV_SIMD_64F CV_SIMD256_64F
421 #define CV_SIMD_FP16 CV_SIMD256_FP16
422 #define CV_SIMD_WIDTH 32
426 typedef v_uint8x32 v_uint8;
428 typedef v_int8x32 v_int8;
430 typedef v_uint16x16 v_uint16;
432 typedef v_int16x16 v_int16;
434 typedef v_uint32x8 v_uint32;
436 typedef v_int32x8 v_int32;
438 typedef v_uint64x4 v_uint64;
440 typedef v_int64x4 v_int64;
442 typedef v_float32x8 v_float32;
443 #if CV_SIMD256_64F
445 typedef v_float64x4 v_float64;
446 #endif
448
449 #define VXPREFIX(func) v256##func
450} // namespace
451 using namespace CV__SIMD_NAMESPACE;
452 #elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
453 #if defined CV_SIMD128_CPP
454 #define CV__SIMD_NAMESPACE simd128_cpp
455 #else
456 #define CV__SIMD_NAMESPACE simd128
457 #endif
458 namespace CV__SIMD_NAMESPACE {
459 #define CV_SIMD CV_SIMD128
460 #define CV_SIMD_64F CV_SIMD128_64F
461 #define CV_SIMD_WIDTH 16
465 typedef v_uint8x16 v_uint8;
467 typedef v_int8x16 v_int8;
469 typedef v_uint16x8 v_uint16;
471 typedef v_int16x8 v_int16;
473 typedef v_uint32x4 v_uint32;
475 typedef v_int32x4 v_int32;
477 typedef v_uint64x2 v_uint64;
479 typedef v_int64x2 v_int64;
481 typedef v_float32x4 v_float32;
482 #if CV_SIMD128_64F
484 typedef v_float64x2 v_float64;
485 #endif
487
488 #define VXPREFIX(func) v##func
489} // namespace
490 using namespace CV__SIMD_NAMESPACE;
491 #endif
492
493 namespace CV__SIMD_NAMESPACE {
499 inline v_uint8 vx_setall_u8(uchar v) { return VXPREFIX(_setall_u8)(v); }
500 inline v_int8 vx_setall_s8(schar v) { return VXPREFIX(_setall_s8)(v); }
501 inline v_uint16 vx_setall_u16(ushort v) { return VXPREFIX(_setall_u16)(v); }
502 inline v_int16 vx_setall_s16(short v) { return VXPREFIX(_setall_s16)(v); }
503 inline v_int32 vx_setall_s32(int v) { return VXPREFIX(_setall_s32)(v); }
504 inline v_uint32 vx_setall_u32(unsigned v) { return VXPREFIX(_setall_u32)(v); }
505 inline v_float32 vx_setall_f32(float v) { return VXPREFIX(_setall_f32)(v); }
506 inline v_int64 vx_setall_s64(int64 v) { return VXPREFIX(_setall_s64)(v); }
507 inline v_uint64 vx_setall_u64(uint64 v) { return VXPREFIX(_setall_u64)(v); }
508 #if CV_SIMD_64F
509 inline v_float64 vx_setall_f64(double v) { return VXPREFIX(_setall_f64)(v); }
510 #endif
512
516 inline v_uint8 vx_setzero_u8() { return VXPREFIX(_setzero_u8)(); }
517 inline v_int8 vx_setzero_s8() { return VXPREFIX(_setzero_s8)(); }
518 inline v_uint16 vx_setzero_u16() { return VXPREFIX(_setzero_u16)(); }
519 inline v_int16 vx_setzero_s16() { return VXPREFIX(_setzero_s16)(); }
520 inline v_int32 vx_setzero_s32() { return VXPREFIX(_setzero_s32)(); }
521 inline v_uint32 vx_setzero_u32() { return VXPREFIX(_setzero_u32)(); }
522 inline v_float32 vx_setzero_f32() { return VXPREFIX(_setzero_f32)(); }
523 inline v_int64 vx_setzero_s64() { return VXPREFIX(_setzero_s64)(); }
524 inline v_uint64 vx_setzero_u64() { return VXPREFIX(_setzero_u64)(); }
525 #if CV_SIMD_64F
526 inline v_float64 vx_setzero_f64() { return VXPREFIX(_setzero_f64)(); }
527 #endif
529
533 inline v_uint8 vx_load(const uchar * ptr) { return VXPREFIX(_load)(ptr); }
534 inline v_int8 vx_load(const schar * ptr) { return VXPREFIX(_load)(ptr); }
535 inline v_uint16 vx_load(const ushort * ptr) { return VXPREFIX(_load)(ptr); }
536 inline v_int16 vx_load(const short * ptr) { return VXPREFIX(_load)(ptr); }
537 inline v_int32 vx_load(const int * ptr) { return VXPREFIX(_load)(ptr); }
538 inline v_uint32 vx_load(const unsigned * ptr) { return VXPREFIX(_load)(ptr); }
539 inline v_float32 vx_load(const float * ptr) { return VXPREFIX(_load)(ptr); }
540 inline v_int64 vx_load(const int64 * ptr) { return VXPREFIX(_load)(ptr); }
541 inline v_uint64 vx_load(const uint64 * ptr) { return VXPREFIX(_load)(ptr); }
542 #if CV_SIMD_64F
543 inline v_float64 vx_load(const double * ptr) { return VXPREFIX(_load)(ptr); }
544 #endif
546
550 inline v_uint8 vx_load_aligned(const uchar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
551 inline v_int8 vx_load_aligned(const schar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
552 inline v_uint16 vx_load_aligned(const ushort * ptr) { return VXPREFIX(_load_aligned)(ptr); }
553 inline v_int16 vx_load_aligned(const short * ptr) { return VXPREFIX(_load_aligned)(ptr); }
554 inline v_int32 vx_load_aligned(const int * ptr) { return VXPREFIX(_load_aligned)(ptr); }
555 inline v_uint32 vx_load_aligned(const unsigned * ptr) { return VXPREFIX(_load_aligned)(ptr); }
556 inline v_float32 vx_load_aligned(const float * ptr) { return VXPREFIX(_load_aligned)(ptr); }
557 inline v_int64 vx_load_aligned(const int64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
558 inline v_uint64 vx_load_aligned(const uint64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
559 #if CV_SIMD_64F
560 inline v_float64 vx_load_aligned(const double * ptr) { return VXPREFIX(_load_aligned)(ptr); }
561 #endif
563
567 inline v_uint8 vx_load_low(const uchar * ptr) { return VXPREFIX(_load_low)(ptr); }
568 inline v_int8 vx_load_low(const schar * ptr) { return VXPREFIX(_load_low)(ptr); }
569 inline v_uint16 vx_load_low(const ushort * ptr) { return VXPREFIX(_load_low)(ptr); }
570 inline v_int16 vx_load_low(const short * ptr) { return VXPREFIX(_load_low)(ptr); }
571 inline v_int32 vx_load_low(const int * ptr) { return VXPREFIX(_load_low)(ptr); }
572 inline v_uint32 vx_load_low(const unsigned * ptr) { return VXPREFIX(_load_low)(ptr); }
573 inline v_float32 vx_load_low(const float * ptr) { return VXPREFIX(_load_low)(ptr); }
574 inline v_int64 vx_load_low(const int64 * ptr) { return VXPREFIX(_load_low)(ptr); }
575 inline v_uint64 vx_load_low(const uint64 * ptr) { return VXPREFIX(_load_low)(ptr); }
576 #if CV_SIMD_64F
577 inline v_float64 vx_load_low(const double * ptr) { return VXPREFIX(_load_low)(ptr); }
578 #endif
580
584 inline v_uint8 vx_load_halves(const uchar * ptr0, const uchar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
585 inline v_int8 vx_load_halves(const schar * ptr0, const schar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
586 inline v_uint16 vx_load_halves(const ushort * ptr0, const ushort * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
587 inline v_int16 vx_load_halves(const short * ptr0, const short * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
588 inline v_int32 vx_load_halves(const int * ptr0, const int * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
589 inline v_uint32 vx_load_halves(const unsigned * ptr0, const unsigned * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
590 inline v_float32 vx_load_halves(const float * ptr0, const float * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
591 inline v_int64 vx_load_halves(const int64 * ptr0, const int64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
592 inline v_uint64 vx_load_halves(const uint64 * ptr0, const uint64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
593 #if CV_SIMD_64F
594 inline v_float64 vx_load_halves(const double * ptr0, const double * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
595 #endif
597
601 inline v_uint8 vx_lut(const uchar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
602 inline v_int8 vx_lut(const schar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
603 inline v_uint16 vx_lut(const ushort * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
604 inline v_int16 vx_lut(const short* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
605 inline v_int32 vx_lut(const int* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
606 inline v_uint32 vx_lut(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
607 inline v_float32 vx_lut(const float* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
608 inline v_int64 vx_lut(const int64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
609 inline v_uint64 vx_lut(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
610 #if CV_SIMD_64F
611 inline v_float64 vx_lut(const double* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
612 #endif
614
618 inline v_uint8 vx_lut_pairs(const uchar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
619 inline v_int8 vx_lut_pairs(const schar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
620 inline v_uint16 vx_lut_pairs(const ushort * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
621 inline v_int16 vx_lut_pairs(const short* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
622 inline v_int32 vx_lut_pairs(const int* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
623 inline v_uint32 vx_lut_pairs(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
624 inline v_float32 vx_lut_pairs(const float* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
625 inline v_int64 vx_lut_pairs(const int64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
626 inline v_uint64 vx_lut_pairs(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
627 #if CV_SIMD_64F
628 inline v_float64 vx_lut_pairs(const double* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
629 #endif
631
635 inline v_uint8 vx_lut_quads(const uchar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
636 inline v_int8 vx_lut_quads(const schar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
637 inline v_uint16 vx_lut_quads(const ushort* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
638 inline v_int16 vx_lut_quads(const short* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
639 inline v_int32 vx_lut_quads(const int* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
640 inline v_uint32 vx_lut_quads(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
641 inline v_float32 vx_lut_quads(const float* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
643
647 inline v_uint16 vx_load_expand(const uchar * ptr) { return VXPREFIX(_load_expand)(ptr); }
648 inline v_int16 vx_load_expand(const schar * ptr) { return VXPREFIX(_load_expand)(ptr); }
649 inline v_uint32 vx_load_expand(const ushort * ptr) { return VXPREFIX(_load_expand)(ptr); }
650 inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); }
651 inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); }
652 inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); }
653 inline v_float32 vx_load_expand(const float16_t * ptr) { return VXPREFIX(_load_expand)(ptr); }
655
659 inline v_uint32 vx_load_expand_q(const uchar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
660 inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
662
664 inline void vx_cleanup() { VXPREFIX(_cleanup)(); }
665
666
668
669 // backward compatibility
670 template<typename _Tp, typename _Tvec> static inline
671 void vx_store(_Tp* dst, const _Tvec& v) { return v_store(dst, v); }
672 // backward compatibility
673 template<typename _Tp, typename _Tvec> static inline
674 void vx_store_aligned(_Tp* dst, const _Tvec& v) { return v_store_aligned(dst, v); }
675
677
678
680 #undef VXPREFIX
681} // namespace
682
684 #ifndef CV_SIMD_64F
685 #define CV_SIMD_64F 0
686 #endif
687
688 #ifndef CV_SIMD_FP16
689 #define CV_SIMD_FP16 0
690 #endif
691
692 #ifndef CV_SIMD
693 #define CV_SIMD 0
694 #endif
695
696 #include "simd_utils.impl.hpp"
697
698 #ifndef CV_DOXYGEN
699CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
700 #endif
701
702} // cv::
703
705
706 #endif
v_reg< schar, 16 > v_int8x16
Sixteen 8-bit signed integer values
Definition: intrin_cpp.hpp:490
v_reg< uchar, 16 > v_uint8x16
Sixteen 8-bit unsigned integer values
Definition: intrin_cpp.hpp:488
v_reg< int64, 2 > v_int64x2
Two 64-bit signed integer values
Definition: intrin_cpp.hpp:506
void v_store(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory
Definition: intrin_cpp.hpp:2193
v_reg< ushort, 8 > v_uint16x8
Eight 16-bit unsigned integer values
Definition: intrin_cpp.hpp:492
v_reg< int, 4 > v_int32x4
Four 32-bit signed integer values
Definition: intrin_cpp.hpp:498
v_reg< unsigned, 4 > v_uint32x4
Four 32-bit unsigned integer values
Definition: intrin_cpp.hpp:496
void vx_cleanup()
SIMD processing state cleanup call
Definition: intrin.hpp:664
v_reg< uint64, 2 > v_uint64x2
Two 64-bit unsigned integer values
Definition: intrin_cpp.hpp:504
v_reg< float, 4 > v_float32x4
Four 32-bit floating point values (single precision)
Definition: intrin_cpp.hpp:500
void v_store_aligned(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (aligned)
Definition: intrin_cpp.hpp:2254
v_reg< short, 8 > v_int16x8
Eight 16-bit signed integer values
Definition: intrin_cpp.hpp:494
v_reg< double, 2 > v_float64x2
Two 64-bit floating point values (double precision)
Definition: intrin_cpp.hpp:502
cv
"black box" representation of the file storage associated with a file on disk.
Definition: aruco.hpp:75
Definition: intrin.hpp:104