8
#ifndef OPENCV_HAL_INTRIN_RVV_HPP
9
#define OPENCV_HAL_INTRIN_RVV_HPP
16CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
19
#define CV_SIMD128_64F 1
29
for
(
int
i = 0; i < 8; ++i)
41
for
(
int
i = 0; i < 8; ++i)
53
for
(
int
i = 0; i < 4; ++i)
65
for
(
int
i = 0; i < 4; ++i)
73
unsigned
val[2] = {0};
134
for
(
int
i = 0; i < 4; ++i)
146
for
(
int
i = 0; i < 4; ++i)
153
#define OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(_Tpvec, _Tp, suffix, width, n) \
154
inline _Tpvec vle##width##_v_##suffix##mf2(const _Tp* ptr) \
156
return _Tpvec(ptr); \
158
inline void vse##width##_v_##suffix##mf2(_Tp* ptr, _Tpvec v) \
160
for (int i = 0; i < n; ++i) \
166OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(
vuint8mf2_t, uint8_t, u8, 8, 8)
167OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(
vint8mf2_t, int8_t, i8, 8, 8)
168OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(
vuint16mf2_t, uint16_t, u16, 16, 4)
169OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(
vint16mf2_t, int16_t, i16, 16, 4)
170OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(
vuint32mf2_t, uint32_t, u32, 32, 2)
171OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(
vint32mf2_t, int32_t, i32, 32, 2)
172OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(
vfloat32mf2_t, float32_t, f32, 32, 2)
173OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(
vuint64mf2_t, uint64_t, u64, 64, 1)
174OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(
vint64mf2_t, int64_t, i64, 64, 1)
175OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(
vfloat64mf2_t, float64_t, f64, 64, 1)
178
#define OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(_Tpwvec, _Tpvec, _wTp, wcvt, suffix, width, n) \
179
inline _Tpwvec wcvt (_Tpvec v) \
182
for (int i = 0; i < n; ++i) \
184
tmp[i] = (_wTp)v.val[i]; \
186
vsetvlmax_e##width##m1(); \
187
return vle##width##_v_##suffix##m1(tmp); \
190OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint16m1_t,
vuint8mf2_t, ushort, vwcvtu_x_x_v_u16m1, u16, 16, 8)
191OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint16m1_t,
vint8mf2_t,
short, vwcvt_x_x_v_i16m1, i16, 16, 8)
192OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint32m1_t,
vuint16mf2_t,
unsigned, vwcvtu_x_x_v_u32m1, u32, 32, 4)
193OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint32m1_t,
vint16mf2_t,
int, vwcvt_x_x_v_i32m1, i32, 32, 4)
194OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint64m1_t,
vuint32mf2_t, uint64, vwcvtu_x_x_v_u64m1, u64, 64, 2)
195OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint64m1_t,
vint32mf2_t, int64, vwcvt_x_x_v_i64m1, i64, 64, 2)
197inline
vuint8mf4_t
vle8_v_u8mf4 (const uint8_t *base)
201
inline
vint8mf4_t vle8_v_i8mf4 (
const
int8_t *base)
203
return
vint8mf4_t(base);
206
inline
vuint16mf2_t vwcvtu_x_x_v_u16mf2 (vuint8mf4_t src)
209
for
(
int
i = 0; i < 4; ++i)
211
tmp[i] = (ushort)src.val[i];
213
return
vle16_v_u16mf2(tmp);
215
inline
vint16mf2_t vwcvt_x_x_v_i16mf2 (vint8mf4_t src)
218
for
(
int
i = 0; i < 4; ++i)
220
tmp[i] = (short)src.val[i];
222
return
vle16_v_i16mf2(tmp);
229
typedef
uchar lane_type;
230
enum
{ nlanes = 16 };
238
v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
239
uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
241
uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
242
for
(
int
i = 0; i < nlanes; ++i)
247
operator
vuint8m1_t()
const
250
return
vle8_v_u8m1(val);
262
typedef
schar lane_type;
263
enum
{ nlanes = 16 };
271
v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
272
schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
274
schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
275
for
(
int
i = 0; i < nlanes; ++i)
280
operator
vint8m1_t()
const
283
return
vle8_v_i8m1(val);
295
typedef
ushort lane_type;
302
vse16_v_u16m1(val, v);
304
v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
306
ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
307
for
(
int
i = 0; i < nlanes; ++i)
312
operator
vuint16m1_t()
const
315
return
vle16_v_u16m1(val);
327
typedef
short
lane_type;
334
vse16_v_i16m1(val, v);
336
v_int16x8(
short
v0,
short
v1,
short
v2,
short
v3,
short
v4,
short
v5,
short
v6,
short
v7)
338
short
v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
339
for
(
int
i = 0; i < nlanes; ++i)
344
operator
vint16m1_t()
const
347
return
vle16_v_i16m1(val);
359
typedef
unsigned
lane_type;
366
vse32_v_u32m1(val, v);
368
v_uint32x4(
unsigned
v0,
unsigned
v1,
unsigned
v2,
unsigned
v3)
370
unsigned
v[] = {v0, v1, v2, v3};
371
for
(
int
i = 0; i < nlanes; ++i)
376
operator
vuint32m1_t()
const
379
return
vle32_v_u32m1(val);
381
unsigned
get0()
const
391
typedef
int
lane_type;
398
vse32_v_i32m1(val, v);
400
v_int32x4(
int
v0,
int
v1,
int
v2,
int
v3)
402
int
v[] = {v0, v1, v2, v3};
403
for
(
int
i = 0; i < nlanes; ++i)
408
operator
vint32m1_t()
const
411
return
vle32_v_i32m1(val);
422
typedef
float
lane_type;
429
vse32_v_f32m1(val, v);
431
v_float32x4(
float
v0,
float
v1,
float
v2,
float
v3)
433
float
v[] = {v0, v1, v2, v3};
434
for
(
int
i = 0; i < nlanes; ++i)
439
operator
vfloat32m1_t()
const
442
return
vle32_v_f32m1(val);
453
typedef
uint64 lane_type;
460
vse64_v_u64m1(val, v);
464
uint64 v[] = {v0, v1};
465
for
(
int
i = 0; i < nlanes; ++i)
470
operator
vuint64m1_t()
const
473
return
vle64_v_u64m1(val);
485
typedef
int64 lane_type;
492
vse64_v_i64m1(val, v);
496
int64 v[] = {v0, v1};
497
for
(
int
i = 0; i < nlanes; ++i)
502
operator
vint64m1_t()
const
505
return
vle64_v_i64m1(val);
518
typedef
double
lane_type;
525
vse64_v_f64m1(val, v);
529
double
v[] = {v0, v1};
530
for
(
int
i = 0; i < nlanes; ++i)
535
operator
vfloat64m1_t()
const
538
return
vle64_v_f64m1(val);
552
#define OPENCV_HAL_IMPL_RVV_INIT_INTEGER(_Tpvec, _Tp, width, suffix1, suffix2) \
553
inline v_##_Tpvec v_setzero_##suffix1() \
555
vsetvlmax_e##width##m1(); \
556
return v_##_Tpvec(vzero_##suffix2##m1()); \
558
inline v_##_Tpvec v_setall_##suffix1(_Tp v) \
560
vsetvlmax_e##width##m1(); \
561
return v_##_Tpvec(vmv_v_x_##suffix2##m1(v)); \
564OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint8x16, uchar, 8, u8, u8)
565OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int8x16, schar, 8, s8, i8)
566OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint16x8, ushort, 16, u16, u16)
567OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int16x8,
short, 16, s16, i16)
568OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint32x4,
unsigned, 32, u32, u32)
569OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int32x4,
int, 32, s32, i32)
570OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint64x2, uint64, 64, u64, u64)
571OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int64x2, int64, 64, s64, i64)
573
#define OPENCV_HAL_IMPL_RVV_INIT_FP(_Tpv, _Tp, width, suffix) \
574
inline v_##_Tpv v_setzero_##suffix() \
576
vsetvlmax_e##width##m1(); \
577
return v_##_Tpv(vzero_##suffix##m1()); \
579
inline v_##_Tpv v_setall_##suffix(_Tp v) \
581
vsetvlmax_e##width##m1(); \
582
return v_##_Tpv(vfmv_v_f_##suffix##m1(v)); \
585OPENCV_HAL_IMPL_RVV_INIT_FP(float32x4,
float, 32, f32)
587OPENCV_HAL_IMPL_RVV_INIT_FP(float64x2,
double, 64, f64)
592
#define OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(_Tpvec, suffix) \
593
inline v_##_Tpvec v_reinterpret_as_##suffix(const v_##_Tpvec& v) { return v; }
595OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint8x16, u8)
596OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int8x16, s8)
597OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint16x8, u16)
598OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int16x8, s16)
599OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint32x4, u32)
600OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int32x4, s32)
601OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(float32x4, f32)
602OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint64x2, u64)
603OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int64x2, s64)
605OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(float64x2, f64)
608
#define OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(_Tpvec1, _Tpvec2, _nTpvec1, _nTpvec2, suffix1, suffix2, nsuffix1, nsuffix2, width1, width2) \
609
inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec2& v) \
611
vsetvlmax_e##width2##m1(); \
612
return v_##_Tpvec1((_nTpvec1)vle##width2##_v_##nsuffix2##m1(v.val)); \
614
inline v_##_Tpvec2 v_reinterpret_as_##suffix2(const v_##_Tpvec1& v) \
616
vsetvlmax_e##width1##m1(); \
617
return v_##_Tpvec2((_nTpvec2)vle##width1##_v_##nsuffix1##m1(v.val)); \
620OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int8x16, vuint8m1_t, vint8m1_t, u8, s8, u8, i8, 8, 8)
621OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int16x8, vuint16m1_t, vint16m1_t, u16, s16, u16, i16, 16, 16)
622OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int32x4, vuint32m1_t, vint32m1_t, u32, s32, u32, i32, 32, 32)
623OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, float32x4, vuint32m1_t, vfloat32m1_t, u32, f32, u32, f32, 32, 32)
624OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, float32x4, vint32m1_t, vfloat32m1_t, s32, f32, i32, f32, 32, 32)
625OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int64x2, vuint64m1_t, vint64m1_t, u64, s64, u64, i64, 64, 64)
626OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint16x8, vuint8m1_t, vuint16m1_t, u8, u16, u8, u16, 8, 16)
627OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint32x4, vuint8m1_t, vuint32m1_t, u8, u32, u8, u32, 8, 32)
628OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint64x2, vuint8m1_t, vuint64m1_t, u8, u64, u8, u64, 8, 64)
629OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, uint32x4, vuint16m1_t, vuint32m1_t, u16, u32, u16, u32, 16, 32)
630OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, uint64x2, vuint16m1_t, vuint64m1_t, u16, u64, u16, u64, 16, 64)
631OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, uint64x2, vuint32m1_t, vuint64m1_t, u32, u64, u32, u64, 32, 64)
632OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int16x8, vint8m1_t, vint16m1_t, s8, s16, i8, i16, 8, 16)
633OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int32x4, vint8m1_t, vint32m1_t, s8, s32, i8, i32, 8, 32)
634OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int64x2, vint8m1_t, vint64m1_t, s8, s64, i8, i64, 8, 64)
635OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, int32x4, vint16m1_t, vint32m1_t, s16, s32, i16, i32, 16, 32)
636OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, int64x2, vint16m1_t, vint64m1_t, s16, s64, i16, i64, 16, 64)
637OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, int64x2, vint32m1_t, vint64m1_t, s32, s64, i32, i64, 32, 64)
638OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int16x8, vuint8m1_t, vint16m1_t, u8, s16, u8, i16, 8, 16)
639OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int32x4, vuint8m1_t, vint32m1_t, u8, s32, u8, i32, 8, 32)
640OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int64x2, vuint8m1_t, vint64m1_t, u8, s64, u8, i64, 8, 64)
641OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int8x16, vuint16m1_t, vint8m1_t, u16, s8, u16, i8, 16, 8)
642OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int32x4, vuint16m1_t, vint32m1_t, u16, s32, u16, i32, 16, 32)
643OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int64x2, vuint16m1_t, vint64m1_t, u16, s64, u16, i64, 16, 64)
644OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int8x16, vuint32m1_t, vint8m1_t, u32, s8, u32, i8, 32, 8)
645OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int16x8, vuint32m1_t, vint16m1_t, u32, s16, u32, i16, 32, 16)
646OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int64x2, vuint32m1_t, vint64m1_t, u32, s64, u32, i64, 32, 64)
647OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int8x16, vuint64m1_t, vint8m1_t, u64, s8, u64, i8, 64, 8)
648OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int16x8, vuint64m1_t, vint16m1_t, u64, s16, u64, i16, 64, 16)
649OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int32x4, vuint64m1_t, vint32m1_t, u64, s32, u64, i32, 64, 32)
650OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, float32x4, vuint8m1_t, vfloat32m1_t, u8, f32, u8, f32, 8, 32)
651OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, float32x4, vuint16m1_t, vfloat32m1_t, u16, f32, u16, f32, 16, 32)
652OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, float32x4, vuint64m1_t, vfloat32m1_t, u64, f32, u64, f32, 64, 32)
653OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, float32x4, vint8m1_t, vfloat32m1_t, s8, f32, i8, f32, 8, 32)
654OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, float32x4, vint16m1_t, vfloat32m1_t, s16, f32, i16, f32, 16, 32)
655OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int64x2, float32x4, vint64m1_t, vfloat32m1_t, s64, f32, i64, f32, 64, 32)
657OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, float64x2, vuint64m1_t, vfloat64m1_t, u64, f64, u64, f64, 64, 64)
658OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int64x2, float64x2, vint64m1_t, vfloat64m1_t, s64, f64, i64, f64, 64, 64)
659OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, float64x2, vuint8m1_t, vfloat64m1_t, u8, f64, u8, f64, 8, 64)
660OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, float64x2, vuint16m1_t, vfloat64m1_t, u16, f64, u16, f64, 16, 64)
661OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, float64x2, vuint32m1_t, vfloat64m1_t, u32, f64, u32, f64, 32, 64)
662OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, float64x2, vint8m1_t, vfloat64m1_t, s8, f64, i8, f64, 8, 64)
663OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, float64x2, vint16m1_t, vfloat64m1_t, s16, f64, i16, f64, 16, 64)
664OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, float64x2, vint32m1_t, vfloat64m1_t, s32, f64, i32, f64, 32, 64)
665OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(float32x4, float64x2, vfloat32m1_t, vfloat64m1_t, f32, f64, f32, f64, 32, 64)
670
#define OPENCV_HAL_IMPL_RVV_EXTRACT(_Tpvec, _Tp, suffix, width, vmv) \
672
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
674
vsetvlmax_e##width##m1(); \
675
return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, s), b, _Tpvec::nlanes - s)); \
677
template<int i> inline _Tp v_extract_n(_Tpvec v) \
679
vsetvlmax_e##width##m1(); \
680
return _Tp(vmv(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), v, i))); \
684OPENCV_HAL_IMPL_RVV_EXTRACT(
v_uint8x16, uchar, u8, 8, vmv_x_s_u8m1_u8)
685OPENCV_HAL_IMPL_RVV_EXTRACT(
v_int8x16, schar, i8, 8, vmv_x_s_i8m1_i8)
686OPENCV_HAL_IMPL_RVV_EXTRACT(
v_uint16x8, ushort, u16, 16, vmv_x_s_u16m1_u16)
687OPENCV_HAL_IMPL_RVV_EXTRACT(
v_int16x8,
short, i16, 16, vmv_x_s_i16m1_i16)
688OPENCV_HAL_IMPL_RVV_EXTRACT(
v_uint32x4, uint, u32, 32, vmv_x_s_u32m1_u32)
689OPENCV_HAL_IMPL_RVV_EXTRACT(
v_int32x4,
int, i32, 32, vmv_x_s_i32m1_i32)
690OPENCV_HAL_IMPL_RVV_EXTRACT(
v_uint64x2, uint64, u64, 64, vmv_x_s_u64m1_u64)
691OPENCV_HAL_IMPL_RVV_EXTRACT(
v_int64x2, int64, i64, 64, vmv_x_s_i64m1_i64)
692OPENCV_HAL_IMPL_RVV_EXTRACT(
v_float32x4,
float, f32, 32, vfmv_f_s_f32m1_f32)
694OPENCV_HAL_IMPL_RVV_EXTRACT(
v_float64x2,
double, f64, 64, vfmv_f_s_f64m1_f64)
699
#define OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(_Tpvec, _nTpvec, _Tp, hvl, width, suffix) \
700
inline _Tpvec v_load(const _Tp* ptr) \
703
return _Tpvec((_nTpvec)vle8_v_u8m1((uchar*)ptr)); \
705
inline _Tpvec v_load_aligned(const _Tp* ptr) \
707
vsetvlmax_e##width##m1(); \
708
return _Tpvec(vle##width##_v_##suffix##m1(ptr)); \
710
inline _Tpvec v_load_low(const _Tp* ptr) \
712
vsetvl_e##width##m1(hvl); \
713
_Tpvec res = _Tpvec(vle##width##_v_##suffix##m1(ptr)); \
714
vsetvlmax_e##width##m1(); \
717
inline void v_store(_Tp* ptr, const _Tpvec& a) \
720
vse8_v_u8m1((uchar*)ptr, vle8_v_u8m1((uchar*)a.val)); \
722
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
724
vsetvlmax_e##width##m1(); \
725
vse##width##_v_##suffix##m1(ptr, a); \
727
inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
729
vsetvlmax_e##width##m1(); \
730
vse##width##_v_##suffix##m1(ptr, a); \
732
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode
) \
734
vsetvlmax_e##width##m1(); \
735
vse##width##_v_##suffix##m1(ptr, a); \
737
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
739
_Tp CV_DECL_ALIGNED(32) tmp_ptr[_Tpvec::nlanes] = {0}; \
740
vsetvlmax_e##width##m1(); \
741
vse##width##_v_##suffix##m1(tmp_ptr, a); \
742
for(int i = 0; i < _Tpvec::nlanes/2; ++i) \
744
ptr[i] = tmp_ptr[i]; \
747
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
749
_Tp CV_DECL_ALIGNED(32) tmp_ptr[_Tpvec::nlanes] = {0}; \
750
vsetvlmax_e##width##m1(); \
751
vse##width##_v_##suffix##m1(tmp_ptr, a); \
752
for(int i = 0; i < _Tpvec::nlanes/2; ++i) \
754
ptr[i] = tmp_ptr[i+_Tpvec::nlanes/2]; \
758OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(
v_uint8x16, vuint8m1_t, uchar, 8, 8, u8)
759OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(
v_int8x16, vint8m1_t, schar, 8, 8, i8)
760OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(
v_uint16x8, vuint16m1_t, ushort, 4, 16, u16)
761OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(
v_int16x8, vint16m1_t,
short, 4, 16, i16)
762OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(
v_uint32x4, vuint32m1_t,
unsigned, 2, 32, u32)
763OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(
v_int32x4, vint32m1_t,
int, 2, 32, i32)
764OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(
v_uint64x2, vuint64m1_t, uint64, 1, 64, u64)
765OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(
v_int64x2, vint64m1_t, int64, 1, 64, i64)
766OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(
v_float32x4, vfloat32m1_t,
float, 2, 32, f32)
768OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(
v_float64x2, vfloat64m1_t,
double, 1, 64, f64)
773
schar CV_DECL_ALIGNED(32) elems[16] =
775
ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr0[4], ptr0[5], ptr0[6], ptr0[7],
776
ptr1[0], ptr1[1], ptr1[2], ptr1[3], ptr1[4], ptr1[5], ptr1[6], ptr1[7]
785
short
CV_DECL_ALIGNED(32) elems[8] =
787
ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr1[0], ptr1[1], ptr1[2], ptr1[3]
796
int
CV_DECL_ALIGNED(32) elems[4] =
798
ptr0[0], ptr0[1], ptr1[0], ptr1[1]
805
float
CV_DECL_ALIGNED(32) elems[4] =
807
ptr0[0], ptr0[1], ptr1[0], ptr1[1]
816
int64 CV_DECL_ALIGNED(32) elems[2] =
828
double
CV_DECL_ALIGNED(32) elems[2] =
840
inline
v_int8x16
v_lut(
const
schar* tab,
const
int* idx)
842
schar CV_DECL_ALIGNED(32) elems[16] =
864
inline
v_int8x16
v_lut_pairs(
const
schar* tab,
const
int* idx)
866
schar CV_DECL_ALIGNED(32) elems[16] =
888
inline
v_int8x16
v_lut_quads(
const
schar* tab,
const
int* idx)
890
schar CV_DECL_ALIGNED(32) elems[16] =
912
inline
v_uint8x16
v_lut(
const
uchar* tab,
const
int* idx) {
return
v_reinterpret_as_u8(v_lut((schar*)tab, idx)); }
913
inline
v_uint8x16
v_lut_pairs(
const
uchar* tab,
const
int* idx) {
return
v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); }
914
inline
v_uint8x16
v_lut_quads(
const
uchar* tab,
const
int* idx) {
return
v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); }
916
inline
v_int16x8
v_lut(
const
short* tab,
const
int* idx)
918
short
CV_DECL_ALIGNED(32) elems[8] =
932
inline
v_int16x8
v_lut_pairs(
const
short* tab,
const
int* idx)
934
short
CV_DECL_ALIGNED(32) elems[8] =
948
inline
v_int16x8
v_lut_quads(
const
short* tab,
const
int* idx)
950
short
CV_DECL_ALIGNED(32) elems[8] =
964
inline
v_uint16x8
v_lut(
const
ushort* tab,
const
int* idx) {
return
v_reinterpret_as_u16(v_lut((
short*)tab, idx)); }
965
inline
v_uint16x8
v_lut_pairs(
const
ushort* tab,
const
int* idx) {
return
v_reinterpret_as_u16(v_lut_pairs((
short*)tab, idx)); }
966
inline
v_uint16x8
v_lut_quads(
const
ushort* tab,
const
int* idx) {
return
v_reinterpret_as_u16(v_lut_quads((
short*)tab, idx)); }
968
inline
v_int32x4
v_lut(
const
int* tab,
const
int* idx)
970
int
CV_DECL_ALIGNED(32) elems[4] =
980
inline
v_int32x4
v_lut_pairs(
const
int* tab,
const
int* idx)
982
int
CV_DECL_ALIGNED(32) elems[4] =
992
inline
v_int32x4
v_lut_quads(
const
int* tab,
const
int* idx)
995
return
v_int32x4(vle32_v_i32m1(tab + idx[0]));
998
inline
v_uint32x4
v_lut(
const
unsigned* tab,
const
int* idx) {
return
v_reinterpret_as_u32(v_lut((
int*)tab, idx)); }
999
inline
v_uint32x4
v_lut_pairs(
const
unsigned* tab,
const
int* idx) {
return
v_reinterpret_as_u32(v_lut_pairs((
int*)tab, idx)); }
1000
inline
v_uint32x4
v_lut_quads(
const
unsigned* tab,
const
int* idx) {
return
v_reinterpret_as_u32(v_lut_quads((
int*)tab, idx)); }
1002
inline
v_int64x2
v_lut(
const
int64_t* tab,
const
int* idx)
1004
int64_t CV_DECL_ALIGNED(32) elems[2] =
1012
inline
v_int64x2
v_lut_pairs(
const
int64* tab,
const
int* idx)
1015
return
v_int64x2(vle64_v_i64m1(tab + idx[0]));
1017
inline
v_uint64x2
v_lut(
const
uint64* tab,
const
int* idx) {
return
v_reinterpret_as_u64(v_lut((
const
int64_t *)tab, idx)); }
1018
inline
v_uint64x2
v_lut_pairs(
const
uint64* tab,
const
int* idx) {
return
v_reinterpret_as_u64(v_lut_pairs((
const
int64_t *)tab, idx)); }
1020
inline
v_float32x4
v_lut(
const
float* tab,
const
int* idx)
1022
float
CV_DECL_ALIGNED(32) elems[4] =
1032
inline
v_float32x4
v_lut_pairs(
const
float* tab,
const
int* idx)
1034
float
CV_DECL_ALIGNED(32) elems[4] =
1044
inline
v_float32x4
v_lut_quads(
const
float* tab,
const
int* idx)
1052
int
CV_DECL_ALIGNED(32) elems[4] =
1054
tab[v_extract_n<0>(idxvec)],
1055
tab[v_extract_n<1>(idxvec)],
1056
tab[v_extract_n<2>(idxvec)],
1057
tab[v_extract_n<3>(idxvec)]
1065
unsigned
CV_DECL_ALIGNED(32) elems[4] =
1067
tab[v_extract_n<0>(idxvec)],
1068
tab[v_extract_n<1>(idxvec)],
1069
tab[v_extract_n<2>(idxvec)],
1070
tab[v_extract_n<3>(idxvec)]
1078
float
CV_DECL_ALIGNED(32) elems[4] =
1080
tab[v_extract_n<0>(idxvec)],
1081
tab[v_extract_n<1>(idxvec)],
1082
tab[v_extract_n<2>(idxvec)],
1083
tab[v_extract_n<3>(idxvec)]
1091
int
CV_DECL_ALIGNED(32) idx[4];
1094
x =
v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
1095
y =
v_float32x4(tab[idx[0]+1], tab[idx[1]+1], tab[idx[2]+1], tab[idx[3]+1]);
1099
inline
v_float64x2
v_lut(
const
double* tab,
const
int* idx)
1101
double
CV_DECL_ALIGNED(32) elems[2] =
1110
inline
v_float64x2
v_lut_pairs(
const
double* tab,
const
int* idx)
1118
double
CV_DECL_ALIGNED(32) elems[2] =
1120
tab[v_extract_n<0>(idxvec)],
1121
tab[v_extract_n<1>(idxvec)]
1129
int
CV_DECL_ALIGNED(32) idx[4] = {0};
1141
ushort CV_DECL_ALIGNED(32) ptr[16] = {0};
1145
return
v_uint8x16(vnsrl_wx_u8m1(vle16_v_u16m2(ptr), 0));
1151
unsigned
CV_DECL_ALIGNED(32) ptr[16] = {0};
1157
return
v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vle32_v_u32m4(ptr), 0), 0));
1164
uint64 CV_DECL_ALIGNED(32) ptr[16] = {0};
1174
return
v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vnsrl_wx_u32m4(vle64_v_u64m8(ptr), 0), 0), 0));
1178
#define OPENCV_HAL_IMPL_RVV_BIN_OP(bin_op, _Tpvec, intrin, width) \
1179
inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
1181
vsetvlmax_e##width##m1(); \
1182
return _Tpvec(intrin(a, b)); \
1184
inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
1186
vsetvlmax_e##width##m1(); \
1187
a = _Tpvec(intrin(a, b)); \
1191OPENCV_HAL_IMPL_RVV_BIN_OP(+,
v_uint8x16, vsaddu_vv_u8m1, 8)
1192OPENCV_HAL_IMPL_RVV_BIN_OP(-,
v_uint8x16, vssubu_vv_u8m1, 8)
1193OPENCV_HAL_IMPL_RVV_BIN_OP(/,
v_uint8x16, vdivu_vv_u8m1, 8)
1194OPENCV_HAL_IMPL_RVV_BIN_OP(+,
v_int8x16, vsadd_vv_i8m1, 8)
1195OPENCV_HAL_IMPL_RVV_BIN_OP(-,
v_int8x16, vssub_vv_i8m1, 8)
1196OPENCV_HAL_IMPL_RVV_BIN_OP(/,
v_int8x16, vdiv_vv_i8m1, 8)
1197OPENCV_HAL_IMPL_RVV_BIN_OP(+,
v_uint16x8, vsaddu_vv_u16m1, 16)
1198OPENCV_HAL_IMPL_RVV_BIN_OP(-,
v_uint16x8, vssubu_vv_u16m1, 16)
1199OPENCV_HAL_IMPL_RVV_BIN_OP(/,
v_uint16x8, vdivu_vv_u16m1, 16)
1200OPENCV_HAL_IMPL_RVV_BIN_OP(+,
v_int16x8, vsadd_vv_i16m1, 16)
1201OPENCV_HAL_IMPL_RVV_BIN_OP(-,
v_int16x8, vssub_vv_i16m1, 16)
1202OPENCV_HAL_IMPL_RVV_BIN_OP(/,
v_int16x8, vdiv_vv_i16m1, 16)
1203OPENCV_HAL_IMPL_RVV_BIN_OP(+,
v_uint32x4, vadd_vv_u32m1, 32)
1204OPENCV_HAL_IMPL_RVV_BIN_OP(-,
v_uint32x4, vsub_vv_u32m1, 32)
1205OPENCV_HAL_IMPL_RVV_BIN_OP(*,
v_uint32x4, vmul_vv_u32m1, 32)
1206OPENCV_HAL_IMPL_RVV_BIN_OP(/,
v_uint32x4, vdivu_vv_u32m1, 32)
1207OPENCV_HAL_IMPL_RVV_BIN_OP(+,
v_int32x4, vadd_vv_i32m1, 32)
1208OPENCV_HAL_IMPL_RVV_BIN_OP(-,
v_int32x4, vsub_vv_i32m1, 32)
1209OPENCV_HAL_IMPL_RVV_BIN_OP(*,
v_int32x4, vmul_vv_i32m1, 32)
1210OPENCV_HAL_IMPL_RVV_BIN_OP(/,
v_int32x4, vdiv_vv_i32m1, 32)
1211OPENCV_HAL_IMPL_RVV_BIN_OP(+,
v_float32x4, vfadd_vv_f32m1, 32)
1212OPENCV_HAL_IMPL_RVV_BIN_OP(-,
v_float32x4, vfsub_vv_f32m1, 32)
1213OPENCV_HAL_IMPL_RVV_BIN_OP(*,
v_float32x4, vfmul_vv_f32m1, 32)
1214OPENCV_HAL_IMPL_RVV_BIN_OP(/,
v_float32x4, vfdiv_vv_f32m1, 32)
1215OPENCV_HAL_IMPL_RVV_BIN_OP(+,
v_uint64x2, vadd_vv_u64m1, 64)
1216OPENCV_HAL_IMPL_RVV_BIN_OP(-,
v_uint64x2, vsub_vv_u64m1, 64)
1217OPENCV_HAL_IMPL_RVV_BIN_OP(*,
v_uint64x2, vmul_vv_u64m1, 64)
1218OPENCV_HAL_IMPL_RVV_BIN_OP(/,
v_uint64x2, vdivu_vv_u64m1, 64)
1219OPENCV_HAL_IMPL_RVV_BIN_OP(+,
v_int64x2, vadd_vv_i64m1, 64)
1220OPENCV_HAL_IMPL_RVV_BIN_OP(-,
v_int64x2, vsub_vv_i64m1, 64)
1221OPENCV_HAL_IMPL_RVV_BIN_OP(*,
v_int64x2, vmul_vv_i64m1, 64)
1222OPENCV_HAL_IMPL_RVV_BIN_OP(/,
v_int64x2, vdiv_vv_i64m1, 64)
1224OPENCV_HAL_IMPL_RVV_BIN_OP(+,
v_float64x2, vfadd_vv_f64m1, 64)
1225OPENCV_HAL_IMPL_RVV_BIN_OP(-,
v_float64x2, vfsub_vv_f64m1, 64)
1226OPENCV_HAL_IMPL_RVV_BIN_OP(*,
v_float64x2, vfmul_vv_f64m1, 64)
1227OPENCV_HAL_IMPL_RVV_BIN_OP(/,
v_float64x2, vfdiv_vv_f64m1, 64)
1233
#define OPENCV_HAL_IMPL_RVV_LOGIC_OP(_Tpvec, suffix, width) \
1234
OPENCV_HAL_IMPL_RVV_BIN_OP(&, _Tpvec, vand_vv_##suffix##m1, width) \
1235
OPENCV_HAL_IMPL_RVV_BIN_OP(|, _Tpvec, vor_vv_##suffix##m1, width) \
1236
OPENCV_HAL_IMPL_RVV_BIN_OP(^, _Tpvec, vxor_vv_##suffix##m1, width) \
1237
inline _Tpvec operator ~ (const _Tpvec& a) \
1239
vsetvlmax_e##width##m1(); \
1240
return _Tpvec(vnot_v_##suffix##m1(a)); \
1243OPENCV_HAL_IMPL_RVV_LOGIC_OP(
v_uint8x16, u8, 8)
1244OPENCV_HAL_IMPL_RVV_LOGIC_OP(
v_int8x16, i8, 8)
1245OPENCV_HAL_IMPL_RVV_LOGIC_OP(
v_uint16x8, u16, 16)
1246OPENCV_HAL_IMPL_RVV_LOGIC_OP(
v_int16x8, i16, 16)
1247OPENCV_HAL_IMPL_RVV_LOGIC_OP(
v_uint32x4, u32, 32)
1248OPENCV_HAL_IMPL_RVV_LOGIC_OP(
v_int32x4, i32, 32)
1249OPENCV_HAL_IMPL_RVV_LOGIC_OP(
v_uint64x2, u64, 64)
1250OPENCV_HAL_IMPL_RVV_LOGIC_OP(
v_int64x2, i64, 64)
1252
#define OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(bin_op, intrin) \
1253
inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \
1255
vsetvlmax_e32m1(); \
1256
return v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b)))); \
1258
inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \
1260
vsetvlmax_e32m1(); \
1261
a = v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b)))); \
1265OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(&, vand_vv_i32m1)
1266OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(|, vor_vv_i32m1)
1267OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(^, vxor_vv_i32m1)
1272
return
v_float32x4(vreinterpret_v_i32m1_f32m1(vnot_v_i32m1(vreinterpret_v_f32m1_i32m1(a))));
1276
#define OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(bin_op, intrin) \
1277
inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \
1279
vsetvlmax_e64m1(); \
1280
return v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b)))); \
1282
inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \
1284
vsetvlmax_e64m1(); \
1285
a = v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b)))); \
1289OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(&, vand_vv_i64m1)
1290OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(|, vor_vv_i64m1)
1291OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(^, vxor_vv_i64m1)
1296
return
v_float64x2(vreinterpret_v_i64m1_f64m1(vnot_v_i64m1(vreinterpret_v_f64m1_i64m1(a))));
1302
#define OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(_Tpvec, suffix, width) \
1303
inline _Tpvec operator << (const _Tpvec& a, int n) \
1305
vsetvlmax_e##width##m1(); \
1306
return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \
1308
inline _Tpvec operator >> (const _Tpvec& a, int n) \
1310
vsetvlmax_e##width##m1(); \
1311
return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n))); \
1313
template<int n> inline _Tpvec v_shl(const _Tpvec& a) \
1315
vsetvlmax_e##width##m1(); \
1316
return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \
1318
template<int n> inline _Tpvec v_shr(const _Tpvec& a) \
1320
vsetvlmax_e##width##m1(); \
1321
return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n))); \
1324
#define OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(_Tpvec, suffix, width) \
1325
inline _Tpvec operator << (const _Tpvec& a, int n) \
1327
vsetvlmax_e##width##m1(); \
1328
return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \
1330
inline _Tpvec operator >> (const _Tpvec& a, int n) \
1332
vsetvlmax_e##width##m1(); \
1333
return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n))); \
1335
template<int n> inline _Tpvec v_shl(const _Tpvec& a) \
1337
vsetvlmax_e##width##m1(); \
1338
return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \
1340
template<int n> inline _Tpvec v_shr(const _Tpvec& a) \
1342
vsetvlmax_e##width##m1(); \
1343
return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n))); \
1346OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(
v_uint8x16, u8, 8)
1347OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(
v_uint16x8, u16, 16)
1348OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(
v_uint32x4, u32, 32)
1349OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(
v_uint64x2, u64, 64)
1350OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(
v_int8x16, i8, 8)
1351OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(
v_int16x8, i16, 16)
1352OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(
v_int32x4, i32, 32)
1353OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(
v_int64x2, i64, 64)
1358
#define OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, op, intrin, suffix, width) \
1359
inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \
1361
vsetvlmax_e##width##m1(); \
1362
return _Tpvec(vmerge_vxm_##suffix##m1(intrin(a, b), vzero_##suffix##m1(), 1)); \
1365
#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, op, intrin, suffix, width) \
1366
inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \
1368
vsetvlmax_e##width##m1(); \
1369
return _Tpvec(vfmerge_vfm_##suffix##m1(intrin(a, b), vzero_##suffix##m1(), 1)); \
1372
#define OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(_Tpvec, suffix, width) \
1373
OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, width) \
1374
OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, width) \
1375
OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmsltu_vv_##suffix##m1_b##width, suffix, width) \
1376
OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgtu_vv_##suffix##m1_b##width, suffix, width) \
1377
OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsleu_vv_##suffix##m1_b##width, suffix, width) \
1378
OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsgeu_vv_##suffix##m1_b##width, suffix, width)
1380
#define OPENCV_HAL_IMPL_RVV_SIGNED_CMP(_Tpvec, suffix, width) \
1381
OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, width) \
1382
OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, width) \
1383
OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmslt_vv_##suffix##m1_b##width, suffix, width) \
1384
OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgt_vv_##suffix##m1_b##width, suffix, width) \
1385
OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsle_vv_##suffix##m1_b##width, suffix, width) \
1386
OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsge_vv_##suffix##m1_b##width, suffix, width)
1388
#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP(_Tpvec, suffix, width) \
1389
OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, ==, vmfeq_vv_##suffix##m1_b##width, suffix, width) \
1390
OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, !=, vmfne_vv_##suffix##m1_b##width, suffix, width) \
1391
OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <, vmflt_vv_##suffix##m1_b##width, suffix, width) \
1392
OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >, vmfgt_vv_##suffix##m1_b##width, suffix, width) \
1393
OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <=, vmfle_vv_##suffix##m1_b##width, suffix, width) \
1394
OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >=, vmfge_vv_##suffix##m1_b##width, suffix, width)
1397OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(
v_uint8x16, u8, 8)
1398OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(
v_uint16x8, u16, 16)
1399OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(
v_uint32x4, u32, 32)
1400OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(
v_uint64x2, u64, 64)
1401OPENCV_HAL_IMPL_RVV_SIGNED_CMP(
v_int8x16, i8, 8)
1402OPENCV_HAL_IMPL_RVV_SIGNED_CMP(
v_int16x8, i16, 16)
1403OPENCV_HAL_IMPL_RVV_SIGNED_CMP(
v_int32x4, i32, 32)
1404OPENCV_HAL_IMPL_RVV_SIGNED_CMP(
v_int64x2, i64, 64)
1405OPENCV_HAL_IMPL_RVV_FLOAT_CMP(
v_float32x4, f32, 32)
1407OPENCV_HAL_IMPL_RVV_FLOAT_CMP(
v_float64x2, f64, 64)
1420
#define OPENCV_HAL_IMPL_RVV_BIN_FUNC(_Tpvec, func, intrin, width) \
1421
inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
1423
vsetvlmax_e##width##m1(); \
1424
return _Tpvec(intrin(a, b)); \
1427OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint8x16, v_min, vminu_vv_u8m1, 8)
1428OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint8x16, v_max, vmaxu_vv_u8m1, 8)
1429OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int8x16, v_min, vmin_vv_i8m1, 8)
1430OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int8x16, v_max, vmax_vv_i8m1, 8)
1431OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint16x8, v_min, vminu_vv_u16m1, 16)
1432OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint16x8, v_max, vmaxu_vv_u16m1, 16)
1433OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int16x8, v_min, vmin_vv_i16m1, 16)
1434OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int16x8, v_max, vmax_vv_i16m1, 16)
1435OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint32x4, v_min, vminu_vv_u32m1, 32)
1436OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint32x4, v_max, vmaxu_vv_u32m1, 32)
1437OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int32x4, v_min, vmin_vv_i32m1, 32)
1438OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int32x4, v_max, vmax_vv_i32m1, 32)
1439OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_float32x4, v_min, vfmin_vv_f32m1, 32)
1440OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_float32x4, v_max, vfmax_vv_f32m1, 32)
1441OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint64x2, v_min, vminu_vv_u64m1, 64)
1442OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint64x2, v_max, vmaxu_vv_u64m1, 64)
1443OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int64x2, v_min, vmin_vv_i64m1, 64)
1444OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int64x2, v_max, vmax_vv_i64m1, 64)
1446OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_float64x2, v_min, vfmin_vv_f64m1, 64)
1447OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_float64x2, v_max, vfmax_vv_f64m1, 64)
1452OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint8x16, v_add_wrap, vadd_vv_u8m1, 8)
1453OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int8x16, v_add_wrap, vadd_vv_i8m1, 8)
1454OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint16x8, v_add_wrap, vadd_vv_u16m1, 16)
1455OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int16x8, v_add_wrap, vadd_vv_i16m1, 16)
1456OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint8x16, v_sub_wrap, vsub_vv_u8m1, 8)
1457OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int8x16, v_sub_wrap, vsub_vv_i8m1, 8)
1458OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint16x8, v_sub_wrap, vsub_vv_u16m1, 16)
1459OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int16x8, v_sub_wrap, vsub_vv_i16m1, 16)
1460OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint8x16, v_mul_wrap, vmul_vv_u8m1, 8)
1461OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int8x16, v_mul_wrap, vmul_vv_i8m1, 8)
1462OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_uint16x8, v_mul_wrap, vmul_vv_u16m1, 16)
1463OPENCV_HAL_IMPL_RVV_BIN_FUNC(
v_int16x8, v_mul_wrap, vmul_vv_i16m1, 16)
1467
#define OPENCV_HAL_IMPL_RVV_REDUCE_SUM(_Tpvec, _wTpvec, _nwTpvec, scalartype, suffix, wsuffix, wwidth, red) \
1468
inline scalartype v_reduce_sum(const _Tpvec& a) \
1470
vsetvlmax_e##wwidth##m1(); \
1471
_nwTpvec zero = vzero_##wsuffix##m1(); \
1472
_nwTpvec res = vzero_##wsuffix##m1(); \
1473
res = v##red##_vs_##suffix##m1_##wsuffix##m1(res, a, zero); \
1474
return (scalartype)(_wTpvec(res).get0()); \
1477OPENCV_HAL_IMPL_RVV_REDUCE_SUM(
v_uint8x16,
v_uint16x8, vuint16m1_t,
unsigned, u8, u16, 16, wredsumu)
1478OPENCV_HAL_IMPL_RVV_REDUCE_SUM(
v_int8x16,
v_int16x8, vint16m1_t,
int, i8, i16, 16, wredsum)
1479OPENCV_HAL_IMPL_RVV_REDUCE_SUM(
v_uint16x8,
v_uint32x4, vuint32m1_t,
unsigned, u16, u32, 32, wredsumu)
1480OPENCV_HAL_IMPL_RVV_REDUCE_SUM(
v_int16x8,
v_int32x4, vint32m1_t,
int, i16, i32, 32, wredsum)
1481OPENCV_HAL_IMPL_RVV_REDUCE_SUM(
v_uint32x4,
v_uint64x2, vuint64m1_t,
unsigned, u32, u64, 64, wredsumu)
1482OPENCV_HAL_IMPL_RVV_REDUCE_SUM(
v_int32x4,
v_int64x2, vint64m1_t,
int, i32, i64, 64, wredsum)
1484OPENCV_HAL_IMPL_RVV_REDUCE_SUM(
v_uint64x2,
v_uint64x2, vuint64m1_t, uint64, u64, u64, 64, redsum)
1485OPENCV_HAL_IMPL_RVV_REDUCE_SUM(
v_int64x2,
v_int64x2, vint64m1_t, int64, i64, i64, 64, redsum)
1491
#define OPENCV_HAL_IMPL_RVV_REDUCE(_Tpvec, func, scalartype, suffix, width, red) \
1492
inline scalartype v_reduce_##func(const _Tpvec& a) \
1494
vsetvlmax_e##width##m1(); \
1495
_Tpvec res = _Tpvec(v##red##_vs_##suffix##m1_##suffix##m1(a, a, a)); \
1496
return scalartype(res.get0()); \
1499OPENCV_HAL_IMPL_RVV_REDUCE(
v_uint8x16,
min, uchar, u8, 8, redminu)
1500OPENCV_HAL_IMPL_RVV_REDUCE(
v_int8x16,
min, schar, i8, 8, redmin)
1501OPENCV_HAL_IMPL_RVV_REDUCE(
v_uint16x8,
min, ushort, u16, 16, redminu)
1502OPENCV_HAL_IMPL_RVV_REDUCE(
v_int16x8,
min,
short, i16, 16, redmin)
1503OPENCV_HAL_IMPL_RVV_REDUCE(
v_uint32x4,
min,
unsigned, u32, 32, redminu)
1504OPENCV_HAL_IMPL_RVV_REDUCE(
v_int32x4,
min,
int, i32, 32, redmin)
1505OPENCV_HAL_IMPL_RVV_REDUCE(
v_float32x4,
min,
float, f32, 32, fredmin)
1506OPENCV_HAL_IMPL_RVV_REDUCE(
v_uint8x16,
max, uchar, u8, 8, redmaxu)
1507OPENCV_HAL_IMPL_RVV_REDUCE(
v_int8x16,
max, schar, i8, 8, redmax)
1508OPENCV_HAL_IMPL_RVV_REDUCE(
v_uint16x8,
max, ushort, u16, 16, redmaxu)
1509OPENCV_HAL_IMPL_RVV_REDUCE(
v_int16x8,
max,
short, i16, 16, redmax)
1510OPENCV_HAL_IMPL_RVV_REDUCE(
v_uint32x4,
max,
unsigned, u32, 32, redmaxu)
1511OPENCV_HAL_IMPL_RVV_REDUCE(
v_int32x4,
max,
int, i32, 32, redmax)
1512OPENCV_HAL_IMPL_RVV_REDUCE(
v_float32x4,
max,
float, f32, 32, fredmax)
1518
float
CV_DECL_ALIGNED(32) elems[4] =
1540
return
one / v_sqrt(x);
1553
return
one / v_sqrt(x);
1560
v_float32x4
x(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a), b, b));
1567
return
v_float32x4(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a), b, b));
1574
v_float64x2
x(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a), b, b));
1581
return
v_float64x2(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a), b, b));
1595
return
v_int32x4(vmacc_vv_i32m1(c, a, b));
1600
return
v_fma(a, b, c);
1605
return
v_fma(a, b, c);
1617
return
v_fma(a, b, c);
1623
#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, width) \
1624
inline bool v_check_all(const _Tpvec& a) \
1626
vsetvlmax_e##width##m1(); \
1627
v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a), shift)); \
1628
return (v.val[0] | v.val[1]) == 0; \
1630
inline bool v_check_any(const _Tpvec& a) \
1632
vsetvlmax_e##width##m1(); \
1633
v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(a, shift)); \
1634
return (v.val[0] | v.val[1]) != 0; \
1637OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(
v_uint8x16, u8, 7, 8)
1638OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(
v_uint16x8, u16, 15, 16)
1639OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(
v_uint32x4, u32, 31, 32)
1640OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(
v_uint64x2, u64, 63, 64)
1677
#define OPENCV_HAL_IMPL_RVV_ABSDIFF(_Tpvec, abs) \
1678
inline _Tpvec v_##abs(const _Tpvec& a, const _Tpvec& b) \
1680
return v_max(a, b) - v_min(a, b); \
1690OPENCV_HAL_IMPL_RVV_ABSDIFF(
v_int8x16, absdiffs)
1691OPENCV_HAL_IMPL_RVV_ABSDIFF(
v_int16x8, absdiffs)
1693
#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width) \
1694
inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
1696
vsetvlmax_e##width##m1(); \
1697
return _rTpvec(rshr((_nwTpvec)sub(v_max(a, b), v_min(a, b)), 0)); \
1700OPENCV_HAL_IMPL_RVV_ABSDIFF_S(
v_int8x16,
v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 8)
1701OPENCV_HAL_IMPL_RVV_ABSDIFF_S(
v_int16x8,
v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 16)
1702OPENCV_HAL_IMPL_RVV_ABSDIFF_S(
v_int32x4,
v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 32)
1704
#define OPENCV_HAL_IMPL_RVV_ABS(_Tprvec, _Tpvec, suffix) \
1705
inline _Tprvec v_abs(const _Tpvec& a) \
1707
return v_absdiff(a, v_setzero_##suffix()); \
1719
#define OPENCV_HAL_IMPL_RVV_REDUCE_SAD(_Tpvec, scalartype) \
1720
inline scalartype v_reduce_sad(const _Tpvec& a, const _Tpvec& b) \
1722
return v_reduce_sum(v_absdiff(a, b)); \
1725OPENCV_HAL_IMPL_RVV_REDUCE_SAD(
v_uint8x16,
unsigned)
1726OPENCV_HAL_IMPL_RVV_REDUCE_SAD(
v_int8x16,
unsigned)
1727OPENCV_HAL_IMPL_RVV_REDUCE_SAD(
v_uint16x8,
unsigned)
1728OPENCV_HAL_IMPL_RVV_REDUCE_SAD(
v_int16x8,
unsigned)
1729OPENCV_HAL_IMPL_RVV_REDUCE_SAD(
v_uint32x4,
unsigned)
1730OPENCV_HAL_IMPL_RVV_REDUCE_SAD(
v_int32x4,
unsigned)
1735
#define OPENCV_HAL_IMPL_RVV_SELECT(_Tpvec, merge, ne, width) \
1736
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
1738
vsetvlmax_e##width##m1(); \
1739
return _Tpvec(merge(ne(mask, 0), b, a)); \
1742OPENCV_HAL_IMPL_RVV_SELECT(
v_uint8x16, vmerge_vvm_u8m1, vmsne_vx_u8m1_b8, 8)
1743OPENCV_HAL_IMPL_RVV_SELECT(
v_int8x16, vmerge_vvm_i8m1, vmsne_vx_i8m1_b8, 8)
1744OPENCV_HAL_IMPL_RVV_SELECT(
v_uint16x8, vmerge_vvm_u16m1, vmsne_vx_u16m1_b16, 16)
1745OPENCV_HAL_IMPL_RVV_SELECT(
v_int16x8, vmerge_vvm_i16m1, vmsne_vx_i16m1_b16, 16)
1746OPENCV_HAL_IMPL_RVV_SELECT(
v_uint32x4, vmerge_vvm_u32m1, vmsne_vx_u32m1_b32, 32)
1747OPENCV_HAL_IMPL_RVV_SELECT(
v_int32x4, vmerge_vvm_i32m1, vmsne_vx_i32m1_b32, 32)
1748OPENCV_HAL_IMPL_RVV_SELECT(
v_float32x4, vmerge_vvm_f32m1, vmfne_vf_f32m1_b32, 32)
1750OPENCV_HAL_IMPL_RVV_SELECT(
v_float64x2, vmerge_vvm_f64m1, vmfne_vf_f64m1_b64, 64)
1755
#define OPENCV_HAL_IMPL_RVV_ROTATE_OP(_Tpvec, suffix, width) \
1756
template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a) \
1758
vsetvlmax_e##width##m1(); \
1759
return _Tpvec(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, n)); \
1761
template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a) \
1763
vsetvlmax_e##width##m1(); \
1764
return _Tpvec(vslideup_vx_##suffix##m1(vzero_##suffix##m1(), a, n)); \
1766
template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \
1768
template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
1770
vsetvlmax_e##width##m1(); \
1771
return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, n), b, _Tpvec::nlanes - n)); \
1773
template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
1775
vsetvlmax_e##width##m1(); \
1776
return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), b, _Tpvec::nlanes - n), a, n)); \
1778
template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \
1779
{ CV_UNUSED(b); return a; }
1782OPENCV_HAL_IMPL_RVV_ROTATE_OP(
v_uint8x16, u8, 8)
1783OPENCV_HAL_IMPL_RVV_ROTATE_OP(
v_int8x16, i8, 8)
1784OPENCV_HAL_IMPL_RVV_ROTATE_OP(
v_uint16x8, u16, 16)
1785OPENCV_HAL_IMPL_RVV_ROTATE_OP(
v_int16x8, i16, 16)
1786OPENCV_HAL_IMPL_RVV_ROTATE_OP(
v_uint32x4, u32, 32)
1787OPENCV_HAL_IMPL_RVV_ROTATE_OP(
v_int32x4, i32, 32)
1788OPENCV_HAL_IMPL_RVV_ROTATE_OP(
v_float32x4, f32, 32)
1789OPENCV_HAL_IMPL_RVV_ROTATE_OP(
v_uint64x2, u64, 64)
1790OPENCV_HAL_IMPL_RVV_ROTATE_OP(
v_int64x2, i64, 64)
1792OPENCV_HAL_IMPL_RVV_ROTATE_OP(
v_float64x2, f64, 64)
1806
double
arr[4] = {a.val[0], a.val[1], 0, 0};
1808
vfloat64m2_t tmp = vle64_v_f64m2(arr);
1815
double
arr[4] = {a.val[0], a.val[1], b.val[0], b.val[1]};
1817
vfloat64m2_t tmp = vle64_v_f64m2(arr);
1824
double
CV_DECL_ALIGNED(32) ptr[4] = {0};
1826
vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a));
1827
double
CV_DECL_ALIGNED(32) elems[2] =
1837
double
CV_DECL_ALIGNED(32) ptr[4] = {0};
1839
vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a));
1840
double
CV_DECL_ALIGNED(32) elems[2] =
1850
double
CV_DECL_ALIGNED(32) ptr[4] = {0};
1852
vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a));
1853
double
CV_DECL_ALIGNED(32) elems[2] =
1863
double
CV_DECL_ALIGNED(32) ptr[4] = {0};
1865
vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a));
1866
double
CV_DECL_ALIGNED(32) elems[2] =
1883
#define OPENCV_HAL_IMPL_RVV_BROADCAST(_Tpvec, suffix) \
1884
template<int i> inline _Tpvec v_broadcast_element(_Tpvec v) \
1886
return v_setall_##suffix(v_extract_n<i>(v)); \
1890OPENCV_HAL_IMPL_RVV_BROADCAST(
v_int8x16, s8)
1891OPENCV_HAL_IMPL_RVV_BROADCAST(
v_uint16x8, u16)
1892OPENCV_HAL_IMPL_RVV_BROADCAST(
v_int16x8, s16)
1893OPENCV_HAL_IMPL_RVV_BROADCAST(
v_uint32x4, u32)
1894OPENCV_HAL_IMPL_RVV_BROADCAST(
v_int32x4, s32)
1895OPENCV_HAL_IMPL_RVV_BROADCAST(
v_uint64x2, u64)
1896OPENCV_HAL_IMPL_RVV_BROADCAST(
v_int64x2, s64)
1904
#define OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(_Tpvec, _Tp, suffix) \
1905
inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \
1906
const v_##_Tpvec& a2, const v_##_Tpvec& a3, \
1907
v_##_Tpvec& b0, v_##_Tpvec& b1, \
1908
v_##_Tpvec& b2, v_##_Tpvec& b3) \
1910
_Tp CV_DECL_ALIGNED(32) elems0[4] = \
1912
v_extract_n<0>(a0), \
1913
v_extract_n<0>(a1), \
1914
v_extract_n<0>(a2), \
1915
v_extract_n<0>(a3) \
1917
b0 = v_load(elems0); \
1918
_Tp CV_DECL_ALIGNED(32) elems1[4] = \
1920
v_extract_n<1>(a0), \
1921
v_extract_n<1>(a1), \
1922
v_extract_n<1>(a2), \
1923
v_extract_n<1>(a3) \
1925
b1 = v_load(elems1); \
1926
_Tp CV_DECL_ALIGNED(32) elems2[4] = \
1928
v_extract_n<2>(a0), \
1929
v_extract_n<2>(a1), \
1930
v_extract_n<2>(a2), \
1931
v_extract_n<2>(a3) \
1933
b2 = v_load(elems2); \
1934
_Tp CV_DECL_ALIGNED(32) elems3[4] = \
1936
v_extract_n<3>(a0), \
1937
v_extract_n<3>(a1), \
1938
v_extract_n<3>(a2), \
1939
v_extract_n<3>(a3) \
1941
b3 = v_load(elems3); \
1944OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(uint32x4,
unsigned, u32)
1945OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(int32x4,
int, i32)
1946OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(float32x4,
float, f32)
1950
#define OPENCV_HAL_IMPL_RVV_REVERSE(_Tpvec, _Tp, width, suffix) \
1951
inline _Tpvec v_reverse(const _Tpvec& a) \
1953
_Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \
1954
_Tp CV_DECL_ALIGNED(32) ptra[_Tpvec::nlanes] = {0}; \
1956
for (int i = 0; i < _Tpvec::nlanes; i++) \
1958
ptr[i] = ptra[_Tpvec::nlanes-i-1]; \
1960
return v_load(ptr); \
1963OPENCV_HAL_IMPL_RVV_REVERSE(
v_uint8x16, uchar, 8, u8)
1964OPENCV_HAL_IMPL_RVV_REVERSE(
v_int8x16, schar, 8, i8)
1965OPENCV_HAL_IMPL_RVV_REVERSE(
v_uint16x8, ushort, 16, u16)
1966OPENCV_HAL_IMPL_RVV_REVERSE(
v_int16x8,
short, 16, i16)
1967OPENCV_HAL_IMPL_RVV_REVERSE(
v_uint32x4,
unsigned, 32, u32)
1968OPENCV_HAL_IMPL_RVV_REVERSE(
v_int32x4,
int, 32, i32)
1969OPENCV_HAL_IMPL_RVV_REVERSE(
v_float32x4,
float, 32, f32)
1970OPENCV_HAL_IMPL_RVV_REVERSE(
v_uint64x2, uint64, 64, u64)
1971OPENCV_HAL_IMPL_RVV_REVERSE(
v_int64x2, int64, 64, i64)
1973OPENCV_HAL_IMPL_RVV_REVERSE(
v_float64x2,
double, 64, f64)
1978
#define OPENCV_HAL_IMPL_RVV_EXPAND(_Tpwvec, _Tp, _Tpvec, width, suffix, wcvt) \
1979
inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
1981
_Tp CV_DECL_ALIGNED(32) lptr[_Tpvec::nlanes/2] = {0}; \
1982
_Tp CV_DECL_ALIGNED(32) hptr[_Tpvec::nlanes/2] = {0}; \
1983
v_store_low(lptr, a); \
1984
v_store_high(hptr, a); \
1985
b0 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr))); \
1986
b1 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr))); \
1988
inline _Tpwvec v_expand_low(const _Tpvec& a) \
1990
_Tp CV_DECL_ALIGNED(32) lptr[_Tpvec::nlanes/2] = {0}; \
1991
v_store_low(lptr, a); \
1992
return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr))); \
1994
inline _Tpwvec v_expand_high(const _Tpvec& a) \
1996
_Tp CV_DECL_ALIGNED(32) hptr[_Tpvec::nlanes/2] = {0}; \
1997
v_store_high(hptr, a); \
1998
return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr))); \
2000
inline _Tpwvec v_load_expand(const _Tp* ptr) \
2002
return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(ptr))); \
2015
return
v_uint32x4(vwcvtu_x_x_v_u32m1(vwcvtu_x_x_v_u16mf2(vle8_v_u8mf4(ptr))));
2021
return
v_int32x4(vwcvt_x_x_v_i32m1(vwcvt_x_x_v_i16mf2(vle8_v_i8mf4(ptr))));
2025
#define OPENCV_HAL_IMPL_RVV_PACK(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, shr) \
2026
inline _Tpvec v_pack(const _wTpvec& a, const _wTpvec& b) \
2028
_wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \
2030
v_store(arr + _wTpvec::nlanes, b); \
2031
vsetvlmax_e##width##m2(); \
2032
return _Tpvec(shr(vle##width##_v_##suffix##m2(arr), 0)); \
2034
inline void v_pack_store(_Tp* ptr, const _wTpvec& a) \
2036
_wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \
2038
v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \
2039
vsetvlmax_e##width##m2(); \
2040
v_store(ptr, _Tpvec(shr(vle##width##_v_##suffix##m2(arr), 0))); \
2042
template<int n> inline \
2043
_Tpvec v_rshr_pack(const _wTpvec& a, const _wTpvec& b) \
2045
_wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \
2047
v_store(arr + _wTpvec::nlanes, b); \
2048
vsetvlmax_e##width##m2(); \
2049
return _Tpvec(rshr(vle##width##_v_##suffix##m2(arr), n)); \
2051
template<int n> inline \
2052
void v_rshr_pack_store(_Tp* ptr, const _wTpvec& a) \
2054
_wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \
2056
v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \
2057
vsetvlmax_e##width##m2(); \
2058
v_store(ptr, _Tpvec(rshr(vle##width##_v_##suffix##m2(arr), n))); \
2061OPENCV_HAL_IMPL_RVV_PACK(
v_uint8x16, uchar,
v_uint16x8, ushort, 16, u16, vnclipu_wx_u8m1, vnclipu_wx_u8m1)
2062OPENCV_HAL_IMPL_RVV_PACK(
v_int8x16, schar,
v_int16x8,
short, 16, i16, vnclip_wx_i8m1, vnclip_wx_i8m1)
2063OPENCV_HAL_IMPL_RVV_PACK(
v_uint16x8, ushort,
v_uint32x4,
unsigned, 32, u32, vnclipu_wx_u16m1, vnclipu_wx_u16m1)
2064OPENCV_HAL_IMPL_RVV_PACK(
v_int16x8,
short,
v_int32x4,
int, 32, i32, vnclip_wx_i16m1, vnclip_wx_i16m1)
2065OPENCV_HAL_IMPL_RVV_PACK(
v_uint32x4,
unsigned,
v_uint64x2, uint64, 64, u64, vnclipu_wx_u32m1, vnsrl_wx_u32m1)
2066OPENCV_HAL_IMPL_RVV_PACK(
v_int32x4,
int,
v_int64x2, int64, 64, i64, vnclip_wx_i32m1, vnsra_wx_i32m1)
2069
#define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, cast) \
2070
inline _Tpvec v_pack_u(const _wTpvec& a, const _wTpvec& b) \
2072
_wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \
2074
v_store(arr + _wTpvec::nlanes, b); \
2075
vsetvlmax_e##width##m2(); \
2076
return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), 0)); \
2078
inline void v_pack_u_store(_Tp* ptr, const _wTpvec& a) \
2080
_wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \
2082
v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \
2083
vsetvlmax_e##width##m2(); \
2084
v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), 0))); \
2086
template<int n> inline \
2087
_Tpvec v_rshr_pack_u(const _wTpvec& a, const _wTpvec& b) \
2089
_wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \
2091
v_store(arr + _wTpvec::nlanes, b); \
2092
vsetvlmax_e##width##m2(); \
2093
return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), n)); \
2095
template<int n> inline \
2096
void v_rshr_pack_u_store(_Tp* ptr, const _wTpvec& a) \
2098
_wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \
2100
v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \
2101
vsetvlmax_e##width##m2(); \
2102
v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), n))); \
2105OPENCV_HAL_IMPL_RVV_PACK_U(
v_uint8x16, uchar,
v_int16x8,
short, 16, i16, vnclipu_wx_u8m1, vreinterpret_v_i16m2_u16m2)
2106OPENCV_HAL_IMPL_RVV_PACK_U(
v_uint16x8, ushort,
v_int32x4,
int, 32, i32, vnclipu_wx_u16m1, vreinterpret_v_i32m2_u32m2)
2109
#define OPENCV_HAL_IMPL_RVV_UNPACKS(_Tpvec, _Tp, width, suffix) \
2110
inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
2112
_Tp CV_DECL_ALIGNED(32) ptra0[v_##_Tpvec::nlanes] = {0}; \
2113
_Tp CV_DECL_ALIGNED(32) ptra1[v_##_Tpvec::nlanes] = {0}; \
2114
_Tp CV_DECL_ALIGNED(32) ptrb0[v_##_Tpvec::nlanes] = {0}; \
2115
_Tp CV_DECL_ALIGNED(32) ptrb1[v_##_Tpvec::nlanes] = {0}; \
2116
v_store(ptra0, a0); \
2117
v_store(ptra1, a1); \
2119
for( i = 0; i < v_##_Tpvec::nlanes/2; i++ ) \
2121
ptrb0[i*2] = ptra0[i]; \
2122
ptrb0[i*2+1] = ptra1[i]; \
2124
for( ; i < v_##_Tpvec::nlanes; i++ ) \
2126
ptrb1[i*2-v_##_Tpvec::nlanes] = ptra0[i]; \
2127
ptrb1[i*2-v_##_Tpvec::nlanes+1] = ptra1[i]; \
2129
b0 = v_load(ptrb0); \
2130
b1 = v_load(ptrb1); \
2132
inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \
2134
_Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes/2] = {0}; \
2135
_Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes/2] = {0}; \
2136
v_store_low(ptra, a); \
2137
v_store_low(ptrb, b); \
2138
return v_load_halves(ptra, ptrb); \
2140
inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \
2142
_Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes/2] = {0}; \
2143
_Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes/2] = {0}; \
2144
v_store_high(ptra, a); \
2145
v_store_high(ptrb, b); \
2146
return v_load_halves(ptra, ptrb); \
2148
inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \
2150
c = v_combine_low(a, b); \
2151
d = v_combine_high(a, b); \
2154OPENCV_HAL_IMPL_RVV_UNPACKS(uint8x16, uchar, 8, u8)
2155OPENCV_HAL_IMPL_RVV_UNPACKS(int8x16, schar, 8, i8)
2156OPENCV_HAL_IMPL_RVV_UNPACKS(uint16x8, ushort, 16, u16)
2157OPENCV_HAL_IMPL_RVV_UNPACKS(int16x8,
short, 16, i16)
2158OPENCV_HAL_IMPL_RVV_UNPACKS(uint32x4,
unsigned, 32, u32)
2159OPENCV_HAL_IMPL_RVV_UNPACKS(int32x4,
int, 32, i32)
2160OPENCV_HAL_IMPL_RVV_UNPACKS(float32x4,
float, 32, f32)
2162OPENCV_HAL_IMPL_RVV_UNPACKS(float64x2,
double, 64, f64)
2166
#define OPENCV_HAL_IMPL_RVV_INTERLEAVED(_Tpvec, _Tp, suffix, width) \
2167
inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \
2169
_Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \
2170
_Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \
2172
for( i = i2 = 0; i < v_##_Tpvec::nlanes; i++, i2 += 2 ) \
2174
ptra[i] = ptr[i2]; \
2175
ptrb[i] = ptr[i2+1]; \
2180
inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \
2182
_Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \
2183
_Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \
2184
_Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \
2186
for( i = i3 = 0; i < v_##_Tpvec::nlanes; i++, i3 += 3 ) \
2188
ptra[i] = ptr[i3]; \
2189
ptrb[i] = ptr[i3+1]; \
2190
ptrc[i] = ptr[i3+2]; \
2196
inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \
2197
v_##_Tpvec& c, v_##_Tpvec& d) \
2199
_Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \
2200
_Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \
2201
_Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \
2202
_Tp CV_DECL_ALIGNED(32) ptrd[v_##_Tpvec::nlanes] = {0}; \
2204
for( i = i4 = 0; i < v_##_Tpvec::nlanes; i++, i4 += 4 ) \
2206
ptra[i] = ptr[i4]; \
2207
ptrb[i] = ptr[i4+1]; \
2208
ptrc[i] = ptr[i4+2]; \
2209
ptrd[i] = ptr[i4+3]; \
2216
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
2217
hal::StoreMode
=hal::STORE_UNALIGNED) \
2220
_Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \
2221
_Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \
2224
for( i = i2 = 0; i < v_##_Tpvec::nlanes; i++, i2 += 2 ) \
2226
ptr[i2] = ptra[i]; \
2227
ptr[i2+1] = ptrb[i]; \
2230
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
2231
const v_##_Tpvec& c, hal::StoreMode
=hal::STORE_UNALIGNED) \
2234
_Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \
2235
_Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \
2236
_Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \
2240
for( i = i3 = 0; i < v_##_Tpvec::nlanes; i++, i3 += 3 ) \
2242
ptr[i3] = ptra[i]; \
2243
ptr[i3+1] = ptrb[i]; \
2244
ptr[i3+2] = ptrc[i]; \
2247
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
2248
const v_##_Tpvec& c, const v_##_Tpvec& d, \
2249
hal::StoreMode
=hal::STORE_UNALIGNED ) \
2252
_Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \
2253
_Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \
2254
_Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \
2255
_Tp CV_DECL_ALIGNED(32) ptrd[v_##_Tpvec::nlanes] = {0}; \
2260
for( i = i4 = 0; i < v_##_Tpvec::nlanes; i++, i4 += 4 ) \
2262
ptr[i4] = ptra[i]; \
2263
ptr[i4+1] = ptrb[i]; \
2264
ptr[i4+2] = ptrc[i]; \
2265
ptr[i4+3] = ptrd[i]; \
2268
inline v_##_Tpvec v_interleave_pairs(const v_##_Tpvec& vec) \
2270
_Tp CV_DECL_ALIGNED(32) ptr[v_##_Tpvec::nlanes] = {0}; \
2271
_Tp CV_DECL_ALIGNED(32) ptrvec[v_##_Tpvec::nlanes] = {0}; \
2272
v_store(ptrvec, vec); \
2273
for (int i = 0; i < v_##_Tpvec::nlanes/4; i++) \
2275
ptr[4*i ] = ptrvec[4*i ]; \
2276
ptr[4*i+1] = ptrvec[4*i+2]; \
2277
ptr[4*i+2] = ptrvec[4*i+1]; \
2278
ptr[4*i+3] = ptrvec[4*i+3]; \
2280
return v_load(ptr); \
2282
inline v_##_Tpvec v_interleave_quads(const v_##_Tpvec& vec) \
2284
_Tp CV_DECL_ALIGNED(32) ptr[v_##_Tpvec::nlanes] = {0}; \
2285
_Tp CV_DECL_ALIGNED(32) ptrvec[v_##_Tpvec::nlanes] = {0}; \
2286
v_store(ptrvec, vec); \
2287
for (int i = 0; i < v_##_Tpvec::nlanes/8; i++) \
2289
ptr[8*i ] = ptrvec[4*i ]; \
2290
ptr[8*i+1] = ptrvec[4*i+4]; \
2291
ptr[8*i+2] = ptrvec[4*i+1]; \
2292
ptr[8*i+3] = ptrvec[4*i+5]; \
2293
ptr[8*i+4] = ptrvec[4*i+2]; \
2294
ptr[8*i+5] = ptrvec[4*i+6]; \
2295
ptr[8*i+6] = ptrvec[4*i+3]; \
2296
ptr[8*i+7] = ptrvec[4*i+7]; \
2298
return v_load(ptr); \
2301OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint8x16, uchar, u8, 8)
2302OPENCV_HAL_IMPL_RVV_INTERLEAVED(int8x16, schar, i8, 8)
2303OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint16x8, ushort, u16, 16)
2304OPENCV_HAL_IMPL_RVV_INTERLEAVED(int16x8,
short, i16, 16)
2305OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint32x4,
unsigned, u32, 32)
2306OPENCV_HAL_IMPL_RVV_INTERLEAVED(int32x4,
int, i32, 32)
2307OPENCV_HAL_IMPL_RVV_INTERLEAVED(float32x4,
float, f32, 32)
2308OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint64x2, uint64, u64, 64)
2309OPENCV_HAL_IMPL_RVV_INTERLEAVED(int64x2, int64, i64, 64)
2311OPENCV_HAL_IMPL_RVV_INTERLEAVED(float64x2,
double, f64, 64)
2316
static
const
unsigned
char
popCountTable[] =
2318
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
2319
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2320
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2321
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2322
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2323
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2324
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2325
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2326
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2327
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2328
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2329
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2330
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2331
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2332
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2333
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
2336
#define OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(_rTpvec, _Tpvec, _rTp, _Tp, suffix) \
2337
inline _rTpvec v_popcount(const _Tpvec& a) \
2339
uchar CV_DECL_ALIGNED(32) ptra[16] = {0}; \
2340
v_store(ptra, v_reinterpret_as_u8(a)); \
2341
_rTp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \
2342
v_store(ptr, v_setzero_##suffix()); \
2343
for (int i = 0; i < _Tpvec::nlanes*(int)sizeof(_Tp); i++) \
2344
ptr[i/sizeof(_Tp)] += popCountTable[ptra[i]]; \
2345
return v_load(ptr); \
2359
#define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, _Tp, suffix, width, shift) \
2360
inline int v_signmask(const _Tpvec& a) \
2363
vsetvlmax_e##width##m1(); \
2364
_Tpvec tmp = _Tpvec(vsrl_vx_##suffix##m1(a, shift)); \
2365
for( int i = 0; i < _Tpvec::nlanes; i++ ) \
2366
mask |= (int)(tmp.val[i]) << i; \
2370OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(
v_uint8x16, uchar, u8, 8, 7)
2371OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(
v_uint16x8, ushort, u16, 16, 15)
2372OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(
v_uint32x4,
unsigned, u32, 32, 31)
2373OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(
v_uint64x2, uint64, u64, 64, 63)
2376{
return
v_signmask(v_reinterpret_as_u8(a)); }
2378{
return
v_signmask(v_reinterpret_as_u16(a)); }
2380{
return
v_signmask(v_reinterpret_as_u32(a)); }
2382{
return
v_signmask(v_reinterpret_as_u32(a)); }
2384{
return
v_signmask(v_reinterpret_as_u64(a)); }
2387{
return
v_signmask(v_reinterpret_as_u64(a)); }
2393
#define OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(_Tpvec, _Tp, suffix) \
2394
inline int v_scan_forward(const _Tpvec& a) \
2396
_Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \
2397
v_store(ptr, v_reinterpret_as_##suffix(a)); \
2398
for (int i = 0; i < _Tpvec::nlanes; i++) \
2399
if(int(ptr[i]) < 0) \
2404OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(
v_uint8x16, uchar, u8)
2405OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(
v_int8x16, schar, s8)
2406OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(
v_uint16x8, ushort, u16)
2407OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(
v_int16x8,
short, s16)
2408OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(
v_uint32x4,
unsigned, u32)
2409OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(
v_int32x4,
int, s32)
2410OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(
v_float32x4,
float, f32)
2411OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(
v_uint64x2, uint64, u64)
2412OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(
v_int64x2, int64, s64)
2414OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(
v_float64x2,
double, f64)
2419
#define OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(_Tpvec, _Tp) \
2420
inline _Tpvec v_pack_triplets(const _Tpvec& vec) \
2422
_Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \
2423
_Tp CV_DECL_ALIGNED(32) ptrvec[_Tpvec::nlanes] = {0}; \
2424
v_store(ptrvec, vec); \
2425
for (int i = 0; i < _Tpvec::nlanes/4; i++) \
2427
ptr[3*i ] = ptrvec[4*i ]; \
2428
ptr[3*i+1] = ptrvec[4*i+2]; \
2429
ptr[3*i+2] = ptrvec[4*i+2]; \
2431
return v_load(ptr); \
2434OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(
v_uint8x16, uchar)
2435OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(
v_int8x16, schar)
2436OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(
v_uint16x8, ushort)
2437OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(
v_int16x8,
short)
2438OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(
v_uint32x4,
unsigned)
2439OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(
v_int32x4,
int)
2440OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(
v_float32x4,
float)
2448
return
v_float32x4(vfwcvt_f_f_v_f32m1(vle16_v_f16mf2(ptr)));
2451
inline
void
v_pack_store(float16_t* ptr,
const
v_float32x4& v)
2453
vse16_v_f16mf2(ptr, vfncvt_f_f_w_f16mf2(v));
2460
for(
int
i = 0; i < N; i++ ) buf[i] = (
float)ptr[i];
2464
inline
void
v_pack_store(float16_t* ptr,
const
v_float32x4& v)
2469
for(
int
i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]);
2500
return
v_int32x4(vfcvt_rtz_x_f_v_i32m1(a));
2505
double
arr[4] = {a.val[0], a.val[1], 0, 0};
2507
vfloat64m2_t tmp = vle64_v_f64m2(arr);
2508
return
v_int32x4(vfncvt_x_f_w_i32m1(tmp));
2513
double
arr[4] = {a.val[0], a.val[1], b.val[0], b.val[1]};
2515
vfloat64m2_t tmp = vle64_v_f64m2(arr);
2516
return
v_int32x4(vfncvt_x_f_w_i32m1(tmp));
2521
double
arr[4] = {a.val[0]-0.5f, a.val[1]-0.5f, 0, 0};
2523
vfloat64m2_t tmp = vle64_v_f64m2(arr);
2524
return
v_int32x4(vfncvt_x_f_w_i32m1(tmp));
2529
double
arr[4] = {a.val[0]+0.5f, a.val[1]+0.5f, 0, 0};
2531
vfloat64m2_t tmp = vle64_v_f64m2(arr);
2532
return
v_int32x4(vfncvt_x_f_w_i32m1(tmp));
2537
double
arr[4] = {a.val[0], a.val[1], 0, 0};
2539
vfloat64m2_t tmp = vle64_v_f64m2(arr);
2540
return
v_int32x4(vfncvt_rtz_x_f_w_i32m1(tmp));
2550
int
CV_DECL_ALIGNED(32) ptr[8] = {0};
2553
vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b));
2559
int
CV_DECL_ALIGNED(32) ptr[8] = {0};
2562
vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b));
2570
int64 CV_DECL_ALIGNED(32) ptr[4] = {0};
2573
vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b));
2579
int64 CV_DECL_ALIGNED(32) ptr[4] = {0};
2582
vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b));
2590
unsigned
CV_DECL_ALIGNED(32) ptr[16] = {0};
2593
vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b));
2595
return
t1 + t2 + t3 + t4;
2600
unsigned
CV_DECL_ALIGNED(32) ptr[16] = {0};
2603
vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b));
2605
return
t1 + t2 + t3 + t4 + c;
2610
int
CV_DECL_ALIGNED(32) ptr[16] = {0};
2613
vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b));
2615
return
t1 + t2 + t3 + t4;
2620
int
CV_DECL_ALIGNED(32) ptr[16] = {0};
2623
vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b));
2625
return
t1 + t2 + t3 + t4 + c;
2631
uint64 CV_DECL_ALIGNED(32) ptr[8] = {0};
2634
vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b));
2636
return
t1 + t2 + t3 + t4;
2640
uint64 CV_DECL_ALIGNED(32) ptr[8] = {0};
2643
vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b));
2645
return
t1 + t2 + t3 + t4 + c;
2650
int64 CV_DECL_ALIGNED(32) ptr[8] = {0};
2653
vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b));
2655
return
t1 + t2 + t3 + t4;
2660
int64 CV_DECL_ALIGNED(32) ptr[8] = {0};
2663
vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b));
2665
return
t1 + t2 + t3 + t4 + c;
2682
int
CV_DECL_ALIGNED(32) ptr[8] = {0};
2684
vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b));
2691
int
CV_DECL_ALIGNED(32) ptr[8] = {0};
2693
vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b));
2702
int64 CV_DECL_ALIGNED(32) ptr[4] = {0};
2704
vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b));
2711
int64 CV_DECL_ALIGNED(32) ptr[4] = {0};
2713
vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b));
2723
unsigned
CV_DECL_ALIGNED(32) ptr[16] = {0};
2725
vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b));
2730
return
t1 + t2 + t3 + t4;
2734
unsigned
CV_DECL_ALIGNED(32) ptr[16] = {0};
2736
vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b));
2741
return
t1 + t2 + t3 + t4 + c;
2745
int
CV_DECL_ALIGNED(32) ptr[16] = {0};
2747
vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b));
2752
return
t1 + t2 + t3 + t4;
2756
int
CV_DECL_ALIGNED(32) ptr[16] = {0};
2758
vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b));
2763
return
t1 + t2 + t3 + t4 + c;
2769
uint64 CV_DECL_ALIGNED(32) ptr[8] = {0};
2771
vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b));
2776
return
t1 + t2 + t3 + t4;
2780
uint64 CV_DECL_ALIGNED(32) ptr[8] = {0};
2782
vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b));
2787
return
t1 + t2 + t3 + t4 + c;
2791
int64 CV_DECL_ALIGNED(32) ptr[8] = {0};
2793
vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b));
2798
return
t1 + t2 + t3 + t4;
2802
int64 CV_DECL_ALIGNED(32) ptr[8] = {0};
2804
vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b));
2809
return
t1 + t2 + t3 + t4 + c;
2826
vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v));
2827
res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1);
2828
res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2);
2829
res = vfmacc_vf_f32m1(res, v_extract_n<3>(v), m3);
2838
vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v));
2839
res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1);
2840
res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2);
2844
#define OPENCV_HAL_IMPL_RVV_MUL_EXPAND(_Tpvec, _Tpwvec, _Tpw, suffix, wmul, width) \
2845
inline void v_mul_expand(const _Tpvec& a, const _Tpvec& b, _Tpwvec& c, _Tpwvec& d) \
2847
_Tpw CV_DECL_ALIGNED(32) ptr[_Tpwvec::nlanes*2] = {0}; \
2848
vsetvlmax_e##width##m2(); \
2849
vse##width##_v_##suffix##m2(ptr, wmul(a, b)); \
2850
vsetvlmax_e##width##m1(); \
2851
c = _Tpwvec(vle##width##_v_##suffix##m1(ptr)); \
2852
d = _Tpwvec(vle##width##_v_##suffix##m1(ptr+_Tpwvec::nlanes)); \
2865
return
v_int16x8(vnsra_wx_i16m1(vwmul_vv_i32m2(a, b), 16));
2870
return
v_uint16x8(vnsrl_wx_u16m1(vwmulu_vv_u32m2(a, b), 16));
2876
#define OPENCV_HAL_IMPL_RVV_MUL_SAT(_Tpvec, _wTpvec) \
2877
inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \
2880
v_mul_expand(a, b, c, d); \
2881
return v_pack(c, d); \
2883
inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \
2895
inline
void
v_cleanup() {}
2897CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element maximum of two arrays or an array and a scalar.
CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst)
Calculates the per-element absolute difference between two arrays or between an array and a scalar.
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element minimum of two arrays or an array and a scalar.
bool v_check_any(const v_reg< _Tp, n > &a)
Check if any of packed values is less than zero
Definition:
intrin_cpp.hpp:1436
v_reg< float, n > v_matmul(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication
Definition:
intrin_cpp.hpp:3196
v_reg< int, n > v_round(const v_reg< float, n > &a)
Round elements
Definition:
intrin_cpp.hpp:2427
v_reg< schar, 16 > v_int8x16
Sixteen 8-bit signed integer values
Definition:
intrin_cpp.hpp:490
v_reg< uchar, 16 > v_uint8x16
Sixteen 8-bit unsigned integer values
Definition:
intrin_cpp.hpp:488
int v_signmask(const v_reg< _Tp, n > &a)
Get negative values mask
Definition:
intrin_cpp.hpp:1395
v_reg< int64, 2 > v_int64x2
Two 64-bit signed integer values
Definition:
intrin_cpp.hpp:506
void v_store(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory
Definition:
intrin_cpp.hpp:2193
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements and expand
Definition:
intrin_cpp.hpp:1145
v_reg< int, n > v_ceil(const v_reg< float, n > &a)
Ceil elements
Definition:
intrin_cpp.hpp:2465
v_reg< ushort, 8 > v_uint16x8
Eight 16-bit unsigned integer values
Definition:
intrin_cpp.hpp:492
v_reg< int, n > v_floor(const v_reg< float, n > &a)
Floor elements
Definition:
intrin_cpp.hpp:2452
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements
Definition:
intrin_cpp.hpp:1080
v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)
Load register contents from memory with double expand
Definition:
intrin_cpp.hpp:1875
v_reg< int, 4 > v_int32x4
Four 32-bit signed integer values
Definition:
intrin_cpp.hpp:498
V_TypeTraits< _Tp >::sum_type v_reduce_sum(const v_reg< _Tp, n > &a)
Element shift left among vector
Definition:
intrin_cpp.hpp:1338
v_reg< _Tp, n > v_muladd(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
A synonym for v_fma
Definition:
intrin_cpp.hpp:1060
v_reg< _Tp, n > v_sqr_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Square of the magnitude
Definition:
intrin_cpp.hpp:1036
v_reg< int, n > v_trunc(const v_reg< float, n > &a)
Truncate elements
Definition:
intrin_cpp.hpp:2478
v_reg< unsigned, 4 > v_uint32x4
Four 32-bit unsigned integer values
Definition:
intrin_cpp.hpp:496
v_reg< _Tp, n > v_invsqrt(const v_reg< _Tp, n > &a)
Inversed square root
Definition:
intrin_cpp.hpp:1010
v_reg< _Tp, n > v_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Magnitude
Definition:
intrin_cpp.hpp:1023
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements and expand
Definition:
intrin_cpp.hpp:1188
CV_INLINE v_reg< double,(n/2)> v_cvt_f64_high(const v_reg< int, n > &a)
Convert to double high part of vector
Definition:
intrin_cpp.hpp:2587
v_reg< float, n > v_reduce_sum4(const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Sums all elements of each input vector, returns the vector of sums
Definition:
intrin_cpp.hpp:1356
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load(const _Tp *ptr)
Load register contents from memory
Definition:
intrin_cpp.hpp:1587
CV_INLINE v_reg< _Tp, n > operator~(const v_reg< _Tp, n > &a)
Bitwise NOT
CV_INLINE v_reg< double, n/2 > v_cvt_f64(const v_reg< int, n > &a)
Convert lower half to double
Definition:
intrin_cpp.hpp:2576
v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> v_load_expand_q(const _Tp *ptr)
Load register contents from memory with quad expand
Definition:
intrin_cpp.hpp:1964
v_reg< uchar, 2 *n > v_pack_b(const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)
! For 16-bit boolean values
Definition:
intrin_cpp.hpp:3114
v_reg< _Tp, n > v_fma(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
Multiply and add
Definition:
intrin_cpp.hpp:1049
v_reg< uint64, 2 > v_uint64x2
Two 64-bit unsigned integer values
Definition:
intrin_cpp.hpp:504
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements
Definition:
intrin_cpp.hpp:1119
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_halves(const _Tp *loptr, const _Tp *hiptr)
Load register contents from two memory blocks
Definition:
intrin_cpp.hpp:1784
v_reg< _Tp, n > v_mul_hi(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Multiply and extract high part
Definition:
intrin_cpp.hpp:1236
v_reg< float, 4 > v_float32x4
Four 32-bit floating point values (single precision)
Definition:
intrin_cpp.hpp:500
v_reg< float, n > v_cvt_f32(const v_reg< int, n > &a)
Convert to float
Definition:
intrin_cpp.hpp:2537
bool v_check_all(const v_reg< _Tp, n > &a)
Check if all packed values are less than zero
Definition:
intrin_cpp.hpp:1424
v_reg< float, n > v_matmuladd(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication and add
Definition:
intrin_cpp.hpp:3226
v_reg< float, n > v_not_nan(const v_reg< float, n > &a)
Less-than comparison
Definition:
intrin_cpp.hpp:893
void v_store_aligned(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (aligned)
Definition:
intrin_cpp.hpp:2254
v_reg< short, 8 > v_int16x8
Eight 16-bit signed integer values
Definition:
intrin_cpp.hpp:494
v_reg< double, 2 > v_float64x2
Two 64-bit floating point values (double precision)
Definition:
intrin_cpp.hpp:502
void v_load_deinterleave(const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b)
Load and deinterleave (2 channels)
Definition:
intrin_cpp.hpp:2046
"black box" representation of the file storage associated with a file on disk.
Definition:
aruco.hpp:75
Definition:
intrin_rvv.hpp:421
Definition:
intrin_rvv.hpp:517
Definition:
intrin_rvv.hpp:326
Definition:
intrin_rvv.hpp:390
Definition:
intrin_rvv.hpp:484
Definition:
intrin_rvv.hpp:261
Definition:
intrin_rvv.hpp:294
Definition:
intrin_rvv.hpp:358
Definition:
intrin_rvv.hpp:452
Definition:
intrin_rvv.hpp:228
Definition:
intrin_rvv.hpp:92
Definition:
intrin_rvv.hpp:120
Definition:
intrin_rvv.hpp:60
Definition:
intrin_rvv.hpp:82
Definition:
intrin_rvv.hpp:111
Definition:
intrin_rvv.hpp:36
Definition:
intrin_rvv.hpp:141
Definition:
intrin_rvv.hpp:48
Definition:
intrin_rvv.hpp:72
Definition:
intrin_rvv.hpp:102
Definition:
intrin_rvv.hpp:24
Definition:
intrin_rvv.hpp:129