42
#ifndef OPENCV_HAL_NEON_UTILS_HPP
43
#define OPENCV_HAL_NEON_UTILS_HPP
45
#include "opencv2/core/cvdef.h"
52
inline
int32x2_t cv_vrnd_s32_f32(float32x2_t v)
54
static
int32x2_t v_sign = vdup_n_s32(1 << 31),
55
v_05 = vreinterpret_s32_f32(vdup_n_f32(0.5f));
57
int32x2_t v_addition = vorr_s32(v_05, vand_s32(v_sign, vreinterpret_s32_f32(v)));
58
return
vcvt_s32_f32(vadd_f32(v, vreinterpret_f32_s32(v_addition)));
61
inline
int32x4_t cv_vrndq_s32_f32(float32x4_t v)
63
static
int32x4_t v_sign = vdupq_n_s32(1 << 31),
64
v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f));
66
int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(v)));
67
return
vcvtq_s32_f32(vaddq_f32(v, vreinterpretq_f32_s32(v_addition)));
70
inline
uint32x2_t cv_vrnd_u32_f32(float32x2_t v)
72
static
float32x2_t v_05 = vdup_n_f32(0.5f);
73
return
vcvt_u32_f32(vadd_f32(v, v_05));
76
inline
uint32x4_t cv_vrndq_u32_f32(float32x4_t v)
78
static
float32x4_t v_05 = vdupq_n_f32(0.5f);
79
return
vcvtq_u32_f32(vaddq_f32(v, v_05));
82
inline
float32x4_t cv_vrecpq_f32(float32x4_t val)
84
float32x4_t reciprocal = vrecpeq_f32(val);
85
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
86
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
90
inline
float32x2_t cv_vrecp_f32(float32x2_t val)
92
float32x2_t reciprocal = vrecpe_f32(val);
93
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
94
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
98
inline
float32x4_t cv_vrsqrtq_f32(float32x4_t val)
100
float32x4_t e = vrsqrteq_f32(val);
101
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
102
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
106
inline
float32x2_t cv_vrsqrt_f32(float32x2_t val)
108
float32x2_t e = vrsqrte_f32(val);
109
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
110
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
114
inline
float32x4_t cv_vsqrtq_f32(float32x4_t val)
116
return
cv_vrecpq_f32(cv_vrsqrtq_f32(val));
119
inline
float32x2_t cv_vsqrt_f32(float32x2_t val)
121
return
cv_vrecp_f32(cv_vrsqrt_f32(val));