43
#ifndef OPENCV_CUDA_VEC_DISTANCE_HPP
44
#define OPENCV_CUDA_VEC_DISTANCE_HPP
48
#include "detail/vec_distance_detail.hpp"
56
namespace
cv
{
namespace
cuda {
namespace
device
58
template
<
typename
T>
struct
L1Dist
60
typedef
int
value_type;
61
typedef
int
result_type;
63
__device__ __forceinline__ L1Dist() : mySum(0) {}
65
__device__ __forceinline__
void
reduceIter(
int
val1,
int
val2)
67
mySum = __sad(val1, val2, mySum);
70
template
<
int
THREAD_DIM> __device__ __forceinline__
void
reduceAll(
int* smem,
int
tid)
72
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
75
__device__ __forceinline__
operator
int()
const
82
template
<>
struct
L1Dist<float>
84
typedef
float
value_type;
85
typedef
float
result_type;
87
__device__ __forceinline__ L1Dist() : mySum(0.0f) {}
89
__device__ __forceinline__
void
reduceIter(
float
val1,
float
val2)
91
mySum += ::fabs(val1 - val2);
94
template
<
int
THREAD_DIM> __device__ __forceinline__
void
reduceAll(
float* smem,
int
tid)
96
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
99
__device__ __forceinline__
operator
float()
const
109
typedef
float
value_type;
110
typedef
float
result_type;
112
__device__ __forceinline__ L2Dist() : mySum(0.0f) {}
114
__device__ __forceinline__
void
reduceIter(
float
val1,
float
val2)
116
float
reg = val1 - val2;
120
template
<
int
THREAD_DIM> __device__ __forceinline__
void
reduceAll(
float* smem,
int
tid)
122
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
125
__device__ __forceinline__
operator
float()
const
135
typedef
int
value_type;
136
typedef
int
result_type;
138
__device__ __forceinline__ HammingDist() : mySum(0) {}
140
__device__ __forceinline__
void
reduceIter(
int
val1,
int
val2)
142
mySum += __popc(val1 ^ val2);
145
template
<
int
THREAD_DIM> __device__ __forceinline__
void
reduceAll(
int* smem,
int
tid)
147
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
150
__device__ __forceinline__
operator
int()
const
159
template
<
int
THREAD_DIM,
typename
Dist,
typename
T1,
typename
T2>
160
__device__
void
calcVecDiffGlobal(
const
T1* vec1,
const
T2* vec2,
int
len, Dist& dist,
typename
Dist::result_type* smem,
int
tid)
162
for
(
int
i = tid; i < len; i += THREAD_DIM)
165
ForceGlob<T1>::Load(vec1, i, val1);
168
ForceGlob<T2>::Load(vec2, i, val2);
170
dist.reduceIter(val1, val2);
173
dist.reduceAll<THREAD_DIM>(smem, tid);
177
template
<
int
THREAD_DIM,
int
MAX_LEN,
bool
LEN_EQ_MAX_LEN,
typename
Dist,
typename
T1,
typename
T2>
178
__device__ __forceinline__
void
calcVecDiffCached(
const
T1* vecCached,
const
T2* vecGlob,
int
len, Dist& dist,
typename
Dist::result_type* smem,
int
tid)
180
vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
182
dist.reduceAll<THREAD_DIM>(smem, tid);
186
template
<
int
THREAD_DIM,
typename
T1>
struct
VecDiffGlobal
188
explicit
__device__ __forceinline__ VecDiffGlobal(
const
T1* vec1_,
int
= 0,
void* = 0,
int
= 0,
int
= 0)
193
template
<
typename
T2,
typename
Dist>
194
__device__ __forceinline__
void
calc(
const
T2* vec2,
int
len, Dist& dist,
typename
Dist::result_type* smem,
int
tid)
const
196
calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
203
template
<
int
THREAD_DIM,
int
MAX_LEN,
bool
LEN_EQ_MAX_LEN,
typename
U>
struct
VecDiffCachedRegister
205
template
<
typename
T1> __device__ __forceinline__ VecDiffCachedRegister(
const
T1* vec1,
int
len, U* smem,
int
glob_tid,
int
tid)
208
smem[glob_tid] = vec1[glob_tid];
211
U* vec1ValsPtr = vec1Vals;
214
for
(
int
i = tid; i < MAX_LEN; i += THREAD_DIM)
215
*vec1ValsPtr++ = smem[i];
220
template
<
typename
T2,
typename
Dist>
221
__device__ __forceinline__
void
calc(
const
T2* vec2,
int
len, Dist& dist,
typename
Dist::result_type* smem,
int
tid)
const
223
calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
226
U vec1Vals[MAX_LEN / THREAD_DIM];
"black box" representation of the file storage associated with a file on disk.
Definition:
aruco.hpp:75