43
#ifndef OPENCV_CUDA_VEC_DISTANCE_HPP
 
44
#define OPENCV_CUDA_VEC_DISTANCE_HPP
 
48
#include "detail/vec_distance_detail.hpp"
 
56
namespace
cv
{
namespace
cuda {
namespace
device
 
58
template
<
typename
T>
struct
L1Dist
 
60
typedef
int
value_type;
 
61
typedef
int
result_type;
 
63
__device__ __forceinline__ L1Dist() : mySum(0) {}
 
65
__device__ __forceinline__
void
reduceIter(
int
val1,
int
val2)
 
67
mySum = __sad(val1, val2, mySum);
 
70
template
<
int
THREAD_DIM> __device__ __forceinline__
void
reduceAll(
int* smem,
int
tid)
 
72
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
 
75
__device__ __forceinline__
operator
int()
const
 
82
template
<>
struct
L1Dist<float>
 
84
typedef
float
value_type;
 
85
typedef
float
result_type;
 
87
__device__ __forceinline__ L1Dist() : mySum(0.0f) {}
 
89
__device__ __forceinline__
void
reduceIter(
float
val1,
float
val2)
 
91
mySum += ::fabs(val1 - val2);
 
94
template
<
int
THREAD_DIM> __device__ __forceinline__
void
reduceAll(
float* smem,
int
tid)
 
96
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
 
99
__device__ __forceinline__
operator
float()
const
 
109
typedef
float
value_type;
 
110
typedef
float
result_type;
 
112
__device__ __forceinline__ L2Dist() : mySum(0.0f) {}
 
114
__device__ __forceinline__
void
reduceIter(
float
val1,
float
val2)
 
116
float
reg = val1 - val2;
 
120
template
<
int
THREAD_DIM> __device__ __forceinline__
void
reduceAll(
float* smem,
int
tid)
 
122
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
 
125
__device__ __forceinline__
operator
float()
const
 
135
typedef
int
value_type;
 
136
typedef
int
result_type;
 
138
__device__ __forceinline__ HammingDist() : mySum(0) {}
 
140
__device__ __forceinline__
void
reduceIter(
int
val1,
int
val2)
 
142
mySum += __popc(val1 ^ val2);
 
145
template
<
int
THREAD_DIM> __device__ __forceinline__
void
reduceAll(
int* smem,
int
tid)
 
147
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
 
150
__device__ __forceinline__
operator
int()
const
 
159
template
<
int
THREAD_DIM,
typename
Dist,
typename
T1,
typename
T2>
 
160
__device__
void
calcVecDiffGlobal(
const
T1* vec1,
const
T2* vec2,
int
len, Dist& dist,
typename
Dist::result_type* smem,
int
tid)
 
162
for
(
int
i = tid; i < len; i += THREAD_DIM)
 
165
ForceGlob<T1>::Load(vec1, i, val1);
 
168
ForceGlob<T2>::Load(vec2, i, val2);
 
170
dist.reduceIter(val1, val2);
 
173
dist.reduceAll<THREAD_DIM>(smem, tid);
 
177
template
<
int
THREAD_DIM,
int
MAX_LEN,
bool
LEN_EQ_MAX_LEN,
typename
Dist,
typename
T1,
typename
T2>
 
178
__device__ __forceinline__
void
calcVecDiffCached(
const
T1* vecCached,
const
T2* vecGlob,
int
len, Dist& dist,
typename
Dist::result_type* smem,
int
tid)
 
180
vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
 
182
dist.reduceAll<THREAD_DIM>(smem, tid);
 
186
template
<
int
THREAD_DIM,
typename
T1>
struct
VecDiffGlobal
 
188
explicit
__device__ __forceinline__ VecDiffGlobal(
const
T1* vec1_,
int
= 0,
void* = 0,
int
= 0,
int
= 0)
 
193
template
<
typename
T2,
typename
Dist>
 
194
__device__ __forceinline__
void
calc(
const
T2* vec2,
int
len, Dist& dist,
typename
Dist::result_type* smem,
int
tid)
const
 
196
calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
 
203
template
<
int
THREAD_DIM,
int
MAX_LEN,
bool
LEN_EQ_MAX_LEN,
typename
U>
struct
VecDiffCachedRegister
 
205
template
<
typename
T1> __device__ __forceinline__ VecDiffCachedRegister(
const
T1* vec1,
int
len, U* smem,
int
glob_tid,
int
tid)
 
208
smem[glob_tid] = vec1[glob_tid];
 
211
U* vec1ValsPtr = vec1Vals;
 
214
for
(
int
i = tid; i < MAX_LEN; i += THREAD_DIM)
 
215
*vec1ValsPtr++ = smem[i];
 
220
template
<
typename
T2,
typename
Dist>
 
221
__device__ __forceinline__
void
calc(
const
T2* vec2,
int
len, Dist& dist,
typename
Dist::result_type* smem,
int
tid)
const
 
223
calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
 
226
U vec1Vals[MAX_LEN / THREAD_DIM];
 
"black box" representation of the file storage associated with a file on disk.
Definition:
aruco.hpp:75