cpp/ja/vec__distance_8hpp_source.html

/*M///////////////////////////////////////////////////////////////////////////////////////


//


//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.


//


//  By downloading, copying, installing or using the software you agree to this license.


//  If you do not agree to this license, do not download, install,


//  copy or use the software.


//


//


//                           License Agreement


//                For Open Source Computer Vision Library


//


// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.


// Copyright (C) 2009, Willow Garage Inc., all rights reserved.


// Third party copyrights are property of their respective owners.


//


// Redistribution and use in source and binary forms, with or without modification,


// are permitted provided that the following conditions are met:


//


//   * Redistribution's of source code must retain the above copyright notice,


//     this list of conditions and the following disclaimer.


//


//   * Redistribution's in binary form must reproduce the above copyright notice,


//     this list of conditions and the following disclaimer in the documentation


//     and/or other materials provided with the distribution.


//


//   * The name of the copyright holders may not be used to endorse or promote products


//     derived from this software without specific prior written permission.


//


// This software is provided by the copyright holders and contributors "as is" and


// any express or implied warranties, including, but not limited to, the implied


// warranties of merchantability and fitness for a particular purpose are disclaimed.


// In no event shall the Intel Corporation or contributors be liable for any direct,


// indirect, incidental, special, exemplary, or consequential damages


// (including, but not limited to, procurement of substitute goods or services;


// loss of use, data, or profits; or business interruption) however caused


// and on any theory of liability, whether in contract, strict liability,


// or tort (including negligence or otherwise) arising in any way out of


// the use of this software, even if advised of the possibility of such damage.


//


//M*/


#ifndef OPENCV_CUDA_VEC_DISTANCE_HPP


#define OPENCV_CUDA_VEC_DISTANCE_HPP


#include "reduce.hpp"


#include "functional.hpp"


#include "detail/vec_distance_detail.hpp"


namespace

cv
{
namespace
cuda {
namespace
device


{


template
<typename
T>
struct
L1Dist


{


typedef
int
value_type;


typedef
int
result_type;


__device__ __forceinline__ L1Dist() : mySum(0) {}


__device__ __forceinline__
void
reduceIter(int
val1,
int
val2)


{


mySum = __sad(val1, val2, mySum);


}


template
<int
THREAD_DIM> __device__ __forceinline__
void
reduceAll(int* smem,
int
tid)


{


reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());


}


__device__ __forceinline__
operator
int()
const


{


return
mySum;


}


int
mySum;


};


template
<>
struct
L1Dist<float>


{


typedef
float
value_type;


typedef
float
result_type;


__device__ __forceinline__ L1Dist() : mySum(0.0f) {}


__device__ __forceinline__
void
reduceIter(float
val1,
float
val2)


{


mySum += ::fabs(val1 - val2);


}


template
<int
THREAD_DIM> __device__ __forceinline__
void
reduceAll(float* smem,
int
tid)


{


reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());


}


__device__ __forceinline__
operator
float()
const


{


return
mySum;


}


float
mySum;


};


struct
L2Dist


{


typedef
float
value_type;


typedef
float
result_type;


__device__ __forceinline__ L2Dist() : mySum(0.0f) {}


__device__ __forceinline__
void
reduceIter(float
val1,
float
val2)


{


float
reg = val1 - val2;


mySum += reg * reg;


}


template
<int
THREAD_DIM> __device__ __forceinline__
void
reduceAll(float* smem,
int
tid)


{


reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());


}


__device__ __forceinline__
operator
float()
const


{


return
sqrtf(mySum);


}


float
mySum;


};


struct
HammingDist


{


typedef
int
value_type;


typedef
int
result_type;


__device__ __forceinline__ HammingDist() : mySum(0) {}


__device__ __forceinline__
void
reduceIter(int
val1,
int
val2)


{


mySum += __popc(val1 ^ val2);


}


template
<int
THREAD_DIM> __device__ __forceinline__
void
reduceAll(int* smem,
int
tid)


{


reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());


}


__device__ __forceinline__
operator
int()
const


{


return
mySum;


}


int
mySum;


};


// calc distance between two vectors in global memory


template
<int
THREAD_DIM,
typename
Dist,
typename
T1,
typename
T2>


__device__
void
calcVecDiffGlobal(const
T1* vec1,
const
T2* vec2,
int
len, Dist& dist,
typename
Dist::result_type* smem,
int
tid)


{


for
(int
i = tid; i < len; i += THREAD_DIM)


{


T1 val1;


ForceGlob<T1>::Load(vec1, i, val1);


T2 val2;


ForceGlob<T2>::Load(vec2, i, val2);


dist.reduceIter(val1, val2);


}


dist.reduceAll<THREAD_DIM>(smem, tid);


}


// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory


template
<int
THREAD_DIM,
int
MAX_LEN,
bool
LEN_EQ_MAX_LEN,
typename
Dist,
typename
T1,
typename
T2>


__device__ __forceinline__
void
calcVecDiffCached(const
T1* vecCached,
const
T2* vecGlob,
int
len, Dist& dist,
typename
Dist::result_type* smem,
int
tid)


{


vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);


dist.reduceAll<THREAD_DIM>(smem, tid);


}


// calc distance between two vectors in global memory


template
<int
THREAD_DIM,
typename
T1>
struct
VecDiffGlobal


{


explicit
__device__ __forceinline__ VecDiffGlobal(const
T1* vec1_,
int
= 0,
void* = 0,
int
= 0,
int
= 0)


{


vec1 = vec1_;


}


template
<typename
T2,
typename
Dist>


__device__ __forceinline__
void
calc(const
T2* vec2,
int
len, Dist& dist,
typename
Dist::result_type* smem,
int
tid)
const


{


calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);


}


const
T1* vec1;


};


// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory


template
<int
THREAD_DIM,
int
MAX_LEN,
bool
LEN_EQ_MAX_LEN,
typename
U>
struct
VecDiffCachedRegister


{


template
<typename
T1> __device__ __forceinline__ VecDiffCachedRegister(const
T1* vec1,
int
len, U* smem,
int
glob_tid,
int
tid)


{


if
(glob_tid < len)


smem[glob_tid] = vec1[glob_tid];


__syncthreads();


U* vec1ValsPtr = vec1Vals;


#pragma unroll


for
(int
i = tid; i < MAX_LEN; i += THREAD_DIM)


*vec1ValsPtr++ = smem[i];


__syncthreads();


}


template
<typename
T2,
typename
Dist>


__device__ __forceinline__
void
calc(const
T2* vec2,
int
len, Dist& dist,
typename
Dist::result_type* smem,
int
tid)
const


{


calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);


}


U vec1Vals[MAX_LEN / THREAD_DIM];


};


}}}
// namespace cv { namespace cuda { namespace cudev


#endif

// OPENCV_CUDA_VEC_DISTANCE_HPP


functional.hpp


cv

"black box" representation of the file storage associated with a file on disk.


Definition:
aruco.hpp:75


reduce.hpp