43
#ifndef OPENCV_CUDA_DEVICE_WARP_HPP
 
44
#define OPENCV_CUDA_DEVICE_WARP_HPP
 
52
namespace
cv
{
namespace
cuda {
namespace
device
 
59
WARP_SIZE     = 1 << LOG_WARP_SIZE,
 
64
static
__device__ __forceinline__
unsigned
int
laneId()
 
67
asm(
"mov.u32 %0, %%laneid;"
:
"=r"(ret) );
 
71
template<
typename
It,
typename
T>
 
72
static
__device__ __forceinline__
void
fill(It beg, It end,
const
T& value)
 
74
for(It t = beg + laneId(); t < end; t += STRIDE)
 
78
template<
typename
InIt,
typename
OutIt>
 
79
static
__device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
 
81
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
 
86
template<
typename
InIt,
typename
OutIt,
class
UnOp>
 
87
static
__device__ __forceinline__ OutIt
transform(InIt beg, InIt end, OutIt out, UnOp op)
 
89
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
 
94
template<
typename
InIt1,
typename
InIt2,
typename
OutIt,
class
BinOp>
 
95
static
__device__ __forceinline__ OutIt
transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
 
97
unsigned
int
lane = laneId();
 
99
InIt1 t1 = beg1 + lane;
 
100
InIt2 t2 = beg2 + lane;
 
101
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
 
106
template
<
class
T,
class
BinOp>
 
107
static
__device__ __forceinline__ T
reduce(
volatile
T *ptr, BinOp op)
 
109
const
unsigned
int
lane = laneId();
 
113
T partial = ptr[lane];
 
115
ptr[lane] = partial = op(partial, ptr[lane + 16]);
 
116
ptr[lane] = partial = op(partial, ptr[lane + 8]);
 
117
ptr[lane] = partial = op(partial, ptr[lane + 4]);
 
118
ptr[lane] = partial = op(partial, ptr[lane + 2]);
 
119
ptr[lane] = partial = op(partial, ptr[lane + 1]);
 
125
template<
typename
OutIt,
typename
T>
 
126
static
__device__ __forceinline__
void
yota(OutIt beg, OutIt end, T value)
 
128
unsigned
int
lane = laneId();
 
131
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
 
CV_EXPORTS_W void reduce(InputArray src, OutputArray dst, int dim, int rtype, int dtype=-1)
Reduces a matrix to a vector.
 
CV_EXPORTS_W void transform(InputArray src, OutputArray dst, InputArray m)
Performs the matrix transformation of every array element.
 
"black box" representation of the file storage associated with a file on disk.
Definition:
aruco.hpp:75