43
#ifndef OPENCV_CUDA_DEVICE_WARP_HPP
44
#define OPENCV_CUDA_DEVICE_WARP_HPP
52
namespace
cv
{
namespace
cuda {
namespace
device
59
WARP_SIZE = 1 << LOG_WARP_SIZE,
64
static
__device__ __forceinline__
unsigned
int
laneId()
67
asm(
"mov.u32 %0, %%laneid;"
:
"=r"(ret) );
71
template<
typename
It,
typename
T>
72
static
__device__ __forceinline__
void
fill(It beg, It end,
const
T& value)
74
for(It t = beg + laneId(); t < end; t += STRIDE)
78
template<
typename
InIt,
typename
OutIt>
79
static
__device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
81
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
86
template<
typename
InIt,
typename
OutIt,
class
UnOp>
87
static
__device__ __forceinline__ OutIt
transform(InIt beg, InIt end, OutIt out, UnOp op)
89
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
94
template<
typename
InIt1,
typename
InIt2,
typename
OutIt,
class
BinOp>
95
static
__device__ __forceinline__ OutIt
transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
97
unsigned
int
lane = laneId();
99
InIt1 t1 = beg1 + lane;
100
InIt2 t2 = beg2 + lane;
101
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
106
template
<
class
T,
class
BinOp>
107
static
__device__ __forceinline__ T
reduce(
volatile
T *ptr, BinOp op)
109
const
unsigned
int
lane = laneId();
113
T partial = ptr[lane];
115
ptr[lane] = partial = op(partial, ptr[lane + 16]);
116
ptr[lane] = partial = op(partial, ptr[lane + 8]);
117
ptr[lane] = partial = op(partial, ptr[lane + 4]);
118
ptr[lane] = partial = op(partial, ptr[lane + 2]);
119
ptr[lane] = partial = op(partial, ptr[lane + 1]);
125
template<
typename
OutIt,
typename
T>
126
static
__device__ __forceinline__
void
yota(OutIt beg, OutIt end, T value)
128
unsigned
int
lane = laneId();
131
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
CV_EXPORTS_W void reduce(InputArray src, OutputArray dst, int dim, int rtype, int dtype=-1)
Reduces a matrix to a vector.
CV_EXPORTS_W void transform(InputArray src, OutputArray dst, InputArray m)
Performs the matrix transformation of every array element.
"black box" representation of the file storage associated with a file on disk.
Definition:
aruco.hpp:75