43
#ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP
44
#define OPENCV_CUDA_WARP_SHUFFLE_HPP
52
namespace
cv
{
namespace
cuda {
namespace
device
54
#if __CUDACC_VER_MAJOR__ >= 9
55
# define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
56
# define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
57
# define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
60
__device__ __forceinline__ T shfl(T val,
int
srcLane,
int
width = warpSize)
62
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
63
return
__shfl(val, srcLane, width);
68
__device__ __forceinline__
unsigned
int
shfl(
unsigned
int
val,
int
srcLane,
int
width = warpSize)
70
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
71
return
(
unsigned
int) __shfl((
int) val, srcLane, width);
76
__device__ __forceinline__
double
shfl(
double
val,
int
srcLane,
int
width = warpSize)
78
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
79
int
lo = __double2loint(val);
80
int
hi = __double2hiint(val);
82
lo = __shfl(lo, srcLane, width);
83
hi = __shfl(hi, srcLane, width);
85
return
__hiloint2double(hi, lo);
92
__device__ __forceinline__ T shfl_down(T val,
unsigned
int
delta,
int
width = warpSize)
94
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
95
return
__shfl_down(val, delta, width);
100
__device__ __forceinline__
unsigned
int
shfl_down(
unsigned
int
val,
unsigned
int
delta,
int
width = warpSize)
102
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
103
return
(
unsigned
int) __shfl_down((
int) val, delta, width);
108
__device__ __forceinline__
double
shfl_down(
double
val,
unsigned
int
delta,
int
width = warpSize)
110
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
111
int
lo = __double2loint(val);
112
int
hi = __double2hiint(val);
114
lo = __shfl_down(lo, delta, width);
115
hi = __shfl_down(hi, delta, width);
117
return
__hiloint2double(hi, lo);
123
template
<
typename
T>
124
__device__ __forceinline__ T shfl_up(T val,
unsigned
int
delta,
int
width = warpSize)
126
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
127
return
__shfl_up(val, delta, width);
132
__device__ __forceinline__
unsigned
int
shfl_up(
unsigned
int
val,
unsigned
int
delta,
int
width = warpSize)
134
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
135
return
(
unsigned
int) __shfl_up((
int) val, delta, width);
140
__device__ __forceinline__
double
shfl_up(
double
val,
unsigned
int
delta,
int
width = warpSize)
142
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
143
int
lo = __double2loint(val);
144
int
hi = __double2hiint(val);
146
lo = __shfl_up(lo, delta, width);
147
hi = __shfl_up(hi, delta, width);
149
return
__hiloint2double(hi, lo);
"black box" representation of the file storage associated with a file on disk.
Definition:
aruco.hpp:75