43
#ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP
 
44
#define OPENCV_CUDA_DATAMOV_UTILS_HPP
 
54
namespace
cv
{
namespace
cuda {
namespace
device
 
56
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
 
59
template
<
typename
T>
struct
ForceGlob
 
61
__device__ __forceinline__
static
void
Load(
const
T* ptr,
int
offset, T& val)  { val = ptr[offset];  }
 
66
#if defined(_WIN64) || defined(__LP64__)
 
68
#define OPENCV_CUDA_ASM_PTR "l"
 
71
#define OPENCV_CUDA_ASM_PTR "r"
 
74
template<
class
T>
struct
ForceGlob;
 
76
#define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
 
77
template <> struct ForceGlob<base_type> \
 
79
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
 
81
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
 
85
#define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
 
86
template <> struct ForceGlob<base_type> \
 
88
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
 
90
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
 
94
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar,  u8)
 
95
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar,  s8)
 
96
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(
char,   b8)
 
97
OPENCV_CUDA_DEFINE_FORCE_GLOB  (ushort, u16, h)
 
98
OPENCV_CUDA_DEFINE_FORCE_GLOB  (
short,  s16, h)
 
99
OPENCV_CUDA_DEFINE_FORCE_GLOB  (uint,   u32, r)
 
100
OPENCV_CUDA_DEFINE_FORCE_GLOB  (
int,    s32, r)
 
101
OPENCV_CUDA_DEFINE_FORCE_GLOB  (
float,  f32, f)
 
102
OPENCV_CUDA_DEFINE_FORCE_GLOB  (
double, f64, d)
 
104
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB
 
105
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B
 
106
#undef OPENCV_CUDA_ASM_PTR
 
"black box" representation of the file storage associated with a file on disk.
Definition:
aruco.hpp:75