43
#ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP
44
#define OPENCV_CUDA_DATAMOV_UTILS_HPP
54
namespace
cv
{
namespace
cuda {
namespace
device
56
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
59
template
<
typename
T>
struct
ForceGlob
61
__device__ __forceinline__
static
void
Load(
const
T* ptr,
int
offset, T& val) { val = ptr[offset]; }
66
#if defined(_WIN64) || defined(__LP64__)
68
#define OPENCV_CUDA_ASM_PTR "l"
71
#define OPENCV_CUDA_ASM_PTR "r"
74
template<
class
T>
struct
ForceGlob;
76
#define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
77
template <> struct ForceGlob<base_type> \
79
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
81
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
85
#define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
86
template <> struct ForceGlob<base_type> \
88
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
90
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
94
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar, u8)
95
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar, s8)
96
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(
char, b8)
97
OPENCV_CUDA_DEFINE_FORCE_GLOB (ushort, u16, h)
98
OPENCV_CUDA_DEFINE_FORCE_GLOB (
short, s16, h)
99
OPENCV_CUDA_DEFINE_FORCE_GLOB (uint, u32, r)
100
OPENCV_CUDA_DEFINE_FORCE_GLOB (
int, s32, r)
101
OPENCV_CUDA_DEFINE_FORCE_GLOB (
float, f32, f)
102
OPENCV_CUDA_DEFINE_FORCE_GLOB (
double, f64, d)
104
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB
105
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B
106
#undef OPENCV_CUDA_ASM_PTR
"black box" representation of the file storage associated with a file on disk.
Definition:
aruco.hpp:75