43
#ifndef OPENCV_CUDA_EMULATION_HPP_
44
#define OPENCV_CUDA_EMULATION_HPP_
55
namespace
cv
{
namespace
cuda {
namespace
device
60
static
__device__ __forceinline__
int
syncthreadsOr(
int
pred)
62
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
66
return
__syncthreads_or(pred);
70
template<
int
CTA_SIZE>
71
static
__forceinline__ __device__
int
Ballot(
int
predicate)
73
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
74
return
__ballot(predicate);
76
__shared__
volatile
int
cta_buffer[CTA_SIZE];
78
int
tid = threadIdx.x;
79
cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
80
return
warp_reduce(cta_buffer);
86
enum
{ TAG_MASK = (1U << ( (
sizeof(
unsigned
int) << 3) - 5U)) - 1U };
89
static
__device__ __forceinline__ T atomicInc(T* address, T val)
91
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
93
unsigned
int
tag = threadIdx.x << ( (
sizeof(
unsigned
int) << 3) - 5U);
96
count = *address & TAG_MASK;
97
count = tag | (count + 1);
99
}
while
(*address != count);
101
return
(count & TAG_MASK) - 1;
103
return ::atomicInc(address, val);
108
static
__device__ __forceinline__ T atomicAdd(T* address, T val)
110
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
112
unsigned
int
tag = threadIdx.x << ( (
sizeof(
unsigned
int) << 3) - 5U);
115
count = *address & TAG_MASK;
116
count = tag | (count + val);
118
}
while
(*address != count);
120
return
(count & TAG_MASK) - val;
122
return ::atomicAdd(address, val);
127
static
__device__ __forceinline__ T atomicMin(T* address, T val)
129
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
130
T count =
::min(*address, val);
134
}
while
(*address > count);
138
return ::atomicMin(address, val);
145
static
__device__ __forceinline__
int
atomicAdd(
int* address,
int
val)
147
return ::atomicAdd(address, val);
149
static
__device__ __forceinline__
unsigned
int
atomicAdd(
unsigned
int* address,
unsigned
int
val)
151
return ::atomicAdd(address, val);
153
static
__device__ __forceinline__
float
atomicAdd(
float* address,
float
val)
155
#if __CUDA_ARCH__ >= 200
156
return ::atomicAdd(address, val);
158
int* address_as_i = (
int*) address;
159
int
old = *address_as_i, assumed;
162
old = ::atomicCAS(address_as_i, assumed,
163
__float_as_int(val + __int_as_float(assumed)));
164
}
while
(assumed != old);
165
return
__int_as_float(old);
168
static
__device__ __forceinline__
double
atomicAdd(
double* address,
double
val)
170
#if __CUDA_ARCH__ >= 130
171
unsigned
long
long
int* address_as_ull = (
unsigned
long
long
int*) address;
172
unsigned
long
long
int
old = *address_as_ull, assumed;
175
old = ::atomicCAS(address_as_ull, assumed,
176
__double_as_longlong(val + __longlong_as_double(assumed)));
177
}
while
(assumed != old);
178
return
__longlong_as_double(old);
186
static
__device__ __forceinline__
int
atomicMin(
int* address,
int
val)
188
return ::atomicMin(address, val);
190
static
__device__ __forceinline__
float
atomicMin(
float* address,
float
val)
192
#if __CUDA_ARCH__ >= 120
193
int* address_as_i = (
int*) address;
194
int
old = *address_as_i, assumed;
197
old = ::atomicCAS(address_as_i, assumed,
198
__float_as_int(::fminf(val, __int_as_float(assumed))));
199
}
while
(assumed != old);
200
return
__int_as_float(old);
207
static
__device__ __forceinline__
double
atomicMin(
double* address,
double
val)
209
#if __CUDA_ARCH__ >= 130
210
unsigned
long
long
int* address_as_ull = (
unsigned
long
long
int*) address;
211
unsigned
long
long
int
old = *address_as_ull, assumed;
214
old = ::atomicCAS(address_as_ull, assumed,
215
__double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
216
}
while
(assumed != old);
217
return
__longlong_as_double(old);
225
static
__device__ __forceinline__
int
atomicMax(
int* address,
int
val)
227
return ::atomicMax(address, val);
229
static
__device__ __forceinline__
float
atomicMax(
float* address,
float
val)
231
#if __CUDA_ARCH__ >= 120
232
int* address_as_i = (
int*) address;
233
int
old = *address_as_i, assumed;
236
old = ::atomicCAS(address_as_i, assumed,
237
__float_as_int(::fmaxf(val, __int_as_float(assumed))));
238
}
while
(assumed != old);
239
return
__int_as_float(old);
246
static
__device__ __forceinline__
double
atomicMax(
double* address,
double
val)
248
#if __CUDA_ARCH__ >= 130
249
unsigned
long
long
int* address_as_ull = (
unsigned
long
long
int*) address;
250
unsigned
long
long
int
old = *address_as_ull, assumed;
253
old = ::atomicCAS(address_as_ull, assumed,
254
__double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
255
}
while
(assumed != old);
256
return
__longlong_as_double(old);
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element minimum of two arrays or an array and a scalar.
"black box" representation of the file storage associated with a file on disk.
Definition:
aruco.hpp:75