OpenCV 4.5.3(日本語機械翻訳)
emulation.hpp
[詳解]
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #ifndef OPENCV_CUDA_EMULATION_HPP_
44 #define OPENCV_CUDA_EMULATION_HPP_
45
46 #include "common.hpp"
47 #include "warp_reduce.hpp"
48
54
55 namespace cv { namespace cuda { namespace device
56{
57 struct Emulation
58 {
59
60 static __device__ __forceinline__ int syncthreadsOr(int pred)
61 {
62 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
63 // just campilation stab
64 return 0;
65 #else
66 return __syncthreads_or(pred);
67 #endif
68 }
69
70 template<int CTA_SIZE>
71 static __forceinline__ __device__ int Ballot(int predicate)
72 {
73 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
74 return __ballot(predicate);
75 #else
76 __shared__ volatile int cta_buffer[CTA_SIZE];
77
78 int tid = threadIdx.x;
79 cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
80 return warp_reduce(cta_buffer);
81 #endif
82 }
83
84 struct smem
85 {
86 enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
87
88 template<typename T>
89 static __device__ __forceinline__ T atomicInc(T* address, T val)
90 {
91 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
92 T count;
93 unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
94 do
95 {
96 count = *address & TAG_MASK;
97 count = tag | (count + 1);
98 *address = count;
99 } while (*address != count);
100
101 return (count & TAG_MASK) - 1;
102 #else
103 return ::atomicInc(address, val);
104 #endif
105 }
106
107 template<typename T>
108 static __device__ __forceinline__ T atomicAdd(T* address, T val)
109 {
110 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
111 T count;
112 unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
113 do
114 {
115 count = *address & TAG_MASK;
116 count = tag | (count + val);
117 *address = count;
118 } while (*address != count);
119
120 return (count & TAG_MASK) - val;
121 #else
122 return ::atomicAdd(address, val);
123 #endif
124 }
125
126 template<typename T>
127 static __device__ __forceinline__ T atomicMin(T* address, T val)
128 {
129 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
130 T count = ::min(*address, val);
131 do
132 {
133 *address = count;
134 } while (*address > count);
135
136 return count;
137 #else
138 return ::atomicMin(address, val);
139 #endif
140 }
141 }; // struct cmem
142
143 struct glob
144 {
145 static __device__ __forceinline__ int atomicAdd(int* address, int val)
146 {
147 return ::atomicAdd(address, val);
148 }
149 static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
150 {
151 return ::atomicAdd(address, val);
152 }
153 static __device__ __forceinline__ float atomicAdd(float* address, float val)
154 {
155 #if __CUDA_ARCH__ >= 200
156 return ::atomicAdd(address, val);
157 #else
158 int* address_as_i = (int*) address;
159 int old = *address_as_i, assumed;
160 do {
161 assumed = old;
162 old = ::atomicCAS(address_as_i, assumed,
163 __float_as_int(val + __int_as_float(assumed)));
164 } while (assumed != old);
165 return __int_as_float(old);
166 #endif
167 }
168 static __device__ __forceinline__ double atomicAdd(double* address, double val)
169 {
170 #if __CUDA_ARCH__ >= 130
171 unsigned long long int* address_as_ull = (unsigned long long int*) address;
172 unsigned long long int old = *address_as_ull, assumed;
173 do {
174 assumed = old;
175 old = ::atomicCAS(address_as_ull, assumed,
176 __double_as_longlong(val + __longlong_as_double(assumed)));
177 } while (assumed != old);
178 return __longlong_as_double(old);
179 #else
180 CV_UNUSED(address);
181 CV_UNUSED(val);
182 return 0.0;
183 #endif
184 }
185
186 static __device__ __forceinline__ int atomicMin(int* address, int val)
187 {
188 return ::atomicMin(address, val);
189 }
190 static __device__ __forceinline__ float atomicMin(float* address, float val)
191 {
192 #if __CUDA_ARCH__ >= 120
193 int* address_as_i = (int*) address;
194 int old = *address_as_i, assumed;
195 do {
196 assumed = old;
197 old = ::atomicCAS(address_as_i, assumed,
198 __float_as_int(::fminf(val, __int_as_float(assumed))));
199 } while (assumed != old);
200 return __int_as_float(old);
201 #else
202 CV_UNUSED(address);
203 CV_UNUSED(val);
204 return 0.0f;
205 #endif
206 }
207 static __device__ __forceinline__ double atomicMin(double* address, double val)
208 {
209 #if __CUDA_ARCH__ >= 130
210 unsigned long long int* address_as_ull = (unsigned long long int*) address;
211 unsigned long long int old = *address_as_ull, assumed;
212 do {
213 assumed = old;
214 old = ::atomicCAS(address_as_ull, assumed,
215 __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
216 } while (assumed != old);
217 return __longlong_as_double(old);
218 #else
219 CV_UNUSED(address);
220 CV_UNUSED(val);
221 return 0.0;
222 #endif
223 }
224
225 static __device__ __forceinline__ int atomicMax(int* address, int val)
226 {
227 return ::atomicMax(address, val);
228 }
229 static __device__ __forceinline__ float atomicMax(float* address, float val)
230 {
231 #if __CUDA_ARCH__ >= 120
232 int* address_as_i = (int*) address;
233 int old = *address_as_i, assumed;
234 do {
235 assumed = old;
236 old = ::atomicCAS(address_as_i, assumed,
237 __float_as_int(::fmaxf(val, __int_as_float(assumed))));
238 } while (assumed != old);
239 return __int_as_float(old);
240 #else
241 CV_UNUSED(address);
242 CV_UNUSED(val);
243 return 0.0f;
244 #endif
245 }
246 static __device__ __forceinline__ double atomicMax(double* address, double val)
247 {
248 #if __CUDA_ARCH__ >= 130
249 unsigned long long int* address_as_ull = (unsigned long long int*) address;
250 unsigned long long int old = *address_as_ull, assumed;
251 do {
252 assumed = old;
253 old = ::atomicCAS(address_as_ull, assumed,
254 __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
255 } while (assumed != old);
256 return __longlong_as_double(old);
257 #else
258 CV_UNUSED(address);
259 CV_UNUSED(val);
260 return 0.0;
261 #endif
262 }
263 };
264 }; //struct Emulation
265}}} // namespace cv { namespace cuda { namespace cudev
266
268
269 #endif /* OPENCV_CUDA_EMULATION_HPP_ */
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element minimum of two arrays or an array and a scalar.
cv
"black box" representation of the file storage associated with a file on disk.
Definition: aruco.hpp:75