OpenCV 4.5.3(日本語機械翻訳)
warp_shuffle.hpp
[詳解]
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP
44 #define OPENCV_CUDA_WARP_SHUFFLE_HPP
45
51
52 namespace cv { namespace cuda { namespace device
53{
54 #if __CUDACC_VER_MAJOR__ >= 9
55 # define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
56 # define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
57 # define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
58 #endif
59 template <typename T>
60 __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
61 {
62 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
63 return __shfl(val, srcLane, width);
64 #else
65 return T();
66 #endif
67 }
68 __device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize)
69 {
70 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
71 return (unsigned int) __shfl((int) val, srcLane, width);
72 #else
73 return 0;
74 #endif
75 }
76 __device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize)
77 {
78 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
79 int lo = __double2loint(val);
80 int hi = __double2hiint(val);
81
82 lo = __shfl(lo, srcLane, width);
83 hi = __shfl(hi, srcLane, width);
84
85 return __hiloint2double(hi, lo);
86 #else
87 return 0.0;
88 #endif
89 }
90
91 template <typename T>
92 __device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize)
93 {
94 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
95 return __shfl_down(val, delta, width);
96 #else
97 return T();
98 #endif
99 }
100 __device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize)
101 {
102 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
103 return (unsigned int) __shfl_down((int) val, delta, width);
104 #else
105 return 0;
106 #endif
107 }
108 __device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize)
109 {
110 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
111 int lo = __double2loint(val);
112 int hi = __double2hiint(val);
113
114 lo = __shfl_down(lo, delta, width);
115 hi = __shfl_down(hi, delta, width);
116
117 return __hiloint2double(hi, lo);
118 #else
119 return 0.0;
120 #endif
121 }
122
123 template <typename T>
124 __device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize)
125 {
126 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
127 return __shfl_up(val, delta, width);
128 #else
129 return T();
130 #endif
131 }
132 __device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize)
133 {
134 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
135 return (unsigned int) __shfl_up((int) val, delta, width);
136 #else
137 return 0;
138 #endif
139 }
140 __device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize)
141 {
142 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
143 int lo = __double2loint(val);
144 int hi = __double2hiint(val);
145
146 lo = __shfl_up(lo, delta, width);
147 hi = __shfl_up(hi, delta, width);
148
149 return __hiloint2double(hi, lo);
150 #else
151 return 0.0;
152 #endif
153 }
154}}}
155
156 # undef __shfl
157 # undef __shfl_up
158 # undef __shfl_down
159
161
162 #endif // OPENCV_CUDA_WARP_SHUFFLE_HPP
cv
"black box" representation of the file storage associated with a file on disk.
Definition: aruco.hpp:75