44
#ifndef OPENCV_CORE_CUDA_HPP
45
#define OPENCV_CORE_CUDA_HPP
48
# error cuda.hpp header must be compiled as C++
51
#include "opencv2/core.hpp"
65
namespace
cv
{
namespace
cuda {
114
virtual
bool
allocate(
GpuMat* mat,
int
rows,
int
cols,
size_t
elemSize) = 0;
115
virtual
void
free(
GpuMat* mat) = 0;
137
GpuMat(
int
rows,
int
cols,
int
type,
void* data,
size_t
step = Mat::AUTO_STEP);
138
GpuMat(
Size
size,
int
type,
void* data,
size_t
step = Mat::AUTO_STEP);
154
CV_WRAP
void
create(
int
rows,
int
cols,
int
type);
155
CV_WRAP
void
create(
Size
size,
int
type);
168
CV_WRAP
void
upload(InputArray arr);
178
CV_WRAP
void
upload(InputArray arr,
Stream& stream);
231
CV_WRAP
void
convertTo(
OutputArray
dst,
int
rtype,
double
alpha,
double
beta = 0.0)
const;
237
CV_WRAP
void
convertTo(
OutputArray
dst,
int
rtype,
double
alpha,
double
beta,
Stream& stream)
const;
239
CV_WRAP
void
assignTo(
GpuMat& m,
int
type = -1)
const;
243
const
uchar* ptr(
int
y = 0)
const;
246
template<
typename
_Tp> _Tp*
ptr(
int
y = 0);
247
template<
typename
_Tp>
const
_Tp* ptr(
int
y = 0)
const;
249
template
<
typename
_Tp>
operator
PtrStepSz<_Tp>()
const;
250
template
<
typename
_Tp>
operator
PtrStep<_Tp>()
const;
259
CV_WRAP
GpuMat
rowRange(
int
startrow,
int
endrow)
const;
263
CV_WRAP
GpuMat
colRange(
int
startcol,
int
endcol)
const;
272
CV_WRAP
GpuMat
reshape(
int
cn,
int
rows = 0)
const;
275
CV_WRAP
void
locateROI(
Size& wholeSize,
Point& ofs)
const;
278
CV_WRAP
GpuMat& adjustROI(
int
dtop,
int
dbottom,
int
dleft,
int
dright);
282
CV_WRAP
bool
isContinuous()
const;
285
CV_WRAP
size_t
elemSize()
const;
288
CV_WRAP
size_t
elemSize1()
const;
291
CV_WRAP
int
type()
const;
294
CV_WRAP
int
depth()
const;
297
CV_WRAP
int
channels()
const;
300
CV_WRAP
size_t
step1()
const;
306
CV_WRAP
bool
empty()
const;
309
CV_WRAP
void* cudaPtr()
const;
312
CV_WRAP
void
updateContinuityFlag();
337
const
uchar* dataend;
345
explicit
GpuData(
size_t
_size);
361
using
SizeArray = std::vector<int>;
362
using
StepArray = std::vector<size_t>;
363
using
IndexArray = std::vector<int>;
390
GpuMatND(SizeArray size,
int
type,
void* data, StepArray step = StepArray());
455
#if defined(__GNUC__) && __GNUC__ < 5
466
void
upload(InputArray src);
467
void
upload(InputArray src,
Stream& stream);
504
void
setFields(SizeArray size,
int
type, StepArray step = StepArray());
530
std::shared_ptr<GpuData> data_;
695
CV_WRAP
GpuMat
getBuffer(
int
rows,
int
cols,
int
type);
698
CV_WRAP
GpuMat
getBuffer(
Size
size,
int
type) {
return
getBuffer(size.height, size.width, type); }
709CV_EXPORTS_W
void
setBufferPoolConfig(
int
deviceId,
size_t
stackSize,
int
stackCount);
733
enum
AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
735
static
MatAllocator* getAllocator(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
737
CV_WRAP
explicit
HostMem(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
741
CV_WRAP
HostMem(
int
rows,
int
cols,
int
type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
742
CV_WRAP
HostMem(
Size
size,
int
type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
745
CV_WRAP
explicit
HostMem(InputArray arr, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
758
CV_WRAP
void
create(
int
rows,
int
cols,
int
type);
759
void
create(
Size
size,
int
type);
763
CV_WRAP
HostMem
reshape(
int
cn,
int
rows = 0)
const;
769
CV_WRAP
Mat
createMatHeader()
const;
781
CV_WRAP
bool
isContinuous()
const;
782
CV_WRAP
size_t
elemSize()
const;
783
CV_WRAP
size_t
elemSize1()
const;
784
CV_WRAP
int
type()
const;
785
CV_WRAP
int
depth()
const;
786
CV_WRAP
int
channels()
const;
787
CV_WRAP
size_t
step1()
const;
788
CV_WRAP
Size
size()
const;
789
CV_WRAP
bool
empty()
const;
800
const
uchar* dataend;
802
AllocType alloc_type;
850
typedef
void (
Stream::*bool_type)()
const;
851
void
this_type_does_not_support_comparisons()
const
{}
854
typedef
void (*StreamCallback)(
int
status,
void* userData);
876
CV_WRAP
bool
queryIfComplete()
const;
880
CV_WRAP
void
waitForCompletion();
884
CV_WRAP
void
waitEvent(
const
Event& event);
900
operator
bool_type()
const;
903
CV_WRAP
void* cudaPtr()
const;
913
friend
class
DefaultDeviceInitializer;
922
BLOCKING_SYNC = 0x01,
923
DISABLE_TIMING = 0x02,
933
CV_WRAP
bool
queryIfComplete()
const;
936
CV_WRAP
void
waitForCompletion();
939
CV_WRAP
static
float
elapsedTime(
const
Event& start,
const
Event& end);
990
FEATURE_SET_COMPUTE_10 = 10,
991
FEATURE_SET_COMPUTE_11 = 11,
992
FEATURE_SET_COMPUTE_12 = 12,
993
FEATURE_SET_COMPUTE_13 = 13,
994
FEATURE_SET_COMPUTE_20 = 20,
995
FEATURE_SET_COMPUTE_21 = 21,
996
FEATURE_SET_COMPUTE_30 = 30,
997
FEATURE_SET_COMPUTE_32 = 32,
998
FEATURE_SET_COMPUTE_35 = 35,
999
FEATURE_SET_COMPUTE_50 = 50,
1001
GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
1002
SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
1003
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
1004
WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
1005
DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
1032
CV_WRAP
static
bool
has(
int
major,
int
minor);
1033
CV_WRAP
static
bool
hasPtx(
int
major,
int
minor);
1034
CV_WRAP
static
bool
hasBin(
int
major,
int
minor);
1036
CV_WRAP
static
bool
hasEqualOrLessPtx(
int
major,
int
minor);
1037
CV_WRAP
static
bool
hasEqualOrGreater(
int
major,
int
minor);
1038
CV_WRAP
static
bool
hasEqualOrGreaterPtx(
int
major,
int
minor);
1039
CV_WRAP
static
bool
hasEqualOrGreaterBin(
int
major,
int
minor);
1061
CV_WRAP
int
deviceID()
const;
1067
CV_WRAP
size_t
totalGlobalMem()
const;
1070
CV_WRAP
size_t
sharedMemPerBlock()
const;
1073
CV_WRAP
int
regsPerBlock()
const;
1076
CV_WRAP
int
warpSize()
const;
1079
CV_WRAP
size_t
memPitch()
const;
1082
CV_WRAP
int
maxThreadsPerBlock()
const;
1091
CV_WRAP
int
clockRate()
const;
1094
CV_WRAP
size_t
totalConstMem()
const;
1097
CV_WRAP
int
majorVersion()
const;
1100
CV_WRAP
int
minorVersion()
const;
1103
CV_WRAP
size_t
textureAlignment()
const;
1106
CV_WRAP
size_t
texturePitchAlignment()
const;
1109
CV_WRAP
int
multiProcessorCount()
const;
1112
CV_WRAP
bool
kernelExecTimeoutEnabled()
const;
1115
CV_WRAP
bool
integrated()
const;
1118
CV_WRAP
bool
canMapHostMemory()
const;
1125
ComputeModeExclusiveProcess
1132
CV_WRAP
int
maxTexture1D()
const;
1135
CV_WRAP
int
maxTexture1DMipmap()
const;
1138
CV_WRAP
int
maxTexture1DLinear()
const;
1144
CV_WRAP
Vec2i
maxTexture2DMipmap()
const;
1147
CV_WRAP
Vec3i
maxTexture2DLinear()
const;
1150
CV_WRAP
Vec2i
maxTexture2DGather()
const;
1156
CV_WRAP
int
maxTextureCubemap()
const;
1159
CV_WRAP
Vec2i
maxTexture1DLayered()
const;
1162
CV_WRAP
Vec3i
maxTexture2DLayered()
const;
1165
CV_WRAP
Vec2i
maxTextureCubemapLayered()
const;
1168
CV_WRAP
int
maxSurface1D()
const;
1177
CV_WRAP
Vec2i
maxSurface1DLayered()
const;
1180
CV_WRAP
Vec3i
maxSurface2DLayered()
const;
1183
CV_WRAP
int
maxSurfaceCubemap()
const;
1186
CV_WRAP
Vec2i
maxSurfaceCubemapLayered()
const;
1189
CV_WRAP
size_t
surfaceAlignment()
const;
1192
CV_WRAP
bool
concurrentKernels()
const;
1195
CV_WRAP
bool
ECCEnabled()
const;
1198
CV_WRAP
int
pciBusID()
const;
1201
CV_WRAP
int
pciDeviceID()
const;
1204
CV_WRAP
int
pciDomainID()
const;
1207
CV_WRAP
bool
tccDriver()
const;
1210
CV_WRAP
int
asyncEngineCount()
const;
1213
CV_WRAP
bool
unifiedAddressing()
const;
1216
CV_WRAP
int
memoryClockRate()
const;
1219
CV_WRAP
int
memoryBusWidth()
const;
1222
CV_WRAP
int
l2CacheSize()
const;
1225
CV_WRAP
int
maxThreadsPerMultiProcessor()
const;
1228
CV_WRAP
void
queryMemory(
size_t& totalMemory,
size_t& freeMemory)
const;
1229
CV_WRAP
size_t
freeMemory()
const;
1230
CV_WRAP
size_t
totalMemory()
const;
1245
CV_WRAP
bool
isCompatible()
const;
1251CV_EXPORTS_W
void
printCudaDeviceInfo(
int
device);
1252CV_EXPORTS_W
void
printShortCudaDeviceInfo(
int
device);
1268
#include "opencv2/core/cuda.inl.hpp"
This type is very similar to InputArray except that it is used for input/output and output function p...
Definition:
mat.hpp:295
Custom array allocator
Definition:
mat.hpp:470
n-dimensional dense array class
Definition:
mat.hpp:802
Template class specifying a continuous subsequence (slice) of a sequence.
Definition:
core/types.hpp:590
Template class for 2D rectangles
Definition:
core/types.hpp:421
Template class for specifying the size of an image or rectangle.
Definition:
core/types.hpp:316
Template class for short numerical vectors, a partial case of Matx
Definition:
matx.hpp:342
BufferPool for use with CUDA streams
Definition:
core/cuda.hpp:688
BufferPool(Stream &stream)
Gets the BufferPool for the given stream.
Class providing functionality for querying the specified GPU properties.
Definition:
core/cuda.hpp:1045
bool supports(FeatureSet feature_set) const
Provides information on CUDA feature support.
const char * name() const
ASCII string identifying device
ComputeMode
Definition:
core/cuda.hpp:1121
@ ComputeModeProhibited
Definition:
core/cuda.hpp:1124
@ ComputeModeDefault
Definition:
core/cuda.hpp:1122
@ ComputeModeExclusive
Definition:
core/cuda.hpp:1123
Definition:
core/cuda.hpp:917
CreateFlags
Definition:
core/cuda.hpp:920
Definition:
core/cuda.hpp:109
Base storage class for GPU memory with reference counting.
Definition:
core/cuda.hpp:106
Allocator * allocator
allocator
Definition:
core/cuda.hpp:340
void release()
decreases reference counter, deallocate the data when reference counter reaches 0
uchar * data
pointer to the data
Definition:
core/cuda.hpp:329
~GpuMat()
destructor - calls release()
static CV_WRAP GpuMat::Allocator * defaultAllocator()
default allocator
int rows
the number of rows and columns
Definition:
core/cuda.hpp:323
uchar * ptr(int y=0)
returns pointer to y-th row
GpuMat(int rows, int cols, int type, void *data, size_t step=Mat::AUTO_STEP)
constructor for GpuMat headers pointing to user-allocated data
_Tp * ptr(int y=0)
template version of the above method
uchar * datastart
helper fields used in locateROI and adjustROI
Definition:
core/cuda.hpp:336
int flags
Definition:
core/cuda.hpp:320
int * refcount
Definition:
core/cuda.hpp:333
Definition:
core/cuda.hpp:359
GpuMatND()
default constructor
GpuMatND clone(Stream &stream) const
int dims
matrix dimensionality
Definition:
core/cuda.hpp:516
bool isSubmatrix() const
returns true if the matrix is a sub-matrix of another matrix
bool external() const
returns true if not empty and points to external(user-allocated) gpu memory
size_t total() const
returns the total number of array elements
GpuMatND clone() const
Creates a full copy of the array and the underlying data. The method creates a full copy of the array...
size_t elemSize1() const
returns the size of element channel in bytes
StepArray step
Definition:
core/cuda.hpp:524
bool isContinuous() const
int flags
Definition:
core/cuda.hpp:513
GpuMat createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const
Creates a GpuMat header for a 2D plane part of an n-dim matrix.
size_t elemSize() const
returns element size in bytes
GpuMat createGpuMatHeader() const
GpuMat operator()(IndexArray idx, Range rowRange, Range colRange) const
Extracts a 2D plane part of an n-dim matrix. It differs from createGpuMatHeader(IndexArray,...
size_t totalMemSize() const
returns the size of underlying memory in bytes
GpuMatND(SizeArray size, int type)
int type() const
returns element type
SizeArray size
shape of this array
Definition:
core/cuda.hpp:519
GpuMatND operator()(const std::vector< Range > &ranges) const
Extracts a sub-matrix. The operator makes a new header for the specified sub-array of *this....
void create(SizeArray size, int type)
Allocates GPU memory. Suppose there is some GPU memory already allocated. In that case,...
GpuMatND(SizeArray size, int type, void *data, StepArray step=StepArray())
uchar * getDevicePtr() const
returns pointer to the first byte of the GPU memory
bool empty() const
returns true if data is null
Class with reference counting wrapping special memory type allocation functions from CUDA.
Definition:
core/cuda.hpp:731
GpuMat createGpuMatHeader() const
Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting f...
void release()
decrements reference counter and released memory if needed.
This class encapsulates a queue of asynchronous calls.
Definition:
core/cuda.hpp:849
static CV_WRAP Stream & Null()
return Stream object for default CUDA stream
void enqueueHostCallback(StreamCallback callback, void *userData)
Adds a callback to be called on the host after all currently enqueued items in the stream have comple...
Class providing a set of static methods to check what NVIDIA* card architecture the CUDA module was b...
Definition:
core/cuda.hpp:1018
static bool builtWith(FeatureSet feature_set)
The following method checks whether the module was built with the support of the given feature:
void CV_EXPORTS_W copyTo(InputArray src, OutputArray dst, InputArray mask)
This is an overloaded member function, provided for convenience (python) Copies the matrix to another...
CV_EXPORTS_W void convertFp16(InputArray src, OutputArray dst)
Converts an array to half precision floating number.
CV_EXPORTS void swap(Mat &a, Mat &b)
Swaps two matrices
CV_EXPORTS_W void setDevice(int device)
Sets a device and initializes it for the current thread.
FeatureSet
Enumeration providing CUDA computing features.
Definition:
core/cuda.hpp:989
CV_EXPORTS_W int getDevice()
Returns the current device index set by cuda::setDevice or initialized by default.
CV_EXPORTS_W void resetDevice()
Explicitly destroys and cleans up all resources associated with the current device in the current pro...
CV_EXPORTS_W int getCudaEnabledDeviceCount()
Returns the number of installed CUDA-enabled devices.
CV_EXPORTS bool deviceSupports(FeatureSet feature_set)
checks whether current device supports the given feature
CV_EXPORTS_W void setBufferPoolUsage(bool on)
BufferPool management (must be called before Stream creation)
CV_EXPORTS_W void unregisterPageLocked(Mat &m)
Unmaps the memory of matrix and makes it pageable again.
CV_EXPORTS_W void registerPageLocked(Mat &m)
Page-locks the memory of matrix and maps it for the device(s).
CV_EXPORTS_W void createContinuous(int rows, int cols, int type, OutputArray arr)
Creates a continuous matrix.
CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
Ensures that the size of a matrix is big enough and the matrix has a proper type.
"black box" representation of the file storage associated with a file on disk.
Definition:
aruco.hpp:75
Definition:
cvstd_wrapper.hpp:74
Class that enables getting cudaEvent_t from cuda::Event
Definition:
cuda_stream_accessor.hpp:76
Definition:
core/cuda.hpp:344
Class that enables getting cudaStream_t from cuda::Stream
Definition:
cuda_stream_accessor.hpp:68