44#ifndef OPENCV_CORE_CUDA_HPP
45#define OPENCV_CORE_CUDA_HPP
48# error cuda.hpp header must be compiled as C++
51#include "opencv2/core.hpp"
65namespace cv {
namespace cuda {
114 virtual bool allocate(
GpuMat* mat,
int rows,
int cols,
size_t elemSize) = 0;
115 virtual void free(
GpuMat* mat) = 0;
137 GpuMat(
int rows,
int cols,
int type,
void* data,
size_t step = Mat::AUTO_STEP);
138 GpuMat(
Size size,
int type,
void* data,
size_t step = Mat::AUTO_STEP);
154 CV_WRAP
void create(
int rows,
int cols,
int type);
155 CV_WRAP
void create(
Size size,
int type);
168 CV_WRAP
void upload(InputArray arr);
178 CV_WRAP
void upload(InputArray arr,
Stream& stream);
231 CV_WRAP
void convertTo(
OutputArray dst,
int rtype,
double alpha,
double beta = 0.0)
const;
237 CV_WRAP
void convertTo(
OutputArray dst,
int rtype,
double alpha,
double beta,
Stream& stream)
const;
239 CV_WRAP
void assignTo(
GpuMat& m,
int type = -1)
const;
243 const uchar* ptr(
int y = 0)
const;
246 template<
typename _Tp> _Tp*
ptr(
int y = 0);
247 template<
typename _Tp>
const _Tp* ptr(
int y = 0)
const;
249 template <
typename _Tp>
operator PtrStepSz<_Tp>()
const;
250 template <
typename _Tp>
operator PtrStep<_Tp>()
const;
259 CV_WRAP
GpuMat rowRange(
int startrow,
int endrow)
const;
263 CV_WRAP
GpuMat colRange(
int startcol,
int endcol)
const;
272 CV_WRAP
GpuMat reshape(
int cn,
int rows = 0)
const;
275 CV_WRAP
void locateROI(
Size& wholeSize,
Point& ofs)
const;
278 CV_WRAP
GpuMat& adjustROI(
int dtop,
int dbottom,
int dleft,
int dright);
282 CV_WRAP
bool isContinuous()
const;
285 CV_WRAP
size_t elemSize()
const;
288 CV_WRAP
size_t elemSize1()
const;
291 CV_WRAP
int type()
const;
294 CV_WRAP
int depth()
const;
297 CV_WRAP
int channels()
const;
300 CV_WRAP
size_t step1()
const;
306 CV_WRAP
bool empty()
const;
309 CV_WRAP
void* cudaPtr()
const;
312 CV_WRAP
void updateContinuityFlag();
337 const uchar* dataend;
345 explicit GpuData(
size_t _size);
361 using SizeArray = std::vector<int>;
362 using StepArray = std::vector<size_t>;
363 using IndexArray = std::vector<int>;
390 GpuMatND(SizeArray size,
int type,
void* data, StepArray step = StepArray());
455#if defined(__GNUC__) && __GNUC__ < 5
466 void upload(InputArray src);
467 void upload(InputArray src,
Stream& stream);
504 void setFields(SizeArray size,
int type, StepArray step = StepArray());
530 std::shared_ptr<GpuData> data_;
695 CV_WRAP
GpuMat getBuffer(
int rows,
int cols,
int type);
698 CV_WRAP
GpuMat getBuffer(
Size size,
int type) {
return getBuffer(size.height, size.width, type); }
709CV_EXPORTS_W
void setBufferPoolConfig(
int deviceId,
size_t stackSize,
int stackCount);
733 enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
735 static MatAllocator* getAllocator(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
737 CV_WRAP
explicit HostMem(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
741 CV_WRAP
HostMem(
int rows,
int cols,
int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
742 CV_WRAP
HostMem(
Size size,
int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
745 CV_WRAP
explicit HostMem(InputArray arr, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
758 CV_WRAP
void create(
int rows,
int cols,
int type);
759 void create(
Size size,
int type);
763 CV_WRAP
HostMem reshape(
int cn,
int rows = 0)
const;
769 CV_WRAP
Mat createMatHeader()
const;
781 CV_WRAP
bool isContinuous()
const;
782 CV_WRAP
size_t elemSize()
const;
783 CV_WRAP
size_t elemSize1()
const;
784 CV_WRAP
int type()
const;
785 CV_WRAP
int depth()
const;
786 CV_WRAP
int channels()
const;
787 CV_WRAP
size_t step1()
const;
788 CV_WRAP
Size size()
const;
789 CV_WRAP
bool empty()
const;
800 const uchar* dataend;
802 AllocType alloc_type;
850 typedef void (
Stream::*bool_type)()
const;
851 void this_type_does_not_support_comparisons()
const {}
854 typedef void (*StreamCallback)(
int status,
void* userData);
876 CV_WRAP
bool queryIfComplete()
const;
880 CV_WRAP
void waitForCompletion();
884 CV_WRAP
void waitEvent(
const Event& event);
900 operator bool_type()
const;
903 CV_WRAP
void* cudaPtr()
const;
913 friend class DefaultDeviceInitializer;
922 BLOCKING_SYNC = 0x01,
923 DISABLE_TIMING = 0x02,
933 CV_WRAP
bool queryIfComplete()
const;
936 CV_WRAP
void waitForCompletion();
939 CV_WRAP
static float elapsedTime(
const Event& start,
const Event& end);
990 FEATURE_SET_COMPUTE_10 = 10,
991 FEATURE_SET_COMPUTE_11 = 11,
992 FEATURE_SET_COMPUTE_12 = 12,
993 FEATURE_SET_COMPUTE_13 = 13,
994 FEATURE_SET_COMPUTE_20 = 20,
995 FEATURE_SET_COMPUTE_21 = 21,
996 FEATURE_SET_COMPUTE_30 = 30,
997 FEATURE_SET_COMPUTE_32 = 32,
998 FEATURE_SET_COMPUTE_35 = 35,
999 FEATURE_SET_COMPUTE_50 = 50,
1001 GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
1002 SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
1003 NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
1004 WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
1005 DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
1032 CV_WRAP
static bool has(
int major,
int minor);
1033 CV_WRAP
static bool hasPtx(
int major,
int minor);
1034 CV_WRAP
static bool hasBin(
int major,
int minor);
1036 CV_WRAP
static bool hasEqualOrLessPtx(
int major,
int minor);
1037 CV_WRAP
static bool hasEqualOrGreater(
int major,
int minor);
1038 CV_WRAP
static bool hasEqualOrGreaterPtx(
int major,
int minor);
1039 CV_WRAP
static bool hasEqualOrGreaterBin(
int major,
int minor);
1061 CV_WRAP
int deviceID()
const;
1067 CV_WRAP
size_t totalGlobalMem()
const;
1070 CV_WRAP
size_t sharedMemPerBlock()
const;
1073 CV_WRAP
int regsPerBlock()
const;
1076 CV_WRAP
int warpSize()
const;
1079 CV_WRAP
size_t memPitch()
const;
1082 CV_WRAP
int maxThreadsPerBlock()
const;
1091 CV_WRAP
int clockRate()
const;
1094 CV_WRAP
size_t totalConstMem()
const;
1097 CV_WRAP
int majorVersion()
const;
1100 CV_WRAP
int minorVersion()
const;
1103 CV_WRAP
size_t textureAlignment()
const;
1106 CV_WRAP
size_t texturePitchAlignment()
const;
1109 CV_WRAP
int multiProcessorCount()
const;
1112 CV_WRAP
bool kernelExecTimeoutEnabled()
const;
1115 CV_WRAP
bool integrated()
const;
1118 CV_WRAP
bool canMapHostMemory()
const;
1125 ComputeModeExclusiveProcess
1132 CV_WRAP
int maxTexture1D()
const;
1135 CV_WRAP
int maxTexture1DMipmap()
const;
1138 CV_WRAP
int maxTexture1DLinear()
const;
1144 CV_WRAP
Vec2i maxTexture2DMipmap()
const;
1147 CV_WRAP
Vec3i maxTexture2DLinear()
const;
1150 CV_WRAP
Vec2i maxTexture2DGather()
const;
1156 CV_WRAP
int maxTextureCubemap()
const;
1159 CV_WRAP
Vec2i maxTexture1DLayered()
const;
1162 CV_WRAP
Vec3i maxTexture2DLayered()
const;
1165 CV_WRAP
Vec2i maxTextureCubemapLayered()
const;
1168 CV_WRAP
int maxSurface1D()
const;
1177 CV_WRAP
Vec2i maxSurface1DLayered()
const;
1180 CV_WRAP
Vec3i maxSurface2DLayered()
const;
1183 CV_WRAP
int maxSurfaceCubemap()
const;
1186 CV_WRAP
Vec2i maxSurfaceCubemapLayered()
const;
1189 CV_WRAP
size_t surfaceAlignment()
const;
1192 CV_WRAP
bool concurrentKernels()
const;
1195 CV_WRAP
bool ECCEnabled()
const;
1198 CV_WRAP
int pciBusID()
const;
1201 CV_WRAP
int pciDeviceID()
const;
1204 CV_WRAP
int pciDomainID()
const;
1207 CV_WRAP
bool tccDriver()
const;
1210 CV_WRAP
int asyncEngineCount()
const;
1213 CV_WRAP
bool unifiedAddressing()
const;
1216 CV_WRAP
int memoryClockRate()
const;
1219 CV_WRAP
int memoryBusWidth()
const;
1222 CV_WRAP
int l2CacheSize()
const;
1225 CV_WRAP
int maxThreadsPerMultiProcessor()
const;
1228 CV_WRAP
void queryMemory(
size_t& totalMemory,
size_t& freeMemory)
const;
1229 CV_WRAP
size_t freeMemory()
const;
1230 CV_WRAP
size_t totalMemory()
const;
1245 CV_WRAP
bool isCompatible()
const;
1251CV_EXPORTS_W
void printCudaDeviceInfo(
int device);
1252CV_EXPORTS_W
void printShortCudaDeviceInfo(
int device);
1268#include "opencv2/core/cuda.inl.hpp"
This type is very similar to InputArray except that it is used for input/output and output function p...
Definition: mat.hpp:295
Custom array allocator
Definition: mat.hpp:470
n-dimensional dense array class
Definition: mat.hpp:802
Template class specifying a continuous subsequence (slice) of a sequence.
Definition: core/types.hpp:590
Template class for 2D rectangles
Definition: core/types.hpp:421
Template class for specifying the size of an image or rectangle.
Definition: core/types.hpp:316
Template class for short numerical vectors, a partial case of Matx
Definition: matx.hpp:342
BufferPool for use with CUDA streams
Definition: core/cuda.hpp:688
BufferPool(Stream &stream)
Gets the BufferPool for the given stream.
Class providing functionality for querying the specified GPU properties.
Definition: core/cuda.hpp:1045
bool supports(FeatureSet feature_set) const
Provides information on CUDA feature support.
const char * name() const
ASCII string identifying device
ComputeMode
Definition: core/cuda.hpp:1121
@ ComputeModeProhibited
Definition: core/cuda.hpp:1124
@ ComputeModeDefault
Definition: core/cuda.hpp:1122
@ ComputeModeExclusive
Definition: core/cuda.hpp:1123
Definition: core/cuda.hpp:917
CreateFlags
Definition: core/cuda.hpp:920
Definition: core/cuda.hpp:109
Base storage class for GPU memory with reference counting.
Definition: core/cuda.hpp:106
Allocator * allocator
allocator
Definition: core/cuda.hpp:340
void release()
decreases reference counter, deallocate the data when reference counter reaches 0
uchar * data
pointer to the data
Definition: core/cuda.hpp:329
~GpuMat()
destructor - calls release()
static CV_WRAP GpuMat::Allocator * defaultAllocator()
default allocator
int rows
the number of rows and columns
Definition: core/cuda.hpp:323
uchar * ptr(int y=0)
returns pointer to y-th row
GpuMat(int rows, int cols, int type, void *data, size_t step=Mat::AUTO_STEP)
constructor for GpuMat headers pointing to user-allocated data
_Tp * ptr(int y=0)
template version of the above method
uchar * datastart
helper fields used in locateROI and adjustROI
Definition: core/cuda.hpp:336
int flags
Definition: core/cuda.hpp:320
int * refcount
Definition: core/cuda.hpp:333
Definition: core/cuda.hpp:359
GpuMatND()
default constructor
GpuMatND clone(Stream &stream) const
int dims
matrix dimensionality
Definition: core/cuda.hpp:516
bool isSubmatrix() const
returns true if the matrix is a sub-matrix of another matrix
bool external() const
returns true if not empty and points to external(user-allocated) gpu memory
size_t total() const
returns the total number of array elements
GpuMatND clone() const
Creates a full copy of the array and the underlying data. The method creates a full copy of the array...
size_t elemSize1() const
returns the size of element channel in bytes
StepArray step
Definition: core/cuda.hpp:524
bool isContinuous() const
int flags
Definition: core/cuda.hpp:513
GpuMat createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const
Creates a GpuMat header for a 2D plane part of an n-dim matrix.
size_t elemSize() const
returns element size in bytes
GpuMat createGpuMatHeader() const
GpuMat operator()(IndexArray idx, Range rowRange, Range colRange) const
Extracts a 2D plane part of an n-dim matrix. It differs from createGpuMatHeader(IndexArray,...
size_t totalMemSize() const
returns the size of underlying memory in bytes
GpuMatND(SizeArray size, int type)
int type() const
returns element type
SizeArray size
shape of this array
Definition: core/cuda.hpp:519
GpuMatND operator()(const std::vector< Range > &ranges) const
Extracts a sub-matrix. The operator makes a new header for the specified sub-array of *this....
void create(SizeArray size, int type)
Allocates GPU memory. Suppose there is some GPU memory already allocated. In that case,...
GpuMatND(SizeArray size, int type, void *data, StepArray step=StepArray())
uchar * getDevicePtr() const
returns pointer to the first byte of the GPU memory
bool empty() const
returns true if data is null
Class with reference counting wrapping special memory type allocation functions from CUDA.
Definition: core/cuda.hpp:731
GpuMat createGpuMatHeader() const
Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting f...
void release()
decrements reference counter and released memory if needed.
This class encapsulates a queue of asynchronous calls.
Definition: core/cuda.hpp:849
static CV_WRAP Stream & Null()
return Stream object for default CUDA stream
void enqueueHostCallback(StreamCallback callback, void *userData)
Adds a callback to be called on the host after all currently enqueued items in the stream have comple...
Class providing a set of static methods to check what NVIDIA* card architecture the CUDA module was b...
Definition: core/cuda.hpp:1018
static bool builtWith(FeatureSet feature_set)
The following method checks whether the module was built with the support of the given feature:
void CV_EXPORTS_W copyTo(InputArray src, OutputArray dst, InputArray mask)
This is an overloaded member function, provided for convenience (python) Copies the matrix to another...
CV_EXPORTS_W void convertFp16(InputArray src, OutputArray dst)
Converts an array to half precision floating number.
CV_EXPORTS void swap(Mat &a, Mat &b)
Swaps two matrices
CV_EXPORTS_W void setDevice(int device)
Sets a device and initializes it for the current thread.
FeatureSet
Enumeration providing CUDA computing features.
Definition: core/cuda.hpp:989
CV_EXPORTS_W int getDevice()
Returns the current device index set by cuda::setDevice or initialized by default.
CV_EXPORTS_W void resetDevice()
Explicitly destroys and cleans up all resources associated with the current device in the current pro...
CV_EXPORTS_W int getCudaEnabledDeviceCount()
Returns the number of installed CUDA-enabled devices.
CV_EXPORTS bool deviceSupports(FeatureSet feature_set)
checks whether current device supports the given feature
CV_EXPORTS_W void setBufferPoolUsage(bool on)
BufferPool management (must be called before Stream creation)
CV_EXPORTS_W void unregisterPageLocked(Mat &m)
Unmaps the memory of matrix and makes it pageable again.
CV_EXPORTS_W void registerPageLocked(Mat &m)
Page-locks the memory of matrix and maps it for the device(s).
CV_EXPORTS_W void createContinuous(int rows, int cols, int type, OutputArray arr)
Creates a continuous matrix.
CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
Ensures that the size of a matrix is big enough and the matrix has a proper type.
"black box" representation of the file storage associated with a file on disk.
Definition: aruco.hpp:75
Definition: cvstd_wrapper.hpp:74
Class that enables getting cudaEvent_t from cuda::Event
Definition: cuda_stream_accessor.hpp:76
Definition: core/cuda.hpp:344
Class that enables getting cudaStream_t from cuda::Stream
Definition: cuda_stream_accessor.hpp:68