cpp/ja/transform__detail_8hpp_source.html

/*M///////////////////////////////////////////////////////////////////////////////////////


//


//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.


//


//  By downloading, copying, installing or using the software you agree to this license.


//  If you do not agree to this license, do not download, install,


//  copy or use the software.


//


//


//                           License Agreement


//                For Open Source Computer Vision Library


//


// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.


// Copyright (C) 2009, Willow Garage Inc., all rights reserved.


// Third party copyrights are property of their respective owners.


//


// Redistribution and use in source and binary forms, with or without modification,


// are permitted provided that the following conditions are met:


//


//   * Redistribution's of source code must retain the above copyright notice,


//     this list of conditions and the following disclaimer.


//


//   * Redistribution's in binary form must reproduce the above copyright notice,


//     this list of conditions and the following disclaimer in the documentation


//     and/or other materials provided with the distribution.


//


//   * The name of the copyright holders may not be used to endorse or promote products


//     derived from this software without specific prior written permission.


//


// This software is provided by the copyright holders and contributors "as is" and


// any express or implied warranties, including, but not limited to, the implied


// warranties of merchantability and fitness for a particular purpose are disclaimed.


// In no event shall the Intel Corporation or contributors be liable for any direct,


// indirect, incidental, special, exemplary, or consequential damages


// (including, but not limited to, procurement of substitute goods or services;


// loss of use, data, or profits; or business interruption) however caused


// and on any theory of liability, whether in contract, strict liability,


// or tort (including negligence or otherwise) arising in any way out of


// the use of this software, even if advised of the possibility of such damage.


//


//M*/


#ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP


#define OPENCV_CUDA_TRANSFORM_DETAIL_HPP


#include "../common.hpp"


#include "../vec_traits.hpp"


#include "../functional.hpp"


namespace

cv
{
namespace
cuda {
namespace
device


{


namespace
transform_detail


{


template
<typename
T,
typename
D,
int
shift>
struct
UnaryReadWriteTraits


{


typedef
typename
TypeVec<T, shift>::vec_type read_type;


typedef
typename
TypeVec<D, shift>::vec_type write_type;


};


template
<typename
T1,
typename
T2,
typename
D,
int
shift>
struct
BinaryReadWriteTraits


{


typedef
typename
TypeVec<T1, shift>::vec_type read_type1;


typedef
typename
TypeVec<T2, shift>::vec_type read_type2;


typedef
typename
TypeVec<D, shift>::vec_type write_type;


};


template
<int
shift>
struct
OpUnroller;


template
<>
struct
OpUnroller<1>


{


template
<typename
T,
typename
D,
typename
UnOp,
typename
Mask>


static
__device__ __forceinline__
void
unroll(const
T& src, D& dst,
const
Mask& mask, UnOp& op,
int
x_shifted,
int
y)


{


if
(mask(y, x_shifted))


dst.x = op(src.x);


}


template
<typename
T1,
typename
T2,
typename
D,
typename
BinOp,
typename
Mask>


static
__device__ __forceinline__
void
unroll(const
T1& src1,
const
T2& src2, D& dst,
const
Mask& mask, BinOp& op,
int
x_shifted,
int
y)


{


if
(mask(y, x_shifted))


dst.x = op(src1.x, src2.x);


}


};


template
<>
struct
OpUnroller<2>


{


template
<typename
T,
typename
D,
typename
UnOp,
typename
Mask>


static
__device__ __forceinline__
void
unroll(const
T& src, D& dst,
const
Mask& mask, UnOp& op,
int
x_shifted,
int
y)


{


if
(mask(y, x_shifted))


dst.x = op(src.x);


if
(mask(y, x_shifted + 1))


dst.y = op(src.y);


}


template
<typename
T1,
typename
T2,
typename
D,
typename
BinOp,
typename
Mask>


static
__device__ __forceinline__
void
unroll(const
T1& src1,
const
T2& src2, D& dst,
const
Mask& mask, BinOp& op,
int
x_shifted,
int
y)


{


if
(mask(y, x_shifted))


dst.x = op(src1.x, src2.x);


if
(mask(y, x_shifted + 1))


dst.y = op(src1.y, src2.y);


}


};


template
<>
struct
OpUnroller<3>


{


template
<typename
T,
typename
D,
typename
UnOp,
typename
Mask>


static
__device__ __forceinline__
void
unroll(const
T& src, D& dst,
const
Mask& mask,
const
UnOp& op,
int
x_shifted,
int
y)


{


if
(mask(y, x_shifted))


dst.x = op(src.x);


if
(mask(y, x_shifted + 1))


dst.y = op(src.y);


if
(mask(y, x_shifted + 2))


dst.z = op(src.z);


}


template
<typename
T1,
typename
T2,
typename
D,
typename
BinOp,
typename
Mask>


static
__device__ __forceinline__
void
unroll(const
T1& src1,
const
T2& src2, D& dst,
const
Mask& mask,
const
BinOp& op,
int
x_shifted,
int
y)


{


if
(mask(y, x_shifted))


dst.x = op(src1.x, src2.x);


if
(mask(y, x_shifted + 1))


dst.y = op(src1.y, src2.y);


if
(mask(y, x_shifted + 2))


dst.z = op(src1.z, src2.z);


}


};


template
<>
struct
OpUnroller<4>


{


template
<typename
T,
typename
D,
typename
UnOp,
typename
Mask>


static
__device__ __forceinline__
void
unroll(const
T& src, D& dst,
const
Mask& mask,
const
UnOp& op,
int
x_shifted,
int
y)


{


if
(mask(y, x_shifted))


dst.x = op(src.x);


if
(mask(y, x_shifted + 1))


dst.y = op(src.y);


if
(mask(y, x_shifted + 2))


dst.z = op(src.z);


if
(mask(y, x_shifted + 3))


dst.w = op(src.w);


}


template
<typename
T1,
typename
T2,
typename
D,
typename
BinOp,
typename
Mask>


static
__device__ __forceinline__
void
unroll(const
T1& src1,
const
T2& src2, D& dst,
const
Mask& mask,
const
BinOp& op,
int
x_shifted,
int
y)


{


if
(mask(y, x_shifted))


dst.x = op(src1.x, src2.x);


if
(mask(y, x_shifted + 1))


dst.y = op(src1.y, src2.y);


if
(mask(y, x_shifted + 2))


dst.z = op(src1.z, src2.z);


if
(mask(y, x_shifted + 3))


dst.w = op(src1.w, src2.w);


}


};


template
<>
struct
OpUnroller<8>


{


template
<typename
T,
typename
D,
typename
UnOp,
typename
Mask>


static
__device__ __forceinline__
void
unroll(const
T& src, D& dst,
const
Mask& mask,
const
UnOp& op,
int
x_shifted,
int
y)


{


if
(mask(y, x_shifted))


dst.a0 = op(src.a0);


if
(mask(y, x_shifted + 1))


dst.a1 = op(src.a1);


if
(mask(y, x_shifted + 2))


dst.a2 = op(src.a2);


if
(mask(y, x_shifted + 3))


dst.a3 = op(src.a3);


if
(mask(y, x_shifted + 4))


dst.a4 = op(src.a4);


if
(mask(y, x_shifted + 5))


dst.a5 = op(src.a5);


if
(mask(y, x_shifted + 6))


dst.a6 = op(src.a6);


if
(mask(y, x_shifted + 7))


dst.a7 = op(src.a7);


}


template
<typename
T1,
typename
T2,
typename
D,
typename
BinOp,
typename
Mask>


static
__device__ __forceinline__
void
unroll(const
T1& src1,
const
T2& src2, D& dst,
const
Mask& mask,
const
BinOp& op,
int
x_shifted,
int
y)


{


if
(mask(y, x_shifted))


dst.a0 = op(src1.a0, src2.a0);


if
(mask(y, x_shifted + 1))


dst.a1 = op(src1.a1, src2.a1);


if
(mask(y, x_shifted + 2))


dst.a2 = op(src1.a2, src2.a2);


if
(mask(y, x_shifted + 3))


dst.a3 = op(src1.a3, src2.a3);


if
(mask(y, x_shifted + 4))


dst.a4 = op(src1.a4, src2.a4);


if
(mask(y, x_shifted + 5))


dst.a5 = op(src1.a5, src2.a5);


if
(mask(y, x_shifted + 6))


dst.a6 = op(src1.a6, src2.a6);


if
(mask(y, x_shifted + 7))


dst.a7 = op(src1.a7, src2.a7);


}


};


template
<typename
T,
typename
D,
typename
UnOp,
typename
Mask>


static
__global__
void
transformSmart(const
PtrStepSz<T> src_, PtrStep<D> dst_,
const
Mask mask,
const
UnOp op)


{


typedef
TransformFunctorTraits<UnOp> ft;


typedef
typename
UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;


typedef
typename
UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;


const
int
x = threadIdx.x + blockIdx.x * blockDim.x;


const
int
y = threadIdx.y + blockIdx.y * blockDim.y;


const
int
x_shifted = x * ft::smart_shift;


if
(y < src_.rows)


{


const
T* src = src_.ptr(y);


D* dst = dst_.ptr(y);


if
(x_shifted + ft::smart_shift - 1 < src_.cols)


{


const
read_type src_n_el = ((const
read_type*)src)[x];


OpUnroller<ft::smart_shift>::unroll(src_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);


}


else


{


for
(int
real_x = x_shifted; real_x < src_.cols; ++real_x)


{


if
(mask(y, real_x))


dst[real_x] = op(src[real_x]);


}


}


}


}


template
<typename
T,
typename
D,
typename
UnOp,
typename
Mask>


__global__
static
void
transformSimple(const
PtrStepSz<T> src, PtrStep<D> dst,
const
Mask mask,
const
UnOp op)


{


const
int
x = blockDim.x * blockIdx.x + threadIdx.x;


const
int
y = blockDim.y * blockIdx.y + threadIdx.y;


if
(x < src.cols && y < src.rows && mask(y, x))


{


dst.ptr(y)[x] = op(src.ptr(y)[x]);


}


}


template
<typename
T1,
typename
T2,
typename
D,
typename
BinOp,
typename
Mask>


static
__global__
void
transformSmart(const
PtrStepSz<T1> src1_,
const
PtrStep<T2> src2_, PtrStep<D> dst_,


const
Mask mask,
const
BinOp op)


{


typedef
TransformFunctorTraits<BinOp> ft;


typedef
typename
BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;


typedef
typename
BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;


typedef
typename
BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;


const
int
x = threadIdx.x + blockIdx.x * blockDim.x;


const
int
y = threadIdx.y + blockIdx.y * blockDim.y;


const
int
x_shifted = x * ft::smart_shift;


if
(y < src1_.rows)


{


const
T1* src1 = src1_.ptr(y);


const
T2* src2 = src2_.ptr(y);


D* dst = dst_.ptr(y);


if
(x_shifted + ft::smart_shift - 1 < src1_.cols)


{


const
read_type1 src1_n_el = ((const
read_type1*)src1)[x];


const
read_type2 src2_n_el = ((const
read_type2*)src2)[x];


OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);


}


else


{


for
(int
real_x = x_shifted; real_x < src1_.cols; ++real_x)


{


if
(mask(y, real_x))


dst[real_x] = op(src1[real_x], src2[real_x]);


}


}


}


}


template
<typename
T1,
typename
T2,
typename
D,
typename
BinOp,
typename
Mask>


static
__global__
void
transformSimple(const
PtrStepSz<T1> src1,
const
PtrStep<T2> src2, PtrStep<D> dst,


const
Mask mask,
const
BinOp op)


{


const
int
x = blockDim.x * blockIdx.x + threadIdx.x;


const
int
y = blockDim.y * blockIdx.y + threadIdx.y;


if
(x < src1.cols && y < src1.rows && mask(y, x))


{


const
T1 src1_data = src1.ptr(y)[x];


const
T2 src2_data = src2.ptr(y)[x];


dst.ptr(y)[x] = op(src1_data, src2_data);


}


}


template
<bool
UseSmart>
struct
TransformDispatcher;


template<>
struct
TransformDispatcher<false>


{


template
<typename
T,
typename
D,
typename
UnOp,
typename
Mask>


static
void
call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)


{


typedef
TransformFunctorTraits<UnOp> ft;


const
dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);


const
dim3 grid(divUp(src.cols, threads.x),
divUp(src.rows, threads.y), 1);


transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);


cudaSafeCall( cudaGetLastError() );


if
(stream == 0)


cudaSafeCall( cudaDeviceSynchronize() );


}


template
<typename
T1,
typename
T2,
typename
D,
typename
BinOp,
typename
Mask>


static
void
call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)


{


typedef
TransformFunctorTraits<BinOp> ft;


const
dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);


const
dim3 grid(divUp(src1.cols, threads.x),
divUp(src1.rows, threads.y), 1);


transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);


cudaSafeCall( cudaGetLastError() );


if
(stream == 0)


cudaSafeCall( cudaDeviceSynchronize() );


}


};


template<>
struct
TransformDispatcher<true>


{


template
<typename
T,
typename
D,
typename
UnOp,
typename
Mask>


static
void
call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)


{


typedef
TransformFunctorTraits<UnOp> ft;


CV_StaticAssert(ft::smart_shift != 1,
"");


if
(!isAligned(src.data, ft::smart_shift *
sizeof(T)) || !isAligned(src.step, ft::smart_shift *
sizeof(T)) ||


!isAligned(dst.data, ft::smart_shift *
sizeof(D)) || !isAligned(dst.step, ft::smart_shift *
sizeof(D)))


{


TransformDispatcher<false>::call(src, dst, op, mask, stream);


return;


}


const
dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);


const
dim3 grid(divUp(src.cols, threads.x * ft::smart_shift),
divUp(src.rows, threads.y), 1);


transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);


cudaSafeCall( cudaGetLastError() );


if
(stream == 0)


cudaSafeCall( cudaDeviceSynchronize() );


}


template
<typename
T1,
typename
T2,
typename
D,
typename
BinOp,
typename
Mask>


static
void
call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)


{


typedef
TransformFunctorTraits<BinOp> ft;


CV_StaticAssert(ft::smart_shift != 1,
"");


if
(!isAligned(src1.data, ft::smart_shift *
sizeof(T1)) || !isAligned(src1.step, ft::smart_shift *
sizeof(T1)) ||


!isAligned(src2.data, ft::smart_shift *
sizeof(T2)) || !isAligned(src2.step, ft::smart_shift *
sizeof(T2)) ||


!isAligned(dst.data, ft::smart_shift *
sizeof(D)) || !isAligned(dst.step, ft::smart_shift *
sizeof(D)))


{


TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);


return;


}


const
dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);


const
dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift),
divUp(src1.rows, threads.y), 1);


transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);


cudaSafeCall( cudaGetLastError() );


if
(stream == 0)


cudaSafeCall( cudaDeviceSynchronize() );


}


};


}
// namespace transform_detail


}}}
// namespace cv { namespace cuda { namespace cudev


#endif

// OPENCV_CUDA_TRANSFORM_DETAIL_HPP


cv::divUp

static int divUp(int a, unsigned int b)

Integer division with result round up.


Definition:
utility.hpp:482


cv::isAligned

static bool isAligned(const T &data)

Alignment check of passed values


Definition:
utility.hpp:517


cv

"black box" representation of the file storage associated with a file on disk.


Definition:
aruco.hpp:75