21 #ifndef JDFTX_CORE_GPUKERNELUTILS_H 22 #define JDFTX_CORE_GPUKERNELUTILS_H 25 #include <cuda_runtime.h> 26 #include <driver_types.h> 27 #include <vector_types.h> 40 {
int iDevice; cudaGetDevice(&iDevice);
41 cudaGetDeviceProperties(&prop, iDevice);
42 cudaFuncGetAttributes(&attr, gpuKernel);
48 #define kernelIndex(dir) (blockIdx.dir * blockDim.dir + threadIdx.dir) 51 #define kernelIndex1D() ((blockIdx.y*gridDim.x+blockIdx.x) * blockDim.x + threadIdx.x) 62 nPerBlock(
attr.maxThreadsPerBlock,1,1),
63 nBlocks(ceildiv(N, int(nPerBlock.x)),1,1)
65 while(
int(nBlocks.x) >
prop.maxGridSize[0])
66 { nBlocks.x = ceildiv(
int(nBlocks.x),2);
82 zBlockMax = ceildiv(S[0], std::min(
attr.maxThreadsPerBlock,
prop.maxThreadsDim[2]));
83 nPerBlock.z = ceildiv(S[0], zBlockMax);
85 int maxBlockXY =
attr.maxThreadsPerBlock/nPerBlock.z;
86 nBlocks.x = ceildiv(S[2], std::min(maxBlockXY,
prop.maxThreadsDim[0]));
87 nPerBlock.x = ceildiv(S[2],
int(nBlocks.x));
89 int maxBlockY =
attr.maxThreadsPerBlock/(nPerBlock.z*nPerBlock.x);
90 nBlocks.y = ceildiv(S[1], std::min(maxBlockY,
prop.maxThreadsDim[1]));
91 nPerBlock.y = ceildiv(S[1],
int(nBlocks.y));
107 #endif // JDFTX_CORE_GPUKERNELUTILS_H 3D launch configuration for symmetry-reduced G-space loops (z dimension folded for real data sets) ...
Definition: GpuKernelUtils.h:96
GpuLaunchConfig3D(GpuKernel *gpuKernel, vector3< int > S)
Set up blocks and grid for a 1D operation over N data points.
Definition: GpuKernelUtils.h:79
cudaDeviceProp prop
properties of the currnetly running device
Definition: GpuKernelUtils.h:36
dim3 nBlocks
dimension of grid (note nBlocks could be 3D for really large kernels)
Definition: GpuKernelUtils.h:57
dim3 nPerBlock
dimension of block
Definition: GpuKernelUtils.h:56
dim3 nPerBlock
dimension of block
Definition: GpuKernelUtils.h:74
1D launch configuration
Definition: GpuKernelUtils.h:55
dim3 nBlocks
dimension of grid (note nBlocks could be 3D for really large kernels)
Definition: GpuKernelUtils.h:75
int zBlockMax
Grids are 2D, so need to loop over last dim.
Definition: GpuKernelUtils.h:76
3D launch configuration
Definition: GpuKernelUtils.h:73
void gpuErrorCheck()
Check for gpu errors and print a useful message (implemented in GpuUtils.cpp)
Base-class for launch configuration for gpu kernels.
Definition: GpuKernelUtils.h:34
GpuLaunchConfigHalf3D(GpuKernel *gpuKernel, vector3< int > S)
Just use the above after reducing the z-dimension to half.
Definition: GpuKernelUtils.h:98
GpuLaunchConfig(GpuKernel *gpuKernel)
Initialize the device and function properties.
Definition: GpuKernelUtils.h:39
cudaFuncAttributes attr
attributes of the function
Definition: GpuKernelUtils.h:35
GpuLaunchConfig1D(GpuKernel *gpuKernel, int N)
Set up blocks and grid for a 1D operation over N data points.
Definition: GpuKernelUtils.h:60