21 #ifndef JDFTX_CORE_GPUKERNELUTILS_H    22 #define JDFTX_CORE_GPUKERNELUTILS_H    25 #include <cuda_runtime.h>    26 #include <driver_types.h>    27 #include <vector_types.h>    40         {       
int iDevice; cudaGetDevice(&iDevice);
    41                 cudaGetDeviceProperties(&prop, iDevice);
    42                 cudaFuncGetAttributes(&attr, gpuKernel);
    48 #define kernelIndex(dir) (blockIdx.dir * blockDim.dir + threadIdx.dir)    51 #define kernelIndex1D() ((blockIdx.y*gridDim.x+blockIdx.x) * blockDim.x + threadIdx.x)    62         nPerBlock(
attr.maxThreadsPerBlock,1,1),
    63         nBlocks(ceildiv(N, int(nPerBlock.x)),1,1)
    65                 while(
int(nBlocks.x) > 
prop.maxGridSize[0])
    66                 {       nBlocks.x = ceildiv(
int(nBlocks.x),2);
    82                 zBlockMax = ceildiv(S[0], std::min(
attr.maxThreadsPerBlock, 
prop.maxThreadsDim[2]));
    83                 nPerBlock.z = ceildiv(S[0], zBlockMax);
    85                 int maxBlockXY = 
attr.maxThreadsPerBlock/nPerBlock.z;
    86                 nBlocks.x = ceildiv(S[2], std::min(maxBlockXY,
prop.maxThreadsDim[0]));
    87                 nPerBlock.x = ceildiv(S[2], 
int(nBlocks.x));
    89                 int maxBlockY = 
attr.maxThreadsPerBlock/(nPerBlock.z*nPerBlock.x);
    90                 nBlocks.y = ceildiv(S[1], std::min(maxBlockY,
prop.maxThreadsDim[1]));
    91                 nPerBlock.y = ceildiv(S[1], 
int(nBlocks.y));
   107 #endif // JDFTX_CORE_GPUKERNELUTILS_H 3D launch configuration for symmetry-reduced G-space loops (z dimension folded for real data sets) ...
Definition: GpuKernelUtils.h:96
 
GpuLaunchConfig3D(GpuKernel *gpuKernel, vector3< int > S)
Set up blocks and grid for a 1D operation over N data points. 
Definition: GpuKernelUtils.h:79
 
cudaDeviceProp prop
properties of the currnetly running device 
Definition: GpuKernelUtils.h:36
 
dim3 nBlocks
dimension of grid (note nBlocks could be 3D for really large kernels) 
Definition: GpuKernelUtils.h:57
 
dim3 nPerBlock
dimension of block 
Definition: GpuKernelUtils.h:56
 
dim3 nPerBlock
dimension of block 
Definition: GpuKernelUtils.h:74
 
1D launch configuration 
Definition: GpuKernelUtils.h:55
 
dim3 nBlocks
dimension of grid (note nBlocks could be 3D for really large kernels) 
Definition: GpuKernelUtils.h:75
 
int zBlockMax
Grids are 2D, so need to loop over last dim. 
Definition: GpuKernelUtils.h:76
 
3D launch configuration 
Definition: GpuKernelUtils.h:73
 
void gpuErrorCheck()
Check for gpu errors and print a useful message (implemented in GpuUtils.cpp) 
 
Base-class for launch configuration for gpu kernels. 
Definition: GpuKernelUtils.h:34
 
GpuLaunchConfigHalf3D(GpuKernel *gpuKernel, vector3< int > S)
Just use the above after reducing the z-dimension to half. 
Definition: GpuKernelUtils.h:98
 
GpuLaunchConfig(GpuKernel *gpuKernel)
Initialize the device and function properties. 
Definition: GpuKernelUtils.h:39
 
cudaFuncAttributes attr
attributes of the function 
Definition: GpuKernelUtils.h:35
 
GpuLaunchConfig1D(GpuKernel *gpuKernel, int N)
Set up blocks and grid for a 1D operation over N data points. 
Definition: GpuKernelUtils.h:60