JDFTx  1.2.0
GpuKernelUtils.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------
2 Copyright 2011 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 
21 #ifndef JDFTX_CORE_GPUKERNELUTILS_H
22 #define JDFTX_CORE_GPUKERNELUTILS_H
23 
24 #include <algorithm>
25 #include <cuda_runtime.h>
26 #include <driver_types.h>
27 #include <vector_types.h>
28 #include <core/vector3.h>
29 
32 
35 { cudaFuncAttributes attr;
36  cudaDeviceProp prop;
37 
39  template<typename GpuKernel> GpuLaunchConfig(GpuKernel* gpuKernel)
40  { int iDevice; cudaGetDevice(&iDevice);
41  cudaGetDeviceProperties(&prop, iDevice);
42  cudaFuncGetAttributes(&attr, gpuKernel);
43  }
44 };
45 
46 
47 //Get the logical index of the kernel (dir is x, y, or z)
48 #define kernelIndex(dir) (blockIdx.dir * blockDim.dir + threadIdx.dir)
49 
50 //Get the logical 1D index, even if the grid is 2D (required for very large 1D kernels)
51 #define kernelIndex1D() ((blockIdx.y*gridDim.x+blockIdx.x) * blockDim.x + threadIdx.x)
52 
53 
56 { dim3 nPerBlock;
57  dim3 nBlocks;
58 
60  template<typename GpuKernel> GpuLaunchConfig1D(GpuKernel* gpuKernel, int N)
61  : GpuLaunchConfig(gpuKernel),
62  nPerBlock(attr.maxThreadsPerBlock,1,1),
63  nBlocks(ceildiv(N, int(nPerBlock.x)),1,1)
64  { //If the grid is too big, make it 2D:
65  while(int(nBlocks.x) > prop.maxGridSize[0])
66  { nBlocks.x = ceildiv(int(nBlocks.x),2);
67  nBlocks.y *= 2;
68  }
69  }
70 };
71 
74 { dim3 nPerBlock;
75  dim3 nBlocks;
76  int zBlockMax;
77 
79  template<typename GpuKernel> GpuLaunchConfig3D(GpuKernel* gpuKernel, vector3<int> S)
80  : GpuLaunchConfig(gpuKernel)
81  { // Try to minimize zBlockMax and maximize block size within constraint:
82  zBlockMax = ceildiv(S[0], std::min(attr.maxThreadsPerBlock, prop.maxThreadsDim[2]));
83  nPerBlock.z = ceildiv(S[0], zBlockMax);
84  // For the chosen z configuration, maximize x block size within constraint
85  int maxBlockXY = attr.maxThreadsPerBlock/nPerBlock.z;
86  nBlocks.x = ceildiv(S[2], std::min(maxBlockXY,prop.maxThreadsDim[0]));
87  nPerBlock.x = ceildiv(S[2], int(nBlocks.x));
88  // For the chosen x and z configuration, maximize y block size within constraint
89  int maxBlockY = attr.maxThreadsPerBlock/(nPerBlock.z*nPerBlock.x);
90  nBlocks.y = ceildiv(S[1], std::min(maxBlockY,prop.maxThreadsDim[1]));
91  nPerBlock.y = ceildiv(S[1], int(nBlocks.y));
92  }
93 };
94 
97 {
98  template<typename GpuKernel> GpuLaunchConfigHalf3D(GpuKernel* gpuKernel, vector3<int> S)
99  : GpuLaunchConfig3D(gpuKernel, vector3<int>(S[0], S[1], S[2]/2+1))
100  {
101  }
102 };
103 
105 void gpuErrorCheck();
106 
107 #endif // JDFTX_CORE_GPUKERNELUTILS_H
3D launch configuration for symmetry-reduced G-space loops (z dimension folded for real data sets) ...
Definition: GpuKernelUtils.h:96
GpuLaunchConfig3D(GpuKernel *gpuKernel, vector3< int > S)
Set up blocks and grid for a 1D operation over N data points.
Definition: GpuKernelUtils.h:79
cudaDeviceProp prop
properties of the currnetly running device
Definition: GpuKernelUtils.h:36
dim3 nBlocks
dimension of grid (note nBlocks could be 3D for really large kernels)
Definition: GpuKernelUtils.h:57
dim3 nPerBlock
dimension of block
Definition: GpuKernelUtils.h:56
dim3 nPerBlock
dimension of block
Definition: GpuKernelUtils.h:74
1D launch configuration
Definition: GpuKernelUtils.h:55
dim3 nBlocks
dimension of grid (note nBlocks could be 3D for really large kernels)
Definition: GpuKernelUtils.h:75
int zBlockMax
Grids are 2D, so need to loop over last dim.
Definition: GpuKernelUtils.h:76
3D launch configuration
Definition: GpuKernelUtils.h:73
void gpuErrorCheck()
Check for gpu errors and print a useful message (implemented in GpuUtils.cpp)
Base-class for launch configuration for gpu kernels.
Definition: GpuKernelUtils.h:34
GpuLaunchConfigHalf3D(GpuKernel *gpuKernel, vector3< int > S)
Just use the above after reducing the z-dimension to half.
Definition: GpuKernelUtils.h:98
GpuLaunchConfig(GpuKernel *gpuKernel)
Initialize the device and function properties.
Definition: GpuKernelUtils.h:39
cudaFuncAttributes attr
attributes of the function
Definition: GpuKernelUtils.h:35
GpuLaunchConfig1D(GpuKernel *gpuKernel, int N)
Set up blocks and grid for a 1D operation over N data points.
Definition: GpuKernelUtils.h:60