Files
file	Blip.h

file	EnergyComponents.h
	Represent components of the (free) energy.

file	GpuKernelUtils.h
	Common utility functions/macros for the gpu kernels and launchers in the .cu files.

file	GpuUtil.h

file	LoopMacros.h

file	MPIUtil.h
	Helper classes for MPI parallelization.

file	SphericalHarmonics.h

file	Spline.h
	Spline interpolation routines.

file	string.h
	STL strings and streams with case insensitive comparison.

file	Thread.h
	Utilities for threading (wrappers around std::thread)

file	Units.h
	Commonly used measurement units in terms of atomic units.

file	Util.h
	Miscellaneous utilities.

file	symbols.h

file	Euler.h
	Various Euler angle related utilities.

Classes
class	BlipConverter

class	BlipResampler

class	EnergyComponents

struct	GpuLaunchConfig
	Base-class for launch configuration for gpu kernels. More...

struct	GpuLaunchConfig1D
	1D launch configuration More...

struct	GpuLaunchConfig3D
	3D launch configuration More...

struct	GpuLaunchConfigHalf3D
	3D launch configuration for symmetry-reduced G-space loops (z dimension folded for real data sets) More...

class	ManagedMemory< T >
	Base class for managed memory of a specified data type. More...

class	MPIUtil
	MPI wrapper class. More...

class	TaskDivision
	Helper for optimally dividing a specified number of (equal) tasks over MPI. More...

struct	YlmProdTerm
	Term in real spherical harmonic expansion of a product of two real spherical harmonics. More...

struct	ichar_traits
	Case insensitive character trait. More...

struct	ifstream

struct	ofstream

struct	istringstream

struct	ostringstream

struct	InitParams
	Parameters used for common initialization functions. More...

class	StopWatch

class	EnumStringMap< Enum >
	A template to ease option parsing (maps enums <--> strings) More...

Macros
#define	kernelIndex(dir) (blockIdx.dir * blockDim.dir + threadIdx.dir)

#define	kernelIndex1D() ((blockIdx.ygridDim.x+blockIdx.x) blockDim.x + threadIdx.x)

#define	THREAD_rLoop(code)
	One thread of a loop over real space (see applyFunc_r_sub for example) More...

#define	COMPUTE_rIndices
	The GPU equivalent of THREAD_rLoop. More...

#define	THREAD_fullGspaceLoop(code)
	One thread of a loop over G-space (see L_sub in PHLO.cpp for example) More...

#define	COMPUTE_fullGindices

#define	THREAD_halfGspaceLoop(code)
	One thread of a loop over symmetry-reduced G-space (see applyFuncGsq_sub in Operators.h for example) More...

#define	COMPUTE_halfGindices

#define	IS_NYQUIST ( (!(2iG[0]-S[0])) \| (!(2iG[1]-S[1])) \| (!(2*iG[2]-S[2])) )

#define	SwitchTemplate_lm(l, m, fTemplate, argList)
	Switch a function templated over l,m for all supported l,m with parenthesis enclosed argument list argList.

#define	DECLARE_YlmPrime(l, m) template<> __hostanddev__ vector3<> YlmPrime<l*(l+1)+m>(const vector3<>& qHat) { return YlmPrime<l,m>(qHat); }

#define	TIME(title, fp, code)
	Elapsed time in seconds (from start of program) More...

#define	assert(expr) (void)((expr) ? 0 : assertStackTraceExit(#expr, __func__, __FILE__, __LINE__))
	A custom assertion with stack trace (NOTE: enabled in release modes as well)

#define	logPrintf(...) fprintf(globalLog, __VA_ARGS__)
	printf() for log files

#define	logFlush() fflush(globalLog)
	fflush() for log files

#define	die(...)
	Quit with an error message (formatted using printf()). Must be called from all processes. More...

#define	die_alone(...)
	Version of die that should only be used when it is impossible to guarantee synchronized calls from all processes. More...

#define	PRIdPTR "zd"

Typedefs
typedef std::basic_string< char, ichar_traits >	string
	Case-insensitive string.

Enumerations
enum class	AtomicSymbol : int { H = 1 , He = 2 , Li = 3 , Be = 4 , B = 5 , C = 6 , N = 7 , O = 8 , F = 9 , Ne = 10 , Na = 11 , Mg = 12 , Al = 13 , Si = 14 , P = 15 , S = 16 , Cl = 17 , Ar = 18 , K = 19 , Ca = 20 , Sc = 21 , Ti = 22 , V = 23 , Cr = 24 , Mn = 25 , Fe = 26 , Co = 27 , Ni = 28 , Cu = 29 , Zn = 30 , Ga = 31 , Ge = 32 , As = 33 , Se = 34 , Br = 35 , Kr = 36 , Rb = 37 , Sr = 38 , Y = 39 , Zr = 40 , Nb = 41 , Mo = 42 , Tc = 43 , Ru = 44 , Rh = 45 , Pd = 46 , Ag = 47 , Cd = 48 , In = 49 , Sn = 50 , Sb = 51 , Te = 52 , I = 53 , Xe = 54 , Cs = 55 , Ba = 56 , La = 57 , Ce = 58 , Pr = 59 , Nd = 60 , Pm = 61 , Sm = 62 , Eu = 63 , Gd = 64 , Tb = 65 , Dy = 66 , Ho = 67 , Er = 68 , Tm = 69 , Yb = 70 , Lu = 71 , Hf = 72 , Ta = 73 , W = 74 , Re = 75 , Os = 76 , Ir = 77 , Pt = 78 , Au = 79 , Hg = 80 , Tl = 81 , Pb = 82 , Bi = 83 , Po = 84 , At = 85 , Rn = 86 , Fr = 87 , Ra = 88 , Ac = 89 , Th = 90 , Pa = 91 , U = 92 , Np = 93 , Pu = 94 , Am = 95 , Cm = 96 , Bk = 97 , Cf = 98 , Es = 99 , Fm = 100 , Md = 101 , No = 102 , Lr = 103 , Rf = 104 , Db = 105 , Sg = 106 , Bh = 107 , Hs = 108 , Mt = 109 , Ds = 110 , Rg = 111 , Cn = 112 , Uut = 113 , Uuq = 114 , Uup = 115 , Uuh = 116 , Uus = 117 , Uuo = 118 }
	Atomic symbol to atomic number.

Functions
double	Tblip (const complexScalarField &phi, double tMax=0, int i0max=0, int i1max=0, int i2max=0)
	Compute the kinetic energy for a blip orbital phi (and set max local KE and location in unit cell)

double	Vblip (const complexScalarField &phi, const ScalarField &V)
	Compute the local potential energy for blip orbital phi in blip potential V.

void	gpuErrorCheck ()
	Check for gpu errors and print a useful message (implemented in GpuUtils.cpp) More...

bool	gpuInit (FILE fpLog=stdout, const class MPIUtil mpiHostGpu=0, double *nGPUs=0)

bool	isGpuMine ()

bool	isGpuEnabled ()
	A utility function to eliminate ugly #ifdef's when not required.

double	bessel_jl (int l, double x)
	Spherical bessel function.

template<int lm>
__hostanddev__ double	Ylm (const vector3<> &qhat)
	Index by combined lm := l*(l+1)+m index (useful when static-looping over all l,m)

template<int l, int m>
__hostanddev__ double	Ylm (const vector3<> &qhat)
	Index by l and m separately.

template<int l, int m>
void	set_Ylm (const vector3<> qHat, double &result)
	Use above macro to provide a non-templated version of the function.

double	Ylm (int l, int m, const vector3<> &qHat)

std::vector< YlmProdTerm >	expandYlmProd (int lm1, int lm2)

std::vector< YlmProdTerm >	expandYlmProd (int l1, int m1, int l2, int m2)
	Wrapper function expandYlmProd with individual indices.

template<int l, int m>
__hostanddev__ vector3	YlmPrime (const vector3<> &qHat)
	Derivative of spherical Harmonic with repect to iDir'th component of qHat with separate l,m indices. More...

template<int lm>
__hostanddev__ vector3	YlmPrime (const vector3<> &qHat)
	Derivative of spherical Harmonic with repect to iDir'th component of qHat with combined lm index. More...

	DECLARE_YlmPrime (0, 0) DECLARE_YlmPrime(1

	DECLARE_YlmPrime (1, 0) DECLARE_YlmPrime(1

	DECLARE_YlmPrime (2,-2) DECLARE_YlmPrime(2

	DECLARE_YlmPrime (2, 0) DECLARE_YlmPrime(2

	DECLARE_YlmPrime (2, 2) DECLARE_YlmPrime(3

	DECLARE_YlmPrime (3,-2) DECLARE_YlmPrime(3

	DECLARE_YlmPrime (3, 0) DECLARE_YlmPrime(3

	DECLARE_YlmPrime (3, 2) DECLARE_YlmPrime(3

	DECLARE_YlmPrime (4,-4) DECLARE_YlmPrime(4

	DECLARE_YlmPrime (4, 0) DECLARE_YlmPrime(4

	DECLARE_YlmPrime (4, 2) DECLARE_YlmPrime(4

	DECLARE_YlmPrime (4, 4) DECLARE_YlmPrime(5

	DECLARE_YlmPrime (5,-4) DECLARE_YlmPrime(5

	DECLARE_YlmPrime (5, 0) DECLARE_YlmPrime(5

	DECLARE_YlmPrime (5, 2) DECLARE_YlmPrime(5

	DECLARE_YlmPrime (5, 4) DECLARE_YlmPrime(5

	DECLARE_YlmPrime (6,-6) DECLARE_YlmPrime(6

	DECLARE_YlmPrime (6, 0) DECLARE_YlmPrime(6

	DECLARE_YlmPrime (6, 2) DECLARE_YlmPrime(6

	DECLARE_YlmPrime (6, 4) DECLARE_YlmPrime(6

	DECLARE_YlmPrime (6, 6) template< int l
	Non-templated version of YlmPrime (for debugging)

int m void	set_YlmPrime (const vector3<> qHat, vector3<> &result)

vector3	YlmPrime (int l, int m, const vector3<> &qHat)

void	trim (string &s)
	Remove leading and trailing spaces from a string.

istream &	operator>> (istream &is, string &str)

ostream &	operator<< (ostream &os, const string &str)

istream &	getline (istream &is, string &str, char delim='\n')

bool	shouldThreadOperators ()

void	suspendOperatorThreading ()
	call from multi-threaded top-level code to disable threading within operators called from a parallel section

void	resumeOperatorThreading ()
	call after a parallel section in top-level code to resume threading within subsequent operator calls

template<typename Callable , typename ... Args>
void	threadLaunch (int nThreads, Callable *func, size_t nJobs, Args... args)
	A simple utility for running muliple threads. More...

template<typename Callable , typename ... Args>
void	threadLaunch (Callable *func, size_t nJobs, Args... args)

template<typename Callable , typename ... Args>
void	threadedLoop (Callable *func, size_t nIter, Args... args)
	A parallelized loop. More...

template<typename Callable , typename ... Args>
double	threadedAccumulate (Callable *func, size_t nIter, Args... args)
	A parallelized loop with an accumulated return value. More...

void	printVersionBanner (const InitParams *ip=0)
	Print package name, version, revision etc. to log.

void	initSystem (int argc, char *argv, const InitParams ip=0)
	Init MPI (if not already done), print banner, set up threads (play nice with job schedulers), GPU and signal handlers.

void	initSystemCmdline (int argc, char **argv, InitParams &ip)
	initSystem along with commandline options

void	finalizeSystem (bool successful=true)
	Clean-up corresponding to initSystem(), final messages (depending on successful) and clean-up MPI.

double	clock_us ()

double	clock_sec ()
	Elapsed time in microseconds (from start of program)

void	printStack (bool detailedStackScript=false)
	Print a minimal stack trace and optionally write a script that, when run, will print a more detailed stacktrace.

void	stackTraceExit (int code)
	Exit on error with stack trace.

int	assertStackTraceExit (const char expr, const char function, const char *file, long line)

void	logSuspend ()
	temporarily disable all log output (until logResume())

void	logResume ()
	re-enable logging after a logSuspend() call

off_t	fileSize (const char *filename)
	Get the size of a file.

void	convertToLE (void *ptr, size_t size, size_t nmemb)
	Convert data from operating endianness to little-endian.

void	convertFromLE (void *ptr, size_t size, size_t nmemb)
	Convert data from little-endian to operating endianness.

size_t	freadLE (void ptr, size_t size, size_t nmemb, FILE fp)
	Read from a little-endian binary file, regardless of operating endianness.

size_t	fwriteLE (const void ptr, size_t size, size_t nmemb, FILE fp)
	Write to a little-endian binary file, regardless of operating endianness.

uint16_t	positiveRemainder (int16_t x, uint16_t y)
	For any x and y>0, compute z = x % y such that 0 <= z < y.

bool	fftSuitable (int N)
	Check if an integer is suitable for Fast Fourier Transforms (small prime factors only)

double	atomicMass (AtomicSymbol)
	retrieve atomic mass for each element

vector3	polarUnitVector (double alpha, double beta)
	Get the position of the new Z-axis, given alpha and beta (does not depend on gamma)

void	getEulerAxis (const vector3<> &newZ, vector3<> &euler)
	Calculates euler[0]=alpha and euler[1]=beta given newZ, the direction of the Z-axis in the rotated frame.

matrix3	matrixFromEuler (const vector3<> &euler)
	Calculate rotation matrices from euler angles.

vector3	eulerFromMatrix (const matrix3<> &mat)
	Calculate euler angles from rotation matrices.

double	wigner_d (const int j, const int m1, const int m2, const double beta)
	Wigner d-function d^j_{m1 m2}(beta) for ZYZ Euler angles.

Variables
cudaDeviceProp	cudaDevProps
	cached properties of currently running device (defined in GpuUtil.cpp)

cublasHandle_t	cublasHandle
	global handle to cublas (defined in GpuUtil.cpp)

cublasHandle_t	cublasHandle
	global handle to cublas (defined in GpuUtil.cpp)

int	nProcsAvailable
	number of available processors (initialized to number of online processors, can be overriden)

const double	eV = 1/27.21138505
	eV in Hartrees

const double	Ryd = 0.5
	Rydberg in Hartrees.

const double	Joule = 1/4.35974434e-18
	Joule in Hartrees.

const double	KJoule = 1000*Joule
	KJoule in Hartrees.

const double	Kcal = KJoule * 4.184
	Kcal in Hartrees.

const double	Kelvin = 1.3806488e-23*Joule
	Kelvin in Hartrees.

const double	invcm = 1./219474.6313705
	Inverse cm in Hartrees.

const double	Angstrom = 1/0.5291772
	Angstrom in bohrs.

const double	meter = 1e10*Angstrom
	meter in bohrs

const double	liter = 1e-3*pow(meter,3)
	liter in cubic bohrs

const double	amu = 1822.88839
	atomic mass unit in electron masses

const double	kg = 1./9.10938291e-31
	kilogram in electron masses

const double	mol = 6.0221367e23
	mole in number (i.e. Avogadro number)

const double	Newton = Joule/meter
	Newton in Hartree/bohr.

const double	Pascal = Newton/(meter*meter)
	Pascal in Hartree/bohr^3.

const double	KPascal = 1000*Pascal
	KPa in Hartree/bohr^3.

const double	Bar = 100*KPascal
	bar in Hartree/bohr^3

const double	mmHg = 133.322387415*Pascal
	mm Hg in Hartree/bohr^3

const double	sec = sqrt((kg*meter)/Newton)
	second in inverse Hartrees

const double	invSec = 1./sec
	inverse second in Hartrees

const double	fs = sec*1.0e-15
	femtosecond in inverse Hartrees

const double	Coul = Joule/eV
	Coulomb in electrons.

const double	Volt = Joule/Coul
	Volt in Hartrees.

const double	Ampere = Coul/sec
	Ampere in electrons/inverse Hartree.

const double	Ohm = Volt/Ampere
	Ohm in inverse conductance quanta.

const double	Tesla = Voltsec/(metermeter)
	Tesla in atomic units.

const double	bohrMagneton = 0.5

const double	gElectron = 2.0023193043617
	electron gyromagnetic ratio

string	inputBasename
	Basename of input file or "stdin" that can be used as a default run-name.

bool	killFlag
	Flag set by signal handlers - all compute loops should quit cleanly when this is set.

MPIUtil *	mpiWorld
	MPI across all processes.

MPIUtil *	mpiGroup
	MPI within current group of processes.

MPIUtil *	mpiGroupHead
	MPI across equal ranks in each group.

bool	mpiDebugLog
	If true, all processes output to seperate debug log files, otherwise only head process outputs (set before calling initSystem())

size_t	mempoolSize
	If non-zero, size of memory pool managed internally by JDFTx.

FILE *	globalLog

FILE *	nullLog
	pointer to /dev/null

EnumStringMap< AtomicSymbol >	atomicSymbolMap
	Relate chemical symbols and atomic numbers.

Detailed Description

Macro Definition Documentation

◆ COMPUTE_fullGindices

#define COMPUTE_fullGindices

Value:

        vector3<int> iG( \
                zBlock * blockDim.z + threadIdx.z, \
                kernelIndex(y), \
                kernelIndex(x) ); \
        if(iG[0]>=S[0] || iG[1]>=S[1] || iG[2]>=S[2]) return; \
        size_t i = iG[2] + S[2]*size_t(iG[1] + S[1]*iG[0]); \
        for(int j=0; j<3; j++) if(2*iG[j]>S[j]) iG[j]-=S[j];

The GPU equivalent of THREAD_fullGspaceLoop NOTE: x and z are swapped in kernel indices since x is contiguous on GPU

◆ COMPUTE_halfGindices

#define COMPUTE_halfGindices

Value:

        int size2 = S[2]/2+1; \
        vector3<int> iG( \
                threadIdx.z + zBlock * blockDim.z, \
                kernelIndex(y), \
                kernelIndex(x) ); \
        if(iG[0]>=S[0] || iG[1]>=S[1] || iG[2]>=size2) return; \
        size_t i = iG[2] + size2*size_t(iG[1] + S[1]*iG[0]); \
        for(int j=0; j<3; j++) if(2*iG[j]>S[j]) iG[j]-=S[j];

The GPU equivalent of THREAD_halfGspaceLoop NOTE: x and z are swapped in kernel indices since x is contiguous on GPU

◆ COMPUTE_rIndices

#define COMPUTE_rIndices

Value:

        vector3<int> iv( \
                threadIdx.z + zBlock * blockDim.z, \
                kernelIndex(y), \
                kernelIndex(x) ); \
        if(iv[0]>=S[0] || iv[1]>=S[1] || iv[2]>=S[2]) return; \
        size_t i = iv[2] + S[2]*size_t(iv[1] + S[1]*iv[0]);

The GPU equivalent of THREAD_rLoop.

◆ die

#define die ( ... )

Value:

        {       fprintf(globalLog, __VA_ARGS__); \
                if(mpiWorld->isHead() && globalLog != stdout) \
                        fprintf(stderr, __VA_ARGS__); \
                finalizeSystem(false); \
                exit(1); \
        }

Quit with an error message (formatted using printf()). Must be called from all processes.

◆ die_alone

#define die_alone ( ... )

Value:

        {       fprintf(globalLog, __VA_ARGS__); \
                fflush(globalLog); \
                if(mpiWorld->isHead() && globalLog != stdout) \
                        fprintf(stderr, __VA_ARGS__); \
                mpiWorld->exit(1); \
        }

Version of die that should only be used when it is impossible to guarantee synchronized calls from all processes.

◆ IS_NYQUIST

#define IS_NYQUIST ( (!(2*iG[0]-S[0])) | (!(2*iG[1]-S[1])) | (!(2*iG[2]-S[2])) )

Determine if this is a nyquist component NOTE: no component is nyquist for odd sizes in this convention

◆ THREAD_fullGspaceLoop

#define THREAD_fullGspaceLoop ( code )

Value:

        size_t i=iStart; \
        vector3<int> iG( i / (S[2]*S[1]), (i/S[2]) % S[1], i % S[2] ); \
        for(int j=0; j<3; j++) if(2*iG[j]>S[j]) iG[j]-=S[j]; \
        while(true) \
        {       \
                code \
                \
                i++; if(i==iStop) break; \
                iG[2]++; \
                if(2*iG[2]>S[2]) iG[2]-=S[2]; \
                if(iG[2]==0) \
                {       iG[1]++; \
                        if(2*iG[1]>S[1]) iG[1]-=S[1]; \
                        if(iG[1]==0) \
                        {       iG[0]++; \
                                if(2*iG[0]>S[0]) iG[0]-=S[0]; \
                        } \
                } \
        }

One thread of a loop over G-space (see L_sub in PHLO.cpp for example)

◆ THREAD_halfGspaceLoop

#define THREAD_halfGspaceLoop ( code )

Value:

        int size2 = S[2]/2+1; \
        size_t i=iStart; \
        vector3<int> iG( i / (size2*S[1]), (i/size2) % S[1], i % size2 ); \
        for(int j=0; j<3; j++) if(2*iG[j]>S[j]) iG[j]-=S[j]; \
        while(i<iStop) \
        {       \
                code \
                \
                i++; if(i==iStop) break; \
                iG[2]++; \
                if(iG[2]==size2) \
                {       iG[2]=0; \
                        iG[1]++; \
                        if(2*iG[1]>S[1]) iG[1]-=S[1]; \
                        if(iG[1]==0) \
                        {       iG[0]++; \
                                if(2*iG[0]>S[0]) iG[0]-=S[0]; \
                        } \
                } \
        }

One thread of a loop over symmetry-reduced G-space (see applyFuncGsq_sub in Operators.h for example)

◆ THREAD_rLoop

#define THREAD_rLoop ( code )

Value:

        size_t i=iStart; \
        vector3<int> iv( \
                i / (S[2]*S[1]), \
                (i/S[2]) % S[1], \
                i % S[2] ); \
        while(i<iStop) \
        {       \
                code \
                \
                i++; if(i==iStop) break; \
                iv[2]++; \
                if(iv[2]==S[2]) \
                {       iv[2]=0; \
                        iv[1]++; \
                        if(iv[1]==S[1]) \
                        {       iv[1] = 0; \
                                iv[0]++; \
                        } \
                } \
        }

One thread of a loop over real space (see applyFunc_r_sub for example)

◆ TIME

#define TIME	(	title,
		fp,
		code
	)

Value:

{       double runTime = clock_us(); \
        { code } \
        runTime = clock_us() - runTime; \
        fprintf(fp, "%s took %.2le s.\n", title, runTime*1e-6); \
}

Elapsed time in seconds (from start of program)

Time a code section and print the timing (with title).
Code must be self-contained as a scope, as it will be surrounded by { }, and should not define "runTime"

Parameters

title	Name to use when printing the timing info for the code block
fp	Stream to output the timing info to
code	The code block to time (must be a self-contained scope)

Function Documentation

◆ assertStackTraceExit()

int assertStackTraceExit	(	const char *	expr,
		const char *	function,
		const char *	file,
		long	line
	)

stack trace on failed assertions

◆ expandYlmProd()

std::vector<YlmProdTerm> expandYlmProd	(	int	lm1,
		int	lm2
	)

inline

Real spherical harmonic expansion of product of two real spherical harmonics (effectively returns list of non-zero Clebsch-Gordon coefficients in the modified real Ylm basis)

◆ gpuErrorCheck()

void gpuErrorCheck ( )

Check for gpu errors and print a useful message (implemented in GpuUtils.cpp)

Check for gpu errors, and if any, abort with a useful messsage.

◆ gpuInit()

bool gpuInit	(	FILE *	fpLog = `stdout`,
		const class MPIUtil *	mpiHostGpu = `0`,
		double *	nGPUs = `0`
	)

Must be called before any GPU use (preferably from main(), see isGpuMine) If mpiHostGpu (group of GPU processes on the same node) is specified, divide compatible GPUs amongst processes on same node, else select one with max memory nGPUs returns the number of physical GPUs used (fraction if a GPU is shared with other processes) Returns false on failure to find a suitable GPU

◆ isGpuMine()

bool isGpuMine ( )

Only the thread that called gpuInit() is allowed to use GPU resources This function will return true only on the one thread that rules the gpu.

◆ shouldThreadOperators()

bool shouldThreadOperators ( )

Operators should run multithreaded if this returns true, and should run in a single thread if this returns false. Note that all the thread launching functions in this file automatically prevent nested threading, so operator codes using those functions need not explicitly check this.

This only affects CPU threading, GPU operators should only be called from a single thread anyway.

◆ threadedAccumulate()

template<typename Callable , typename ... Args>

double threadedAccumulate	(	Callable *	func,
		size_t	nIter,
		Args...	args
	)

A parallelized loop with an accumulated return value.

Same as threadedLoop, but func() returns double, which is summed over and returned

Returns: Accumulated return value of all calls to func()

◆ threadedLoop()

template<typename Callable , typename ... Args>

void threadedLoop	(	Callable *	func,
		size_t	nIter,
		Args...	args
	)

A parallelized loop.

Given a callable object func and an argument list args, this routine calls func(i, args) for each i in [0:nIter-1], and accumulates the return values.

Note that the calls are made from multiple threads, so func must be thread safe. (Hint: pass mutexes as a part of args if synchronization is required).

As many threads as online processors are launched and the nIter iterations are evenly split between all the threads. Threaded loops will become single threaded if suspendOperatorThreading().

Parameters

func	The function / object with operator() to be looped over
nIter	The number of loop 'iterations'
args	Arguments to pass to func

◆ threadLaunch() [1/2]

template<typename Callable , typename ... Args>

void threadLaunch	(	Callable *	func,
		size_t	nJobs,
		Args...	args
	)

Same as threadLaunch(int nThreads, Callable* func, size_t nJobs, Args... args) with nThreads = number of online processors.

◆ threadLaunch() [2/2]

template<typename Callable , typename ... Args>

void threadLaunch	(	int	nThreads,
		Callable *	func,
		size_t	nJobs,
		Args...	args
	)

A simple utility for running muliple threads.

Given a callable object func and an argument list args, this routine calls nThreads threads of func invoked as func(iMin, iMax, args). The nJobs jobs are evenly split between all the threads; each instance of func should handle job index i satisfying iMin <= i < iMax.

If nJobs <= 0, the behaviour changes: the function is invoked as func(iThread, nThreads, args) instead, where 0 <= iThread < nThreads. This mode allows for more flexible threading than the evenly split job management indicated above. This could be used as a convenient interface for launching threads for any parallel routine requiring as many threads as processors.

Parameters

nThreads	Number of threads to launch (if <=0, as many as processors on system)
func	The function / object with operator() to invoke in a multithreaded fashion
nJobs	The number of jobs to be split between the various func threads
args	Arguments to pass to func

◆ YlmPrime() [1/2]

template<int l, int m>

__hostanddev__ vector3 YlmPrime ( const vector3<> & qHat )

Derivative of spherical Harmonic with repect to iDir'th component of qHat with separate l,m indices.

Derivative of spherical Harmonic with repect to iDir'th component of qHat with combined lm index.

◆ YlmPrime() [2/2]

template<int lm>

__hostanddev__ vector3 YlmPrime ( const vector3<> & qHat )

Derivative of spherical Harmonic with repect to iDir'th component of qHat with combined lm index.

Files

Classes

Macros

Typedefs

Enumerations

Functions

Variables

Detailed Description

Macro Definition Documentation

◆ COMPUTE_fullGindices

◆ COMPUTE_halfGindices

◆ COMPUTE_rIndices

◆ die

◆ die_alone

◆ IS_NYQUIST

◆ THREAD_fullGspaceLoop

◆ THREAD_halfGspaceLoop

◆ THREAD_rLoop

◆ TIME

Function Documentation

◆ assertStackTraceExit()

◆ expandYlmProd()

◆ gpuErrorCheck()

◆ gpuInit()

◆ isGpuMine()

◆ shouldThreadOperators()

◆ threadedAccumulate()

◆ threadedLoop()

◆ threadLaunch() [1/2]

◆ threadLaunch() [2/2]

◆ YlmPrime() [1/2]

◆ YlmPrime() [2/2]