Commonly used BLAS-like routines. More...

#include <gsl/gsl_cblas.h>
#include <cstdlib>
#include <cstdio>
#include <cfloat>
#include <core/scalar.h>
#include <core/Thread.h>
#include <cublas_v2.h>
#include <cuda_runtime.h>

Macros
#define	callPref(functionName) functionName##_gpu
	Select between functionName and functionName_gpu for the CPU and GPU executables respectively.

#define	DECLARE_eblas_sum(sumFunc, sumStridedFunc)

Functions
template<typename Ty , typename Tx >
void	eblas_mul (const int N, const Tx X, const int incX, Ty Y, const int incY)
	Templated elementwise multiply Y *= X for arrays X, Y. More...

void	eblas_dmul (const int N, const double X, const int incX, double Y, const int incY)
	Specialization of eblas_mul() for double[] *= double[].

void	eblas_zmul (const int N, const complex X, const int incX, complex Y, const int incY)
	Specialization of eblas_mul() for complex[] *= complex[].

void	eblas_zmuld (const int N, const double X, const int incX, complex Y, const int incY)
	Specialization of eblas_mul() for complex[] *= double[].

void	eblas_dmul_gpu (const int N, const double X, const int incX, double Y, const int incY)
	Equivalent of eblas_dmul() for GPU data pointers.

void	eblas_zmul_gpu (const int N, const complex X, const int incX, complex Y, const int incY)
	Equivalent of eblas_zmul() for GPU data pointers.

void	eblas_zmuld_gpu (const int N, const double X, const int incX, complex Y, const int incY)
	Equivalent of eblas_zmuld() for GPU data pointers.

template<typename Ty , typename Tx >
void	eblas_div (const int N, const Tx X, const int incX, Ty Y, const int incY)
	Templated elementwise divide Y /= X for arrays X, Y. More...

void	eblas_ddiv (const int N, const double X, const int incX, double Y, const int incY)
	Specialization of eblas_div() for double[] /= double[].

void	eblas_zdiv (const int N, const complex X, const int incX, complex Y, const int incY)
	Specialization of eblas_div() for complex[] /= complex[].

void	eblas_zdivd (const int N, const double X, const int incX, complex Y, const int incY)
	Specialization of eblas_div() for complex[] /= double[].

void	eblas_ddiv_gpu (const int N, const double X, const int incX, double Y, const int incY)
	Equivalent of eblas_ddiv() for GPU data pointers.

void	eblas_zdiv_gpu (const int N, const complex X, const int incX, complex Y, const int incY)
	Equivalent of eblas_zdiv() for GPU data pointers.

void	eblas_zdivd_gpu (const int N, const double X, const int incX, complex Y, const int incY)
	Equivalent of eblas_zdivd() for GPU data pointers.

void	eblas_sumStrided (const int N, const int stride, const double X, double result)
	Return sum over array of doubles with specified stride.

void	eblas_sumStrided_gpu (const int N, const int stride, const double X, double result)
	Equivalent of eblas_sumStrided() for X on GPU; result is on CPU.

template<typename T >
T	eblas_sum (const int N, const T *X)
	Return sum over array of generic type T composed only of a certain number of double members.

template<typename T >
T	eblas_sum_gpu (const int N, const T *X)
	Equivalent of eblas_sum() for GPU data pointers.

void	eblas_lincomb (const int N, const complex &sX, const complex X, const int incX, const complex &sY, const complex Y, const int incY, complex *Z, const int incZ)
	Elementwise linear combination Z = sX * X + sY * Y. More...

void	eblas_lincomb_gpu (const int N, const complex &sX, const complex X, const int incX, const complex &sY, const complex Y, const int incY, complex *Z, const int incZ)
	Equivalent of eblas_lincomb() for GPU data pointers.

void	eblas_zgemm (CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB, int M, int N, int K, const complex &alpha, const complex A, const int lda, const complex B, const int ldb, const complex &beta, complex *C, const int ldc)
	Threaded complex matrix multiply (threaded wrapper around zgemm) All the parameters have the same meaning as in cblas_zgemm, except element order is always Column Major (FORTRAN order!)

void	eblas_zgemm_gpu (CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB, int M, int N, int K, const complex &alpha, const complex A, const int lda, const complex B, const int ldb, const complex &beta, complex *C, const int ldc)
	Wrap cublasZgemm to provide the same interface as eblas_zgemm()

void	eblas_scatter_zdaxpy (const int Nindex, double a, const int index, const complex x, complex y, bool conjx=false, const complex w=0, bool conjw=false)
	Scatter y(index) += a * x. More...

void	eblas_scatter_zaxpy (const int Nindex, complex a, const int index, const complex x, complex y, bool conjx=false, const complex w=0, bool conjw=false)
	Equivalent of eblas_scatter_zdaxpy() with a complex scale factor.

void	eblas_scatter_daxpy (const int Nindex, double a, const int index, const double x, double y, const double w=0)
	Equivalent of eblas_scatter_zdaxpy() for real data arrays.

void	eblas_gather_zdaxpy (const int Nindex, double a, const int index, const complex x, complex y, bool conjx=false, const complex w=0, bool conjw=false)
	Gather y += a * x(index) More...

void	eblas_gather_zaxpy (const int Nindex, complex a, const int index, const complex x, complex y, bool conjx=false, const complex w=0, bool conjw=false)
	Equivalent of eblas_gather_zdaxpy() with a complex scale factor.

void	eblas_gather_daxpy (const int Nindex, double a, const int index, const double x, double y, const double w=0)
	Equivalent of eblas_scatter_zdaxpy() for real data arrays.

void	eblas_scatter_zdaxpy_gpu (const int Nindex, double a, const int index, const complex x, complex y, bool conjx=false, const complex w=0, bool conjw=false)
	Equivalent of eblas_scatter_zdaxpy() for GPU data pointers.

void	eblas_scatter_zaxpy_gpu (const int Nindex, complex a, const int index, const complex x, complex y, bool conjx=false, const complex w=0, bool conjw=false)
	Equivalent of eblas_scatter_zaxpy() for GPU data pointers.

void	eblas_scatter_daxpy_gpu (const int Nindex, double a, const int index, const double x, double y, const double w=0)
	Equivalent of eblas_scatter_daxpy() for GPU data pointers.

void	eblas_gather_zdaxpy_gpu (const int Nindex, double a, const int index, const complex x, complex y, bool conjx=false, const complex w=0, bool conjw=false)
	Equivalent of eblas_gather_zdaxpy() for GPU data pointers.

void	eblas_gather_zaxpy_gpu (const int Nindex, complex a, const int index, const complex x, complex y, bool conjx=false, const complex w=0, bool conjw=false)
	Equivalent of eblas_gather_zaxpy() for GPU data pointers.

void	eblas_gather_daxpy_gpu (const int Nindex, double a, const int index, const double x, double y, const double w=0)
	Equivalent of eblas_gather_daxpy() for GPU data pointers.

void	eblas_accumNorm (int N, const double &a, const complex x, double y)
	Accumulate elementwise norm of a complex array x into y i.e. y += a x conj(x) More...

void	eblas_accumProd (int N, const double &a, const complex xU, const complex xC, double yRe, double yIm)
	Accumulate elementwise product of two complex arrays xU and xC into real and imaginary parts yRe and yIm i.e. (yRe + i yIm) += a xU conj(xC) More...

void	eblas_accumProdComplex (int N, const double &a, const complex xU, const complex xC, complex *y)
	Accumulate elementwise product of two complex arrays xU and xC into y. More...

void	eblas_accumNorm_gpu (int N, const double &a, const complex x, double y)
	Equivalent of eblas_accumNorm() for GPU data pointers.

void	eblas_accumProd_gpu (int N, const double &a, const complex xU, const complex xC, double yRe, double yIm)
	Equivalent of eblas_accumProd() for GPU data pointers.

void	eblas_accumProdComplex_gpu (int N, const double &a, const complex xU, const complex xC, complex *y)
	Equivalent of eblas_accumProd() for GPU data pointers.

void	eblas_symmetrize (int N, int n, const int symmIndex, double x)
	Symmetrize an array x, using N n-fold equivalence classes in symmIndex. More...

void	eblas_symmetrize (int N, int n, const int symmIndex, complex x)
	Equivalent of eblas_symmetrize() for complex data pointers.

void	eblas_symmetrize_gpu (int N, int n, const int symmIndex, double x)
	Equivalent of eblas_symmetrize() for real GPU data pointers.

void	eblas_symmetrize_gpu (int N, int n, const int symmIndex, complex x)
	Equivalent of eblas_symmetrize() for complex GPU data pointers.

void	eblas_symmetrize (int N, int n, const int symmIndex, const int symmMult, const complex phase, complex x)
	Symmetrize a complex array x with phase factors, using N n-fold equivalence classes in symmIndex (useful for space group symmetrization in reciprocal space) More...

void	eblas_symmetrize_gpu (int N, int n, const int symmIndex, const int symmMult, const complex phase, complex x)
	Equivalent of eblas_symmetrize() for complex GPU data pointers.

void	eblas_symmetrize (int N, int n, const int symmIndex, const int symmMult, const complex phase, const matrix3<> rotSpin, std::vector< complex * > x)
	Symmetrize a quadruplet of complex arrays with phase factors, using N n-fold equivalence classes in symmIndex (useful for space group symmetrization of spin density matrices in reciprocal space) More...

void	eblas_symmetrize_gpu (int N, int n, const int symmIndex, const int symmMult, const complex phase, const matrix3<> rotSpin, std::vector< complex * > x)
	Equivalent of eblas_symmetrize() for complex GPU data pointers.

template<typename T >
void	eblas_copy (T dest, const T src, int N)
	Copy a data array. More...

template<typename T >
void	eblas_zero (int N, T *x)
	Zero a data array. More...

void	eblas_dscal (int N, double a, double *x, int incx)
	Scale a real array: threaded wrapper to the cblas_dscal BLAS1 function.

void	eblas_zdscal (int N, double a, complex *x, int incx)
	Scale a complex array by a real scale factor: threaded wrapper to the cblas_zdscal BLAS1 function.

void	eblas_zscal (int N, const complex &a, complex *x, int incx)
	Scale a complex array by a complex scale factor: threaded wrapper to the cblas_zscal BLAS1 function.

void	eblas_daxpy (int N, double a, const double x, int incx, double y, int incy)
	Scaled-accumulate on real arrays: threaded wrapper to the cblas_daxpy BLAS1 function.

void	eblas_zaxpy (int N, const complex &a, const complex x, int incx, complex y, int incy)
	Scaled-accumulate on complex arrays: threaded wrapper to the cblas_zaxpy BLAS1 function.

complex	eblas_zdotc (int N, const complex x, int incx, const complex y, int incy)
	Dot product of complex arrays: threaded wrapper to the cblas_zdotc BLAS1 function.

double	eblas_ddot (int N, const double x, int incx, const double y, int ncy)
	Dot product of real arrays: threaded wrapper to the cblas_ddot BLAS1 function.

double	eblas_dznrm2 (int N, const complex *x, int incx)
	2-norm of a complex array: threaded wrapper to the cblas_dznrm2 BLAS1 function

double	eblas_dnrm2 (int N, const double *x, int incx)
	2-norm of a real array: threaded wrapper to the cblas_dnrm2 BLAS1 function

template<typename T >
void	eblas_copy_gpu (T dest, const T src, int N)
	Equivalent of eblas_copy() for GPU data pointers.

template<typename T >
void	eblas_zero_gpu (int N, T *x)
	Equivalent of eblas_zero() for GPU data pointers.

void	eblas_dscal_gpu (int N, double a, double *x, int incx)
	Equivalent of eblas_dscal() for GPU data pointers.

void	eblas_zdscal_gpu (int N, double a, complex *x, int incx)
	Equivalent of eblas_zdscal() for GPU data pointers.

void	eblas_zscal_gpu (int N, const complex &a, complex *x, int incx)
	Equivalent of eblas_zscal for GPU data pointers.

void	eblas_daxpy_gpu (int N, double a, const double x, int incx, double y, int incy)
	Equivalent of eblas_daxpy() for GPU data pointers.

void	eblas_zaxpy_gpu (int N, const complex &a, const complex x, int incx, complex y, int incy)
	Equivalent of eblas_zaxpy() for GPU data pointers.

complex	eblas_zdotc_gpu (int N, const complex x, int incx, const complex y, int incy)
	Equivalent of eblas_zdotc() for GPU data pointers.

double	eblas_ddot_gpu (int N, const double x, int incx, const double y, int incy)
	Equivalent of eblas_ddot() for GPU data pointers.

double	eblas_dznrm2_gpu (int N, const complex *x, int incx)
	Equivalent of eblas_dznrm2() for GPU data pointers.

double	eblas_dnrm2_gpu (int N, const double *x, int incx)
	Equivalent of eblas_dnrm2() for GPU data pointers.

void	eblas_capMinMax (const int N, double *x, double &xMin, double &xMax, double capLo=-DBL_MAX, double capHi=+DBL_MAX)
	Find the minimum and maximum of a data array and optionally cap it from above and/or below. More...

void	eblas_capMinMax_gpu (const int N, double *x, double &xMin, double &xMax, double capLo=-DBL_MAX, double capHi=+DBL_MAX)
	Equivalent of eblas_capMinMax() for GPU data pointers.

	DECLARE_eblas_sum (eblas_sum, eblas_sumStrided) DECLARE_eblas_sum(eblas_sum_gpu

Detailed Description

Commonly used BLAS-like routines.

Macro Definition Documentation

◆ DECLARE_eblas_sum

#define DECLARE_eblas_sum	(	sumFunc,
		sumStridedFunc
	)

Value:

        template<typename T> T sumFunc(const int N, const T* X) \
        {       int Ndoubles = sizeof(T)/sizeof(double); \
                assert(Ndoubles * sizeof(double) == sizeof(T)); \
                T result; sumStridedFunc(N, Ndoubles, (const double*)X, (double*)&result); \
                return result; \
        }

Macros

Functions

Detailed Description

Macro Definition Documentation

◆ DECLARE_eblas_sum