JDFTx  1.7.0
BlasExtra.h File Reference

Commonly used BLAS-like routines. More...

#include <gsl/gsl_cblas.h>
#include <cstdlib>
#include <cstdio>
#include <cfloat>
#include <core/scalar.h>
#include <core/Thread.h>
#include <cublas_v2.h>
#include <cuda_runtime.h>

Macros

#define callPref(functionName)   functionName##_gpu
 Select between functionName and functionName_gpu for the CPU and GPU executables respectively.
 
#define DECLARE_eblas_sum(sumFunc, sumStridedFunc)
 

Functions

template<typename Ty , typename Tx >
void eblas_mul (const int N, const Tx *X, const int incX, Ty *Y, const int incY)
 Templated elementwise multiply Y *= X for arrays X, Y. More...
 
void eblas_dmul (const int N, const double *X, const int incX, double *Y, const int incY)
 Specialization of eblas_mul() for double[] *= double[].
 
void eblas_zmul (const int N, const complex *X, const int incX, complex *Y, const int incY)
 Specialization of eblas_mul() for complex[] *= complex[].
 
void eblas_zmuld (const int N, const double *X, const int incX, complex *Y, const int incY)
 Specialization of eblas_mul() for complex[] *= double[].
 
void eblas_dmul_gpu (const int N, const double *X, const int incX, double *Y, const int incY)
 Equivalent of eblas_dmul() for GPU data pointers.
 
void eblas_zmul_gpu (const int N, const complex *X, const int incX, complex *Y, const int incY)
 Equivalent of eblas_zmul() for GPU data pointers.
 
void eblas_zmuld_gpu (const int N, const double *X, const int incX, complex *Y, const int incY)
 Equivalent of eblas_zmuld() for GPU data pointers.
 
template<typename Ty , typename Tx >
void eblas_div (const int N, const Tx *X, const int incX, Ty *Y, const int incY)
 Templated elementwise divide Y /= X for arrays X, Y. More...
 
void eblas_ddiv (const int N, const double *X, const int incX, double *Y, const int incY)
 Specialization of eblas_div() for double[] /= double[].
 
void eblas_zdiv (const int N, const complex *X, const int incX, complex *Y, const int incY)
 Specialization of eblas_div() for complex[] /= complex[].
 
void eblas_zdivd (const int N, const double *X, const int incX, complex *Y, const int incY)
 Specialization of eblas_div() for complex[] /= double[].
 
void eblas_ddiv_gpu (const int N, const double *X, const int incX, double *Y, const int incY)
 Equivalent of eblas_ddiv() for GPU data pointers.
 
void eblas_zdiv_gpu (const int N, const complex *X, const int incX, complex *Y, const int incY)
 Equivalent of eblas_zdiv() for GPU data pointers.
 
void eblas_zdivd_gpu (const int N, const double *X, const int incX, complex *Y, const int incY)
 Equivalent of eblas_zdivd() for GPU data pointers.
 
void eblas_sumStrided (const int N, const int stride, const double *X, double *result)
 Return sum over array of doubles with specified stride.
 
void eblas_sumStrided_gpu (const int N, const int stride, const double *X, double *result)
 Equivalent of eblas_sumStrided() for X on GPU; result is on CPU.
 
template<typename T >
eblas_sum (const int N, const T *X)
 Return sum over array of generic type T composed only of a certain number of double members.
 
template<typename T >
eblas_sum_gpu (const int N, const T *X)
 Equivalent of eblas_sum() for GPU data pointers.
 
void eblas_lincomb (const int N, const complex &sX, const complex *X, const int incX, const complex &sY, const complex *Y, const int incY, complex *Z, const int incZ)
 Elementwise linear combination Z = sX * X + sY * Y. More...
 
void eblas_lincomb_gpu (const int N, const complex &sX, const complex *X, const int incX, const complex &sY, const complex *Y, const int incY, complex *Z, const int incZ)
 Equivalent of eblas_lincomb() for GPU data pointers.
 
void eblas_zgemm (CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB, int M, int N, int K, const complex &alpha, const complex *A, const int lda, const complex *B, const int ldb, const complex &beta, complex *C, const int ldc)
 Threaded complex matrix multiply (threaded wrapper around zgemm) All the parameters have the same meaning as in cblas_zgemm, except element order is always Column Major (FORTRAN order!)
 
void eblas_zgemm_gpu (CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB, int M, int N, int K, const complex &alpha, const complex *A, const int lda, const complex *B, const int ldb, const complex &beta, complex *C, const int ldc)
 Wrap cublasZgemm to provide the same interface as eblas_zgemm()
 
void eblas_scatter_zdaxpy (const int Nindex, double a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Scatter y(index) += a * x. More...
 
void eblas_scatter_zaxpy (const int Nindex, complex a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Equivalent of eblas_scatter_zdaxpy() with a complex scale factor.
 
void eblas_scatter_daxpy (const int Nindex, double a, const int *index, const double *x, double *y, const double *w=0)
 Equivalent of eblas_scatter_zdaxpy() for real data arrays.
 
void eblas_gather_zdaxpy (const int Nindex, double a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Gather y += a * x(index) More...
 
void eblas_gather_zaxpy (const int Nindex, complex a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Equivalent of eblas_gather_zdaxpy() with a complex scale factor.
 
void eblas_gather_daxpy (const int Nindex, double a, const int *index, const double *x, double *y, const double *w=0)
 Equivalent of eblas_scatter_zdaxpy() for real data arrays.
 
void eblas_scatter_zdaxpy_gpu (const int Nindex, double a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Equivalent of eblas_scatter_zdaxpy() for GPU data pointers.
 
void eblas_scatter_zaxpy_gpu (const int Nindex, complex a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Equivalent of eblas_scatter_zaxpy() for GPU data pointers.
 
void eblas_scatter_daxpy_gpu (const int Nindex, double a, const int *index, const double *x, double *y, const double *w=0)
 Equivalent of eblas_scatter_daxpy() for GPU data pointers.
 
void eblas_gather_zdaxpy_gpu (const int Nindex, double a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Equivalent of eblas_gather_zdaxpy() for GPU data pointers.
 
void eblas_gather_zaxpy_gpu (const int Nindex, complex a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Equivalent of eblas_gather_zaxpy() for GPU data pointers.
 
void eblas_gather_daxpy_gpu (const int Nindex, double a, const int *index, const double *x, double *y, const double *w=0)
 Equivalent of eblas_gather_daxpy() for GPU data pointers.
 
void eblas_accumNorm (int N, const double &a, const complex *x, double *y)
 Accumulate elementwise norm of a complex array x into y i.e. y += a x conj(x) More...
 
void eblas_accumProd (int N, const double &a, const complex *xU, const complex *xC, double *yRe, double *yIm)
 Accumulate elementwise product of two complex arrays xU and xC into real and imaginary parts yRe and yIm i.e. (yRe + i yIm) += a xU conj(xC) More...
 
void eblas_accumProdComplex (int N, const double &a, const complex *xU, const complex *xC, complex *y)
 Accumulate elementwise product of two complex arrays xU and xC into y. More...
 
void eblas_accumNorm_gpu (int N, const double &a, const complex *x, double *y)
 Equivalent of eblas_accumNorm() for GPU data pointers.
 
void eblas_accumProd_gpu (int N, const double &a, const complex *xU, const complex *xC, double *yRe, double *yIm)
 Equivalent of eblas_accumProd() for GPU data pointers.
 
void eblas_accumProdComplex_gpu (int N, const double &a, const complex *xU, const complex *xC, complex *y)
 Equivalent of eblas_accumProd() for GPU data pointers.
 
void eblas_symmetrize (int N, int n, const int *symmIndex, double *x)
 Symmetrize an array x, using N n-fold equivalence classes in symmIndex. More...
 
void eblas_symmetrize (int N, int n, const int *symmIndex, complex *x)
 Equivalent of eblas_symmetrize() for complex data pointers.
 
void eblas_symmetrize_gpu (int N, int n, const int *symmIndex, double *x)
 Equivalent of eblas_symmetrize() for real GPU data pointers.
 
void eblas_symmetrize_gpu (int N, int n, const int *symmIndex, complex *x)
 Equivalent of eblas_symmetrize() for complex GPU data pointers.
 
void eblas_symmetrize (int N, int n, const int *symmIndex, const int *symmMult, const complex *phase, complex *x)
 Symmetrize a complex array x with phase factors, using N n-fold equivalence classes in symmIndex (useful for space group symmetrization in reciprocal space) More...
 
void eblas_symmetrize_gpu (int N, int n, const int *symmIndex, const int *symmMult, const complex *phase, complex *x)
 Equivalent of eblas_symmetrize() for complex GPU data pointers.
 
void eblas_symmetrize (int N, int n, const int *symmIndex, const int *symmMult, const complex *phase, const matrix3<> *rotSpin, std::vector< complex * > x)
 Symmetrize a quadruplet of complex arrays with phase factors, using N n-fold equivalence classes in symmIndex (useful for space group symmetrization of spin density matrices in reciprocal space) More...
 
void eblas_symmetrize_gpu (int N, int n, const int *symmIndex, const int *symmMult, const complex *phase, const matrix3<> *rotSpin, std::vector< complex * > x)
 Equivalent of eblas_symmetrize() for complex GPU data pointers.
 
template<typename T >
void eblas_copy (T *dest, const T *src, int N)
 Copy a data array. More...
 
template<typename T >
void eblas_zero (int N, T *x)
 Zero a data array. More...
 
void eblas_dscal (int N, double a, double *x, int incx)
 Scale a real array: threaded wrapper to the cblas_dscal BLAS1 function.
 
void eblas_zdscal (int N, double a, complex *x, int incx)
 Scale a complex array by a real scale factor: threaded wrapper to the cblas_zdscal BLAS1 function.
 
void eblas_zscal (int N, const complex &a, complex *x, int incx)
 Scale a complex array by a complex scale factor: threaded wrapper to the cblas_zscal BLAS1 function.
 
void eblas_daxpy (int N, double a, const double *x, int incx, double *y, int incy)
 Scaled-accumulate on real arrays: threaded wrapper to the cblas_daxpy BLAS1 function.
 
void eblas_zaxpy (int N, const complex &a, const complex *x, int incx, complex *y, int incy)
 Scaled-accumulate on complex arrays: threaded wrapper to the cblas_zaxpy BLAS1 function.
 
complex eblas_zdotc (int N, const complex *x, int incx, const complex *y, int incy)
 Dot product of complex arrays: threaded wrapper to the cblas_zdotc BLAS1 function.
 
double eblas_ddot (int N, const double *x, int incx, const double *y, int ncy)
 Dot product of real arrays: threaded wrapper to the cblas_ddot BLAS1 function.
 
double eblas_dznrm2 (int N, const complex *x, int incx)
 2-norm of a complex array: threaded wrapper to the cblas_dznrm2 BLAS1 function
 
double eblas_dnrm2 (int N, const double *x, int incx)
 2-norm of a real array: threaded wrapper to the cblas_dnrm2 BLAS1 function
 
template<typename T >
void eblas_copy_gpu (T *dest, const T *src, int N)
 Equivalent of eblas_copy() for GPU data pointers.
 
template<typename T >
void eblas_zero_gpu (int N, T *x)
 Equivalent of eblas_zero() for GPU data pointers.
 
void eblas_dscal_gpu (int N, double a, double *x, int incx)
 Equivalent of eblas_dscal() for GPU data pointers.
 
void eblas_zdscal_gpu (int N, double a, complex *x, int incx)
 Equivalent of eblas_zdscal() for GPU data pointers.
 
void eblas_zscal_gpu (int N, const complex &a, complex *x, int incx)
 Equivalent of eblas_zscal for GPU data pointers.
 
void eblas_daxpy_gpu (int N, double a, const double *x, int incx, double *y, int incy)
 Equivalent of eblas_daxpy() for GPU data pointers.
 
void eblas_zaxpy_gpu (int N, const complex &a, const complex *x, int incx, complex *y, int incy)
 Equivalent of eblas_zaxpy() for GPU data pointers.
 
complex eblas_zdotc_gpu (int N, const complex *x, int incx, const complex *y, int incy)
 Equivalent of eblas_zdotc() for GPU data pointers.
 
double eblas_ddot_gpu (int N, const double *x, int incx, const double *y, int incy)
 Equivalent of eblas_ddot() for GPU data pointers.
 
double eblas_dznrm2_gpu (int N, const complex *x, int incx)
 Equivalent of eblas_dznrm2() for GPU data pointers.
 
double eblas_dnrm2_gpu (int N, const double *x, int incx)
 Equivalent of eblas_dnrm2() for GPU data pointers.
 
void eblas_capMinMax (const int N, double *x, double &xMin, double &xMax, double capLo=-DBL_MAX, double capHi=+DBL_MAX)
 Find the minimum and maximum of a data array and optionally cap it from above and/or below. More...
 
void eblas_capMinMax_gpu (const int N, double *x, double &xMin, double &xMax, double capLo=-DBL_MAX, double capHi=+DBL_MAX)
 Equivalent of eblas_capMinMax() for GPU data pointers.
 
 DECLARE_eblas_sum (eblas_sum, eblas_sumStrided) DECLARE_eblas_sum(eblas_sum_gpu
 

Detailed Description

Commonly used BLAS-like routines.

Macro Definition Documentation

◆ DECLARE_eblas_sum

#define DECLARE_eblas_sum (   sumFunc,
  sumStridedFunc 
)
Value:
template<typename T> T sumFunc(const int N, const T* X) \
{ int Ndoubles = sizeof(T)/sizeof(double); \
assert(Ndoubles * sizeof(double) == sizeof(T)); \
T result; sumStridedFunc(N, Ndoubles, (const double*)X, (double*)&result); \
return result; \
}