JDFTx  1.3.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BlasExtra.h File Reference

Commonly used BLAS-like routines. More...

#include <gsl/gsl_cblas.h>
#include <cstdlib>
#include <cstdio>
#include <cfloat>
#include <core/scalar.h>
#include <core/Thread.h>
#include <cublas.h>
#include <cuda_runtime.h>

Macros

#define eblas_dscal_gpu   cublasDscal
 Equivalent of eblas_dscal() for GPU data pointers.
 
#define eblas_daxpy_gpu   cublasDaxpy
 Equivalent of eblas_daxpy() for GPU data pointers.
 
#define eblas_ddot_gpu   cublasDdot
 Equivalent of eblas_ddot() for GPU data pointers.
 
#define eblas_dnrm2_gpu   cublasDnrm2
 Equivalent of eblas_dnrm2() for GPU data pointers.
 
#define callPref(functionName)   functionName##_gpu
 Select between functionName and functionName_gpu for the CPU and GPU executables respectively.
 

Functions

template<typename Ty , typename Tx >
void eblas_mul (const int N, const Tx *X, const int incX, Ty *Y, const int incY)
 Templated elementwise multiply Y *= X for arrays X, Y. More...
 
void eblas_dmul (const int N, const double *X, const int incX, double *Y, const int incY)
 Specialization of eblas_mul() for double[] *= double[].
 
void eblas_zmul (const int N, const complex *X, const int incX, complex *Y, const int incY)
 Specialization of eblas_mul() for complex[] *= complex[].
 
void eblas_zmuld (const int N, const double *X, const int incX, complex *Y, const int incY)
 Specialization of eblas_mul() for complex[] *= double[].
 
void eblas_dmul_gpu (const int N, const double *X, const int incX, double *Y, const int incY)
 Equivalent of eblas_dmul() for GPU data pointers.
 
void eblas_zmul_gpu (const int N, const complex *X, const int incX, complex *Y, const int incY)
 Equivalent of eblas_zmul() for GPU data pointers.
 
void eblas_zmuld_gpu (const int N, const double *X, const int incX, complex *Y, const int incY)
 Equivalent of eblas_zmuld() for GPU data pointers.
 
template<typename Ty , typename Tx >
void eblas_div (const int N, const Tx *X, const int incX, Ty *Y, const int incY)
 Templated elementwise divide Y /= X for arrays X, Y. More...
 
void eblas_ddiv (const int N, const double *X, const int incX, double *Y, const int incY)
 Specialization of eblas_div() for double[] /= double[].
 
void eblas_zdiv (const int N, const complex *X, const int incX, complex *Y, const int incY)
 Specialization of eblas_div() for complex[] /= complex[].
 
void eblas_zdivd (const int N, const double *X, const int incX, complex *Y, const int incY)
 Specialization of eblas_div() for complex[] /= double[].
 
void eblas_ddiv_gpu (const int N, const double *X, const int incX, double *Y, const int incY)
 Equivalent of eblas_ddiv() for GPU data pointers.
 
void eblas_zdiv_gpu (const int N, const complex *X, const int incX, complex *Y, const int incY)
 Equivalent of eblas_zdiv() for GPU data pointers.
 
void eblas_zdivd_gpu (const int N, const double *X, const int incX, complex *Y, const int incY)
 Equivalent of eblas_zdivd() for GPU data pointers.
 
void eblas_lincomb (const int N, const complex &sX, const complex *X, const int incX, const complex &sY, const complex *Y, const int incY, complex *Z, const int incZ)
 Elementwise linear combination Z = sX * X + sY * Y. More...
 
void eblas_lincomb_gpu (const int N, const complex &sX, const complex *X, const int incX, const complex &sY, const complex *Y, const int incY, complex *Z, const int incZ)
 Equivalent of eblas_lincomb() for GPU data pointers.
 
void eblas_zgemm (CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB, int M, int N, int K, const complex &alpha, const complex *A, const int lda, const complex *B, const int ldb, const complex &beta, complex *C, const int ldc)
 Threaded complex matrix multiply (threaded wrapper around zgemm) All the parameters have the same meaning as in cblas_zgemm, except element order is always Column Major (FORTRAN order!)
 
void eblas_zgemm_gpu (CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB, int M, int N, int K, const complex &alpha, const complex *A, const int lda, const complex *B, const int ldb, const complex &beta, complex *C, const int ldc)
 Wrap cublasZgemm to provide the same interface as eblas_zgemm()
 
void eblas_scatter_zdaxpy (const int Nindex, double a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Scatter y(index) += a * x. More...
 
void eblas_scatter_zaxpy (const int Nindex, complex a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Equivalent of eblas_scatter_zdaxpy() with a complex scale factor.
 
void eblas_scatter_daxpy (const int Nindex, double a, const int *index, const double *x, double *y, const double *w=0)
 Equivalent of eblas_scatter_zdaxpy() for real data arrays.
 
void eblas_gather_zdaxpy (const int Nindex, double a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Gather y += a * x(index) More...
 
void eblas_gather_zaxpy (const int Nindex, complex a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Equivalent of eblas_gather_zdaxpy() with a complex scale factor.
 
void eblas_gather_daxpy (const int Nindex, double a, const int *index, const double *x, double *y, const double *w=0)
 Equivalent of eblas_scatter_zdaxpy() for real data arrays.
 
void eblas_scatter_zdaxpy_gpu (const int Nindex, double a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Equivalent of eblas_scatter_zdaxpy() for GPU data pointers.
 
void eblas_scatter_zaxpy_gpu (const int Nindex, complex a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Equivalent of eblas_scatter_zaxpy() for GPU data pointers.
 
void eblas_scatter_daxpy_gpu (const int Nindex, double a, const int *index, const double *x, double *y, const double *w=0)
 Equivalent of eblas_scatter_daxpy() for GPU data pointers.
 
void eblas_gather_zdaxpy_gpu (const int Nindex, double a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Equivalent of eblas_gather_zdaxpy() for GPU data pointers.
 
void eblas_gather_zaxpy_gpu (const int Nindex, complex a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false)
 Equivalent of eblas_gather_zaxpy() for GPU data pointers.
 
void eblas_gather_daxpy_gpu (const int Nindex, double a, const int *index, const double *x, double *y, const double *w=0)
 Equivalent of eblas_gather_daxpy() for GPU data pointers.
 
void eblas_accumNorm (int N, const double &a, const complex *x, double *y)
 Accumulate elementwise norm of a complex array x into y i.e. y += a x conj(x) More...
 
void eblas_accumProd (int N, const double &a, const complex *xU, const complex *xC, double *yRe, double *yIm)
 Accumulate elementwise product of two complex arrays xU and xC into real and imaginary parts yRe and yIm i.e. (yRe + i yIm) += a xU conj(xC) More...
 
void eblas_accumNorm_gpu (int N, const double &a, const complex *x, double *y)
 Equivalent of eblas_accumNorm() for GPU data pointers.
 
void eblas_accumProd_gpu (int N, const double &a, const complex *xU, const complex *xC, double *yRe, double *yIm)
 Equivalent of eblas_accumProd() for GPU data pointers.
 
void eblas_symmetrize (int N, int n, const int *symmIndex, double *x)
 Symmetrize an array x, using N n-fold equivalence classes in symmIndex. More...
 
void eblas_symmetrize (int N, int n, const int *symmIndex, complex *x)
 Equivalent of eblas_symmetrize() for complex data pointers.
 
void eblas_symmetrize_gpu (int N, int n, const int *symmIndex, double *x)
 Equivalent of eblas_symmetrize() for real GPU data pointers.
 
void eblas_symmetrize_gpu (int N, int n, const int *symmIndex, complex *x)
 Equivalent of eblas_symmetrize() for complex GPU data pointers.
 
void eblas_symmetrize (int N, int n, const int *symmIndex, const int *symmMult, const complex *phase, complex *x)
 Symmetrize a complex array x with phase factors, using N n-fold equivalence classes in symmIndex (useful for space group symmetrization in reciprocal space) More...
 
void eblas_symmetrize_gpu (int N, int n, const int *symmIndex, const int *symmMult, const complex *phase, complex *x)
 Equivalent of eblas_symmetrize() for complex GPU data pointers.
 
template<typename T >
void eblas_copy (T *dest, const T *src, int N)
 Copy a data array. More...
 
void eblas_zero (int N, double *x)
 Zero a data array. More...
 
void eblas_zero (int N, complex *x)
 Equivalent of eblas_zero() for complex data arrays.
 
void eblas_dscal (int N, double a, double *x, int incx)
 Scale a real array: threaded wrapper to the cblas_dscal BLAS1 function.
 
void eblas_zdscal (int N, double a, complex *x, int incx)
 Scale a complex array by a real scale factor: threaded wrapper to the cblas_zdscal BLAS1 function.
 
void eblas_zscal (int N, const complex &a, complex *x, int incx)
 Scale a complex array by a complex scale factor: threaded wrapper to the cblas_zscal BLAS1 function.
 
void eblas_daxpy (int N, double a, const double *x, int incx, double *y, int incy)
 Scaled-accumulate on real arrays: threaded wrapper to the cblas_daxpy BLAS1 function.
 
void eblas_zaxpy (int N, const complex &a, const complex *x, int incx, complex *y, int incy)
 Scaled-accumulate on complex arrays: threaded wrapper to the cblas_zaxpy BLAS1 function.
 
complex eblas_zdotc (int N, const complex *x, int incx, const complex *y, int incy)
 Dot product of complex arrays: threaded wrapper to the cblas_zdotc BLAS1 function.
 
double eblas_ddot (int N, const double *x, int incx, const double *y, int ncy)
 Dot product of real arrays: threaded wrapper to the cblas_ddot BLAS1 function.
 
double eblas_dznrm2 (int N, const complex *x, int incx)
 2-norm of a complex array: threaded wrapper to the cblas_dznrm2 BLAS1 function
 
double eblas_dnrm2 (int N, const double *x, int incx)
 2-norm of a real array: threaded wrapper to the cblas_dnrm2 BLAS1 function
 
template<typename T >
void eblas_copy_gpu (T *dest, const T *src, int N)
 Equivalent of eblas_copy() for GPU data pointers.
 
void eblas_zero_gpu (int N, double *x)
 Equivalent of eblas_zero() for GPU data pointers.
 
void eblas_zero_gpu (int N, complex *x)
 Equivalent of eblas_zero() for GPU data pointers.
 
void eblas_zdscal_gpu (int N, double a, complex *x, int incx)
 Equivalent of eblas_zdscal() for GPU data pointers.
 
void eblas_zscal_gpu (int N, const complex &a, complex *x, int incx)
 Equivalent of eblas_zscal for GPU data pointers.
 
void eblas_zaxpy_gpu (int N, const complex &a, const complex *x, int incx, complex *y, int incy)
 Equivalent of eblas_zaxpy() for GPU data pointers.
 
complex eblas_zdotc_gpu (int N, const complex *x, int incx, const complex *y, int incy)
 Equivalent of eblas_zdotc() for GPU data pointers.
 
double eblas_dznrm2_gpu (int N, const complex *x, int incx)
 Equivalent of eblas_dznrm2() for GPU data pointers.
 
void eblas_capMinMax (const int N, double *x, double &xMin, double &xMax, double capLo=-DBL_MAX, double capHi=+DBL_MAX)
 Find the minimum and maximum of a data array and optionally cap it from above and/or below. More...
 
void eblas_capMinMax_gpu (const int N, double *x, double &xMin, double &xMax, double capLo=-DBL_MAX, double capHi=+DBL_MAX)
 Equivalent of eblas_capMinMax() for GPU data pointers.
 

Detailed Description

Commonly used BLAS-like routines.

Function Documentation

void eblas_accumNorm ( int  N,
const double &  a,
const complex x,
double *  y 
)

Accumulate elementwise norm of a complex array x into y i.e. y += a x conj(x)

Parameters
NLength of array
ascale factor
xInput complex data array
yOuput real data array
void eblas_accumProd ( int  N,
const double &  a,
const complex xU,
const complex xC,
double *  yRe,
double *  yIm 
)

Accumulate elementwise product of two complex arrays xU and xC into real and imaginary parts yRe and yIm i.e. (yRe + i yIm) += a xU conj(xC)

Parameters
NLength of array
ascale factor
xUUnconjugated input complex data array
xCConjugated input complex data array
yReOuput real-part data array
yImOuput imaginary-part data array
void eblas_capMinMax ( const int  N,
double *  x,
double &  xMin,
double &  xMax,
double  capLo = -DBL_MAX,
double  capHi = +DBL_MAX 
)

Find the minimum and maximum of a data array and optionally cap it from above and/or below.

Parameters
NLength of data array
xInput data array, that might be modified if capLo or capHi are finite
xMinOn output, the minimum of the input data array
xMaxOn output, the maximum of the input data array
capLoIf finite, cap the data array on output from below at this value (no capping for the default value)
capHiIf finite, cap the data array on output from above at this value (no capping for the default value)
template<typename T >
void eblas_copy ( T *  dest,
const T *  src,
int  N 
)

Copy a data array.

Template Parameters
TData type of the input and output arrays
Parameters
destDestination data pointer
srcSource data pointer
NNumber of elements of the data type T to copy
template<typename Ty , typename Tx >
void eblas_div ( const int  N,
const Tx *  X,
const int  incX,
Ty *  Y,
const int  incY 
)

Templated elementwise divide Y /= X for arrays X, Y.

Template Parameters
Typrimiitive data-type for array Y
Txprimiitive data-type for array X
Parameters
Nnumber of elements in X and Y
Xpointer to the first element of array X
incXstride along the X array
Ypointer to the first element of array Y
incYstride along the Y array (must be non-zero)
void eblas_gather_zdaxpy ( const int  Nindex,
double  a,
const int *  index,
const complex x,
complex y,
bool  conjx = false,
const complex w = 0,
bool  conjw = false 
)

Gather y += a * x(index)

Parameters
NindexLength of index array
aScale factor (real)
index0-based index array (with length at least Nindex)
xInput array that is sampled with the index array (with length at least max(index)+1)
yOutput array that is sampled consecutively (with length at least Nindex)
conjxIf true, use conj(x) instead of x
wOptional array that is sampled consecutively and multiplies x elementwise
conjwIf true, use conj(w) instead of w
void eblas_lincomb ( const int  N,
const complex sX,
const complex X,
const int  incX,
const complex sY,
const complex Y,
const int  incY,
complex Z,
const int  incZ 
)

Elementwise linear combination Z = sX * X + sY * Y.

Parameters
NNumber of elements in X, Y and X
sXScale factor for input X
XData pointer for input X
incXPointer increment for input X
sYScale factor for input Y
YData pointer for input Y
incYPointer increment for input Y
ZData pointer for input Z
incZPointer increment for input Z
template<typename Ty , typename Tx >
void eblas_mul ( const int  N,
const Tx *  X,
const int  incX,
Ty *  Y,
const int  incY 
)

Templated elementwise multiply Y *= X for arrays X, Y.

Template Parameters
Typrimiitive data-type for array Y
Txprimiitive data-type for array X
Parameters
Nnumber of elements in X and Y
Xpointer to the first element of array X
incXstride along the X array
Ypointer to the first element of array Y
incYstride along the Y array (must be non-zero)
void eblas_scatter_zdaxpy ( const int  Nindex,
double  a,
const int *  index,
const complex x,
complex y,
bool  conjx = false,
const complex w = 0,
bool  conjw = false 
)

Scatter y(index) += a * x.

Parameters
NindexLength of index array
aScale factor (real)
index0-based index array (with length at least Nindex)
xInput array that is sampled consecutively (with length at least Nindex)
yOutput array that is sampled with the index array (with length at least max(index)+1)
conjxIf true, use conj(x) instead of x
wOptional array that is sampled consecutively and multiplies x elementwise
conjwIf true, use conj(w) instead of w
void eblas_symmetrize ( int  N,
int  n,
const int *  symmIndex,
double *  x 
)

Symmetrize an array x, using N n-fold equivalence classes in symmIndex.

Parameters
NLength of array x
nLength of symmetry equivalence classes
symmIndexEvery consecutive set of n indices in this array forms an equivalence class
xData array to be symmetrized in place
void eblas_symmetrize ( int  N,
int  n,
const int *  symmIndex,
const int *  symmMult,
const complex phase,
complex x 
)

Symmetrize a complex array x with phase factors, using N n-fold equivalence classes in symmIndex (useful for space group symmetrization in reciprocal space)

Parameters
NLength of array x
nLength of symmetry equivalence classes
symmIndexEvery consecutive set of n indices in this array forms an equivalence class
symmMultMultiplicity per equivalence class (number of repetitions of each element in orbit)
phasePhase factors corresponding to each entry in symmIndex
xData array to be symmetrized in place
void eblas_zero ( int  N,
double *  x 
)

Zero a data array.

Parameters
NNumber of elements to zero
xData pointer