Commonly used BLAS-like routines. More...
#include <gsl/gsl_cblas.h>
#include <cstdlib>
#include <cstdio>
#include <cfloat>
#include <core/scalar.h>
#include <core/Thread.h>
#include <cublas.h>
#include <cuda_runtime.h>
Macros | |
#define | eblas_dscal_gpu cublasDscal |
Equivalent of eblas_dscal() for GPU data pointers. | |
#define | eblas_daxpy_gpu cublasDaxpy |
Equivalent of eblas_daxpy() for GPU data pointers. | |
#define | eblas_ddot_gpu cublasDdot |
Equivalent of eblas_ddot() for GPU data pointers. | |
#define | eblas_dnrm2_gpu cublasDnrm2 |
Equivalent of eblas_dnrm2() for GPU data pointers. | |
#define | callPref(functionName) functionName##_gpu |
Select between functionName and functionName_gpu for the CPU and GPU executables respectively. | |
Functions | |
template<typename Ty , typename Tx > | |
void | eblas_mul (const int N, const Tx *X, const int incX, Ty *Y, const int incY) |
Templated elementwise multiply Y *= X for arrays X, Y. More... | |
void | eblas_dmul (const int N, const double *X, const int incX, double *Y, const int incY) |
Specialization of eblas_mul() for double[] *= double[]. | |
void | eblas_zmul (const int N, const complex *X, const int incX, complex *Y, const int incY) |
Specialization of eblas_mul() for complex[] *= complex[]. | |
void | eblas_zmuld (const int N, const double *X, const int incX, complex *Y, const int incY) |
Specialization of eblas_mul() for complex[] *= double[]. | |
void | eblas_dmul_gpu (const int N, const double *X, const int incX, double *Y, const int incY) |
Equivalent of eblas_dmul() for GPU data pointers. | |
void | eblas_zmul_gpu (const int N, const complex *X, const int incX, complex *Y, const int incY) |
Equivalent of eblas_zmul() for GPU data pointers. | |
void | eblas_zmuld_gpu (const int N, const double *X, const int incX, complex *Y, const int incY) |
Equivalent of eblas_zmuld() for GPU data pointers. | |
template<typename Ty , typename Tx > | |
void | eblas_div (const int N, const Tx *X, const int incX, Ty *Y, const int incY) |
Templated elementwise divide Y /= X for arrays X, Y. More... | |
void | eblas_ddiv (const int N, const double *X, const int incX, double *Y, const int incY) |
Specialization of eblas_div() for double[] /= double[]. | |
void | eblas_zdiv (const int N, const complex *X, const int incX, complex *Y, const int incY) |
Specialization of eblas_div() for complex[] /= complex[]. | |
void | eblas_zdivd (const int N, const double *X, const int incX, complex *Y, const int incY) |
Specialization of eblas_div() for complex[] /= double[]. | |
void | eblas_ddiv_gpu (const int N, const double *X, const int incX, double *Y, const int incY) |
Equivalent of eblas_ddiv() for GPU data pointers. | |
void | eblas_zdiv_gpu (const int N, const complex *X, const int incX, complex *Y, const int incY) |
Equivalent of eblas_zdiv() for GPU data pointers. | |
void | eblas_zdivd_gpu (const int N, const double *X, const int incX, complex *Y, const int incY) |
Equivalent of eblas_zdivd() for GPU data pointers. | |
void | eblas_lincomb (const int N, const complex &sX, const complex *X, const int incX, const complex &sY, const complex *Y, const int incY, complex *Z, const int incZ) |
Elementwise linear combination Z = sX * X + sY * Y. More... | |
void | eblas_lincomb_gpu (const int N, const complex &sX, const complex *X, const int incX, const complex &sY, const complex *Y, const int incY, complex *Z, const int incZ) |
Equivalent of eblas_lincomb() for GPU data pointers. | |
void | eblas_zgemm (CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB, int M, int N, int K, const complex &alpha, const complex *A, const int lda, const complex *B, const int ldb, const complex &beta, complex *C, const int ldc) |
Threaded complex matrix multiply (threaded wrapper around zgemm) All the parameters have the same meaning as in cblas_zgemm, except element order is always Column Major (FORTRAN order!) | |
void | eblas_zgemm_gpu (CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB, int M, int N, int K, const complex &alpha, const complex *A, const int lda, const complex *B, const int ldb, const complex &beta, complex *C, const int ldc) |
Wrap cublasZgemm to provide the same interface as eblas_zgemm() | |
void | eblas_scatter_zdaxpy (const int Nindex, double a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false) |
Scatter y(index) += a * x. More... | |
void | eblas_scatter_zaxpy (const int Nindex, complex a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false) |
Equivalent of eblas_scatter_zdaxpy() with a complex scale factor. | |
void | eblas_scatter_daxpy (const int Nindex, double a, const int *index, const double *x, double *y, const double *w=0) |
Equivalent of eblas_scatter_zdaxpy() for real data arrays. | |
void | eblas_gather_zdaxpy (const int Nindex, double a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false) |
Gather y += a * x(index) More... | |
void | eblas_gather_zaxpy (const int Nindex, complex a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false) |
Equivalent of eblas_gather_zdaxpy() with a complex scale factor. | |
void | eblas_gather_daxpy (const int Nindex, double a, const int *index, const double *x, double *y, const double *w=0) |
Equivalent of eblas_scatter_zdaxpy() for real data arrays. | |
void | eblas_scatter_zdaxpy_gpu (const int Nindex, double a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false) |
Equivalent of eblas_scatter_zdaxpy() for GPU data pointers. | |
void | eblas_scatter_zaxpy_gpu (const int Nindex, complex a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false) |
Equivalent of eblas_scatter_zaxpy() for GPU data pointers. | |
void | eblas_scatter_daxpy_gpu (const int Nindex, double a, const int *index, const double *x, double *y, const double *w=0) |
Equivalent of eblas_scatter_daxpy() for GPU data pointers. | |
void | eblas_gather_zdaxpy_gpu (const int Nindex, double a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false) |
Equivalent of eblas_gather_zdaxpy() for GPU data pointers. | |
void | eblas_gather_zaxpy_gpu (const int Nindex, complex a, const int *index, const complex *x, complex *y, bool conjx=false, const complex *w=0, bool conjw=false) |
Equivalent of eblas_gather_zaxpy() for GPU data pointers. | |
void | eblas_gather_daxpy_gpu (const int Nindex, double a, const int *index, const double *x, double *y, const double *w=0) |
Equivalent of eblas_gather_daxpy() for GPU data pointers. | |
void | eblas_accumNorm (int N, const double &a, const complex *x, double *y) |
Accumulate elementwise norm of a complex array x into y i.e. y += a x conj(x) More... | |
void | eblas_accumProd (int N, const double &a, const complex *xU, const complex *xC, double *yRe, double *yIm) |
Accumulate elementwise product of two complex arrays xU and xC into real and imaginary parts yRe and yIm i.e. (yRe + i yIm) += a xU conj(xC) More... | |
void | eblas_accumNorm_gpu (int N, const double &a, const complex *x, double *y) |
Equivalent of eblas_accumNorm() for GPU data pointers. | |
void | eblas_accumProd_gpu (int N, const double &a, const complex *xU, const complex *xC, double *yRe, double *yIm) |
Equivalent of eblas_accumProd() for GPU data pointers. | |
void | eblas_symmetrize (int N, int n, const int *symmIndex, double *x) |
Symmetrize an array x, using N n-fold equivalence classes in symmIndex. More... | |
void | eblas_symmetrize (int N, int n, const int *symmIndex, complex *x) |
Equivalent of eblas_symmetrize() for complex data pointers. | |
void | eblas_symmetrize_gpu (int N, int n, const int *symmIndex, double *x) |
Equivalent of eblas_symmetrize() for real GPU data pointers. | |
void | eblas_symmetrize_gpu (int N, int n, const int *symmIndex, complex *x) |
Equivalent of eblas_symmetrize() for complex GPU data pointers. | |
void | eblas_symmetrize (int N, int n, const int *symmIndex, const int *symmMult, const complex *phase, complex *x) |
Symmetrize a complex array x with phase factors, using N n-fold equivalence classes in symmIndex (useful for space group symmetrization in reciprocal space) More... | |
void | eblas_symmetrize_gpu (int N, int n, const int *symmIndex, const int *symmMult, const complex *phase, complex *x) |
Equivalent of eblas_symmetrize() for complex GPU data pointers. | |
template<typename T > | |
void | eblas_copy (T *dest, const T *src, int N) |
Copy a data array. More... | |
template<typename T > | |
void | eblas_zero (int N, T *x) |
Zero a data array. More... | |
void | eblas_dscal (int N, double a, double *x, int incx) |
Scale a real array: threaded wrapper to the cblas_dscal BLAS1 function. | |
void | eblas_zdscal (int N, double a, complex *x, int incx) |
Scale a complex array by a real scale factor: threaded wrapper to the cblas_zdscal BLAS1 function. | |
void | eblas_zscal (int N, const complex &a, complex *x, int incx) |
Scale a complex array by a complex scale factor: threaded wrapper to the cblas_zscal BLAS1 function. | |
void | eblas_daxpy (int N, double a, const double *x, int incx, double *y, int incy) |
Scaled-accumulate on real arrays: threaded wrapper to the cblas_daxpy BLAS1 function. | |
void | eblas_zaxpy (int N, const complex &a, const complex *x, int incx, complex *y, int incy) |
Scaled-accumulate on complex arrays: threaded wrapper to the cblas_zaxpy BLAS1 function. | |
complex | eblas_zdotc (int N, const complex *x, int incx, const complex *y, int incy) |
Dot product of complex arrays: threaded wrapper to the cblas_zdotc BLAS1 function. | |
double | eblas_ddot (int N, const double *x, int incx, const double *y, int ncy) |
Dot product of real arrays: threaded wrapper to the cblas_ddot BLAS1 function. | |
double | eblas_dznrm2 (int N, const complex *x, int incx) |
2-norm of a complex array: threaded wrapper to the cblas_dznrm2 BLAS1 function | |
double | eblas_dnrm2 (int N, const double *x, int incx) |
2-norm of a real array: threaded wrapper to the cblas_dnrm2 BLAS1 function | |
template<typename T > | |
void | eblas_copy_gpu (T *dest, const T *src, int N) |
Equivalent of eblas_copy() for GPU data pointers. | |
template<typename T > | |
void | eblas_zero_gpu (int N, T *x) |
Equivalent of eblas_zero() for GPU data pointers. | |
void | eblas_zdscal_gpu (int N, double a, complex *x, int incx) |
Equivalent of eblas_zdscal() for GPU data pointers. | |
void | eblas_zscal_gpu (int N, const complex &a, complex *x, int incx) |
Equivalent of eblas_zscal for GPU data pointers. | |
void | eblas_zaxpy_gpu (int N, const complex &a, const complex *x, int incx, complex *y, int incy) |
Equivalent of eblas_zaxpy() for GPU data pointers. | |
complex | eblas_zdotc_gpu (int N, const complex *x, int incx, const complex *y, int incy) |
Equivalent of eblas_zdotc() for GPU data pointers. | |
double | eblas_dznrm2_gpu (int N, const complex *x, int incx) |
Equivalent of eblas_dznrm2() for GPU data pointers. | |
void | eblas_capMinMax (const int N, double *x, double &xMin, double &xMax, double capLo=-DBL_MAX, double capHi=+DBL_MAX) |
Find the minimum and maximum of a data array and optionally cap it from above and/or below. More... | |
void | eblas_capMinMax_gpu (const int N, double *x, double &xMin, double &xMax, double capLo=-DBL_MAX, double capHi=+DBL_MAX) |
Equivalent of eblas_capMinMax() for GPU data pointers. | |
Commonly used BLAS-like routines.