20 #ifndef JDFTX_CORE_BLASEXTRA_H 21 #define JDFTX_CORE_BLASEXTRA_H 27 #include <gsl/gsl_cblas.h> 31 #include <core/scalar.h> 36 #include <cuda_runtime.h> 48 template<
typename Ty,
typename Tx>
void eblas_mul(
const int N,
const Tx* X,
const int incX, Ty* Y,
const int incY);
51 inline void eblas_dmul(
const int N,
const double* X,
const int incX,
double* Y,
const int incY) {
eblas_mul(N,X,incX,Y,incY); }
59 void eblas_dmul_gpu(
const int N,
const double* X,
const int incX,
double* Y,
const int incY);
75 template<
typename Ty,
typename Tx>
void eblas_div(
const int N,
const Tx* X,
const int incX, Ty* Y,
const int incY);
78 inline void eblas_ddiv(
const int N,
const double* X,
const int incX,
double* Y,
const int incY) {
eblas_div(N,X,incX,Y,incY); }
86 void eblas_ddiv_gpu(
const int N,
const double* X,
const int incX,
double* Y,
const int incY);
118 void eblas_zgemm(CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB,
int M,
int N,
int K,
122 void eblas_zgemm_gpu(CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB,
int M,
int N,
int K,
140 void eblas_scatter_daxpy(
const int Nindex,
double a,
const int* index,
const double* x,
double* y);
153 void eblas_gather_daxpy(
const int Nindex,
double a,
const int* index,
const double* x,
double* y);
212 template<
typename T>
void eblas_copy(T* dest,
const T* src,
int N) { memcpy(dest, src, N*
sizeof(T)); }
220 void eblas_dscal(
int N,
double a,
double* x,
int incx);
226 void eblas_daxpy(
int N,
double a,
const double* x,
int incx,
double* y,
int incy);
232 double eblas_ddot(
int N,
const double* x,
int incx,
const double* y,
int ncy);
236 double eblas_dnrm2(
int N,
const double* x,
int incx);
240 template<
typename T>
void eblas_copy_gpu(T* dest,
const T* src,
int N) { cudaMemcpy(dest, src, N*
sizeof(T), cudaMemcpyDeviceToDevice); }
247 #define eblas_dscal_gpu cublasDscal 253 #define eblas_daxpy_gpu cublasDaxpy 259 #define eblas_ddot_gpu cublasDdot 263 #define eblas_dnrm2_gpu cublasDnrm2 269 #define callPref(functionName) functionName##_gpu //gpu versions available, so use them preferentially 271 #define callPref(functionName) functionName //only cpu version available 282 void eblas_capMinMax(
const int N,
double* x,
double& xMin,
double& xMax,
double capLo=-DBL_MAX,
double capHi=+DBL_MAX);
284 void eblas_capMinMax_gpu(
const int N,
double* x,
double& xMin,
double& xMax,
double capLo=-DBL_MAX,
double capHi=+DBL_MAX);
296 template<
typename Ty,
typename Tx>
297 void eblas_mul_sub(
size_t iMin,
size_t iMax,
const Tx* X,
const int incX, Ty* Y,
const int incY)
298 {
for(
size_t i=iMin; i<iMax; i++) Y[incY*i] *= X[incX*i];
301 template<
typename Ty,
typename Tx>
302 void eblas_mul(
const int N,
const Tx* X,
const int incX, Ty* Y,
const int incY)
303 {
if(incY==0)
die(
"incY cannot be = 0")
305 eblas_mul_sub<Ty,Tx>, N, X, incX, Y, incY);
309 template<typename Ty, typename Tx>
310 void eblas_div_sub(
size_t iMin,
size_t iMax, const Tx* X, const
int incX, Ty* Y, const
int incY)
311 {
for(
size_t i=iMin; i<iMax; i++) Y[incY*i] /= X[incX*i];
313 template<
typename Ty,
typename Tx>
314 void eblas_div(
const int N,
const Tx* X,
const int incX, Ty* Y,
const int incY)
315 {
if(incY==0)
die(
"incY cannot be = 0")
317 eblas_div_sub<Ty,Tx>, N, X, incX, Y, incY);
321 #endif // JDFTX_CORE_BLASEXTRA_H
void threadLaunch(int nThreads, Callable *func, size_t nJobs, Args...args)
A simple utility for running muliple threads.
Utilities for threading (wrappers around std::thread)
#define die(...)
Quit with an error message (formatted using printf()). Must be called from all processes.
Definition: Util.h:114
Complex number (need to define our own because we need operators for gpu code as well) ...
Definition: scalar.h:49