JDFTx  1.2.0
operators_internal.h
1 /*-------------------------------------------------------------------
2 Copyright 2011 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #ifndef JDFTX_ELECTRONIC_OPERATORS_INTERNAL_H
21 #define JDFTX_ELECTRONIC_OPERATORS_INTERNAL_H
22 
23 #include <core/matrix3.h>
24 #include <electronic/RadialFunction.h>
25 #include <electronic/SphericalHarmonics.h>
26 #include <vector>
27 
28 //Copied from Util.h (which cannot be included here due to lack of CUDA C++11 support)
29 int assertStackTraceExit(const char* expr, const char* function, const char* file, long line);
30 #ifndef assert
31 #define assert(expr) (void)((expr) ? 0 : assertStackTraceExit(#expr, __func__, __FILE__, __LINE__))
32 #endif
33 
36 template<typename T, int N>
37 struct array
38 { T arr[N];
39  array(const std::vector<T>& vec) { assert(N==int(vec.size())); for(int s=0; s<N; s++) arr[s]=vec[s]; }
40  __hostanddev__ array(T t=0) { for(int s=0; s<N; s++) arr[s]=t; }
41  __hostanddev__ T& operator[](int i) { return arr[i]; }
42  __hostanddev__ const T& operator[](int i) const { return arr[i]; }
43 };
44 
45 
46 __hostanddev__ void D_calc(int i, const vector3<int>& iG, const complex* in, complex* out,
47  const vector3<>& Ge)
48 { out[i] = in[i] * complex(0, dot(iG,Ge));
49 }
50 __hostanddev__ void DD_calc(int i, const vector3<int>& iG, const complex* in, complex* out,
51  const vector3<>& Ge1, const vector3<>& Ge2)
52 { out[i] = in[i] * (-dot(iG,Ge1) * dot(iG,Ge2));
53 }
54 
56 #define SwitchTemplate_l(l,fTemplate,argList) \
57  switch(l) \
58  { case 0: fTemplate<0> argList; break; \
59  case 1: fTemplate<1> argList; break; \
60  case 2: fTemplate<2> argList; break; \
61  case 3: fTemplate<3> argList; break; \
62  case 4: fTemplate<4> argList; break; \
63  case 5: fTemplate<5> argList; break; \
64  case 6: fTemplate<6> argList; break; \
65  }
66 
67 template<int l, int lpm> struct lGradient_staticLoop
68 { static __hostanddev__ void set(int i, const vector3<>& g, const complex& phasedIn, const array<complex*,2*l+1>& out)
69  { out[lpm][i] = phasedIn * Ylm<l,lpm-l>(g);
70  lGradient_staticLoop<l,lpm-1>::set(i, g, phasedIn, out);
71  }
72 };
73 template<int l> struct lGradient_staticLoop<l,-1> { static __hostanddev__ void set(int i, const vector3<>& g, const complex& phasedIn, const array<complex*,2*l+1>& out) {} }; //end recursion
74 
75 template<int l> __hostanddev__ void lGradient_calc(int i, const vector3<int>& iG, bool isNyq, const complex* in, const array<complex*,2*l+1>& out, const matrix3<>& G)
76 { const complex phase = cis(l*0.5*M_PI); // iota^l (computable at compile time)
77  lGradient_staticLoop<l,l+l>::set(i, iG*G, isNyq ? 0. : phase * in[i], out);
78 }
79 
80 template<int l, int lpm> struct lDivergence_staticLoop
81 { static __hostanddev__ complex get(int i, const vector3<>& g, const array<const complex*,2*l+1>& in)
82  { return in[lpm][i] * Ylm<l,lpm-l>(g) + lDivergence_staticLoop<l,lpm-1>::get(i, g, in);
83  }
84 };
85 template<int l> struct lDivergence_staticLoop<l,-1> { static __hostanddev__ complex get(int i, const vector3<>& g, const array<const complex*,2*l+1>& in) { return complex(); } }; //end recursion
86 
87 template<int l> __hostanddev__ void lDivergence_calc(int i, const vector3<int>& iG, bool isNyq, const array<const complex*,2*l+1>& in, complex* out, const matrix3<>& G)
88 { const complex phase = cis(l*0.5*M_PI); // iota^l (computable at compile time)
89  out[i] = (isNyq ? 0. : phase) * lDivergence_staticLoop<l,l+l>::get(i, iG*G, in);
90 }
91 
92 
93 __hostanddev__ complex blochPhase_calc(const vector3<int>& iv, const vector3<>& invS, const vector3<>& k)
94 { return cis(2*M_PI*dot(k, vector3<>(iv[0]*invS[0], iv[1]*invS[1], iv[2]*invS[2])));
95 }
96 
97 template<typename scalar> __hostanddev__
98 void pointGroupScatter_calc(int i, const vector3<int>& iv, const vector3<int>& S,
99  const scalar* in, scalar* out, const matrix3<int>& mMesh)
100 { vector3<int> ivRot = mMesh * iv; //rotated index vector
101  //Project back into range:
102  for(int j=0; j<3; j++)
103  { ivRot[j] = ivRot[j] % S[j];
104  if(ivRot[j]<0) ivRot[j] += S[j];
105  }
106  int iRot = ivRot[2]+S[2]*(ivRot[1]+S[1]*ivRot[0]); //rotated index
107  out[iRot] = in[i];
108 }
109 
110 __hostanddev__ complex radialFunction_calc(const vector3<int>& iG, const matrix3<>& GGT, const RadialFunctionG& f, const vector3<>& r0)
111 { return f(sqrt(GGT.metric_length_squared(iG))) * cis(-2*M_PI*dot(iG,r0));
112 }
113 
114 __hostanddev__ void reducedL_calc(int j, int nbasis, int ncols, const complex* Y, complex* LY,
115  const matrix3<> GGT, const vector3<int>* iGarr, const vector3<> k, double detR)
116 { for (int i=0; i < ncols; i++)
117  LY[nbasis*i+j] = (-detR * GGT.metric_length_squared(iGarr[j]+k)) * Y[nbasis*i+j];
118 }
119 __hostanddev__ void reducedLinv_calc(int j, int nbasis, int ncols, const complex* Y, complex* LinvY,
120  const matrix3<> GGT, const vector3<int>* iGarr, const vector3<> k, double detR)
121 { for (int i=0; i < ncols; i++)
122  { double G2 = GGT.metric_length_squared(iGarr[j]+k);
123  LinvY[nbasis*i+j] = (G2 ? -1./(detR*G2) : 0.) * Y[nbasis*i+j];
124  }
125 }
126 
127 __hostanddev__ void precond_inv_kinetic_calc(int j, int nbasis, int ncols, complex* Ydata,
128  double KErollover, const matrix3<> GGT, const vector3<int>* iGarr, const vector3<> k, double invdetR)
129 {
130  double x = 0.5*GGT.metric_length_squared(iGarr[j]+k)/KErollover;
131  double precondFactor = 1.+x*(1.+x*(1.+x*(1.+x*(1.+x*(1.+x*(1.+x*(1.+x)))))));
132  precondFactor = precondFactor*invdetR/(1.+x*precondFactor);
133  for(int i=0; i < ncols; i++)
134  Ydata[nbasis*i+j] *= precondFactor;
135 }
136 
137 __hostanddev__ void precond_inv_kinetic_band_calc(int j, int nbasis, int ncols, complex* Ydata, const double* KEref,
138  const matrix3<>& GGT, const vector3<int>* iGarr, const vector3<>& k)
139 {
140  double KE = 0.5*GGT.metric_length_squared(iGarr[j]+k);
141  for(int i=0; i<ncols; i++)
142  { double x = KE/KEref[i];
143  Ydata[nbasis*i+j] *= (27.+x*(18.+x*(12.+x*8.))) / (27.+x*(18.+x*(12.+x*(8.+x*16))));
144  }
145 }
146 
147 __hostanddev__
148 void translate_calc(int j, int nbasis, int ncols, complex* Y, const vector3<int>* iGarr, const vector3<>& k, const vector3<>& dr)
149 { complex tFactor = cis(-2*M_PI*dot(iGarr[j]+k,dr));
150  for(int i=0; i<ncols; i++)
151  Y[nbasis*i+j] *= tFactor;
152 }
153 
154 __hostanddev__
155 void translateColumns_calc(int j, int nbasis, int ncols, complex* Y, const vector3<int>* iGarr, const vector3<>& k, const vector3<>* dr)
156 { for(int i=0; i<ncols; i++)
157  Y[nbasis*i+j] *= cis(-2*M_PI*dot(iGarr[j]+k,dr[i]));
158 }
159 
160 __hostanddev__ void reducedD_calc(int j, int nbasis, int ncols, const complex* Ydata, complex* DYdata,
161  const vector3<int>* iGarr, double kdotGe, const vector3<> Ge)
162 {
163  complex Di(0, kdotGe+dot(iGarr[j],Ge)); // i (k+G).e where e is the cartesian direction of interest
164  for(int i=0; i < ncols; i++)
165  DYdata[nbasis*i+j] = Di*Ydata[nbasis*i+j];
166 }
167 
168 __hostanddev__ void reducedDD_calc(int j, int nbasis, int ncols, const complex* Ydata, complex* DYdata,
169  const vector3<int>* iGarr, double kdotGe1, double kdotGe2, const vector3<> Ge1, const vector3<> Ge2)
170 {
171  complex Di(0, kdotGe1+dot(iGarr[j],Ge1)); // i (k+G).ei
172  complex Dj(0, kdotGe2+dot(iGarr[j],Ge2)); // i (k+G).ej
173  for(int i=0; i < ncols; i++)
174  DYdata[nbasis*i+j] = Di*Dj*Ydata[nbasis*i+j];
175 }
176 
177 #endif // JDFTX_ELECTRONIC_OPERATORS_INTERNAL_H
ScalarField sqrt(const ScalarField &)
Elementwise square root (preserve input)
complex dot(const Tptr &X, const Tptr &Y)
Definition: Operators.h:196
Definition: operators_internal.h:67
int assertStackTraceExit(const char *expr, const char *function, const char *file, long line)
Definition: operators_internal.h:37
G-space radial function stored on a uniform grid (of |G|)
Definition: RadialFunction.h:28
Definition: operators_internal.h:80
Complex number (need to define our own because we need operators for gpu code as well) ...
Definition: scalar.h:49
#define assert(expr)
A custom assertion with stack trace (NOTE: enabled in release modes as well)
Definition: Util.h:100