JDFTx  1.2.1
SpeciesInfo_internal.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------
2 Copyright 2012 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #ifndef JDFTX_ELECTRONIC_SPECIESINFO_INTERNAL_H
21 #define JDFTX_ELECTRONIC_SPECIESINFO_INTERNAL_H
22 
24 
25 #include <core/matrix3.h>
26 #include <electronic/RadialFunction.h>
27 #include <electronic/SphericalHarmonics.h>
28 #include <stdint.h>
29 
31 template<int l, int m> __hostanddev__
32 void Vnl_calc(int n, int atomStride, int nAtoms, const vector3<>& k, const vector3<int>* iGarr,
33  const matrix3<>& G, const vector3<>* pos, const RadialFunctionG& VnlRadial, complex* Vnl)
34 {
35  vector3<> kpG = k + iGarr[n]; //k+G in reciprocal lattice coordinates:
36  vector3<> qvec = kpG * G; //k+G in cartesian coordinates
37  double q = qvec.length();
38  vector3<> qhat = qvec * (q ? 1.0/q : 0.0); //the unit vector along qvec (set qhat to 0 for q=0 (doesn't matter))
39  double prefac = Ylm<l,m>(qhat) * VnlRadial(q); //prefactor to structure factor
40  //Loop over columns (multiple atoms at same l,m):
41  for(int atom=0; atom<nAtoms; atom++)
42  Vnl[atom*atomStride+n] = prefac * cis((-2*M_PI)*dot(pos[atom],kpG));
43 }
44 void Vnl(int nbasis, int atomStride, int nAtoms, int l, int m, const vector3<> k, const vector3<int>* iGarr,
45  const matrix3<> G, const vector3<>* pos, const RadialFunctionG& VnlRadial, complex* Vnl);
46 #ifdef GPU_ENABLED
47 void Vnl_gpu(int nbasis, int atomStride, int nAtoms, int l, int m, const vector3<> k, const vector3<int>* iGarr,
48  const matrix3<> G, const vector3<>* pos, const RadialFunctionG& VnlRadial, complex* Vnl);
49 #endif
50 
51 
56 template<int lm> struct StaticLoopYlmTag{};
57 template<int Nlm, typename Functor, int lmInv> struct StaticLoopYlm
58 { __hostanddev__ static void exec(Functor* f)
59  { (*f)(StaticLoopYlmTag<Nlm-lmInv>());
61  }
62 };
63 template<int Nlm, typename Functor> struct StaticLoopYlm< Nlm, Functor, 0>
64 { __hostanddev__ static void exec(Functor* f) { return; }
65 };
66 template<int Nlm, typename Functor> __hostanddev__ void staticLoopYlm(Functor* f)
68 }
69 
70 #define SwitchTemplate_Nlm(Nlm, func, args) \
71  switch(Nlm) \
72  { case 1: func<1> args; break; \
73  case 4: func<4> args; break; \
74  case 9: func<9> args; break; \
75  case 16: func<16> args; break; \
76  case 25: func<25> args; break; \
77  case 49: func<49> args; break; \
78  default: fprintf(stderr, "Invalid Nlm in SwitchTemplate_Nlm"); exit(1); \
79  }
80 
81 
86 { vector3<> qhat; double q;
87  int nCoeff; double dGinv; const double* nRadial;
88  complex n;
89 
90  __hostanddev__ nAugmentFunctor(const vector3<>& qvec, int nCoeff, double dGinv, const double* nRadial)
91  : nCoeff(nCoeff), dGinv(dGinv), nRadial(nRadial)
92  { q = qvec.length();
93  qhat = qvec * (q ? 1.0/q : 0.0); //the unit vector along qvec (set qhat to 0 for q=0 (doesn't matter))
94  }
95 
96  template<int lm> __hostanddev__ void operator()(const StaticLoopYlmTag<lm>&)
97  { //Compute phase (-i)^l:
98  complex mIota(0,-1), phase(1,0);
99  for(int l=0; l*(l+2) < lm; l++) phase *= mIota;
100  //Accumulate result:
101  double Gindex = q * dGinv;
102  if(Gindex < nCoeff-5)
103  n += phase * Ylm<lm>(qhat) * QuinticSpline::value(nRadial+lm*nCoeff, Gindex);
104  }
105 };
106 template<int Nlm> __hostanddev__
107 void nAugment_calc(int i, const vector3<int>& iG, const matrix3<>& G,
108  int nCoeff, double dGinv, const double* nRadial, const vector3<>& atpos, complex* n)
109 {
110  nAugmentFunctor functor(iG*G, nCoeff, dGinv, nRadial);
111  staticLoopYlm<Nlm>(&functor);
112  n[i] += functor.n * cis((-2*M_PI)*dot(atpos,iG));
113 }
114 void nAugment(int Nlm,
115  const vector3<int> S, const matrix3<>& G, int iGstart, int iGstop,
116  int nCoeff, double dGinv, const double* nRadial, const vector3<>& atpos, complex* n);
117 #ifdef GPU_ENABLED
118 void nAugment_gpu(int Nlm,
119  const vector3<int> S, const matrix3<>& G, int iGstart, int iGstop,
120  int nCoeff, double dGinv, const double* nRadial, const vector3<>& atpos, complex* n);
121 #endif
122 
123 //Function for initializing the index arrays used by nAugmentGrad
125 __hostanddev__ uint64_t setNagIndex_calc(const vector3<int>& iG, const vector3<int>& S, const matrix3<>& G, double dGinv)
126 { uint64_t Gindex = uint64_t((iG*G).length() * dGinv);
127  vector3<int> iv = iG; for(int k=0; k<3; k++) if(iv[k]<0) iv[k] += S[k];
128  return (Gindex << 48) //Putting Gindex in the higher word allows sorting by it first, and then by grid point index
129  + (uint64_t(iv[0]) << 32) + (uint64_t(iv[1]) << 16) + uint64_t(iv[2]);
130 }
131 __hostanddev__ void setNagIndexPtr_calc(int i, int iMax, int nCoeff, const uint64_t* nagIndex, size_t* nagIndexPtr)
132 { int Gindex = int(nagIndex[i] >> 48);
133  int GindexNext = (i+1<iMax) ? int(nagIndex[i+1] >> 48) : nCoeff;
134  if(i==0) for(int j=0; j<=Gindex; j++) nagIndexPtr[j] = 0;
135  for(int j=Gindex; j<GindexNext; j++)
136  nagIndexPtr[j+1] = i+1;
137 }
138 void setNagIndex(const vector3<int>& S, const matrix3<>& G, int iGstart, int iGstop, int nCoeff, double dGinv, uint64_t*& nagIndex, size_t*& nagIndexPtr);
139 #ifdef GPU_ENABLED
140 void setNagIndex_gpu(const vector3<int>& S, const matrix3<>& G, int iGstart, int iGstop, int nCoeff, double dGinv, uint64_t*& nagIndex, size_t*& nagIndexPtr);
141 #endif
142 
143 //Gradient propragation corresponding to nAugment:
144 //(The MPI division happens implicitly here, because nagIndex is limited to each process's share (see above))
146 { vector3<> qhat; double q;
147  int nCoeff; double dGinv; const double* nRadial;
148  complex E_n, nE_n;
149  double* E_nRadial;
150  int dotPrefac; //prefactor in dot-product (1 or 2 for each reciprocal space point, because of real symmetry)
151 
152  __hostanddev__ nAugmentGradFunctor(const vector3<>& qvec, int nCoeff, double dGinv, const double* nRadial, const complex& E_n, double* E_nRadial, int dotPrefac)
153  : nCoeff(nCoeff), dGinv(dGinv), nRadial(nRadial), E_n(E_n), E_nRadial(E_nRadial), dotPrefac(dotPrefac)
154  { q = qvec.length();
155  qhat = qvec * (q ? 1.0/q : 0.0); //the unit vector along qvec (set qhat to 0 for q=0 (doesn't matter))
156  }
157 
158  template<int lm> __hostanddev__ void operator()(const StaticLoopYlmTag<lm>&)
159  { //Compute phase (-i)^l:
160  complex mIota(0,-1), phase(1,0);
161  for(int l=0; l*(l+2) < lm; l++) phase *= mIota;
162  //Accumulate result:
163  double Gindex = q * dGinv;
164  if(Gindex < nCoeff-5)
165  { complex term = phase * Ylm<lm>(qhat) * E_n;
166  QuinticSpline::valueGrad(dotPrefac * term.real(), E_nRadial+lm*nCoeff, Gindex);
167  if(nRadial) nE_n += term * QuinticSpline::value(nRadial+lm*nCoeff, Gindex); //needed again only when computing forces
168  }
169  }
170 };
171 template<int Nlm> __hostanddev__
172 void nAugmentGrad_calc(uint64_t key, const vector3<int>& S, const matrix3<>& G,
173  int nCoeff, double dGinv, const double* nRadial, const vector3<>& atpos,
174  const complex* ccE_n, double* E_nRadial, vector3<complex*> E_atpos, bool dummyGpuThread=false)
175 {
176  //Obtain 3D index iG and array offset i for this point (similar to COMPUTE_halfGindices)
177  vector3<int> iG;
178  iG[2] = int(0xFFFF & key); key >>= 16;
179  iG[1] = int(0xFFFF & key); key >>= 16;
180  iG[0] = int(0xFFFF & key);
181  size_t i = iG[2] + (S[2]/2+1)*size_t(iG[1] + S[1]*iG[0]);
182  for(int j=0; j<3; j++) if(2*iG[j]>S[j]) iG[j]-=S[j];
183  int dotPrefac = (iG[2]==0||2*iG[2]==S[2]) ? 1 : 2;
184 
185  nAugmentGradFunctor functor(iG*G, nCoeff, dGinv, nRadial, dummyGpuThread ? complex() : ccE_n[i].conj() * cis((-2*M_PI)*dot(atpos,iG)), E_nRadial, dotPrefac);
186  staticLoopYlm<Nlm>(&functor);
187  if(nRadial && !dummyGpuThread) accumVector((functor.nE_n * complex(0,-2*M_PI)) * iG, E_atpos, i);
188 }
189 void nAugmentGrad(int Nlm, const vector3<int> S, const matrix3<>& G,
190  int nCoeff, double dGinv, const double* nRadial, const vector3<>& atpos,
191  const complex* ccE_n, double* E_nRadial, vector3<complex*> E_atpos,
192  const uint64_t* nagIndex, const size_t* nagIndexPtr);
193 #ifdef GPU_ENABLED
194 void nAugmentGrad_gpu(int Nlm, const vector3<int> S, const matrix3<>& G,
195  int nCoeff, double dGinv, const double* nRadial, const vector3<>& atpos,
196  const complex* ccE_n, double* E_nRadial, vector3<complex*> E_atpos,
197  const uint64_t* nagIndex, const size_t* nagIndexPtr);
198 #endif
199 
200 
202 __hostanddev__ complex getSG_calc(const vector3<int>& iG, const int& nAtoms, const vector3<>* atpos)
203 { complex SG = complex(0,0);
204  for(int atom=0; atom<nAtoms; atom++)
205  SG += cis(-2*M_PI*dot(iG,atpos[atom]));
206  return SG;
207 }
209 void getSG(const vector3<int> S, int nAtoms, const vector3<>* atpos, double invVol, complex* SG);
210 #ifdef GPU_ENABLED
211 void getSG_gpu(const vector3<int> S, int nAtoms, const vector3<>* atpos, double invVol, complex* SG);
212 #endif
213 
215 __hostanddev__ void updateLocal_calc(int i, const vector3<int>& iG, const matrix3<>& GGT,
216  complex *Vlocps, complex *rhoIon, complex *nChargeball, complex* nCore, complex* tauCore,
217  int nAtoms, const vector3<>* atpos, double invVol, const RadialFunctionG& VlocRadial,
218  double Z, const RadialFunctionG& nCoreRadial, const RadialFunctionG& tauCoreRadial,
219  double Zchargeball, double wChargeball)
220 {
221  double Gsq = GGT.metric_length_squared(iG);
222 
223  //Compute structure factor (scaled by 1/detR):
224  complex SGinvVol = getSG_calc(iG, nAtoms, atpos) * invVol;
225 
226  //Short-ranged part of Local potential (long-ranged part added on later in IonInfo.cpp):
227  Vlocps[i] += SGinvVol * VlocRadial(sqrt(Gsq));
228 
229  //Nuclear charge (optionally widened to a gaussian later in IonInfo.cpp):
230  rhoIon[i] += SGinvVol * (-Z);
231 
232  //Chargeball:
233  if(nChargeball)
234  nChargeball[i] += SGinvVol * Zchargeball * exp(-0.5*Gsq*pow(wChargeball,2));
235 
236  //Partial core:
237  if(nCore) nCore[i] += SGinvVol * nCoreRadial(sqrt(Gsq));
238  if(tauCore) tauCore[i] += SGinvVol * tauCoreRadial(sqrt(Gsq));
239 }
240 void updateLocal(const vector3<int> S, const matrix3<> GGT,
241  complex *Vlocps, complex *rhoIon, complex *n_chargeball, complex* n_core, complex* tauCore,
242  int nAtoms, const vector3<>* atpos, double invVol, const RadialFunctionG& VlocRadial,
243  double Z, const RadialFunctionG& nCoreRadial, const RadialFunctionG& tauCoreRadial,
244  double Zchargeball, double wChargeball);
245 #ifdef GPU_ENABLED
246 void updateLocal_gpu(const vector3<int> S, const matrix3<> GGT,
247  complex *Vlocps, complex *rhoIon, complex *n_chargeball, complex* n_core, complex* tauCore,
248  int nAtoms, const vector3<>* atpos, double invVol, const RadialFunctionG& VlocRadial,
249  double Z, const RadialFunctionG& nCoreRadial, const RadialFunctionG& tauCoreRadial,
250  double Zchargeball, double wChargeball);
251 #endif
252 
253 
255 __hostanddev__ void gradLocalToSG_calc(int i, const vector3<int> iG, const matrix3<> GGT,
256  const complex* ccgrad_Vlocps, const complex* ccgrad_rhoIon, const complex* ccgrad_nChargeball,
257  const complex* ccgrad_nCore, const complex* ccgrad_tauCore, complex* ccgrad_SG,
258  const RadialFunctionG& VlocRadial, double Z,
259  const RadialFunctionG& nCoreRadial, const RadialFunctionG& tauCoreRadial,
260  double Zchargeball, double wChargeball)
261 {
262  double Gsq = GGT.metric_length_squared(iG);
263  complex ccgrad_SGinvVol(0,0); //result for this G value (gradient w.r.t structure factor/volume)
264 
265  //Local potential (short ranged part in the radial function - Z/r):
266  ccgrad_SGinvVol += ccgrad_Vlocps[i] * VlocRadial(sqrt(Gsq));
267 
268  //Nuclear charge
269  if(ccgrad_rhoIon)
270  ccgrad_SGinvVol += ccgrad_rhoIon[i] * (-Z);
271 
272  //Chargeball:
273  if(ccgrad_nChargeball)
274  ccgrad_SGinvVol += ccgrad_nChargeball[i] * Zchargeball * exp(-0.5*Gsq*pow(wChargeball,2));
275 
276  //Partial core:
277  if(ccgrad_nCore) ccgrad_SGinvVol += ccgrad_nCore[i] * nCoreRadial(sqrt(Gsq));
278  if(ccgrad_tauCore) ccgrad_SGinvVol += ccgrad_tauCore[i] * tauCoreRadial(sqrt(Gsq));
279 
280  //Store result:
281  ccgrad_SG[i] = ccgrad_SGinvVol;
282 }
283 void gradLocalToSG(const vector3<int> S, const matrix3<> GGT,
284  const complex* ccgrad_Vlocps, const complex* ccgrad_rhoIon, const complex* ccgrad_nChargeball,
285  const complex* ccgrad_nCore, const complex* ccgrad_tauCore, complex* ccgrad_SG,
286  const RadialFunctionG& VlocRadial, double Z,
287  const RadialFunctionG& nCoreRadial, const RadialFunctionG& tauCoreRadial,
288  double Zchargeball, double wChargeball);
289 #ifdef GPU_ENABLED
290 void gradLocalToSG_gpu(const vector3<int> S, const matrix3<> GGT,
291  const complex* ccgrad_Vlocps, const complex* ccgrad_rhoIon, const complex* ccgrad_nChargeball,
292  const complex* ccgrad_nCore, const complex* ccgrad_tauCore, complex* ccgrad_SG,
293  const RadialFunctionG& VlocRadial, double Z,
294  const RadialFunctionG& nCoreRadial, const RadialFunctionG& tauCoreRadial,
295  double Zchargeball, double wChargeball);
296 #endif
297 
298 
301 __hostanddev__ void gradSGtoAtpos_calc(int i, const vector3<int> iG, const vector3<> atpos,
302  const complex* ccgrad_SG, vector3<complex*> grad_atpos)
303 {
304  complex term = complex(0,-2*M_PI) * cis(-2*M_PI*dot(iG,atpos)) * ccgrad_SG[i].conj();
305  storeVector(iG * term, grad_atpos, i);
306 }
307 void gradSGtoAtpos(const vector3<int> S, const vector3<> atpos,
308  const complex* ccgrad_SG, vector3<complex*> grad_atpos);
309 #ifdef GPU_ENABLED
310 void gradSGtoAtpos_gpu(const vector3<int> S, const vector3<> atpos,
311  const complex* ccgrad_SG, vector3<complex*> grad_atpos);
312 #endif
313 
314 
315 #endif // JDFTX_ELECTRONIC_SPECIESINFO_INTERNAL_H
__hostanddev__ uint64_t setNagIndex_calc(const vector3< int > &iG, const vector3< int > &S, const matrix3<> &G, double dGinv)
(In MPI mode, only a subset of G-vectors are indexed on each process (to correspond to nAUgment)) ...
Definition: SpeciesInfo_internal.h:125
ScalarField pow(const ScalarField &, double alpha)
Elementwise power (preserve input)
ScalarField sqrt(const ScalarField &)
Elementwise square root (preserve input)
Tptr conj(Tptr &&in)
Generic elementwise conjugate for complex data:
Definition: Operators.h:122
void getSG(const vector3< int > S, int nAtoms, const vector3<> *atpos, double invVol, complex *SG)
Get structure factor in a ScalarFieldTilde&#39;s data/dataGpu (with 1/vol normalization factor) ...
complex dot(const Tptr &X, const Tptr &Y)
Definition: Operators.h:196
Definition: SpeciesInfo_internal.h:145
Definition: SpeciesInfo_internal.h:57
__hostanddev__ void gradSGtoAtpos_calc(int i, const vector3< int > iG, const vector3<> atpos, const complex *ccgrad_SG, vector3< complex * > grad_atpos)
Definition: SpeciesInfo_internal.h:301
__hostanddev__ double value(const double *coeff, double x)
Compute value of quintic spline. Warning: x is not range-checked.
__hostanddev__ void storeVector(const vector3< scalar > &v, vector3< scalar * > &vArr, int i)
Store vector to a vector field.
Definition: vector3.h:171
__hostanddev__ void accumVector(const vector3< scalar > &v, vector3< scalar * > &vArr, int i)
Accumulate vector onto a vector field.
Definition: vector3.h:175
Definition: SpeciesInfo_internal.h:56
Definition: SpeciesInfo_internal.h:85
__hostanddev__ complex getSG_calc(const vector3< int > &iG, const int &nAtoms, const vector3<> *atpos)
Get structure factor for a specific iG, given a list of atoms.
Definition: SpeciesInfo_internal.h:202
G-space radial function stored on a uniform grid (of |G|)
Definition: RadialFunction.h:28
ScalarField exp(const ScalarField &)
Elementwise exponential (preserve input)
__hostanddev__ void gradLocalToSG_calc(int i, const vector3< int > iG, const matrix3<> GGT, const complex *ccgrad_Vlocps, const complex *ccgrad_rhoIon, const complex *ccgrad_nChargeball, const complex *ccgrad_nCore, const complex *ccgrad_tauCore, complex *ccgrad_SG, const RadialFunctionG &VlocRadial, double Z, const RadialFunctionG &nCoreRadial, const RadialFunctionG &tauCoreRadial, double Zchargeball, double wChargeball)
Propagate (complex conjugates of) gradients w.r.t Vlocps, rhoIon etc to complex conjugate gradient w...
Definition: SpeciesInfo_internal.h:255
__hostanddev__ void Vnl_calc(int n, int atomStride, int nAtoms, const vector3<> &k, const vector3< int > *iGarr, const matrix3<> &G, const vector3<> *pos, const RadialFunctionG &VnlRadial, complex *Vnl)
Compute Vnl and optionally its gradients for a subset of the basis space, and for multiple atomic pos...
Definition: SpeciesInfo_internal.h:32
__hostanddev__ void updateLocal_calc(int i, const vector3< int > &iG, const matrix3<> &GGT, complex *Vlocps, complex *rhoIon, complex *nChargeball, complex *nCore, complex *tauCore, int nAtoms, const vector3<> *atpos, double invVol, const RadialFunctionG &VlocRadial, double Z, const RadialFunctionG &nCoreRadial, const RadialFunctionG &tauCoreRadial, double Zchargeball, double wChargeball)
Calculate local pseudopotential, ionic density and chargeball due to one species at a given G-vector...
Definition: SpeciesInfo_internal.h:215
__hostanddev__ void valueGrad(double E_value, double *E_coeff, double x)
Gradient propagation corresponding to value()
Complex number (need to define our own because we need operators for gpu code as well) ...
Definition: scalar.h:49