JDFTx  1.2.0
Minimize.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------
2 Copyright 2011 Ravishankar Sundararaman, Kendra Letchworth Weaver
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #ifndef JDFTX_CORE_MINIMIZE_H
21 #define JDFTX_CORE_MINIMIZE_H
22 
23 #include <cmath>
24 #include <cfloat>
25 #include <algorithm>
26 #include <core/Util.h>
27 #include <core/MinimizeParams.h>
28 
31 
46 template<typename Vector> struct Minimizable
47 {
49  virtual void step(const Vector& dir, double alpha)=0;
50 
52  virtual double compute(Vector* grad, Vector* Kgrad)=0;
53 
56  virtual bool report(int iter) { return false; }
57 
59  virtual void constrain(Vector&) {}
60 
62  virtual double sync(double x) const { return x; }
63 
65  virtual double safeStepSize(const Vector& dir) const { return DBL_MAX; }
66 
68  double minimize(const MinimizeParams& params);
69 
72  void fdTest(const MinimizeParams& params);
73 
74 private:
75  typedef bool (*Linmin)(Minimizable<Vector>&, const MinimizeParams&, const Vector&, double, double&, double&, Vector&, Vector&);
76  Linmin getLinmin(const MinimizeParams& params) const;
77  double lBFGS(const MinimizeParams& params); //limited memory BFGS implementation (differs sufficiently from CG to be justify a separate implementation)
78 };
79 
85 template<typename Vector> struct LinearSolvable
86 {
87  Vector state;
88 
90  virtual Vector hessian(const Vector&) const=0;
91 
93  virtual Vector precondition(const Vector& v) const { return clone(v); }
94 
96  virtual double sync(double x) const { return x; }
97 
100  int solve(const Vector& rhs, const MinimizeParams& params);
101 };
102 
104 //---------------------- Implementation ----------------------------
106 
107 #include <core/Minimize_linmin.h>
108 #include <core/Minimize_lBFGS.h>
109 
110 template<typename Vector> double Minimizable<Vector>::minimize(const MinimizeParams& p)
111 { if(p.fdTest) fdTest(p); // finite difference test
112  if(p.dirUpdateScheme == MinimizeParams::LBFGS) return lBFGS(p);
113 
114  Vector g, gPrev, Kg; //current, previous and preconditioned gradients
115  double E = sync(compute(&g, &Kg)); //get initial energy and gradient
116  EdiffCheck ediffCheck(p.nEnergyDiff, p.energyDiffThreshold); //list of past energies
117 
118  Vector d = clone(Kg); //step direction (will be reset in first iteration)
119  constrain(d); //restrict search direction to allowed subspace
120  bool forceGradDirection = true; //whether current direction is along the gradient
121  MinimizeParams::DirectionUpdateScheme currentDirUpdateScheme = p.dirUpdateScheme; //initially use the specified scheme, may switch to SD on trouble
122  bool gPrevUsed;
123  switch(currentDirUpdateScheme)
126  gPrevUsed = false;
127  break;
128  default:
129  gPrevUsed = true;
130  }
131 
132  double alphaT = p.alphaTstart; //test step size
133  double alpha = alphaT; //actual step size
134  double beta = 0.0; //CG prev search direction mix factor
135  double gKNorm = 0.0, gKNormPrev = 0.0; //current and previous norms of the preconditioned gradient
136 
137  //Select the linmin method:
138  Linmin linmin = getLinmin(p);
139 
140  //Iterate until convergence, max iteration count or kill signal
141  int iter=0;
142  for(iter=0; !killFlag; iter++)
143  {
144  if(report(iter)) //optional reporting/processing
145  { E = sync(compute(&g, &Kg)); //update energy and gradient if state was modified
146  fprintf(p.fpLog, "%s\tState modified externally: resetting search direction.\n", p.linePrefix);
147  fflush(p.fpLog);
148  forceGradDirection = true; //reset search direction
149  }
150 
151  gKNorm = sync(dot(g,Kg));
152  fprintf(p.fpLog, "%sIter: %3d %s: ", p.linePrefix, iter, p.energyLabel);
153  fprintf(p.fpLog, p.energyFormat, E);
154  fprintf(p.fpLog, " |grad|_K: %10.3le alpha: %10.3le", sqrt(gKNorm/p.nDim), alpha);
155 
156  //Print prev step stats and set CG direction parameter if necessary
157  beta = 0.0;
158  if(!forceGradDirection)
159  { double dotgd = sync(dot(g,d));
160  double dotgPrevKg = gPrevUsed ? sync(dot(gPrev, Kg)) : 0.;
161 
162  fprintf(p.fpLog, " linmin: %10.3le", dotgd/sqrt(sync(dot(g,g))*sync(dot(d,d))));
163  if(gPrevUsed)
164  fprintf(p.fpLog, " cgtest: %10.3le", dotgPrevKg/sqrt(gKNorm*gKNormPrev));
165  fprintf(p.fpLog, " t[s]: %9.2lf", clock_sec());
166 
167  //Update beta:
168  switch(currentDirUpdateScheme)
169  { case MinimizeParams::FletcherReeves: beta = gKNorm/gKNormPrev; break;
170  case MinimizeParams::PolakRibiere: beta = (gKNorm-dotgPrevKg)/gKNormPrev; break;
171  case MinimizeParams::HestenesStiefel: beta = (gKNorm-dotgPrevKg)/(dotgd-sync(dot(d,gPrev))); break;
172  case MinimizeParams::SteepestDescent: beta = 0.0; break;
173  case MinimizeParams::LBFGS: break; //Should never encounter since LBFGS handled separately; just to eliminate compiler warnings
174  }
175  if(beta<0.0)
176  { fprintf(p.fpLog, "\n%sEncountered beta<0, resetting CG.", p.linePrefix);
177  beta=0.0;
178  }
179  }
180  forceGradDirection = false;
181  fprintf(p.fpLog, "\n"); fflush(p.fpLog);
182  if(sqrt(gKNorm/p.nDim) < p.knormThreshold)
183  { fprintf(p.fpLog, "%sConverged (|grad|_K<%le).\n", p.linePrefix, p.knormThreshold);
184  fflush(p.fpLog); return E;
185  }
186  if(ediffCheck.checkConvergence(E))
187  { fprintf(p.fpLog, "%sConverged (|Delta %s|<%le for %d iters).\n",
189  fflush(p.fpLog); return E;
190  }
191  if(!std::isfinite(gKNorm))
192  { fprintf(p.fpLog, "%s|grad|_K=%le. Stopping ...\n", p.linePrefix, gKNorm);
193  fflush(p.fpLog); return E;
194  }
195  if(!std::isfinite(E))
196  { fprintf(p.fpLog, "%sE=%le. Stopping ...\n", p.linePrefix, E);
197  fflush(p.fpLog); return E;
198  }
199  if(iter>=p.nIterations) break;
200  if(gPrevUsed) gPrev = g;
201  gKNormPrev = gKNorm;
202 
203  //Update search direction
204  d *= beta; axpy(-1.0, Kg, d); // d = beta*d - Kg
205  constrain(d); //restrict search direction to allowed subspace
206 
207  //Line minimization
208  alphaT = std::min(alphaT, safeStepSize(d));
209  if(linmin(*this, p, d, alphaT, alpha, E, g, Kg))
210  { //linmin succeeded:
211  if(p.updateTestStepSize)
212  { alphaT = alpha;
213  if(alphaT<p.alphaTmin) //bad step size
214  alphaT = p.alphaTstart; //make sure next test step size is not too bad
215  }
216  }
217  else
218  { //linmin failed:
219  fprintf(p.fpLog, "%s\tUndoing step.\n", p.linePrefix);
220  step(d, -alpha);
221  E = sync(compute(&g, &Kg));
222  if(beta)
223  { //Failed, but not along the gradient direction:
224  fprintf(p.fpLog, "%s\tStep failed: resetting search direction.\n", p.linePrefix);
225  fflush(p.fpLog);
226  forceGradDirection = true; //reset search direction
227  }
228  else
229  { //Failed along the gradient direction
230  fprintf(p.fpLog, "%s\tStep failed along negative gradient direction.\n", p.linePrefix);
231  fprintf(p.fpLog, "%sProbably at roundoff error limit. (Stopping)\n", p.linePrefix);
232  fflush(p.fpLog);
233  return E;
234  }
235  }
236  }
237  fprintf(p.fpLog, "%sNone of the convergence criteria satisfied after %d iterations.\n", p.linePrefix, iter);
238  return E;
239 }
240 
241 
242 template<typename Vector> void Minimizable<Vector>::fdTest(const MinimizeParams& p)
243 {
244  const double deltaMin = 1e-9;
245  const double deltaMax = 1e+1;
246  const double deltaScale = 1e+1;
247  string fdPrefixString = p.linePrefix + string("fdTest: ");
248  const char* fdPrefix = fdPrefixString.c_str();
249  fprintf(p.fpLog, "%s--------------------------------------\n", fdPrefix);
250  Vector g, Kg;
251  double E0 = sync(compute(&g, &Kg));
252 
253  Vector dx;
254  { // Set the direction to be a random vector of the same norm
255  // as the preconditioned gradient times the initial test step size
256  dx = clone(Kg);
257  randomize(dx);
258  constrain(dx);
259  dx *= p.alphaTstart * sqrt(sync(dot(Kg,Kg))/sync(dot(dx,dx)));
260  }
261  double dE_ddelta = sync(dot(dx, g)); //directional derivative at delta=0
262 
263  double deltaPrev=0;
264  for(double delta=deltaMin; delta<=deltaMax; delta*=deltaScale)
265  { double dE = dE_ddelta*delta;
266  step(dx, delta-deltaPrev); deltaPrev=delta;
267  double deltaE = sync(compute(0,0)) - E0;
268  fprintf(p.fpLog, "%s delta=%le:\n", fdPrefix, delta);
269  fprintf(p.fpLog, "%s d%s Ratio: %19.16lf\n", fdPrefix, p.energyLabel, deltaE/dE);
270  fprintf(p.fpLog, "%s d%s Error: %19.16lf\n", fdPrefix, p.energyLabel, sqrt(p.nDim)*1.1e-16/fabs(dE));
271  }
272  fprintf(p.fpLog, "%s--------------------------------------\n", fdPrefix);
273  step(dx, -deltaPrev); //restore state to original value
274 }
275 
276 
277 template<typename Vector> int LinearSolvable<Vector>::solve(const Vector& rhs, const MinimizeParams& p)
278 { //Initialize:
279  Vector r = clone(rhs); axpy(-1.0, hessian(state), r); //residual r = rhs - A.state;
280  Vector z = precondition(r), d = r; //the preconditioned residual and search direction
281  double beta=0.0, rdotzPrev=0.0, rdotz = sync(dot(r, z));
282 
283  //Check initial residual
284  double rzNorm = sqrt(fabs(rdotz)/p.nDim);
285  fprintf(p.fpLog, "%sInitial: sqrt(|r.z|): %12.6le\n", p.linePrefix, rzNorm); fflush(p.fpLog);
286  if(rzNorm<p.knormThreshold) { fprintf(p.fpLog, "%sConverged sqrt(r.z)<%le\n", p.linePrefix, p.knormThreshold); fflush(p.fpLog); return 0; }
287 
288  //Main loop:
289  int iter;
290  for(iter=0; iter<p.nIterations && !killFlag; iter++)
291  { //Update search direction:
292  if(rdotzPrev)
293  { beta = rdotz/rdotzPrev;
294  d *= beta; axpy(1.0, z, d); // d = z + beta*d
295  }
296  else d = clone(z); //fresh search direction (along gradient)
297  //Step:
298  Vector w = hessian(d);
299  double alpha = rdotz/sync(dot(w,d));
300  axpy(alpha, d, state);
301  axpy(-alpha, w, r);
302  z = precondition(r);
303  rdotzPrev = rdotz;
304  rdotz = sync(dot(r, z));
305  //Print info:
306  double rzNorm = sqrt(fabs(rdotz)/p.nDim);
307  fprintf(p.fpLog, "%sIter: %3d sqrt(|r.z|): %12.6le alpha: %12.6le beta: %13.6le t[s]: %9.2lf\n",
308  p.linePrefix, iter, rzNorm, alpha, beta, clock_sec()); fflush(p.fpLog);
309  //Check convergence:
310  if(rzNorm<p.knormThreshold) { fprintf(p.fpLog, "%sConverged sqrt(r.z)<%le\n", p.linePrefix, p.knormThreshold); fflush(p.fpLog); return iter; }
311  }
312  fprintf(p.fpLog, "%sGradient did not converge within threshold in %d iterations\n", p.linePrefix, iter); fflush(p.fpLog);
313  return iter;
314 }
315 
317 #endif // JDFTX_CORE_MINIMIZE_H
double alphaTmin
minimum value of the test-step size (algorithm gives up when difficulties cause alphaT to fall below ...
Definition: MinimizeParams.h:60
virtual void constrain(Vector &)
Constrain search directions to the space of free directions for minimize.
Definition: Minimize.h:59
Limited memory version of the BFGS algorithm.
Definition: MinimizeParams.h:36
Definition: Minimize_linmin.h:27
ScalarField sqrt(const ScalarField &)
Elementwise square root (preserve input)
Definition: Minimize.h:85
void randomize(TptrCollection &x)
Initialize to normal random numbers:
Definition: ScalarFieldArray.h:154
int nIterations
Maximum number of iterations (default 100)
Definition: MinimizeParams.h:48
complex dot(const Tptr &X, const Tptr &Y)
Definition: Operators.h:196
Hestenes-Stiefel (preconditioned) conjugate gradients.
Definition: MinimizeParams.h:35
virtual bool report(int iter)
Definition: Minimize.h:56
Parameters to control the minimization algorithm.
Definition: MinimizeParams.h:29
double energyDiffThreshold
stop when energy change is below this for nEnergyDiff successive iterations (default: 0) ...
Definition: MinimizeParams.h:56
virtual double compute(Vector *grad, Vector *Kgrad)=0
Returns the objective function at the current state and store the gradient in grad and preconditioned...
virtual double sync(double x) const
Override to synchronize scalars over MPI processes (if the same minimization is happening in sync ove...
Definition: Minimize.h:62
double alphaTstart
initial value for the test-step size (default: 1.0)
Definition: MinimizeParams.h:59
int nEnergyDiff
number of successive iterations for energyDiffThreshold check (default: 2)
Definition: MinimizeParams.h:57
Steepest Descent (always along negative (preconditioned) gradient)
Definition: MinimizeParams.h:37
double knormThreshold
stop when norm of residual against preconditioner falls below this (default: 0)
Definition: MinimizeParams.h:55
void fdTest(const MinimizeParams &params)
Tptr clone(const Tptr &X)
Clone (NOTE: operator= is by reference for the ScalarField classes)
Definition: Operators.h:111
virtual Vector precondition(const Vector &v) const
Override to enable preconditioning: return the preconditioned vector, given a vector.
Definition: Minimize.h:93
bool updateTestStepSize
set alphaT=alpha after every iteration if true (default: true)
Definition: MinimizeParams.h:61
double clock_sec()
Elapsed time in microseconds (from start of program)
bool killFlag
Flag set by signal handlers - all compute loops should quit cleanly when this is set.
virtual double sync(double x) const
Override to synchronize scalars over MPI processes (if the same minimization is happening in sync ove...
Definition: Minimize.h:96
virtual double safeStepSize(const Vector &dir) const
Override to return maximum safe step size along a given direction. Steps can be arbitrarily large by ...
Definition: Minimize.h:65
Miscellaneous utilities.
double minimize(const MinimizeParams &params)
Minimize this objective function with algorithm controlled by params and return the minimized value...
Polak-Ribiere (preconditioned) conjugate gradients (default)
Definition: MinimizeParams.h:33
const char * linePrefix
prefix for each output line of minimizer, useful for nested minimizations (default "CG\t") ...
Definition: MinimizeParams.h:52
const char * energyLabel
Label for the minimized quantity (default "E")
Definition: MinimizeParams.h:53
Fletcher-Reeves (preconditioned) conjugate gradients.
Definition: MinimizeParams.h:34
int nDim
Dimension of optimization space; used only for knormThreshold (default 1)
Definition: MinimizeParams.h:49
int solve(const Vector &rhs, const MinimizeParams &params)
const char * energyFormat
printf format for the minimized quantity (default "%22.15le")
Definition: MinimizeParams.h:54
FILE * fpLog
Stream to log iterations to.
Definition: MinimizeParams.h:51
Definition: Minimize.h:46
DirectionUpdateScheme
Search direction update scheme.
Definition: MinimizeParams.h:32
virtual void step(const Vector &dir, double alpha)=0
Move the state in parameter space along direction dir with scale alpha.
void axpy(double alpha, const Tptr &X, Tptr &Y)
Generic axpy for complex data types (Note: null pointers are treated as zero)
Definition: Operators.h:158
std::basic_string< char, ichar_traits > string
Case-insensitive string.
Definition: string.h:42
bool fdTest
whether to perform a finite difference test before each minimization (default false) ...
Definition: MinimizeParams.h:70
Vector state
the location of the minimum, obtained by solving hessian * state == rhs
Definition: Minimize.h:87