JDFTx  1.0.0
Minimize.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------
2 Copyright 2011 Ravishankar Sundararaman, Kendra Letchworth Weaver
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #ifndef JDFTX_CORE_MINIMIZE_H
21 #define JDFTX_CORE_MINIMIZE_H
22 
23 #include <cmath>
24 #include <algorithm>
25 #include <core/Util.h>
26 #include <core/MinimizeParams.h>
27 
30 
45 template<typename Vector> struct Minimizable
46 {
48  virtual void step(const Vector& dir, double alpha)=0;
49 
51  virtual double compute(Vector* grad)=0;
52 
55  virtual Vector precondition(const Vector& grad) { return clone(grad); }
56 
59  virtual bool report(int iter) { return false; }
60 
63  virtual void constrain(Vector&) {}
64 
66  virtual double sync(double x) const { return x; }
67 
69  double minimize(const MinimizeParams& params);
70 
73  void fdTest(const MinimizeParams& params);
74 
75 private:
76  typedef bool (*Linmin)(Minimizable<Vector>&, const MinimizeParams&, const Vector&, double, double&, double&, Vector&);
77  Linmin getLinmin(const MinimizeParams& params) const;
78  double lBFGS(const MinimizeParams& params); //limited memory BFGS implementation (differs sufficiently from CG to be justify a separate implementation)
79 };
80 
86 template<typename Vector> struct LinearSolvable
87 {
88  Vector state;
89 
91  virtual Vector hessian(const Vector&) const=0;
92 
94  virtual Vector precondition(const Vector& v) const { return clone(v); }
95 
97  virtual double sync(double x) const { return x; }
98 
101  int solve(const Vector& rhs, const MinimizeParams& params);
102 };
103 
105 //---------------------- Implementation ----------------------------
107 
108 #include <core/Minimize_linmin.h>
109 #include <core/Minimize_lBFGS.h>
110 
111 template<typename Vector> double Minimizable<Vector>::minimize(const MinimizeParams& p)
112 { if(p.fdTest) fdTest(p); // finite difference test
113  if(p.dirUpdateScheme == MinimizeParams::LBFGS) return lBFGS(p);
114 
115  Vector g, gPrev; //current and previous gradient
116  double E = sync(compute(&g)); //get initial energy and gradient
117  EdiffCheck ediffCheck(p.nEnergyDiff, p.energyDiffThreshold); //list of past energies
118 
119  Vector d = clone(g); //step direction (will be reset in first iteration)
120  bool forceGradDirection = true; //whether current direction is along the gradient
121  MinimizeParams::DirectionUpdateScheme currentDirUpdateScheme = p.dirUpdateScheme; //initially use the specified scheme, may switch to SD on trouble
122 
123  double alphaT = p.alphaTstart; //test step size
124  double alpha = alphaT; //actual step size
125  double beta = 0.0; //CG prev search direction mix factor
126  double gKNorm = 0.0, gKNormPrev = 0.0; //current and previous norms of the preconditioned gradient
127 
128  //Select the linmin method:
129  Linmin linmin = getLinmin(p);
130 
131  //Iterate until convergence, max iteration count or kill signal
132  int iter=0;
133  for(iter=0; !killFlag; iter++)
134  {
135  if(report(iter)) //optional reporting/processing
136  { E = sync(compute(&g)); //update energy and gradient if state was modified
137  fprintf(p.fpLog, "%s\tState modified externally: resetting search direction.\n", p.linePrefix);
138  fflush(p.fpLog);
139  forceGradDirection = true; //reset search direction
140  }
141 
142  Vector Kg = precondition(g);
143  gKNorm = sync(dot(g,Kg));
144  fprintf(p.fpLog, "%sIter: %3d %s: ", p.linePrefix, iter, p.energyLabel);
145  fprintf(p.fpLog, p.energyFormat, E);
146  fprintf(p.fpLog, " |grad|_K: %10.3le alpha: %10.3le", sqrt(gKNorm/p.nDim), alpha);
147 
148  //Print prev step stats and set CG direction parameter if necessary
149  beta = 0.0;
150  if(!forceGradDirection)
151  { double dotgd = sync(dot(g,d));
152  double dotgPrevKg = sync(dot(gPrev, Kg));
153 
154  double linmin = dotgd/sqrt(sync(dot(g,g))*sync(dot(d,d)));
155  double cgtest = dotgPrevKg/sqrt(gKNorm*gKNormPrev);
156  fprintf(p.fpLog, " linmin: %10.3le", linmin);
157  fprintf(p.fpLog, " cgtest: %10.3le", cgtest);
158 
159  //Update beta:
160  switch(currentDirUpdateScheme)
161  { case MinimizeParams::FletcherReeves: beta = gKNorm/gKNormPrev; break;
162  case MinimizeParams::PolakRibiere: beta = (gKNorm-dotgPrevKg)/gKNormPrev; break;
163  case MinimizeParams::HestenesStiefel: beta = (gKNorm-dotgPrevKg)/(dotgd-sync(dot(d,gPrev))); break;
164  case MinimizeParams::SteepestDescent: beta = 0.0; break;
165  case MinimizeParams::LBFGS: break; //Should never encounter since LBFGS handled separately; just to eliminate compiler warnings
166  }
167  if(beta<0.0)
168  { fprintf(p.fpLog, "\n%sEncountered beta<0, resetting CG.", p.linePrefix);
169  beta=0.0;
170  }
171  }
172  forceGradDirection = false;
173  fprintf(p.fpLog, "\n"); fflush(p.fpLog);
174  if(sqrt(gKNorm/p.nDim) < p.knormThreshold)
175  { fprintf(p.fpLog, "%sConverged (|grad|_K<%le).\n", p.linePrefix, p.knormThreshold);
176  fflush(p.fpLog); return E;
177  }
178  if(ediffCheck.checkConvergence(E))
179  { fprintf(p.fpLog, "%sConverged (|Delta %s|<%le for %d iters).\n",
181  fflush(p.fpLog); return E;
182  }
183  if(!std::isfinite(gKNorm))
184  { fprintf(p.fpLog, "%s|grad|_K=%le. Stopping ...\n", p.linePrefix, gKNorm);
185  fflush(p.fpLog); return E;
186  }
187  if(!std::isfinite(E))
188  { fprintf(p.fpLog, "%sE=%le. Stopping ...\n", p.linePrefix, E);
189  fflush(p.fpLog); return E;
190  }
191  if(iter>=p.nIterations) break;
192  gPrev = g;
193  gKNormPrev = gKNorm;
194 
195  //Update search direction
196  d *= beta; axpy(-1.0, Kg, d); // d = beta*d - Kg
197 
198  //Line minimization
199  if(linmin(*this, p, d, alphaT, alpha, E, g))
200  { //linmin succeeded:
201  if(p.updateTestStepSize)
202  { alphaT = alpha;
203  if(alphaT<p.alphaTmin) //bad step size
204  alphaT = p.alphaTstart; //make sure next test step size is not too bad
205  }
206  }
207  else
208  { //linmin failed:
209  fprintf(p.fpLog, "%s\tUndoing step.\n", p.linePrefix);
210  step(d, -alpha);
211  E = sync(compute(&g));
212  if(beta)
213  { //Failed, but not along the gradient direction:
214  fprintf(p.fpLog, "%s\tStep failed: resetting search direction.\n", p.linePrefix);
215  fflush(p.fpLog);
216  forceGradDirection = true; //reset search direction
217  }
218  else
219  { //Failed along the gradient direction
220  fprintf(p.fpLog, "%s\tStep failed along negative gradient direction.\n", p.linePrefix);
221  fprintf(p.fpLog, "%sProbably at roundoff error limit. (Stopping)\n", p.linePrefix);
222  fflush(p.fpLog);
223  return E;
224  }
225  }
226  }
227  fprintf(p.fpLog, "%sNone of the convergence criteria satisfied after %d iterations.\n", p.linePrefix, iter);
228  return E;
229 }
230 
231 
232 template<typename Vector> void Minimizable<Vector>::fdTest(const MinimizeParams& p)
233 {
234  const double deltaMin = 1e-9;
235  const double deltaMax = 1e+1;
236  const double deltaScale = 1e+1;
237  string fdPrefixString = p.linePrefix + string("fdTest: ");
238  const char* fdPrefix = fdPrefixString.c_str();
239  fprintf(p.fpLog, "%s--------------------------------------\n", fdPrefix);
240  Vector g;
241  double E0 = sync(compute(&g));
242 
243  Vector dx;
244  { // Set the direction to be a random vector of the same norm
245  // as the preconditioned gradient times the initial test step size
246  Vector Kg = precondition(g);
247  dx = clone(Kg);
248  randomize(dx);
249  constrain(dx);
250  dx *= p.alphaTstart * sqrt(sync(dot(Kg,Kg))/sync(dot(dx,dx)));
251  }
252  double dE_ddelta = sync(dot(dx, g)); //directional derivative at delta=0
253 
254  double deltaPrev=0;
255  for(double delta=deltaMin; delta<=deltaMax; delta*=deltaScale)
256  { double dE = dE_ddelta*delta;
257  step(dx, delta-deltaPrev); deltaPrev=delta;
258  double deltaE = sync(compute(0)) - E0;
259  fprintf(p.fpLog, "%s delta=%le:\n", fdPrefix, delta);
260  fprintf(p.fpLog, "%s d%s Ratio: %19.16lf\n", fdPrefix, p.energyLabel, deltaE/dE);
261  fprintf(p.fpLog, "%s d%s Error: %19.16lf\n", fdPrefix, p.energyLabel, sqrt(p.nDim)*1.1e-16/fabs(dE));
262  }
263  fprintf(p.fpLog, "%s--------------------------------------\n", fdPrefix);
264  step(dx, -deltaPrev); //restore state to original value
265 }
266 
267 
268 template<typename Vector> int LinearSolvable<Vector>::solve(const Vector& rhs, const MinimizeParams& p)
269 { //Initialize:
270  Vector r = clone(rhs); axpy(-1.0, hessian(state), r); //residual r = rhs - A.state;
271  Vector z = precondition(r), d = r; //the preconditioned residual and search direction
272  double beta=0.0, rdotzPrev=0.0, rdotz = sync(dot(r, z));
273 
274  //Check initial residual
275  double rzNorm = sqrt(fabs(rdotz)/p.nDim);
276  fprintf(p.fpLog, "%sInitial: sqrt(|r.z|): %12.6le\n", p.linePrefix, rzNorm); fflush(p.fpLog);
277  if(rzNorm<p.knormThreshold) { fprintf(p.fpLog, "%sConverged sqrt(r.z)<%le\n", p.linePrefix, p.knormThreshold); fflush(p.fpLog); return 0; }
278 
279  //Main loop:
280  int iter;
281  for(iter=0; iter<p.nIterations && !killFlag; iter++)
282  { //Update search direction:
283  if(rdotzPrev)
284  { beta = rdotz/rdotzPrev;
285  d *= beta; axpy(1.0, z, d); // d = z + beta*d
286  }
287  else d = clone(z); //fresh search direction (along gradient)
288  //Step:
289  Vector w = hessian(d);
290  double alpha = rdotz/sync(dot(w,d));
291  axpy(alpha, d, state);
292  axpy(-alpha, w, r);
293  z = precondition(r);
294  rdotzPrev = rdotz;
295  rdotz = sync(dot(r, z));
296  //Print info:
297  double rzNorm = sqrt(fabs(rdotz)/p.nDim);
298  fprintf(p.fpLog, "%sIter: %3d sqrt(|r.z|): %12.6le alpha: %12.6le beta: %13.6le\n",
299  p.linePrefix, iter, rzNorm, alpha, beta); fflush(p.fpLog);
300  //Check convergence:
301  if(rzNorm<p.knormThreshold) { fprintf(p.fpLog, "%sConverged sqrt(r.z)<%le\n", p.linePrefix, p.knormThreshold); fflush(p.fpLog); return iter; }
302  }
303  fprintf(p.fpLog, "%sGradient did not converge within threshold in %d iterations\n", p.linePrefix, iter); fflush(p.fpLog);
304  return iter;
305 }
306 
308 #endif // JDFTX_CORE_MINIMIZE_H
double alphaTmin
minimum value of the test-step size (algorithm gives up when difficulties cause alphaT to fall below ...
Definition: MinimizeParams.h:60
virtual void constrain(Vector &)
Definition: Minimize.h:63
Limited memory version of the BFGS algorithm.
Definition: MinimizeParams.h:36
Definition: Minimize_linmin.h:27
ScalarField sqrt(const ScalarField &)
Elementwise square root (preserve input)
Definition: Minimize.h:86
void randomize(TptrCollection &x)
Initialize to normal random numbers:
Definition: ScalarFieldArray.h:157
int nIterations
Maximum number of iterations (default 100)
Definition: MinimizeParams.h:48
complex dot(const Tptr &X, const Tptr &Y)
Definition: Operators.h:196
Hestenes-Stiefel (preconditioned) conjugate gradients.
Definition: MinimizeParams.h:35
virtual bool report(int iter)
Definition: Minimize.h:59
Parameters to control the minimization algorithm.
Definition: MinimizeParams.h:29
double energyDiffThreshold
stop when energy change is below this for nEnergyDiff successive iterations (default: 0) ...
Definition: MinimizeParams.h:56
virtual double sync(double x) const
Override to synchronize scalars over MPI processes (if the same minimization is happening in sync ove...
Definition: Minimize.h:66
double alphaTstart
initial value for the test-step size (default: 1.0)
Definition: MinimizeParams.h:59
int nEnergyDiff
number of successive iterations for energyDiffThreshold check (default: 2)
Definition: MinimizeParams.h:57
Steepest Descent (always along negative (preconditioned) gradient)
Definition: MinimizeParams.h:37
virtual Vector precondition(const Vector &grad)
Definition: Minimize.h:55
double knormThreshold
stop when norm of residual against preconditioner falls below this (default: 0)
Definition: MinimizeParams.h:55
void fdTest(const MinimizeParams &params)
Tptr clone(const Tptr &X)
Clone (NOTE: operator= is by reference for the ScalarField classes)
Definition: Operators.h:111
virtual Vector precondition(const Vector &v) const
Override to enable preconditioning: return the preconditioned vector, given a vector.
Definition: Minimize.h:94
bool updateTestStepSize
set alphaT=alpha after every iteration if true (default: true)
Definition: MinimizeParams.h:61
bool killFlag
Flag set by signal handlers - all compute loops should quit cleanly when this is set.
virtual double sync(double x) const
Override to synchronize scalars over MPI processes (if the same minimization is happening in sync ove...
Definition: Minimize.h:97
Miscellaneous utilities.
double minimize(const MinimizeParams &params)
Minimize this objective function with algorithm controlled by params and return the minimized value...
Polak-Ribiere (preconditioned) conjugate gradients (default)
Definition: MinimizeParams.h:33
const char * linePrefix
prefix for each output line of minimizer, useful for nested minimizations (default "CG\t") ...
Definition: MinimizeParams.h:52
const char * energyLabel
Label for the minimized quantity (default "E")
Definition: MinimizeParams.h:53
Fletcher-Reeves (preconditioned) conjugate gradients.
Definition: MinimizeParams.h:34
int nDim
Dimension of optimization space; used only for knormThreshold (default 1)
Definition: MinimizeParams.h:49
int solve(const Vector &rhs, const MinimizeParams &params)
const char * energyFormat
printf format for the minimized quantity (default "%22.15le")
Definition: MinimizeParams.h:54
FILE * fpLog
Stream to log iterations to.
Definition: MinimizeParams.h:51
Definition: Minimize.h:45
DirectionUpdateScheme
Search direction update scheme.
Definition: MinimizeParams.h:32
virtual void step(const Vector &dir, double alpha)=0
Move the state in parameter space along direction dir with scale alpha.
void axpy(double alpha, const Tptr &X, Tptr &Y)
Generic axpy for complex data types (Note: null pointers are treated as zero)
Definition: Operators.h:158
std::basic_string< char, ichar_traits > string
Case-insensitive string.
Definition: string.h:42
virtual double compute(Vector *grad)=0
Returns the objective function at the current state and store the gradient in grad, if non-null.
bool fdTest
whether to perform a finite difference test before each minimization (default false) ...
Definition: MinimizeParams.h:70
Vector state
the location of the minimum, obtained by solving hessian * state == rhs
Definition: Minimize.h:88