JDFTx  1.1.0
Minimize.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------
2 Copyright 2011 Ravishankar Sundararaman, Kendra Letchworth Weaver
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #ifndef JDFTX_CORE_MINIMIZE_H
21 #define JDFTX_CORE_MINIMIZE_H
22 
23 #include <cmath>
24 #include <algorithm>
25 #include <core/Util.h>
26 #include <core/MinimizeParams.h>
27 
30 
45 template<typename Vector> struct Minimizable
46 {
48  virtual void step(const Vector& dir, double alpha)=0;
49 
51  virtual double compute(Vector* grad)=0;
52 
55  virtual Vector precondition(const Vector& grad) { return clone(grad); }
56 
59  virtual bool report(int iter) { return false; }
60 
63  virtual void constrain(Vector&) {}
64 
66  virtual double sync(double x) const { return x; }
67 
69  double minimize(const MinimizeParams& params);
70 
73  void fdTest(const MinimizeParams& params);
74 
75 private:
76  typedef bool (*Linmin)(Minimizable<Vector>&, const MinimizeParams&, const Vector&, double, double&, double&, Vector&);
77  Linmin getLinmin(const MinimizeParams& params) const;
78  double lBFGS(const MinimizeParams& params); //limited memory BFGS implementation (differs sufficiently from CG to be justify a separate implementation)
79 };
80 
86 template<typename Vector> struct LinearSolvable
87 {
88  Vector state;
89 
91  virtual Vector hessian(const Vector&) const=0;
92 
94  virtual Vector precondition(const Vector& v) const { return clone(v); }
95 
97  virtual double sync(double x) const { return x; }
98 
101  int solve(const Vector& rhs, const MinimizeParams& params);
102 };
103 
105 //---------------------- Implementation ----------------------------
107 
108 #include <core/Minimize_linmin.h>
109 #include <core/Minimize_lBFGS.h>
110 
111 template<typename Vector> double Minimizable<Vector>::minimize(const MinimizeParams& p)
112 { if(p.fdTest) fdTest(p); // finite difference test
113  if(p.dirUpdateScheme == MinimizeParams::LBFGS) return lBFGS(p);
114 
115  Vector g, gPrev; //current and previous gradient
116  double E = sync(compute(&g)); //get initial energy and gradient
117  EdiffCheck ediffCheck(p.nEnergyDiff, p.energyDiffThreshold); //list of past energies
118 
119  Vector d = clone(g); //step direction (will be reset in first iteration)
120  bool forceGradDirection = true; //whether current direction is along the gradient
121  MinimizeParams::DirectionUpdateScheme currentDirUpdateScheme = p.dirUpdateScheme; //initially use the specified scheme, may switch to SD on trouble
122 
123  double alphaT = p.alphaTstart; //test step size
124  double alpha = alphaT; //actual step size
125  double beta = 0.0; //CG prev search direction mix factor
126  double gKNorm = 0.0, gKNormPrev = 0.0; //current and previous norms of the preconditioned gradient
127 
128  //Select the linmin method:
129  Linmin linmin = getLinmin(p);
130 
131  //Iterate until convergence, max iteration count or kill signal
132  int iter=0;
133  for(iter=0; !killFlag; iter++)
134  {
135  if(report(iter)) //optional reporting/processing
136  { E = sync(compute(&g)); //update energy and gradient if state was modified
137  fprintf(p.fpLog, "%s\tState modified externally: resetting search direction.\n", p.linePrefix);
138  fflush(p.fpLog);
139  forceGradDirection = true; //reset search direction
140  }
141 
142  Vector Kg = precondition(g);
143  gKNorm = sync(dot(g,Kg));
144  fprintf(p.fpLog, "%sIter: %3d %s: ", p.linePrefix, iter, p.energyLabel);
145  fprintf(p.fpLog, p.energyFormat, E);
146  fprintf(p.fpLog, " |grad|_K: %10.3le alpha: %10.3le", sqrt(gKNorm/p.nDim), alpha);
147 
148  //Print prev step stats and set CG direction parameter if necessary
149  beta = 0.0;
150  if(!forceGradDirection)
151  { double dotgd = sync(dot(g,d));
152  double dotgPrevKg = sync(dot(gPrev, Kg));
153 
154  double linmin = dotgd/sqrt(sync(dot(g,g))*sync(dot(d,d)));
155  double cgtest = dotgPrevKg/sqrt(gKNorm*gKNormPrev);
156  fprintf(p.fpLog, " linmin: %10.3le", linmin);
157  fprintf(p.fpLog, " cgtest: %10.3le", cgtest);
158  fprintf(p.fpLog, " t[s]: %9.2lf", clock_sec());
159 
160  //Update beta:
161  switch(currentDirUpdateScheme)
162  { case MinimizeParams::FletcherReeves: beta = gKNorm/gKNormPrev; break;
163  case MinimizeParams::PolakRibiere: beta = (gKNorm-dotgPrevKg)/gKNormPrev; break;
164  case MinimizeParams::HestenesStiefel: beta = (gKNorm-dotgPrevKg)/(dotgd-sync(dot(d,gPrev))); break;
165  case MinimizeParams::SteepestDescent: beta = 0.0; break;
166  case MinimizeParams::LBFGS: break; //Should never encounter since LBFGS handled separately; just to eliminate compiler warnings
167  }
168  if(beta<0.0)
169  { fprintf(p.fpLog, "\n%sEncountered beta<0, resetting CG.", p.linePrefix);
170  beta=0.0;
171  }
172  }
173  forceGradDirection = false;
174  fprintf(p.fpLog, "\n"); fflush(p.fpLog);
175  if(sqrt(gKNorm/p.nDim) < p.knormThreshold)
176  { fprintf(p.fpLog, "%sConverged (|grad|_K<%le).\n", p.linePrefix, p.knormThreshold);
177  fflush(p.fpLog); return E;
178  }
179  if(ediffCheck.checkConvergence(E))
180  { fprintf(p.fpLog, "%sConverged (|Delta %s|<%le for %d iters).\n",
182  fflush(p.fpLog); return E;
183  }
184  if(!std::isfinite(gKNorm))
185  { fprintf(p.fpLog, "%s|grad|_K=%le. Stopping ...\n", p.linePrefix, gKNorm);
186  fflush(p.fpLog); return E;
187  }
188  if(!std::isfinite(E))
189  { fprintf(p.fpLog, "%sE=%le. Stopping ...\n", p.linePrefix, E);
190  fflush(p.fpLog); return E;
191  }
192  if(iter>=p.nIterations) break;
193  gPrev = g;
194  gKNormPrev = gKNorm;
195 
196  //Update search direction
197  d *= beta; axpy(-1.0, Kg, d); // d = beta*d - Kg
198 
199  //Line minimization
200  if(linmin(*this, p, d, alphaT, alpha, E, g))
201  { //linmin succeeded:
202  if(p.updateTestStepSize)
203  { alphaT = alpha;
204  if(alphaT<p.alphaTmin) //bad step size
205  alphaT = p.alphaTstart; //make sure next test step size is not too bad
206  }
207  }
208  else
209  { //linmin failed:
210  fprintf(p.fpLog, "%s\tUndoing step.\n", p.linePrefix);
211  step(d, -alpha);
212  E = sync(compute(&g));
213  if(beta)
214  { //Failed, but not along the gradient direction:
215  fprintf(p.fpLog, "%s\tStep failed: resetting search direction.\n", p.linePrefix);
216  fflush(p.fpLog);
217  forceGradDirection = true; //reset search direction
218  }
219  else
220  { //Failed along the gradient direction
221  fprintf(p.fpLog, "%s\tStep failed along negative gradient direction.\n", p.linePrefix);
222  fprintf(p.fpLog, "%sProbably at roundoff error limit. (Stopping)\n", p.linePrefix);
223  fflush(p.fpLog);
224  return E;
225  }
226  }
227  }
228  fprintf(p.fpLog, "%sNone of the convergence criteria satisfied after %d iterations.\n", p.linePrefix, iter);
229  return E;
230 }
231 
232 
233 template<typename Vector> void Minimizable<Vector>::fdTest(const MinimizeParams& p)
234 {
235  const double deltaMin = 1e-9;
236  const double deltaMax = 1e+1;
237  const double deltaScale = 1e+1;
238  string fdPrefixString = p.linePrefix + string("fdTest: ");
239  const char* fdPrefix = fdPrefixString.c_str();
240  fprintf(p.fpLog, "%s--------------------------------------\n", fdPrefix);
241  Vector g;
242  double E0 = sync(compute(&g));
243 
244  Vector dx;
245  { // Set the direction to be a random vector of the same norm
246  // as the preconditioned gradient times the initial test step size
247  Vector Kg = precondition(g);
248  dx = clone(Kg);
249  randomize(dx);
250  constrain(dx);
251  dx *= p.alphaTstart * sqrt(sync(dot(Kg,Kg))/sync(dot(dx,dx)));
252  }
253  double dE_ddelta = sync(dot(dx, g)); //directional derivative at delta=0
254 
255  double deltaPrev=0;
256  for(double delta=deltaMin; delta<=deltaMax; delta*=deltaScale)
257  { double dE = dE_ddelta*delta;
258  step(dx, delta-deltaPrev); deltaPrev=delta;
259  double deltaE = sync(compute(0)) - E0;
260  fprintf(p.fpLog, "%s delta=%le:\n", fdPrefix, delta);
261  fprintf(p.fpLog, "%s d%s Ratio: %19.16lf\n", fdPrefix, p.energyLabel, deltaE/dE);
262  fprintf(p.fpLog, "%s d%s Error: %19.16lf\n", fdPrefix, p.energyLabel, sqrt(p.nDim)*1.1e-16/fabs(dE));
263  }
264  fprintf(p.fpLog, "%s--------------------------------------\n", fdPrefix);
265  step(dx, -deltaPrev); //restore state to original value
266 }
267 
268 
269 template<typename Vector> int LinearSolvable<Vector>::solve(const Vector& rhs, const MinimizeParams& p)
270 { //Initialize:
271  Vector r = clone(rhs); axpy(-1.0, hessian(state), r); //residual r = rhs - A.state;
272  Vector z = precondition(r), d = r; //the preconditioned residual and search direction
273  double beta=0.0, rdotzPrev=0.0, rdotz = sync(dot(r, z));
274 
275  //Check initial residual
276  double rzNorm = sqrt(fabs(rdotz)/p.nDim);
277  fprintf(p.fpLog, "%sInitial: sqrt(|r.z|): %12.6le\n", p.linePrefix, rzNorm); fflush(p.fpLog);
278  if(rzNorm<p.knormThreshold) { fprintf(p.fpLog, "%sConverged sqrt(r.z)<%le\n", p.linePrefix, p.knormThreshold); fflush(p.fpLog); return 0; }
279 
280  //Main loop:
281  int iter;
282  for(iter=0; iter<p.nIterations && !killFlag; iter++)
283  { //Update search direction:
284  if(rdotzPrev)
285  { beta = rdotz/rdotzPrev;
286  d *= beta; axpy(1.0, z, d); // d = z + beta*d
287  }
288  else d = clone(z); //fresh search direction (along gradient)
289  //Step:
290  Vector w = hessian(d);
291  double alpha = rdotz/sync(dot(w,d));
292  axpy(alpha, d, state);
293  axpy(-alpha, w, r);
294  z = precondition(r);
295  rdotzPrev = rdotz;
296  rdotz = sync(dot(r, z));
297  //Print info:
298  double rzNorm = sqrt(fabs(rdotz)/p.nDim);
299  fprintf(p.fpLog, "%sIter: %3d sqrt(|r.z|): %12.6le alpha: %12.6le beta: %13.6le t[s]: %9.2lf\n",
300  p.linePrefix, iter, rzNorm, alpha, beta, clock_sec()); fflush(p.fpLog);
301  //Check convergence:
302  if(rzNorm<p.knormThreshold) { fprintf(p.fpLog, "%sConverged sqrt(r.z)<%le\n", p.linePrefix, p.knormThreshold); fflush(p.fpLog); return iter; }
303  }
304  fprintf(p.fpLog, "%sGradient did not converge within threshold in %d iterations\n", p.linePrefix, iter); fflush(p.fpLog);
305  return iter;
306 }
307 
309 #endif // JDFTX_CORE_MINIMIZE_H
double alphaTmin
minimum value of the test-step size (algorithm gives up when difficulties cause alphaT to fall below ...
Definition: MinimizeParams.h:60
virtual void constrain(Vector &)
Definition: Minimize.h:63
Limited memory version of the BFGS algorithm.
Definition: MinimizeParams.h:36
Definition: Minimize_linmin.h:27
ScalarField sqrt(const ScalarField &)
Elementwise square root (preserve input)
Definition: Minimize.h:86
void randomize(TptrCollection &x)
Initialize to normal random numbers:
Definition: ScalarFieldArray.h:157
int nIterations
Maximum number of iterations (default 100)
Definition: MinimizeParams.h:48
complex dot(const Tptr &X, const Tptr &Y)
Definition: Operators.h:196
Hestenes-Stiefel (preconditioned) conjugate gradients.
Definition: MinimizeParams.h:35
virtual bool report(int iter)
Definition: Minimize.h:59
Parameters to control the minimization algorithm.
Definition: MinimizeParams.h:29
double energyDiffThreshold
stop when energy change is below this for nEnergyDiff successive iterations (default: 0) ...
Definition: MinimizeParams.h:56
virtual double sync(double x) const
Override to synchronize scalars over MPI processes (if the same minimization is happening in sync ove...
Definition: Minimize.h:66
double alphaTstart
initial value for the test-step size (default: 1.0)
Definition: MinimizeParams.h:59
int nEnergyDiff
number of successive iterations for energyDiffThreshold check (default: 2)
Definition: MinimizeParams.h:57
Steepest Descent (always along negative (preconditioned) gradient)
Definition: MinimizeParams.h:37
virtual Vector precondition(const Vector &grad)
Definition: Minimize.h:55
double knormThreshold
stop when norm of residual against preconditioner falls below this (default: 0)
Definition: MinimizeParams.h:55
void fdTest(const MinimizeParams &params)
Tptr clone(const Tptr &X)
Clone (NOTE: operator= is by reference for the ScalarField classes)
Definition: Operators.h:111
virtual Vector precondition(const Vector &v) const
Override to enable preconditioning: return the preconditioned vector, given a vector.
Definition: Minimize.h:94
bool updateTestStepSize
set alphaT=alpha after every iteration if true (default: true)
Definition: MinimizeParams.h:61
double clock_sec()
Elapsed time in microseconds (from start of program)
bool killFlag
Flag set by signal handlers - all compute loops should quit cleanly when this is set.
virtual double sync(double x) const
Override to synchronize scalars over MPI processes (if the same minimization is happening in sync ove...
Definition: Minimize.h:97
Miscellaneous utilities.
double minimize(const MinimizeParams &params)
Minimize this objective function with algorithm controlled by params and return the minimized value...
Polak-Ribiere (preconditioned) conjugate gradients (default)
Definition: MinimizeParams.h:33
const char * linePrefix
prefix for each output line of minimizer, useful for nested minimizations (default "CG\t") ...
Definition: MinimizeParams.h:52
const char * energyLabel
Label for the minimized quantity (default "E")
Definition: MinimizeParams.h:53
Fletcher-Reeves (preconditioned) conjugate gradients.
Definition: MinimizeParams.h:34
int nDim
Dimension of optimization space; used only for knormThreshold (default 1)
Definition: MinimizeParams.h:49
int solve(const Vector &rhs, const MinimizeParams &params)
const char * energyFormat
printf format for the minimized quantity (default "%22.15le")
Definition: MinimizeParams.h:54
FILE * fpLog
Stream to log iterations to.
Definition: MinimizeParams.h:51
Definition: Minimize.h:45
DirectionUpdateScheme
Search direction update scheme.
Definition: MinimizeParams.h:32
virtual void step(const Vector &dir, double alpha)=0
Move the state in parameter space along direction dir with scale alpha.
void axpy(double alpha, const Tptr &X, Tptr &Y)
Generic axpy for complex data types (Note: null pointers are treated as zero)
Definition: Operators.h:158
std::basic_string< char, ichar_traits > string
Case-insensitive string.
Definition: string.h:42
virtual double compute(Vector *grad)=0
Returns the objective function at the current state and store the gradient in grad, if non-null.
bool fdTest
whether to perform a finite difference test before each minimization (default false) ...
Definition: MinimizeParams.h:70
Vector state
the location of the minimum, obtained by solving hessian * state == rhs
Definition: Minimize.h:88