JDFTx  1.1.1
Minimize_lBFGS.h
1 /*-------------------------------------------------------------------
2 Copyright 2014 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #ifndef JDFTX_CORE_MINIMIZE_LBFGS_H
21 #define JDFTX_CORE_MINIMIZE_LBFGS_H
22 
23 #include <core/Minimize_linmin.h>
24 #include <core/Minimize.h> //this include is present only to aid IDE's autocompletion (does nothing since this file is always used via Minimize.h)
25 #include <memory>
26 #include <list>
27 #include <stack>
28 
29 //Following Nocedal and Liu, Math Prog 45, 503 (1989)
30 template<typename Vector> double Minimizable<Vector>::lBFGS(const MinimizeParams& p)
31 {
32  Vector g; //gradient
33  double E = sync(compute(&g)); //get initial energy and gradient
34  Vector Kg = precondition(g); //preconditioned gradient
35 
36  EdiffCheck ediffCheck(p.nEnergyDiff, p.energyDiffThreshold); //list of past energies
37 
38  double alpha = 0.; //step size (note BFGS always tries alpha=alphaTstart first, which is recommended to be 1)
39  double linminTest = 0.;
40 
41  //History of variable and residual changes:
42  struct History
43  { Vector s; //change in variable (= alpha d)
44  Vector Ky; //change in preconditioned residual (= Kg - KgPrev)
45  double rho; //= 1/dot(s,y)
46  };
47  std::list< std::shared_ptr<History> > history;
48  double gamma = 0.; //scaling: set to dot(s,y)/dot(y,Ky) each iteration
49 
50  //Select the linmin method:
51  Linmin linmin = getLinmin(p);
52 
53  //Iterate until convergence, max iteration count or kill signal
54  int iter=0;
55  for(iter=0; !killFlag; iter++)
56  {
57  if(report(iter)) //optional reporting/processing
58  { E = sync(compute(&g)); //update energy and gradient if state was modified
59  Kg = precondition(g);
60  fprintf(p.fpLog, "%s\tState modified externally: resetting history.\n", p.linePrefix);
61  fflush(p.fpLog);
62  history.clear();
63  }
64 
65  double gKnorm = sync(dot(g,Kg));
66  fprintf(p.fpLog, "%sIter: %3d %s: ", p.linePrefix, iter, p.energyLabel);
67  fprintf(p.fpLog, p.energyFormat, E);
68  fprintf(p.fpLog, " |grad|_K: %10.3le", sqrt(gKnorm/p.nDim));
69  if(alpha) fprintf(p.fpLog, " alpha: %10.3le", alpha);
70  if(linminTest) fprintf(p.fpLog, " linmin: %10.3le", linminTest);
71  fprintf(p.fpLog, " t[s]: %9.2lf", clock_sec());
72 
73  //Check stopping conditions:
74  fprintf(p.fpLog, "\n"); fflush(p.fpLog);
75  if(sqrt(gKnorm/p.nDim) < p.knormThreshold)
76  { fprintf(p.fpLog, "%sConverged (|grad|_K<%le).\n", p.linePrefix, p.knormThreshold);
77  fflush(p.fpLog); return E;
78  }
79  if(ediffCheck.checkConvergence(E))
80  { fprintf(p.fpLog, "%sConverged (|Delta %s|<%le for %d iters).\n",
82  fflush(p.fpLog); return E;
83  }
84  if(!std::isfinite(gKnorm))
85  { fprintf(p.fpLog, "%s|grad|_K=%le. Stopping ...\n", p.linePrefix, gKnorm);
86  fflush(p.fpLog); return E;
87  }
88  if(!std::isfinite(E))
89  { fprintf(p.fpLog, "%sE=%le. Stopping ...\n", p.linePrefix, E);
90  fflush(p.fpLog); return E;
91  }
92  if(iter>=p.nIterations) break;
93 
94  //Prepare container that will be committed to history below: (and avoid copies)
95  auto h = std::make_shared<History>();
96  Vector& d = h->s;
97 
98  //Compute search direction:
99  d = clone(Kg);
100  std::stack<double> a; //alpha in the reference renamed to 'a' here to not clash with step size
101  for(auto h=history.rbegin(); h!=history.rend(); h++)
102  { a.push( (*h)->rho * sync(dot((*h)->s, d)) );
103  axpy(-a.top(), (*h)->Ky, d);
104  }
105  if(gamma) d *= gamma; //scaling (available after first iteration)
106  for(auto h=history.begin(); h!=history.end(); h++)
107  { double b = (*h)->rho * sync(dot((*h)->Ky, d));
108  axpy(a.top()-b, (*h)->s, d);
109  a.pop();
110  }
111  d *= -1;
112  if((int)history.size() == p.history) history.pop_front(); //if full, make room for the upcoming history entry (do this here to free memory earlier)
113 
114  //Line minimization
115  Vector y = clone(g); //store previous gradient before linmin changes it (this will later be converted to y = g-gPrev)
116  if(!linmin(*this, p, d, p.alphaTstart, alpha, E, g))
117  { //linmin failed:
118  fprintf(p.fpLog, "%s\tUndoing step.\n", p.linePrefix);
119  step(d, -alpha);
120  E = sync(compute(&g));
121  Kg = precondition(g);
122  if(history.size())
123  { //Failed, but not along the gradient direction:
124  fprintf(p.fpLog, "%s\tStep failed: resetting history.\n", p.linePrefix);
125  fflush(p.fpLog);
126  history.clear();
127  gamma = 0.;
128  linminTest = 0.;
129  continue;
130  }
131  else
132  { //Failed along the gradient direction
133  fprintf(p.fpLog, "%s\tStep failed along negative gradient direction.\n", p.linePrefix);
134  fprintf(p.fpLog, "%sProbably at roundoff error limit. (Stopping)\n", p.linePrefix);
135  fflush(p.fpLog);
136  return E;
137  }
138  }
139 
140  //Update history:
141  linminTest = sync(dot(g,d))/sqrt(sync(dot(g,g))*sync(dot(d,d)));
142  d *= alpha; //d -> alpha * d, which is change of state, and it resides in h->s
143  h->Ky = Kg; Kg = precondition(g); //store previous preconditioned gradient, and compute new one
144  h->Ky *= -1; axpy(1., Kg, h->Ky); //Ky = K(g-gPrev)
145  y *= -1; axpy(1., g, y); //y = g-gPrev
146  double ydots = sync(dot(y, h->s));
147  h->rho = 1./ydots;
148  gamma = ydots / sync(dot(y, h->Ky));
149  history.push_back(h);
150  }
151  fprintf(p.fpLog, "%sNone of the convergence criteria satisfied after %d iterations.\n", p.linePrefix, iter);
152  return E;
153 }
154 
155 #endif //JDFTX_CORE_MINIMIZE_LBFGS_H
Definition: Minimize_linmin.h:27
ScalarField sqrt(const ScalarField &)
Elementwise square root (preserve input)
int nIterations
Maximum number of iterations (default 100)
Definition: MinimizeParams.h:48
complex dot(const Tptr &X, const Tptr &Y)
Definition: Operators.h:196
int history
Number of past variables and residuals to store (BFGS only)
Definition: MinimizeParams.h:50
Parameters to control the minimization algorithm.
Definition: MinimizeParams.h:29
double energyDiffThreshold
stop when energy change is below this for nEnergyDiff successive iterations (default: 0) ...
Definition: MinimizeParams.h:56
double alphaTstart
initial value for the test-step size (default: 1.0)
Definition: MinimizeParams.h:59
int nEnergyDiff
number of successive iterations for energyDiffThreshold check (default: 2)
Definition: MinimizeParams.h:57
double knormThreshold
stop when norm of residual against preconditioner falls below this (default: 0)
Definition: MinimizeParams.h:55
Tptr clone(const Tptr &X)
Clone (NOTE: operator= is by reference for the ScalarField classes)
Definition: Operators.h:111
double clock_sec()
Elapsed time in microseconds (from start of program)
bool killFlag
Flag set by signal handlers - all compute loops should quit cleanly when this is set.
const char * linePrefix
prefix for each output line of minimizer, useful for nested minimizations (default "CG\t") ...
Definition: MinimizeParams.h:52
Nonlinear minimization templates.
const char * energyLabel
Label for the minimized quantity (default "E")
Definition: MinimizeParams.h:53
int nDim
Dimension of optimization space; used only for knormThreshold (default 1)
Definition: MinimizeParams.h:49
const char * energyFormat
printf format for the minimized quantity (default "%22.15le")
Definition: MinimizeParams.h:54
FILE * fpLog
Stream to log iterations to.
Definition: MinimizeParams.h:51
Definition: Minimize.h:45
void axpy(double alpha, const Tptr &X, Tptr &Y)
Generic axpy for complex data types (Note: null pointers are treated as zero)
Definition: Operators.h:158