JDFTx  1.2.0
Minimize_lBFGS.h
1 /*-------------------------------------------------------------------
2 Copyright 2014 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #ifndef JDFTX_CORE_MINIMIZE_LBFGS_H
21 #define JDFTX_CORE_MINIMIZE_LBFGS_H
22 
23 #include <core/Minimize_linmin.h>
24 #include <core/Minimize.h> //this include is present only to aid IDE's autocompletion (does nothing since this file is always used via Minimize.h)
25 #include <memory>
26 #include <list>
27 #include <stack>
28 
29 //Following Nocedal and Liu, Math Prog 45, 503 (1989)
30 template<typename Vector> double Minimizable<Vector>::lBFGS(const MinimizeParams& p)
31 {
32  Vector g, Kg; //gradient and preconditioned gradient
33  double E = sync(compute(&g, &Kg)); //get initial energy and gradient
34 
35  EdiffCheck ediffCheck(p.nEnergyDiff, p.energyDiffThreshold); //list of past energies
36 
37  double alpha = 0.; //step size (note BFGS always tries alpha=alphaTstart first, which is recommended to be 1)
38  double linminTest = 0.;
39 
40  //History of variable and residual changes:
41  struct History
42  { Vector s; //change in variable (= alpha d)
43  Vector Ky; //change in preconditioned residual (= Kg - KgPrev)
44  double rho; //= 1/dot(s,y)
45  };
46  std::list< std::shared_ptr<History> > history;
47  double gamma = 0.; //scaling: set to dot(s,y)/dot(y,Ky) each iteration
48 
49  //Select the linmin method:
50  Linmin linmin = getLinmin(p);
51 
52  //Iterate until convergence, max iteration count or kill signal
53  int iter=0;
54  for(iter=0; !killFlag; iter++)
55  {
56  if(report(iter)) //optional reporting/processing
57  { E = sync(compute(&g, &Kg)); //update energy and gradient if state was modified
58  fprintf(p.fpLog, "%s\tState modified externally: resetting history.\n", p.linePrefix);
59  fflush(p.fpLog);
60  history.clear();
61  }
62 
63  double gKnorm = sync(dot(g,Kg));
64  fprintf(p.fpLog, "%sIter: %3d %s: ", p.linePrefix, iter, p.energyLabel);
65  fprintf(p.fpLog, p.energyFormat, E);
66  fprintf(p.fpLog, " |grad|_K: %10.3le", sqrt(gKnorm/p.nDim));
67  if(alpha) fprintf(p.fpLog, " alpha: %10.3le", alpha);
68  if(linminTest) fprintf(p.fpLog, " linmin: %10.3le", linminTest);
69  fprintf(p.fpLog, " t[s]: %9.2lf", clock_sec());
70 
71  //Check stopping conditions:
72  fprintf(p.fpLog, "\n"); fflush(p.fpLog);
73  if(sqrt(gKnorm/p.nDim) < p.knormThreshold)
74  { fprintf(p.fpLog, "%sConverged (|grad|_K<%le).\n", p.linePrefix, p.knormThreshold);
75  fflush(p.fpLog); return E;
76  }
77  if(ediffCheck.checkConvergence(E))
78  { fprintf(p.fpLog, "%sConverged (|Delta %s|<%le for %d iters).\n",
80  fflush(p.fpLog); return E;
81  }
82  if(!std::isfinite(gKnorm))
83  { fprintf(p.fpLog, "%s|grad|_K=%le. Stopping ...\n", p.linePrefix, gKnorm);
84  fflush(p.fpLog); return E;
85  }
86  if(!std::isfinite(E))
87  { fprintf(p.fpLog, "%sE=%le. Stopping ...\n", p.linePrefix, E);
88  fflush(p.fpLog); return E;
89  }
90  if(iter>=p.nIterations) break;
91 
92  //Prepare container that will be committed to history below: (and avoid copies)
93  auto h = std::make_shared<History>();
94  Vector& d = h->s;
95 
96  //Compute search direction:
97  d = clone(Kg);
98  std::stack<double> a; //alpha in the reference renamed to 'a' here to not clash with step size
99  for(auto h=history.rbegin(); h!=history.rend(); h++)
100  { a.push( (*h)->rho * sync(dot((*h)->s, d)) );
101  axpy(-a.top(), (*h)->Ky, d);
102  }
103  if(gamma) d *= gamma; //scaling (available after first iteration)
104  for(auto h=history.begin(); h!=history.end(); h++)
105  { double b = (*h)->rho * sync(dot((*h)->Ky, d));
106  axpy(a.top()-b, (*h)->s, d);
107  a.pop();
108  }
109  d *= -1;
110  if((int)history.size() == p.history) history.pop_front(); //if full, make room for the upcoming history entry (do this here to free memory earlier)
111  constrain(d); //restrict search direction to allowed subspace
112 
113  //Line minimization
114  Vector y = clone(g); h->Ky = clone(Kg); //store previous gradients before linmin changes it (these will later be converted to y = g-gPrev)
115  double alphaT = std::min(p.alphaTstart, safeStepSize(d));
116  if(!linmin(*this, p, d, alphaT, alpha, E, g, Kg))
117  { //linmin failed:
118  fprintf(p.fpLog, "%s\tUndoing step.\n", p.linePrefix);
119  step(d, -alpha);
120  E = sync(compute(&g, &Kg));
121  if(history.size())
122  { //Failed, but not along the gradient direction:
123  fprintf(p.fpLog, "%s\tStep failed: resetting history.\n", p.linePrefix);
124  fflush(p.fpLog);
125  history.clear();
126  gamma = 0.;
127  linminTest = 0.;
128  continue;
129  }
130  else
131  { //Failed along the gradient direction
132  fprintf(p.fpLog, "%s\tStep failed along negative gradient direction.\n", p.linePrefix);
133  fprintf(p.fpLog, "%sProbably at roundoff error limit. (Stopping)\n", p.linePrefix);
134  fflush(p.fpLog);
135  return E;
136  }
137  }
138 
139  //Update history:
140  linminTest = sync(dot(g,d))/sqrt(sync(dot(g,g))*sync(dot(d,d)));
141  d *= alpha; //d -> alpha * d, which is change of state, and it resides in h->s
142  h->Ky *= -1; axpy(1., Kg, h->Ky); //Ky = K(g-gPrev)
143  y *= -1; axpy(1., g, y); //y = g-gPrev
144  double ydots = sync(dot(y, h->s));
145  h->rho = 1./ydots;
146  gamma = ydots / sync(dot(y, h->Ky));
147  history.push_back(h);
148  }
149  fprintf(p.fpLog, "%sNone of the convergence criteria satisfied after %d iterations.\n", p.linePrefix, iter);
150  return E;
151 }
152 
153 #endif //JDFTX_CORE_MINIMIZE_LBFGS_H
Definition: Minimize_linmin.h:27
ScalarField sqrt(const ScalarField &)
Elementwise square root (preserve input)
int nIterations
Maximum number of iterations (default 100)
Definition: MinimizeParams.h:48
complex dot(const Tptr &X, const Tptr &Y)
Definition: Operators.h:196
int history
Number of past variables and residuals to store (BFGS only)
Definition: MinimizeParams.h:50
Parameters to control the minimization algorithm.
Definition: MinimizeParams.h:29
double energyDiffThreshold
stop when energy change is below this for nEnergyDiff successive iterations (default: 0) ...
Definition: MinimizeParams.h:56
double alphaTstart
initial value for the test-step size (default: 1.0)
Definition: MinimizeParams.h:59
int nEnergyDiff
number of successive iterations for energyDiffThreshold check (default: 2)
Definition: MinimizeParams.h:57
double knormThreshold
stop when norm of residual against preconditioner falls below this (default: 0)
Definition: MinimizeParams.h:55
Tptr clone(const Tptr &X)
Clone (NOTE: operator= is by reference for the ScalarField classes)
Definition: Operators.h:111
double clock_sec()
Elapsed time in microseconds (from start of program)
bool killFlag
Flag set by signal handlers - all compute loops should quit cleanly when this is set.
const char * linePrefix
prefix for each output line of minimizer, useful for nested minimizations (default "CG\t") ...
Definition: MinimizeParams.h:52
Nonlinear minimization templates.
const char * energyLabel
Label for the minimized quantity (default "E")
Definition: MinimizeParams.h:53
int nDim
Dimension of optimization space; used only for knormThreshold (default 1)
Definition: MinimizeParams.h:49
const char * energyFormat
printf format for the minimized quantity (default "%22.15le")
Definition: MinimizeParams.h:54
FILE * fpLog
Stream to log iterations to.
Definition: MinimizeParams.h:51
Definition: Minimize.h:46
void axpy(double alpha, const Tptr &X, Tptr &Y)
Generic axpy for complex data types (Note: null pointers are treated as zero)
Definition: Operators.h:158