JDFTx  1.2.1
Minimize_linmin.h
1 /*-------------------------------------------------------------------
2 Copyright 2014 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #ifndef JDFTX_CORE_MINIMIZE_LINMIN_H
21 #define JDFTX_CORE_MINIMIZE_LINMIN_H
22 
23 #include <core/Minimize.h> //this include is present only to aid IDE's autocompletion (does nothing since this file is always used via Minimize.h)
24 #include <deque>
25 
26 //Energy difference convergence check
27 class EdiffCheck : std::deque<double>
28 { unsigned nDiff;
29  double threshold;
30 public:
31  EdiffCheck(unsigned nDiff, double threshold) : nDiff(nDiff), threshold(fabs(threshold))
32  {
33  }
34  bool checkConvergence(double E)
35  { if(!size()) { push_back(E); return false; } //first element
36  if(E >= back()) { clear(); push_back(E); return false; } //energy increased, reset converge list
37  //Have atleast one energy difference in list:
38  push_back(E);
39  if(size()==nDiff+2) pop_front(); //discard old unneeded elements
40  if(size()==nDiff+1)
41  { for(unsigned i=0; i<nDiff; i++)
42  if(at(i+1) < at(i)-threshold)
43  return false;
44  return true;
45  }
46  else return false;
47  }
48 };
49 
50 namespace MinimizePrivate
51 {
52  //Each of these linmin methods advance the parameters in obj along direction d
53  //updating the energy E, gradient g, and the step-size alpha
54  //The return value specifies if the step succeeded at reducing E
55  //If the step fails, alpha MUST contain the total progress along dir
56  //made by this step, so that minimize may reset it back to the original value
57 
58  //Equation-of-motion / Relaxation method stepping
59  //NOTE: Criterion for success of this method is different from the others
60  // It only ensures that the energy is not NaN/Inf.
61  template<typename Vector>
62  bool linminRelax(Minimizable<Vector>& obj, const MinimizeParams& p,
63  const Vector& d, double alphaT, double& alpha, double& E, Vector& g, Vector& Kg)
64  {
65  alpha = alphaT; //constant step-size equal to the starting value
66  obj.step(d, alpha);
67  E = obj.sync(obj.compute(&g, &Kg));
68  if(!std::isfinite(E))
69  { fprintf(p.fpLog, "%s\tRelax step failed with %s = %le\n.", p.linePrefix, p.energyLabel, E); fflush(p.fpLog);
70  return false;
71  }
72  else return true;
73  }
74 
75 
76  //Quadratic line minimization
77  template<typename Vector>
78  bool linminQuad(Minimizable<Vector>& obj, const MinimizeParams& p,
79  const Vector& d, double alphaT, double& alpha, double& E, Vector& g, Vector& Kg)
80  {
81  double alphaPrev = 0.0; //the progress made so far along d
82  double Eorig = E;
83  double gdotd = obj.sync(dot(g,d)); //directional derivative at starting point
84  if(gdotd >= 0.0)
85  { fprintf(p.fpLog, "%s\tBad step direction: g.d > 0.\n", p.linePrefix); fflush(p.fpLog);
86  alpha = alphaPrev;
87  return false;
88  }
89 
90  //Test step and step size prediction:
91  double ET = 0.0; //test step energy
92  for(int s=0; s<p.nAlphaAdjustMax; s++)
93  { if(alphaT < p.alphaTmin)
94  { fprintf(p.fpLog, "%s\talphaT below threshold %le. Quitting step.\n", p.linePrefix, p.alphaTmin); fflush(p.fpLog);
95  alpha = alphaPrev;
96  return false;
97  }
98  //Try the test step:
99  obj.step(d, alphaT-alphaPrev); alphaPrev = alphaT;
100  ET = obj.sync(obj.compute(0,0));
101  //Check if step crossed domain of validity of parameter space:
102  if(!std::isfinite(ET))
103  { alphaT *= p.alphaTreduceFactor;
104  fprintf(p.fpLog, "%s\tTest step failed with %s = %le, reducing alphaT to %le.\n",
105  p.linePrefix, p.energyLabel, ET, alphaT); fflush(p.fpLog);
106  continue;
107  }
108  //Predict step size:
109  alpha = 0.5*pow(alphaT,2)*gdotd/(alphaT*gdotd + E - ET);
110  //Check reasonableness of predicted step size:
111  if(alpha<0)
112  { //Curvature has the wrong sign
113  //That implies ET < E, so accept step for now, and try descending further next time
114  alphaT *= p.alphaTincreaseFactor;
115  fprintf(p.fpLog, "%s\tWrong curvature in test step, increasing alphaT to %le.\n", p.linePrefix, alphaT); fflush(p.fpLog);
116  E = obj.sync(obj.compute(&g, &Kg));
117  return true;
118  }
119  if(alpha/alphaT > p.alphaTincreaseFactor)
120  { alphaT *= p.alphaTincreaseFactor;
121  fprintf(p.fpLog, "%s\tPredicted alpha/alphaT>%lf, increasing alphaT to %le.\n",
122  p.linePrefix, p.alphaTincreaseFactor, alphaT); fflush(p.fpLog);
123  continue;
124  }
125  if(alphaT/alpha < p.alphaTreduceFactor)
126  { alphaT *= p.alphaTreduceFactor;
127  fprintf(p.fpLog, "%s\tPredicted alpha/alphaT<%lf, reducing alphaT to %le.\n",
128  p.linePrefix, p.alphaTreduceFactor, alphaT); fflush(p.fpLog);
129  continue;
130  }
131  //Successful test step:
132  break;
133  }
134  if(!std::isfinite(E))
135  { fprintf(p.fpLog, "%s\tTest step failed %d times. Quitting step.\n", p.linePrefix, p.nAlphaAdjustMax); fflush(p.fpLog);
136  alpha = alphaPrev;
137  return false;
138  }
139 
140  //Actual step:
141  for(int s=0; s<p.nAlphaAdjustMax; s++)
142  { //Try the step:
143  obj.step(d, alpha-alphaPrev); alphaPrev=alpha;
144  E = obj.sync(obj.compute(&g, &Kg));
145  if(!std::isfinite(E))
146  { alpha *= p.alphaTreduceFactor;
147  fprintf(p.fpLog, "%s\tStep failed with %s = %le, reducing alpha to %le.\n",
148  p.linePrefix, p.energyLabel, E, alpha); fflush(p.fpLog);
149  continue;
150  }
151  if(E > Eorig)
152  { alpha *= p.alphaTreduceFactor;
153  fprintf(p.fpLog, "%s\tStep increased %s by %le, reducing alpha to %le.\n",
154  p.linePrefix, p.energyLabel, E-Eorig, alpha); fflush(p.fpLog);
155  continue;
156  }
157  //Step successful:
158  break;
159  }
160  if(!std::isfinite(E) || E>Eorig)
161  { fprintf(p.fpLog, "%s\tStep failed to reduce %s after %d attempts. Quitting step.\n",
162  p.linePrefix, p.energyLabel, p.nAlphaAdjustMax); fflush(p.fpLog);
163  return false;
164  }
165  return true;
166  }
167 
168 
169  //Cubic line minimization, designed to handle fluids which can be highly non-quadratic
170  template<typename Vector>
171  bool linminCubicWolfe(Minimizable<Vector>& obj, const MinimizeParams& p,
172  const Vector& d, double alphaT, double& alpha, double& E, Vector& g, Vector& Kg)
173  {
174  double Eprev = E;
175  double gdotdPrev = obj.sync(dot(g,d)); //directional derivative at starting point
176  if(gdotdPrev >= 0.0)
177  { fprintf(p.fpLog, "%s\tBad step direction: g.d > 0.\n", p.linePrefix); fflush(p.fpLog);
178  alpha = 0;
179  return false;
180  }
181  double E0 = Eprev, gdotd0 =gdotdPrev; //Always use initial energy and gradient for Wolfe test (even if part of line search has been committed)
182 
183  alpha = alphaT; //this is the initial tentative step
184  double alphaPrev = 0; //alpha of the other point in the cubic interval
185  double alphaState = 0.; //alpha that correspodns to state of the objective function
186  for(int s=0;; s++)
187  { //Move by alpha:
188  obj.step(d, alpha-alphaState); alphaState=alpha;
189  E = obj.sync(obj.compute(&g, &Kg));
190  double gdotd = obj.sync(dot(g,d));
191  if(s > p.nAlphaAdjustMax) break; //step limit
192  //Check for domain error:
193  if(!std::isfinite(E))
194  { alpha = alphaPrev + p.alphaTreduceFactor*(alpha-alphaPrev);
195  fprintf(p.fpLog, "%s\tStep failed with %s = %le, reducing alpha to %le.\n",
196  p.linePrefix, p.energyLabel, E, alpha); fflush(p.fpLog);
197  continue;
198  }
199  //Have a valid cubic spline interval [alphaPrev,alpha]
200  //with values Eprev, E and derivatives gdotdPrev, gdotd
201  //Change coordinates to make that the unit interval in t:
202  double Ep0 = gdotdPrev*(alpha-alphaPrev);
203  double Ep1 = gdotd*(alpha-alphaPrev);
204  double deltaE = E-Eprev;
205  //dE/dt is a quadratic At^2-2Bt+C, with coefficients:
206  double A = 3*(Ep0 + Ep1 - 2*deltaE);
207  double B = 2*Ep0 + Ep1 - 3*deltaE;
208  double C = Ep0;
209  double tMin = NAN; //location of minimum (NAN if none)
210  if(B*B-A*C >= 0) //E'(t) has at least one root
211  { double disc = sqrt(B*B-A*C);
212  double tOpt = B>0 ? (B+disc)/A : C/(B-disc); //only root of E'(t) for which E''(t) = 2*(A*t-B) >= 0
213  double Eopt = Eprev + tOpt*(C + tOpt*(-B + tOpt*A/3)); //interpolated E(tOpt)
214  if(std::isfinite(tOpt) && Eopt<=E && Eopt<=Eprev) //well-defined local minimum lower than current endpoints
215  tMin = tOpt;
216  }
217  if(std::isfinite(tMin))
218  { tMin = std::min(tMin, p.alphaTincreaseFactor); //cap on forward-in-t step
219  tMin = std::max(tMin, 1.-p.alphaTincreaseFactor); //cap on backward-in-t step
220  }
221  else
222  { //no local minimum lower than current endpoints within interval
223  //therefore at least one of the endpoints must have function value decreasing in the outwards direction
224  if(Ep1<=0) tMin = p.alphaTincreaseFactor; //E(t) decreases above t=1, take max forward-in-t step
225  else tMin = 1.-p.alphaTincreaseFactor; //E(t) decreases below t=0, take max backward-in-t step
226  }
227  double alphaNew = alphaPrev + tMin*(alpha-alphaPrev);
228  //Check if we're done (Wolfe conditions):
229  if(E <= E0 + p.wolfeEnergy*alpha*gdotd0 && gdotd >= p.wolfeGradient*gdotd0)
230  { if(p.updateTestStepSize) alphaT = alphaNew; //save computed optimum step size for next time
231  return true;
232  }
233  fprintf(p.fpLog, "%s\tWolfe criterion not satisfied: alpha: %lg (E-E0)/|gdotd0|: %lg gdotd/gdotd0: %lg (taking cubic step)\n",
234  p.linePrefix, alpha, (E-E0)/fabs(gdotd0), gdotd/gdotd0); fflush(p.fpLog);
235  //Prepare for next step:
236  if(E<Eprev) { alphaPrev = alpha; Eprev=E; gdotdPrev=gdotd; } //pick the lower energy points amongst E and Eprev as the next 'prev' point
237  alpha = alphaNew; //set the alpha chosen above as the other endpoint for the next cubic
238  }
239  //Check if current state is invalid or worse than starting point:
240  if(!std::isfinite(E) || E>E0) return false; //minimize will roll back to the last known good state
241  else return true;
242  }
243 }
244 
245 template<typename Vector> typename Minimizable<Vector>::Linmin Minimizable<Vector>::getLinmin(const MinimizeParams& p) const
246 { using namespace MinimizePrivate;
247  switch(p.linminMethod)
249  { switch(p.dirUpdateScheme)
250  { case MinimizeParams::SteepestDescent: return linminRelax<Vector>;
251  case MinimizeParams::LBFGS: return linminCubicWolfe<Vector>;
252  default: return linminQuad<Vector>; //Default for all nonlinear CG methods
253  }
254  }
255  case MinimizeParams::Relax: return linminRelax<Vector>;
256  case MinimizeParams::Quad: return linminQuad<Vector>;
257  case MinimizeParams::CubicWolfe: return linminCubicWolfe<Vector>;
258  }
259  return 0;
260 }
261 
262 #endif //JDFTX_CORE_MINIMIZE_LINMIN_H
ScalarField pow(const ScalarField &, double alpha)
Elementwise power (preserve input)
double alphaTmin
minimum value of the test-step size (algorithm gives up when difficulties cause alphaT to fall below ...
Definition: MinimizeParams.h:60
Limited memory version of the BFGS algorithm.
Definition: MinimizeParams.h:36
Definition: Minimize_linmin.h:27
ScalarField sqrt(const ScalarField &)
Elementwise square root (preserve input)
complex dot(const Tptr &X, const Tptr &Y)
Definition: Operators.h:196
Definition: Minimize_linmin.h:50
Parameters to control the minimization algorithm.
Definition: MinimizeParams.h:29
virtual double compute(Vector *grad, Vector *Kgrad)=0
Returns the objective function at the current state and store the gradient in grad and preconditioned...
Cubic line search terminated by Wolfe conditions, possibly without a test step.
Definition: MinimizeParams.h:45
double alphaTreduceFactor
Factor to reduce alphaT on difficulties (default 0.1)
Definition: MinimizeParams.h:63
move by a constant multiple (=alphaTstart) of the search direction (not recommended for CG) ...
Definition: MinimizeParams.h:43
virtual double sync(double x) const
Override to synchronize scalars over MPI processes (if the same minimization is happening in sync ove...
Definition: Minimize.h:62
Steepest Descent (always along negative (preconditioned) gradient)
Definition: MinimizeParams.h:37
default method recommended by the direction update scheme
Definition: MinimizeParams.h:42
double wolfeGradient
Wolfe criterion dimensionless threshold for gradient.
Definition: MinimizeParams.h:68
bool updateTestStepSize
set alphaT=alpha after every iteration if true (default: true)
Definition: MinimizeParams.h:61
double alphaTincreaseFactor
Max ratio of alpha to alphaT, increase alphaT by this factor otherwise (default 3.0)
Definition: MinimizeParams.h:64
const char * linePrefix
prefix for each output line of minimizer, useful for nested minimizations (default "CG\t") ...
Definition: MinimizeParams.h:52
Nonlinear minimization templates.
const char * energyLabel
Label for the minimized quantity (default "E")
Definition: MinimizeParams.h:53
int nAlphaAdjustMax
maximum number of times to alpha adjust attempts (default 3)
Definition: MinimizeParams.h:65
FILE * fpLog
Stream to log iterations to.
Definition: MinimizeParams.h:51
Definition: Minimize.h:46
virtual void step(const Vector &dir, double alpha)=0
Move the state in parameter space along direction dir with scale alpha.
use the energy at a test step location to find the minimum along the line (default) ...
Definition: MinimizeParams.h:44