1.2.0/Minimize_8h_source.html

 /*-------------------------------------------------------------------
 Copyright 2011 Ravishankar Sundararaman, Kendra Letchworth Weaver

 This file is part of JDFTx.

 JDFTx is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 JDFTx is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with JDFTx.  If not, see <http://www.gnu.org/licenses/>.
 -------------------------------------------------------------------*/

 #ifndef JDFTX_CORE_MINIMIZE_H
 #define JDFTX_CORE_MINIMIZE_H

 #include <cmath>
 #include <cfloat>
 #include <algorithm>
 #include <core/Util.h>
 #include <core/MinimizeParams.h>


 template<typename Vector> struct Minimizable
 {
         virtual void step(const Vector& dir, double alpha)=0;

         virtual double compute(Vector* grad, Vector* Kgrad)=0;

         virtual bool report(int iter) { return false; }

         virtual void constrain(Vector&) {}

         virtual double sync(double x) const { return x; }

         virtual double safeStepSize(const Vector& dir) const { return DBL_MAX; }

         double minimize(const MinimizeParams& params);

         void fdTest(const MinimizeParams& params);

 private:
         typedef bool (*Linmin)(Minimizable<Vector>&, const MinimizeParams&, const Vector&, double, double&, double&, Vector&, Vector&);
         Linmin getLinmin(const MinimizeParams& params) const;
         double lBFGS(const MinimizeParams& params); //limited memory BFGS implementation (differs sufficiently from CG to be justify a separate implementation)
 };

 template<typename Vector> struct LinearSolvable
 {
         Vector state;

         virtual Vector hessian(const Vector&) const=0;

         virtual Vector precondition(const Vector& v) const { return clone(v); }

         virtual double sync(double x) const { return x; }

         int solve(const Vector& rhs, const MinimizeParams& params);
 };

 //---------------------- Implementation ----------------------------

 #include <core/Minimize_linmin.h>
 #include <core/Minimize_lBFGS.h>

 template<typename Vector> double Minimizable<Vector>::minimize(const MinimizeParams& p)
 {       if(p.fdTest) fdTest(p); // finite difference test
         if(p.dirUpdateScheme == MinimizeParams::LBFGS) return lBFGS(p);

         Vector g, gPrev, Kg; //current, previous and preconditioned gradients
         double E = sync(compute(&g, &Kg)); //get initial energy and gradient
         EdiffCheck ediffCheck(p.nEnergyDiff, p.energyDiffThreshold); //list of past energies

         Vector d = clone(Kg); //step direction (will be reset in first iteration)
         constrain(d); //restrict search direction to allowed subspace
         bool forceGradDirection = true; //whether current direction is along the gradient
         MinimizeParams::DirectionUpdateScheme currentDirUpdateScheme = p.dirUpdateScheme; //initially use the specified scheme, may switch to SD on trouble
         bool gPrevUsed;
         switch(currentDirUpdateScheme)
         {       case MinimizeParams::FletcherReeves:
                 case MinimizeParams::SteepestDescent:
                         gPrevUsed = false;
                         break;
                 default:
                         gPrevUsed = true;
         }

         double alphaT = p.alphaTstart; //test step size
         double alpha = alphaT; //actual step size
         double beta = 0.0; //CG prev search direction mix factor
         double gKNorm = 0.0, gKNormPrev = 0.0; //current and previous norms of the preconditioned gradient

         //Select the linmin method:
         Linmin linmin = getLinmin(p);

         //Iterate until convergence, max iteration count or kill signal
         int iter=0;
         for(iter=0; !killFlag; iter++)
         {
                 if(report(iter)) //optional reporting/processing
                 {       E = sync(compute(&g, &Kg)); //update energy and gradient if state was modified
                         fprintf(p.fpLog, "%s\tState modified externally: resetting search direction.\n", p.linePrefix);
                         fflush(p.fpLog);
                         forceGradDirection = true; //reset search direction
                 }

                 gKNorm = sync(dot(g,Kg));
                 fprintf(p.fpLog, "%sIter: %3d  %s: ", p.linePrefix, iter, p.energyLabel);
                 fprintf(p.fpLog, p.energyFormat, E);
                 fprintf(p.fpLog, "  |grad|_K: %10.3le  alpha: %10.3le", sqrt(gKNorm/p.nDim), alpha);

                 //Print prev step stats and set CG direction parameter if necessary
                 beta = 0.0;
                 if(!forceGradDirection)
                 {       double dotgd = sync(dot(g,d));
                         double dotgPrevKg = gPrevUsed ? sync(dot(gPrev, Kg)) : 0.;

                         fprintf(p.fpLog, "  linmin: %10.3le", dotgd/sqrt(sync(dot(g,g))*sync(dot(d,d))));
                         if(gPrevUsed)
                                 fprintf(p.fpLog, "  cgtest: %10.3le", dotgPrevKg/sqrt(gKNorm*gKNormPrev));
                         fprintf(p.fpLog, "  t[s]: %9.2lf", clock_sec());

                         //Update beta:
                         switch(currentDirUpdateScheme)
                         {       case MinimizeParams::FletcherReeves:  beta = gKNorm/gKNormPrev; break;
                                 case MinimizeParams::PolakRibiere:    beta = (gKNorm-dotgPrevKg)/gKNormPrev; break;
                                 case MinimizeParams::HestenesStiefel: beta = (gKNorm-dotgPrevKg)/(dotgd-sync(dot(d,gPrev))); break;
                                 case MinimizeParams::SteepestDescent: beta = 0.0; break;
                                 case MinimizeParams::LBFGS: break; //Should never encounter since LBFGS handled separately; just to eliminate compiler warnings
                         }
                         if(beta<0.0)
                         {       fprintf(p.fpLog, "\n%sEncountered beta<0, resetting CG.", p.linePrefix);
                                 beta=0.0;
                         }
                 }
                 forceGradDirection = false;
                 fprintf(p.fpLog, "\n"); fflush(p.fpLog);
                 if(sqrt(gKNorm/p.nDim) < p.knormThreshold)
                 {       fprintf(p.fpLog, "%sConverged (|grad|_K<%le).\n", p.linePrefix, p.knormThreshold);
                         fflush(p.fpLog); return E;
                 }
                 if(ediffCheck.checkConvergence(E))
                 {       fprintf(p.fpLog, "%sConverged (|Delta %s|<%le for %d iters).\n",
                                 p.linePrefix, p.energyLabel, p.energyDiffThreshold, p.nEnergyDiff);
                         fflush(p.fpLog); return E;
                 }
                 if(!std::isfinite(gKNorm))
                 {       fprintf(p.fpLog, "%s|grad|_K=%le. Stopping ...\n", p.linePrefix, gKNorm);
                         fflush(p.fpLog); return E;
                 }
                 if(!std::isfinite(E))
                 {       fprintf(p.fpLog, "%sE=%le. Stopping ...\n", p.linePrefix, E);
                         fflush(p.fpLog); return E;
                 }
                 if(iter>=p.nIterations) break;
                 if(gPrevUsed) gPrev = g;
                 gKNormPrev = gKNorm;

                 //Update search direction
                 d *= beta; axpy(-1.0, Kg, d);  // d = beta*d - Kg
                 constrain(d); //restrict search direction to allowed subspace

                 //Line minimization
                 alphaT = std::min(alphaT, safeStepSize(d));
                 if(linmin(*this, p, d, alphaT, alpha, E, g, Kg))
                 {       //linmin succeeded:
                         if(p.updateTestStepSize)
                         {       alphaT = alpha;
                                 if(alphaT<p.alphaTmin) //bad step size
                                         alphaT = p.alphaTstart; //make sure next test step size is not too bad
                         }
                 }
                 else
                 {       //linmin failed:
                         fprintf(p.fpLog, "%s\tUndoing step.\n", p.linePrefix);
                         step(d, -alpha);
                         E = sync(compute(&g, &Kg));
                         if(beta)
                         {       //Failed, but not along the gradient direction:
                                 fprintf(p.fpLog, "%s\tStep failed: resetting search direction.\n", p.linePrefix);
                                 fflush(p.fpLog);
                                 forceGradDirection = true; //reset search direction
                         }
                         else
                         {       //Failed along the gradient direction
                                 fprintf(p.fpLog, "%s\tStep failed along negative gradient direction.\n", p.linePrefix);
                                 fprintf(p.fpLog, "%sProbably at roundoff error limit. (Stopping)\n", p.linePrefix);
                                 fflush(p.fpLog);
                                 return E;
                         }
                 }
         }
         fprintf(p.fpLog, "%sNone of the convergence criteria satisfied after %d iterations.\n", p.linePrefix, iter);
         return E;
 }


 template<typename Vector> void Minimizable<Vector>::fdTest(const MinimizeParams& p)
 {
         const double deltaMin = 1e-9;
         const double deltaMax = 1e+1;
         const double deltaScale = 1e+1;
         string fdPrefixString = p.linePrefix + string("fdTest: ");
         const char* fdPrefix = fdPrefixString.c_str();
         fprintf(p.fpLog, "%s--------------------------------------\n", fdPrefix);
         Vector g, Kg;
         double E0 = sync(compute(&g, &Kg));

         Vector dx;
         {       // Set the direction to be a random vector of the same norm
                 // as the preconditioned gradient times the initial test step size
                 dx = clone(Kg);
                 randomize(dx);
                 constrain(dx);
                 dx *= p.alphaTstart * sqrt(sync(dot(Kg,Kg))/sync(dot(dx,dx)));
         }
         double dE_ddelta = sync(dot(dx, g)); //directional derivative at delta=0

         double deltaPrev=0;
         for(double delta=deltaMin; delta<=deltaMax; delta*=deltaScale)
         {       double dE = dE_ddelta*delta;
                 step(dx, delta-deltaPrev); deltaPrev=delta;
                 double deltaE = sync(compute(0,0)) - E0;
                 fprintf(p.fpLog, "%s   delta=%le:\n", fdPrefix, delta);
                 fprintf(p.fpLog, "%s      d%s Ratio: %19.16lf\n", fdPrefix, p.energyLabel, deltaE/dE);
                 fprintf(p.fpLog, "%s      d%s Error: %19.16lf\n", fdPrefix, p.energyLabel, sqrt(p.nDim)*1.1e-16/fabs(dE));
         }
         fprintf(p.fpLog, "%s--------------------------------------\n", fdPrefix);
         step(dx, -deltaPrev); //restore state to original value
 }


 template<typename Vector> int LinearSolvable<Vector>::solve(const Vector& rhs, const MinimizeParams& p)
 {       //Initialize:
         Vector r = clone(rhs); axpy(-1.0, hessian(state), r); //residual r = rhs - A.state;
         Vector z = precondition(r), d = r; //the preconditioned residual and search direction
         double beta=0.0, rdotzPrev=0.0, rdotz = sync(dot(r, z));

         //Check initial residual
         double rzNorm = sqrt(fabs(rdotz)/p.nDim);
         fprintf(p.fpLog, "%sInitial:  sqrt(|r.z|): %12.6le\n", p.linePrefix, rzNorm); fflush(p.fpLog);
         if(rzNorm<p.knormThreshold) { fprintf(p.fpLog, "%sConverged sqrt(r.z)<%le\n", p.linePrefix, p.knormThreshold); fflush(p.fpLog); return 0; }

         //Main loop:
         int iter;
         for(iter=0; iter<p.nIterations && !killFlag; iter++)
         {       //Update search direction:
                 if(rdotzPrev)
                 {       beta = rdotz/rdotzPrev;
                         d *= beta; axpy(1.0, z, d); // d = z + beta*d
                 }
                 else d = clone(z); //fresh search direction (along gradient)
                 //Step:
                 Vector w = hessian(d);
                 double alpha = rdotz/sync(dot(w,d));
                 axpy(alpha, d, state);
                 axpy(-alpha, w, r);
                 z = precondition(r);
                 rdotzPrev = rdotz;
                 rdotz = sync(dot(r, z));
                 //Print info:
                 double rzNorm = sqrt(fabs(rdotz)/p.nDim);
                 fprintf(p.fpLog, "%sIter: %3d  sqrt(|r.z|): %12.6le  alpha: %12.6le  beta: %13.6le  t[s]: %9.2lf\n",
                         p.linePrefix, iter, rzNorm, alpha, beta, clock_sec()); fflush(p.fpLog);
                 //Check convergence:
                 if(rzNorm<p.knormThreshold) { fprintf(p.fpLog, "%sConverged sqrt(r.z)<%le\n", p.linePrefix, p.knormThreshold); fflush(p.fpLog); return iter; }
         }
         fprintf(p.fpLog, "%sGradient did not converge within threshold in %d iterations\n", p.linePrefix, iter); fflush(p.fpLog);
         return iter;
 }

 #endif // JDFTX_CORE_MINIMIZE_H
MinimizeParams::alphaTmin
double alphaTmin
minimum value of the test-step size (algorithm gives up when difficulties cause alphaT to fall below ...
Definition: MinimizeParams.h:60

Minimizable::constrain
virtual void constrain(Vector &)
Constrain search directions to the space of free directions for minimize.
Definition: Minimize.h:59

MinimizeParams::LBFGS
Limited memory version of the BFGS algorithm.
Definition: MinimizeParams.h:36

EdiffCheck
Definition: Minimize_linmin.h:27

sqrt
ScalarField sqrt(const ScalarField &)
Elementwise square root (preserve input)

LinearSolvable
Definition: Minimize.h:85

randomize
void randomize(TptrCollection &x)
Initialize to normal random numbers:
Definition: ScalarFieldArray.h:154

MinimizeParams::nIterations
int nIterations
Maximum number of iterations (default 100)
Definition: MinimizeParams.h:48

dot
complex dot(const Tptr &X, const Tptr &Y)
Definition: Operators.h:196

MinimizeParams::HestenesStiefel
Hestenes-Stiefel (preconditioned) conjugate gradients.
Definition: MinimizeParams.h:35

Minimizable::report
virtual bool report(int iter)
Definition: Minimize.h:56

MinimizeParams
Parameters to control the minimization algorithm.
Definition: MinimizeParams.h:29

MinimizeParams::energyDiffThreshold
double energyDiffThreshold
stop when energy change is below this for nEnergyDiff successive iterations (default: 0) ...
Definition: MinimizeParams.h:56

Minimizable::compute
virtual double compute(Vector *grad, Vector *Kgrad)=0
Returns the objective function at the current state and store the gradient in grad and preconditioned...

Minimizable::sync
virtual double sync(double x) const
Override to synchronize scalars over MPI processes (if the same minimization is happening in sync ove...
Definition: Minimize.h:62

MinimizeParams::alphaTstart
double alphaTstart
initial value for the test-step size (default: 1.0)
Definition: MinimizeParams.h:59

MinimizeParams::nEnergyDiff
int nEnergyDiff
number of successive iterations for energyDiffThreshold check (default: 2)
Definition: MinimizeParams.h:57

MinimizeParams::SteepestDescent
Steepest Descent (always along negative (preconditioned) gradient)
Definition: MinimizeParams.h:37

MinimizeParams::knormThreshold
double knormThreshold
stop when norm of residual against preconditioner falls below this (default: 0)
Definition: MinimizeParams.h:55

Minimizable::fdTest
void fdTest(const MinimizeParams &params)

clone
Tptr clone(const Tptr &X)
Clone (NOTE: operator= is by reference for the ScalarField classes)
Definition: Operators.h:111

LinearSolvable::precondition
virtual Vector precondition(const Vector &v) const
Override to enable preconditioning: return the preconditioned vector, given a vector.
Definition: Minimize.h:93

MinimizeParams::updateTestStepSize
bool updateTestStepSize
set alphaT=alpha after every iteration if true (default: true)
Definition: MinimizeParams.h:61

clock_sec
double clock_sec()
Elapsed time in microseconds (from start of program)

killFlag
bool killFlag
Flag set by signal handlers - all compute loops should quit cleanly when this is set.

LinearSolvable::sync
virtual double sync(double x) const
Override to synchronize scalars over MPI processes (if the same minimization is happening in sync ove...
Definition: Minimize.h:96

Minimizable::safeStepSize
virtual double safeStepSize(const Vector &dir) const
Override to return maximum safe step size along a given direction. Steps can be arbitrarily large by ...
Definition: Minimize.h:65

Util.h
Miscellaneous utilities.

Minimizable::minimize
double minimize(const MinimizeParams &params)
Minimize this objective function with algorithm controlled by params and return the minimized value...

MinimizeParams::PolakRibiere
Polak-Ribiere (preconditioned) conjugate gradients (default)
Definition: MinimizeParams.h:33

MinimizeParams::linePrefix
const char * linePrefix
prefix for each output line of minimizer, useful for nested minimizations (default "CG\t") ...
Definition: MinimizeParams.h:52

MinimizeParams::energyLabel
const char * energyLabel
Label for the minimized quantity (default "E")
Definition: MinimizeParams.h:53

MinimizeParams::FletcherReeves
Fletcher-Reeves (preconditioned) conjugate gradients.
Definition: MinimizeParams.h:34

MinimizeParams::nDim
int nDim
Dimension of optimization space; used only for knormThreshold (default 1)
Definition: MinimizeParams.h:49

LinearSolvable::solve
int solve(const Vector &rhs, const MinimizeParams &params)

MinimizeParams::energyFormat
const char * energyFormat
printf format for the minimized quantity (default "%22.15le")
Definition: MinimizeParams.h:54

MinimizeParams::fpLog
FILE * fpLog
Stream to log iterations to.
Definition: MinimizeParams.h:51

Minimizable
Definition: Minimize.h:46

MinimizeParams::DirectionUpdateScheme
DirectionUpdateScheme
Search direction update scheme.
Definition: MinimizeParams.h:32

Minimizable::step
virtual void step(const Vector &dir, double alpha)=0
Move the state in parameter space along direction dir with scale alpha.

axpy
void axpy(double alpha, const Tptr &X, Tptr &Y)
Generic axpy for complex data types (Note: null pointers are treated as zero)
Definition: Operators.h:158

string
std::basic_string< char, ichar_traits > string
Case-insensitive string.
Definition: string.h:42

MinimizeParams::fdTest
bool fdTest
whether to perform a finite difference test before each minimization (default false) ...
Definition: MinimizeParams.h:70

LinearSolvable::state
Vector state
the location of the minimum, obtained by solving hessian * state == rhs
Definition: Minimize.h:87