20 #ifndef JDFTX_CORE_SPLINE_H 21 #define JDFTX_CORE_SPLINE_H 28 #include <core/scalar.h> 37 std::vector<double>
getCoeff(
const std::vector<double>& samples,
bool oddExtension=
false);
43 __hostanddev__
double value(
const double* coeff,
double x);
49 __hostanddev__
double deriv(
const double* coeff,
double x);
54 __hostanddev__
void valueGrad(
double E_value,
double* E_coeff,
double x);
67 __hostanddev__
void getBernsteinCoeffs(
const double* coeff,
double x,
double& tR,
double& tL,
double (&b)[6])
73 for(
int i=0; i<6; i++) c[i] = coeff[j+i];
75 b[0] = (1./66) * (c[0] + 26*c[1] + 66*c[2] + 26*c[3] + c[4]);
76 b[1] = (1./33) * (8*c[1] + 33*c[2] + 18*c[3] + c[4]);
77 b[2] = (2./33) * (2*c[1] + 15*c[2] + 12*c[3] + c[4]);
78 b[3] = (2./33) * (c[1] + 12*c[2] + 15*c[3] + 2*c[4]);
79 b[4] = (1./33) * (c[1] + 18*c[2] + 33*c[3] + 8*c[4]);
80 b[5] = (1./66) * (c[1] + 26*c[2] + 66*c[3] + 26*c[4] + c[5]);
84 __hostanddev__
double value(
const double* coeff,
double x)
85 {
double tR, tL, b[6]; getBernsteinCoeffs(coeff, x, tR, tL, b);
88 for(
int i=0; i<5; i++) c[i] = tL*b[i] + tR*b[i+1];
89 for(
int i=0; i<4; i++) d[i] = tL*c[i] + tR*c[i+1];
90 for(
int i=0; i<3; i++) c[i] = tL*d[i] + tR*d[i+1];
91 for(
int i=0; i<2; i++) d[i] = tL*c[i] + tR*c[i+1];
92 return tL*d[0] + tR*d[1];
96 __hostanddev__
double deriv(
const double* coeff,
double x)
97 {
double tR, tL, b[6]; getBernsteinCoeffs(coeff, x, tR, tL, b);
100 for(
int i=0; i<5; i++) c[i] = b[i+1] - b[i];
101 for(
int i=0; i<4; i++) d[i] = tL*c[i] + tR*c[i+1];
102 for(
int i=0; i<3; i++) c[i] = tL*d[i] + tR*d[i+1];
103 for(
int i=0; i<2; i++) d[i] = tL*c[i] + tR*c[i+1];
104 return 5.*(tL*d[0] + tR*d[1]);
110 extern __shared__
double shared_E_coeff[];
112 __hostanddev__
void valueGrad(
double E_value,
double* E_coeff,
double x)
119 c[0]=0.;
for(
int i=0; i<2; i++) { c[i] += tL*b[i]; c[i+1] = tR*b[i]; }
120 b[0]=0.;
for(
int i=0; i<3; i++) { b[i] += tL*c[i]; b[i+1] = tR*c[i]; }
121 c[0]=0.;
for(
int i=0; i<4; i++) { c[i] += tL*b[i]; c[i+1] = tR*b[i]; }
122 b[0]=0.;
for(
int i=0; i<5; i++) { b[i] += tL*c[i]; b[i+1] = tR*c[i]; }
124 c[0] = (1./66) * (b[0]);
125 c[1] = (1./66) * (26*b[0] + 16*b[1] + 8*b[2] + 4*b[3] + 2*b[4] + b[5]);
126 c[2] = (1./33) * (33*b[0] + 33*b[1] + 30*b[2] + 24*b[3] + 18*b[4] + 13*b[5]);
127 c[3] = (1./33) * (13*b[0] + 18*b[1] + 24*b[2] + 30*b[3] + 33*b[4] + 33*b[5]);
128 c[4] = (1./66) * (b[0] + 2*b[1] + 4*b[2] + 8*b[3] + 16*b[4] + 26*b[5]);
129 c[5] = (1./66) * (b[5]);
131 #ifndef __CUDA_ARCH__ 132 for(
int i=0; i<6; i++) E_coeff[j+i] += E_value * c[i];
134 int iThread = threadIdx.x;
135 for(
int i=0; i<6; i++) shared_E_coeff[iThread*6+i] = E_value * c[i];
137 int extent = blockDim.x/2;
138 int stride = (blockDim.x+1)/2;
142 for(
int i=0; i<6; i++) shared_E_coeff[iThread*6+i] += shared_E_coeff[(stride+iThread)*6+i];
144 stride = (stride+1)/2;
149 for(
int i=0; i<6; i++) E_coeff[j+i] += shared_E_coeff[i];
155 #endif // JDFTX_CORE_SPLINE_H __hostanddev__ double value(const double *coeff, double x)
Compute value of quintic spline. Warning: x is not range-checked.
std::vector< double > getCoeff(const std::vector< double > &samples, bool oddExtension=false)
Generate quintic spline coefficients for a set of uniformly-spaced samples.
C4-continuous interpolation using quintic splines.
Definition: Spline.h:31
__hostanddev__ void valueGrad(double E_value, double *E_coeff, double x)
Gradient propagation corresponding to value()
__hostanddev__ double deriv(const double *coeff, double x)
Compute derivative (w.r.t x) of quintic spline. Warning: x is not range-checked.