📄 optimization.java
字号:
// Check converge on x
boolean finish = false;
double test=0.0;
for(int h=0; h<l; h++){
deltaX[h] = x[h]-oldX[h];
double tmp=Math.abs(deltaX[h])/
Math.max(Math.abs(x[h]), 1.0);
if(tmp > test) test = tmp;
}
if(test < m_Zero){
if (m_Debug)
System.err.println("\nDeltaX converge: "+test);
finish = true;
}
// Check zero gradient
grad = evaluateGradient(x);
test=0.0;
double denom=0.0, dxSq=0.0, dgSq=0.0, newlyBounded=0.0;
for(int g=0; g<l; g++){
if(!isFixed[g]){
deltaGrad[g] = grad[g] - oldGrad[g];
// Calculate the denominators
denom += deltaX[g]*deltaGrad[g];
dxSq += deltaX[g]*deltaX[g];
dgSq += deltaGrad[g]*deltaGrad[g];
}
else // Only newly bounded variables will be non-zero
newlyBounded += deltaX[g]*(grad[g]-oldGrad[g]);
// Note: CANNOT use projected gradient for testing
// convergence because of newly bounded variables
double tmp = Math.abs(grad[g])*
Math.max(Math.abs(direct[g]),1.0)/
Math.max(Math.abs(m_f),1.0);
if(tmp > test) test = tmp;
}
if(test < m_Zero){
if (m_Debug)
System.err.println("Gradient converge: "+test);
finish = true;
}
// dg'*dx could be < 0 using inexact lnsrch
if(m_Debug)
System.err.println("dg'*dx="+(denom+newlyBounded));
// dg'*dx = 0
if(Math.abs(denom+newlyBounded) < m_Zero)
finish = true;
int size = wsBdsIndx.size();
boolean isUpdate = true; // Whether to update BFGS formula
// Converge: check whether release any current constraints
if(finish){
if (m_Debug)
System.err.println("Test any release possible ...");
if(toFree != null)
oldToFree = (FastVector)toFree.copy();
toFree = new FastVector();
for(int m=size-1; m>=0; m--){
int index=((Integer)wsBdsIndx.elementAt(m)).intValue();
double[] hessian = evaluateHessian(x, index);
double deltaL=0.0;
if(hessian != null){
for(int mm=0; mm<hessian.length; mm++)
if(!isFixed[mm]) // Free variable
deltaL += hessian[mm]*direct[mm];
}
// First and second order Lagrangian multiplier estimate
// If user didn't provide Hessian, use first-order only
double L1, L2;
if(x[index] >= constraints[1][index]) // Upper bound
L1 = -grad[index];
else if(x[index] <= constraints[0][index])// Lower bound
L1 = grad[index];
else
throw new Exception("x["+index+"] not fixed on the"+
" bounds where it should have been!");
// L2 = L1 + deltaL
L2 = L1 + deltaL;
if (m_Debug)
System.err.println("Variable "+index+
": Lagrangian="+L1+"|"+L2);
//Check validity of Lagrangian multiplier estimate
boolean isConverge =
(2.0*Math.abs(deltaL)) < Math.min(Math.abs(L1),
Math.abs(L2));
if((L1*L2>0.0) && isConverge){ //Same sign and converge: valid
if(L2 < 0.0){// Negative Lagrangian: feasible
toFree.addElement(new Integer(index));
wsBdsIndx.removeElementAt(m);
finish=false; // Not optimal, cannot finish
}
}
// Although hardly happen, better check it
// If the first-order Lagrangian multiplier estimate is wrong,
// avoid zigzagging
if((hessian==null) && equal(toFree, oldToFree))
finish = true;
}
if(finish){// Min. found
if (m_Debug)
System.err.println("Minimum found.");
m_f = objectiveFunction(x);
if(Double.isNaN(m_f))
throw new Exception("Objective function value is NaN!");
return x;
}
// Free some variables
for(int mmm=0; mmm<toFree.size(); mmm++){
int freeIndx=((Integer)toFree.elementAt(mmm)).intValue();
isFixed[freeIndx] = false; // Free this variable
if(x[freeIndx] <= constraints[0][freeIndx]){// Lower bound
nwsBounds[0][freeIndx] = constraints[0][freeIndx];
if (m_Debug)
System.err.println("Free variable "+freeIndx+
" from bound "+
nwsBounds[0][freeIndx]);
}
else{ // Upper bound
nwsBounds[1][freeIndx] = constraints[1][freeIndx];
if (m_Debug)
System.err.println("Free variable "+freeIndx+
" from bound "+
nwsBounds[1][freeIndx]);
}
lowerTriangleMatrix.setElement(freeIndx, freeIndx, 1.0);
diagonalOfCholesky[freeIndx] = 1.0;
isUpdate = false;
}
}
if(denom<Math.max(m_Zero*Math.sqrt(dxSq)*Math.sqrt(dgSq), m_Zero)){
if (m_Debug)
System.err.println("dg'*dx negative!");
isUpdate = false; // Do not update
}
// If Hessian will be positive definite, update it
if(isUpdate){
// modify once: dg*dg'/(dg'*dx)
double coeff = 1.0/denom; // 1/(dg'*dx)
updateCholeskyFactor(lowerTriangleMatrix,diagonalOfCholesky,deltaGrad,coeff,isFixed);
// modify twice: g*g'/(g'*p)
coeff = 1.0/m_Slope; // 1/(g'*p)
updateCholeskyFactor(lowerTriangleMatrix,diagonalOfCholesky,oldGrad,coeff,isFixed);
}
}
// Find new direction. Initilized the variables. Do not create new variable
// in each iteration. TWang . Mar 23. 2005.
// Matrix LD = new Matrix(l,l); // L*D
// double[] b = new double[l];
tempLD.initialize();
for(int inner_i = 0; inner_i<l; inner_i++){
tempDiagonal[inner_i] = 0;
}
//>> END. TWang
for(int k=0; k<l; k++){
if(!isFixed[k]) tempDiagonal[k] = -grad[k];
else tempDiagonal[k] = 0.0;
for(int j=k; j<l; j++){ // Lower triangle
if(!isFixed[j] && !isFixed[k])
tempLD.setElement(j, k, lowerTriangleMatrix.getElement(j,k)*diagonalOfCholesky[k]);
}
}
// Solve (LD)*y = -g, where y=L'*direct
double[] LDIR = solveTriangle(tempLD, tempDiagonal, true, isFixed);
//tempLD = null; // DO not mark it as NULL; since we will reuse this structure. TWang. Mar 23, 2005.
for(int m=0; m<LDIR.length; m++){
if(Double.isNaN(LDIR[m]))
throw new Exception("L*direct["+m+"] is NaN!"
+"|-g="+tempDiagonal[m]+"|"+isFixed[m]
+"|diag="+diagonalOfCholesky[m]);
}
// Solve L'*direct = y
direct = solveTriangle(lowerTriangleMatrix, LDIR, false, isFixed);
for(int m=0; m<direct.length; m++){
if(Double.isNaN(direct[m]))
throw new Exception("direct is NaN!");
}
System.gc();
}
if(m_Debug)
System.err.println("Cannot find minimum"+
" -- too many interations!");
m_X = x;
return null;
}
/**
* Solve the linear equation of TX=B where T is a triangle matrix
* It can be solved using back/forward substitution, with O(N^2)
* complexity
* @param t the matrix T
* @param b the vector B
* @param isLower whether T is a lower or higher triangle matrix
* @param isZero which row(s) of T are not used when solving the equation.
* If it's null or all 'false', then every row is used.
* @return the solution of X
*/
public static double[] solveTriangle(Matrix t, double[] b,
boolean isLower, boolean[] isZero){
int n = b.length;
double[] result = new double[n];
if(isZero == null)
isZero = new boolean[n];
if(isLower){ // lower triangle, forward-substitution
int j = 0;
while((j<n)&&isZero[j]){result[j]=0.0; j++;} // go to the first row
if(j<n){
result[j] = b[j]/t.getElement(j,j);
for(; j<n; j++){
if(!isZero[j]){
double numerator=b[j];
for(int k=0; k<j; k++)
numerator -= t.getElement(j,k)*result[k];
result[j] = numerator/t.getElement(j,j);
}
else
result[j] = 0.0;
}
}
}
else{ // Upper triangle, back-substitution
int j=n-1;
while((j>=0)&&isZero[j]){result[j]=0.0; j--;} // go to the last row
if(j>=0){
result[j] = b[j]/t.getElement(j,j);
for(; j>=0; j--){
if(!isZero[j]){
double numerator=b[j];
for(int k=j+1; k<n; k++)
numerator -= t.getElement(k,j)*result[k];
result[j] = numerator/t.getElement(j,j);
}
else
result[j] = 0.0;
}
}
}
return result;
}
/**
* One rank update of the Cholesky factorization of B matrix in BFGS updates,
* i.e. B = LDL', and B_{new} = LDL' + coeff*(vv') where L is a unit lower triangle
* matrix and D is a diagonal matrix, and v is a vector.<br>
* When coeff > 0, we use C1 algorithm, and otherwise we use C2 algorithm described
* in ``Methods for Modifying Matrix Factorizations''
*
* @param L the unit triangle matrix L
* @param D the diagonal matrix D
* @param v the update vector v
* @param coeff the coeffcient of update
* @param isFixed which variables are not to be updated
*/
protected void updateCholeskyFactor(Matrix L, double[] D,
double[] v, double coeff,
boolean[] isFixed)
throws Exception{
double t, p, b;
int n = v.length;
double[] vp = new double[n];
for (int i=0; i<v.length; i++)
if(!isFixed[i])
vp[i]=v[i];
else
vp[i]=0.0;
if(coeff>0.0){
t = coeff;
for(int j=0; j<n; j++){
if(isFixed[j]) continue;
p = vp[j];
double d=D[j], dbarj=d+t*p*p;
D[j] = dbarj;
b = p*t/dbarj;
t *= d/dbarj;
for(int r=j+1; r<n; r++){
if(!isFixed[r]){
double l=L.getElement(r, j);
vp[r] -= p*l;
L.setElement(r, j, l+b*vp[r]);
}
else
L.setElement(r, j, 0.0);
}
}
}
else{
double[] P = solveTriangle(L, v, true, isFixed);
t = 0.0;
for(int i=0; i<n; i++)
if(!isFixed[i])
t += P[i]*P[i]/D[i];
double sqrt=1.0+coeff*t;
sqrt = (sqrt<0.0)? 0.0 : Math.sqrt(sqrt);
double alpha=coeff, sigma=coeff/(1.0+sqrt), rho, theta;
for(int j=0; j<n; j++){
if(isFixed[j]) continue;
double d=D[j];
p = P[j]*P[j]/d;
theta = 1.0+sigma*p;
t -= p;
if(t<0.0) t=0.0; // Rounding error
double plus = sigma*sigma*p*t;
if((j<n-1) && (plus <= m_Zero))
plus=m_Zero; // Avoid rounding error
rho = theta*theta + plus;
D[j] = rho*d;
if(Double.isNaN(D[j])){
throw new Exception("d["+j+"] NaN! P="+P[j]+",d="+d+
",t="+t+",p="+p+",sigma="+sigma+
",sclar="+coeff);
}
b=alpha*P[j]/(rho*d);
alpha /= rho;
rho = Math.sqrt(rho);
double sigmaOld = sigma;
sigma *= (1.0+rho)/(rho*(theta+rho));
if((j<n-1) &&
(Double.isNaN(sigma) || Double.isInfinite(sigma)))
throw new Exception("sigma NaN/Inf! rho="+rho+
",theta="+theta+",P["+j+"]="+
P[j]+",p="+p+",d="+d+",t="+t+
",oldsigma="+sigmaOld);
for(int r=j+1; r<n; r++){
if(!isFixed[r]){
double l=L.getElement(r, j);
vp[r] -= P[j]*l;
L.setElement(r, j, l+b*vp[r]);
}
else
L.setElement(r, j, 0.0);
}
}
}
}
/**
* Check whether the two integer vectors equal to each other
* Two integer vectors are equal if all the elements are the
* same, regardless of the order of the elements
*
* @param a one integer vector
* @param b another integer vector
* @return whether they are equal
*/
private boolean equal(FastVector a, FastVector b){
if((a==null) || (b==null) || (a.size()!=b.size()))
return false;
int size=a.size();
// Store into int arrays
int[] ia=new int[size], ib=new int[size];
for(int i=0;i<size;i++){
ia[i] = ((Integer)a.elementAt(i)).intValue();
ib[i] = ((Integer)b.elementAt(i)).intValue();
}
// Only values matter, order does not matter
int[] sorta=Utils.sort(ia), sortb=Utils.sort(ib);
for(int j=0; j<size;j++)
if(ia[sorta[j]] != ib[sortb[j]])
return false;
return true;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -