📄 optimization.java
字号:
test=0.0; double denom=0.0, dxSq=0.0, dgSq=0.0, newlyBounded=0.0; for(int g=0; g<l; g++){ if(!isFixed[g]){ deltaGrad[g] = grad[g] - oldGrad[g]; // Calculate the denominators denom += deltaX[g]*deltaGrad[g]; dxSq += deltaX[g]*deltaX[g]; dgSq += deltaGrad[g]*deltaGrad[g]; } else // Only newly bounded variables will be non-zero newlyBounded += deltaX[g]*(grad[g]-oldGrad[g]); // Note: CANNOT use projected gradient for testing // convergence because of newly bounded variables double tmp = Math.abs(grad[g])* Math.max(Math.abs(direct[g]),1.0)/ Math.max(Math.abs(m_f),1.0); if(tmp > test) test = tmp; } if(test < m_Zero){ if (m_Debug) System.err.println("Gradient converge: "+test); finish = true; } // dg'*dx could be < 0 using inexact lnsrch if(m_Debug) System.err.println("dg'*dx="+(denom+newlyBounded)); // dg'*dx = 0 if(Math.abs(denom+newlyBounded) < m_Zero) finish = true; int size = wsBdsIndx.size(); boolean isUpdate = true; // Whether to update BFGS formula // Converge: check whether release any current constraints if(finish){ if (m_Debug) System.err.println("Test any release possible ..."); if(toFree != null) oldToFree = (FastVector)toFree.copy(); toFree = new FastVector(); for(int m=size-1; m>=0; m--){ int index=((Integer)wsBdsIndx.elementAt(m)).intValue(); double[] hessian = evaluateHessian(x, index); double deltaL=0.0; if(hessian != null){ for(int mm=0; mm<hessian.length; mm++) if(!isFixed[mm]) // Free variable deltaL += hessian[mm]*direct[mm]; } // First and second order Lagrangian multiplier estimate // If user didn't provide Hessian, use first-order only double L1, L2; if(x[index] >= constraints[1][index]) // Upper bound L1 = -grad[index]; else if(x[index] <= constraints[0][index])// Lower bound L1 = grad[index]; else throw new Exception("x["+index+"] not fixed on the"+ " bounds where it should have been!"); // L2 = L1 + deltaL L2 = L1 + deltaL; if (m_Debug) System.err.println("Variable "+index+ ": Lagrangian="+L1+"|"+L2); //Check validity of Lagrangian multiplier estimate boolean isConverge = (2.0*Math.abs(deltaL)) < Math.min(Math.abs(L1), Math.abs(L2)); if((L1*L2>0.0) && isConverge){ //Same sign and converge: valid if(L2 < 0.0){// Negative Lagrangian: feasible toFree.addElement(new Integer(index)); wsBdsIndx.removeElementAt(m); finish=false; // Not optimal, cannot finish } } // Although hardly happen, better check it // If the first-order Lagrangian multiplier estimate is wrong, // avoid zigzagging if((hessian==null) && equal(toFree, oldToFree)) finish = true; } if(finish){// Min. found if (m_Debug) System.err.println("Minimum found."); m_f = objectiveFunction(x); return x; } // Free some variables for(int mmm=0; mmm<toFree.size(); mmm++){ int freeIndx=((Integer)toFree.elementAt(mmm)).intValue(); isFixed[freeIndx] = false; // Free this variable if(x[freeIndx] <= constraints[0][freeIndx]){// Lower bound nwsBounds[0][freeIndx] = constraints[0][freeIndx]; if (m_Debug) System.err.println("Free variable "+freeIndx+ " from bound "+ nwsBounds[0][freeIndx]); } else{ // Upper bound nwsBounds[1][freeIndx] = constraints[1][freeIndx]; if (m_Debug) System.err.println("Free variable "+freeIndx+ " from bound "+ nwsBounds[1][freeIndx]); } L.setElement(freeIndx, freeIndx, 1.0); D.setElement(freeIndx, freeIndx, 1.0); isUpdate = false; } } if(denom<Math.max(m_Zero*Math.sqrt(dxSq)*Math.sqrt(dgSq), m_Zero)){ if (m_Debug) System.err.println("dg'*dx negative!"); isUpdate = false; // Do not update } // If Hessian will be positive definite, update it if(isUpdate){ double[] v= new double[l]; Matrix[] result; // modify once: dg*dg'/(dg'*dx) double coeff = 1.0/denom; // 1/(dg'*dx) result = updateCholeskyFactor(L,D,deltaGrad,coeff,isFixed); // modify twice: g*g'/(g'*p) coeff = 1.0/m_Slope; // 1/(g'*p) result=updateCholeskyFactor (result[0],result[1],oldGrad,coeff,isFixed); L = result[0]; D = result[1]; } } // Find new direction Matrix LD = new Matrix(l,l); // L*D double[] b = new double[l]; for(int k=0; k<l; k++){ if(!isFixed[k]) b[k] = -grad[k]; else b[k] = 0.0; for(int j=k; j<l; j++){ // Lower triangle if(!isFixed[j] && !isFixed[k]) LD.setElement(j, k, L.getElement(j,k)*D.getElement(k,k)); } } // Solve (LD)*y = -g, where y=L'*direct double[] LDIR = solveTriangle(LD, b, true, isFixed); for(int m=0; m<LDIR.length; m++){ if(Double.isNaN(LDIR[m])) throw new Exception("L*direct["+m+"] is NaN!" +"|-g="+b[m]+"|"+isFixed[m] +"|diag="+D.getElement(m,m)); } // Solve L'*direct = y direct = solveTriangle(L, LDIR, false, isFixed); for(int m=0; m<direct.length; m++){ if(Double.isNaN(direct[m])) throw new Exception("direct is NaN!"); } System.gc(); } if(m_Debug) System.err.println("Cannot find minimum"+ " -- too many interations!"); m_X = x; return null; } /** * Solve the linear equation of TX=B where T is a triangle matrix * It can be solved using back/forward substitution, with O(N^2) * complexity * @param t the matrix T * @param b the vector B * @param isLower whether T is a lower or higher triangle matrix * @param isZero which row(s) of T are not used when solving the equation. * If it's null or all 'false', then every row is used. * @return the solution of X */ public static double[] solveTriangle(Matrix t, double[] b, boolean isLower, boolean[] isZero){ int n = b.length; double[] result = new double[n]; if(isZero == null) isZero = new boolean[n]; if(isLower){ // lower triangle, forward-substitution int j = 0; while((j<n)&&isZero[j]){result[j]=0.0; j++;} // go to the first row if(j<n){ result[j] = b[j]/t.getElement(j,j); for(; j<n; j++){ if(!isZero[j]){ double numerator=b[j]; for(int k=0; k<j; k++) numerator -= t.getElement(j,k)*result[k]; result[j] = numerator/t.getElement(j,j); } else result[j] = 0.0; } } } else{ // Upper triangle, back-substitution int j=n-1; while((j>=0)&&isZero[j]){result[j]=0.0; j--;} // go to the last row if(j>=0){ result[j] = b[j]/t.getElement(j,j); for(; j>=0; j--){ if(!isZero[j]){ double numerator=b[j]; for(int k=j+1; k<n; k++) numerator -= t.getElement(k,j)*result[k]; result[j] = numerator/t.getElement(j,j); } else result[j] = 0.0; } } } return result; } /** * One rank update of the Cholesky factorization of B matrix in BFGS updates, * i.e. B = LDL', and B_{new} = LDL' + coeff*(vv') where L is a unit lower triangle * matrix and D is a diagonal matrix, and v is a vector.<br> * When coeff > 0, we use C1 algorithm, and otherwise we use C2 algorithm described * in ``Methods for Modifying Matrix Factorizations'' * * @param L the unit triangle matrix L * @param D the diagonal matrix D * @param v the update vector v * @param coeff the coeffcient of update * @param isFixed which variables are not to be updated * @return the updated L and D */ protected Matrix[] updateCholeskyFactor(Matrix L, Matrix D, double[] v, double coeff, boolean[] isFixed) throws Exception{ double t, p, b; int n = v.length; double[] vp = new double[n]; Matrix dBar = new Matrix(n, n), lBar = new Matrix(n, n); for (int i=0; i<v.length; i++) if(!isFixed[i]) vp[i]=v[i]; else vp[i]=0.0; if(coeff>0.0){ t = coeff; for(int j=0; j<n; j++){ if(isFixed[j]) continue; lBar.setElement(j, j, 1.0); // Unit triangle p = vp[j]; double d=D.getElement(j,j), dbarj=d+t*p*p; dBar.setElement(j, j, dbarj); b = p*t/dbarj; t *= d/dbarj; for(int r=j+1; r<n; r++){ if(!isFixed[r]){ double l=L.getElement(r, j); vp[r] -= p*l; lBar.setElement(r, j, l+b*vp[r]); } else lBar.setElement(r, j, 0.0); } } } else{ double[] P = solveTriangle(L, v, true, isFixed); t = 0.0; for(int i=0; i<n; i++) if(!isFixed[i]) t += P[i]*P[i]/D.getElement(i,i); double sqrt=1.0+coeff*t; sqrt = (sqrt<0.0)? 0.0 : Math.sqrt(sqrt); double alpha=coeff, sigma=coeff/(1.0+sqrt), rho, theta; for(int j=0; j<n; j++){ if(isFixed[j]) continue; lBar.setElement(j, j, 1.0); // Unit triangle double d=D.getElement(j,j); p = P[j]*P[j]/d; theta = 1.0+sigma*p; t -= p; if(t<0.0) t=0.0; // Rounding error double plus = sigma*sigma*p*t; if((j<n-1) && (plus <= m_Zero)) plus=m_Zero; // Avoid rounding error rho = theta*theta + plus; dBar.setElement(j, j, rho*d); if(Double.isNaN(dBar.getElement(j,j))){ throw new Exception("d["+j+"] NaN! P="+P[j]+",d="+d+ ",t="+t+",p="+p+",sigma="+sigma+ ",sclar="+coeff); } b=alpha*P[j]/(rho*d); alpha /= rho; rho = Math.sqrt(rho); double sigmaOld = sigma; sigma *= (1.0+rho)/(rho*(theta+rho)); if((j<n-1) && (Double.isNaN(sigma) || Double.isInfinite(sigma))) throw new Exception("sigma NaN/Inf! rho="+rho+ ",theta="+theta+",P["+j+"]="+ P[j]+",p="+p+",d="+d+",t="+t+ ",oldsigma="+sigmaOld); for(int r=j+1; r<n; r++){ if(!isFixed[r]){ double l=L.getElement(r, j); vp[r] -= P[j]*l; lBar.setElement(r, j, l+b*vp[r]); } else lBar.setElement(r, j, 0.0); } } } Matrix[] rt = new Matrix[2]; rt[0] = lBar; rt[1] = dBar; return rt; } /** * Check whether the two integer vectors equal to each other * Two integer vectors are equal if all the elements are the * same, regardless of the order of the elements * * @param a one integer vector * @param b another integer vector * @return whether they are equal */ private boolean equal(FastVector a, FastVector b){ if((a==null) || (b==null) || (a.size()!=b.size())) return false; int size=a.size(); // Store into int arrays int[] ia=new int[size], ib=new int[size]; for(int i=0;i<size;i++){ ia[i] = ((Integer)a.elementAt(i)).intValue(); ib[i] = ((Integer)b.elementAt(i)).intValue(); } // Only values matter, order does not matter int[] sorta=Utils.sort(ia), sortb=Utils.sort(ib); for(int j=0; j<size;j++) if(ia[sorta[j]] != ib[sortb[j]]) return false; return true; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -