📄 optimization.java
字号:
if(test < m_Zero){ if (m_Debug) System.err.println("Gradient converge: "+test); finish = true; } // dg'*dx could be < 0 using inexact lnsrch if(m_Debug) System.err.println("dg'*dx="+(denom+newlyBounded)); // dg'*dx = 0 if(Math.abs(denom+newlyBounded) < m_Zero) finish = true; int size = wsBdsIndx.size(); boolean isUpdate = true; // Whether to update BFGS formula // Converge: check whether release any current constraints if(finish){ if (m_Debug) System.err.println("Test any release possible ..."); if(toFree != null) oldToFree = (DynamicIntArray)toFree.copy(); toFree = new DynamicIntArray(wsBdsIndx.size()); for(int m=size-1; m>=0; m--){ int index=wsBdsIndx.elementAt(m); double[] hessian = evaluateHessian(x, index); double deltaL=0.0; if(hessian != null){ for(int mm=0; mm<hessian.length; mm++) if(!isFixed[mm]) // Free variable deltaL += hessian[mm]*direct[mm]; } // First and second order Lagrangian multiplier estimate // If user didn't provide Hessian, use first-order only double L1, L2; if(x[index] >= constraints[1][index]) // Upper bound L1 = -grad[index]; else if(x[index] <= constraints[0][index])// Lower bound L1 = grad[index]; else throw new Exception("x["+index+"] not fixed on the"+ " bounds where it should have been!"); // L2 = L1 + deltaL L2 = L1 + deltaL; if (m_Debug) System.err.println("Variable "+index+ ": Lagrangian="+L1+"|"+L2); //Check validity of Lagrangian multiplier estimate boolean isConverge = (2.0*Math.abs(deltaL)) < Math.min(Math.abs(L1), Math.abs(L2)); if((L1*L2>0.0) && isConverge){ //Same sign and converge: valid if(L2 < 0.0){// Negative Lagrangian: feasible toFree.addElement(index); wsBdsIndx.removeElementAt(m); finish=false; // Not optimal, cannot finish } } // Although hardly happen, better check it // If the first-order Lagrangian multiplier estimate is wrong, // avoid zigzagging if((hessian==null) && (toFree != null) && toFree.equal(oldToFree)) finish = true; } if(finish){// Min. found if (m_Debug) System.err.println("Minimum found."); m_f = objectiveFunction(x); if(Double.isNaN(m_f)) throw new Exception("Objective function value is NaN!"); return x; } // Free some variables for(int mmm=0; mmm<toFree.size(); mmm++){ int freeIndx=toFree.elementAt(mmm); isFixed[freeIndx] = false; // Free this variable if(x[freeIndx] <= constraints[0][freeIndx]){// Lower bound nwsBounds[0][freeIndx] = constraints[0][freeIndx]; if (m_Debug) System.err.println("Free variable "+freeIndx+ " from bound "+ nwsBounds[0][freeIndx]); } else{ // Upper bound nwsBounds[1][freeIndx] = constraints[1][freeIndx]; if (m_Debug) System.err.println("Free variable "+freeIndx+ " from bound "+ nwsBounds[1][freeIndx]); } L.setElement(freeIndx, freeIndx, 1.0); D[freeIndx] = 1.0; isUpdate = false; } } if(denom<Math.max(m_Zero*Math.sqrt(dxSq)*Math.sqrt(dgSq), m_Zero)){ if (m_Debug) System.err.println("dg'*dx negative!"); isUpdate = false; // Do not update } // If Hessian will be positive definite, update it if(isUpdate){ // modify once: dg*dg'/(dg'*dx) double coeff = 1.0/denom; // 1/(dg'*dx) updateCholeskyFactor(L,D,deltaGrad,coeff,isFixed); // modify twice: g*g'/(g'*p) coeff = 1.0/m_Slope; // 1/(g'*p) updateCholeskyFactor(L,D,oldGrad,coeff,isFixed); } } // Find new direction Matrix LD = new Matrix(l,l); // L*D double[] b = new double[l]; for(int k=0; k<l; k++){ if(!isFixed[k]) b[k] = -grad[k]; else b[k] = 0.0; for(int j=k; j<l; j++){ // Lower triangle if(!isFixed[j] && !isFixed[k]) LD.setElement(j, k, L.getElement(j,k)*D[k]); } } // Solve (LD)*y = -g, where y=L'*direct double[] LDIR = solveTriangle(LD, b, true, isFixed); LD = null; for(int m=0; m<LDIR.length; m++){ if(Double.isNaN(LDIR[m])) throw new Exception("L*direct["+m+"] is NaN!" +"|-g="+b[m]+"|"+isFixed[m] +"|diag="+D[m]); } // Solve L'*direct = y direct = solveTriangle(L, LDIR, false, isFixed); for(int m=0; m<direct.length; m++){ if(Double.isNaN(direct[m])) throw new Exception("direct is NaN!"); } //System.gc(); } if(m_Debug) System.err.println("Cannot find minimum"+ " -- too many interations!"); m_X = x; return null; } /** * Solve the linear equation of TX=B where T is a triangle matrix * It can be solved using back/forward substitution, with O(N^2) * complexity * @param t the matrix T * @param b the vector B * @param isLower whether T is a lower or higher triangle matrix * @param isZero which row(s) of T are not used when solving the equation. * If it's null or all 'false', then every row is used. * @return the solution of X */ public static double[] solveTriangle(Matrix t, double[] b, boolean isLower, boolean[] isZero){ int n = b.length; double[] result = new double[n]; if(isZero == null) isZero = new boolean[n]; if(isLower){ // lower triangle, forward-substitution int j = 0; while((j<n)&&isZero[j]){result[j]=0.0; j++;} // go to the first row if(j<n){ result[j] = b[j]/t.getElement(j,j); for(; j<n; j++){ if(!isZero[j]){ double numerator=b[j]; for(int k=0; k<j; k++) numerator -= t.getElement(j,k)*result[k]; result[j] = numerator/t.getElement(j,j); } else result[j] = 0.0; } } } else{ // Upper triangle, back-substitution int j=n-1; while((j>=0)&&isZero[j]){result[j]=0.0; j--;} // go to the last row if(j>=0){ result[j] = b[j]/t.getElement(j,j); for(; j>=0; j--){ if(!isZero[j]){ double numerator=b[j]; for(int k=j+1; k<n; k++) numerator -= t.getElement(k,j)*result[k]; result[j] = numerator/t.getElement(j,j); } else result[j] = 0.0; } } } return result; } /** * One rank update of the Cholesky factorization of B matrix in BFGS updates, * i.e. B = LDL', and B_{new} = LDL' + coeff*(vv') where L is a unit lower triangle * matrix and D is a diagonal matrix, and v is a vector.<br/> * When coeff > 0, we use C1 algorithm, and otherwise we use C2 algorithm described * in ``Methods for Modifying Matrix Factorizations'' * * @param L the unit triangle matrix L * @param D the diagonal matrix D * @param v the update vector v * @param coeff the coeffcient of update * @param isFixed which variables are not to be updated */ protected void updateCholeskyFactor(Matrix L, double[] D, double[] v, double coeff, boolean[] isFixed) throws Exception{ double t, p, b; int n = v.length; double[] vp = new double[n]; for (int i=0; i<v.length; i++) if(!isFixed[i]) vp[i]=v[i]; else vp[i]=0.0; if(coeff>0.0){ t = coeff; for(int j=0; j<n; j++){ if(isFixed[j]) continue; p = vp[j]; double d=D[j], dbarj=d+t*p*p; D[j] = dbarj; b = p*t/dbarj; t *= d/dbarj; for(int r=j+1; r<n; r++){ if(!isFixed[r]){ double l=L.getElement(r, j); vp[r] -= p*l; L.setElement(r, j, l+b*vp[r]); } else L.setElement(r, j, 0.0); } } } else{ double[] P = solveTriangle(L, v, true, isFixed); t = 0.0; for(int i=0; i<n; i++) if(!isFixed[i]) t += P[i]*P[i]/D[i]; double sqrt=1.0+coeff*t; sqrt = (sqrt<0.0)? 0.0 : Math.sqrt(sqrt); double alpha=coeff, sigma=coeff/(1.0+sqrt), rho, theta; for(int j=0; j<n; j++){ if(isFixed[j]) continue; double d=D[j]; p = P[j]*P[j]/d; theta = 1.0+sigma*p; t -= p; if(t<0.0) t=0.0; // Rounding error double plus = sigma*sigma*p*t; if((j<n-1) && (plus <= m_Zero)) plus=m_Zero; // Avoid rounding error rho = theta*theta + plus; D[j] = rho*d; if(Double.isNaN(D[j])){ throw new Exception("d["+j+"] NaN! P="+P[j]+",d="+d+ ",t="+t+",p="+p+",sigma="+sigma+ ",sclar="+coeff); } b=alpha*P[j]/(rho*d); alpha /= rho; rho = Math.sqrt(rho); double sigmaOld = sigma; sigma *= (1.0+rho)/(rho*(theta+rho)); if((j<n-1) && (Double.isNaN(sigma) || Double.isInfinite(sigma))) throw new Exception("sigma NaN/Inf! rho="+rho+ ",theta="+theta+",P["+j+"]="+ P[j]+",p="+p+",d="+d+",t="+t+ ",oldsigma="+sigmaOld); for(int r=j+1; r<n; r++){ if(!isFixed[r]){ double l=L.getElement(r, j); vp[r] -= P[j]*l; L.setElement(r, j, l+b*vp[r]); } else L.setElement(r, j, 0.0); } } } } /** * Implements a simple dynamic array for ints. */ private class DynamicIntArray { /** The int array. */ private int[] m_Objects; /** The current size; */ private int m_Size = 0; /** The capacity increment */ private int m_CapacityIncrement = 1; /** The capacity multiplier. */ private int m_CapacityMultiplier = 2; /** * Constructs a vector with the given capacity. * * @param capacity the vector's initial capacity */ public DynamicIntArray(int capacity) { m_Objects = new int[capacity]; } /** * Adds an element to this vector. Increases its * capacity if its not large enough. * * @param element the element to add */ public final void addElement(int element) { if (m_Size == m_Objects.length) { int[] newObjects; newObjects = new int[m_CapacityMultiplier * (m_Objects.length + m_CapacityIncrement)]; System.arraycopy(m_Objects, 0, newObjects, 0, m_Size); m_Objects = newObjects; } m_Objects[m_Size] = element; m_Size++; } /** * Produces a copy of this vector. * * @return the new vector */ public final Object copy() { DynamicIntArray copy = new DynamicIntArray(m_Objects.length); copy.m_Size = m_Size; copy.m_CapacityIncrement = m_CapacityIncrement; copy.m_CapacityMultiplier = m_CapacityMultiplier; System.arraycopy(m_Objects, 0, copy.m_Objects, 0, m_Size); return copy; } /** * Returns the element at the given position. * * @param index the element's index * @return the element with the given index */ public final int elementAt(int index) { return m_Objects[index]; } /** * Check whether the two integer vectors equal to each other * Two integer vectors are equal if all the elements are the * same, regardless of the order of the elements * * @param b another integer vector * @return whether they are equal */ private boolean equal(DynamicIntArray b){ if((b==null) || (size()!=b.size())) return false; int size=size(); // Only values matter, order does not matter int[] sorta=Utils.sort(m_Objects), sortb=Utils.sort(b.m_Objects); for(int j=0; j<size;j++) if(m_Objects[sorta[j]] != b.m_Objects[sortb[j]]) return false; return true; } /** * Deletes an element from this vector. * * @param index the index of the element to be deleted */ public final void removeElementAt(int index) { System.arraycopy(m_Objects, index + 1, m_Objects, index, m_Size - index - 1); m_Size--; } /** * Removes all components from this vector and sets its * size to zero. */ public final void removeAllElements() { m_Objects = new int[m_Objects.length]; m_Size = 0; } /** * Returns the vector's current size. * * @return the vector's current size */ public final int size() { return m_Size; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -