📄 optimization.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
		test=0.0;		double denom=0.0, dxSq=0.0, dgSq=0.0, newlyBounded=0.0; 		for(int g=0; g<l; g++){		    if(!isFixed[g]){ 		   			deltaGrad[g] = grad[g] - oldGrad[g];		  			// Calculate the denominators			    			denom += deltaX[g]*deltaGrad[g];			dxSq += deltaX[g]*deltaX[g];			dgSq += deltaGrad[g]*deltaGrad[g];		    }		    else // Only newly bounded variables will be non-zero			newlyBounded +=  deltaX[g]*(grad[g]-oldGrad[g]);		    		    // Note: CANNOT use projected gradient for testing 		    // convergence because of newly bounded variables		    double tmp = Math.abs(grad[g])*			Math.max(Math.abs(direct[g]),1.0)/			Math.max(Math.abs(m_f),1.0);		    if(tmp > test) test = tmp;			}				if(test < m_Zero){		    if (m_Debug)			System.err.println("Gradient converge: "+test);		    finish = true;		}	    				// dg'*dx could be < 0 using inexact lnsrch		if(m_Debug)		    System.err.println("dg'*dx="+(denom+newlyBounded));			// dg'*dx = 0		if(Math.abs(denom+newlyBounded) < m_Zero)		    finish = true;				int size = wsBdsIndx.size();		boolean isUpdate = true;  // Whether to update BFGS formula	    		// Converge: check whether release any current constraints		if(finish){		    if (m_Debug)			System.err.println("Test any release possible ...");		    			    if(toFree != null)			oldToFree = (FastVector)toFree.copy();		    toFree = new FastVector();		    		    for(int m=size-1; m>=0; m--){			int index=((Integer)wsBdsIndx.elementAt(m)).intValue();			double[] hessian = evaluateHessian(x, index);						double deltaL=0.0;			if(hessian != null){			    for(int mm=0; mm<hessian.length; mm++)				if(!isFixed[mm]) // Free variable				    deltaL += hessian[mm]*direct[mm];			}						// First and second order Lagrangian multiplier estimate			// If user didn't provide Hessian, use first-order only			double L1, L2;			if(x[index] >= constraints[1][index]) // Upper bound			    L1 = -grad[index];			else if(x[index] <= constraints[0][index])// Lower bound			    L1 = grad[index];			else			    throw new Exception("x["+index+"] not fixed on the"+						" bounds where it should have been!");						// L2 = L1 + deltaL			L2 = L1 + deltaL;						if (m_Debug)			    System.err.println("Variable "+index+					       ": Lagrangian="+L1+"|"+L2);						//Check validity of Lagrangian multiplier estimate			boolean isConverge = 			    (2.0*Math.abs(deltaL)) < Math.min(Math.abs(L1),							      Math.abs(L2));  			if((L1*L2>0.0) && isConverge){ //Same sign and converge: valid			    if(L2 < 0.0){// Negative Lagrangian: feasible				toFree.addElement(new Integer(index));				wsBdsIndx.removeElementAt(m);				finish=false; // Not optimal, cannot finish			    }			}						// Although hardly happen, better check it			// If the first-order Lagrangian multiplier estimate is wrong,			// avoid zigzagging			if((hessian==null) && equal(toFree, oldToFree)) 			    finish = true;           		    }		    		    if(finish){// Min. found			if (m_Debug)			    System.err.println("Minimum found.");			m_f = objectiveFunction(x);			return x;		    }		    		    // Free some variables		    for(int mmm=0; mmm<toFree.size(); mmm++){			int freeIndx=((Integer)toFree.elementAt(mmm)).intValue();			isFixed[freeIndx] = false; // Free this variable			if(x[freeIndx] <= constraints[0][freeIndx]){// Lower bound			    nwsBounds[0][freeIndx] = constraints[0][freeIndx];			    if (m_Debug)				System.err.println("Free variable "+freeIndx+						   " from bound "+ 						   nwsBounds[0][freeIndx]);			}			else{ // Upper bound			    nwsBounds[1][freeIndx] = constraints[1][freeIndx];			    if (m_Debug)				System.err.println("Free variable "+freeIndx+						   " from bound "+ 						   nwsBounds[1][freeIndx]);			}						L.setElement(freeIndx, freeIndx, 1.0);			D.setElement(freeIndx, freeIndx, 1.0);			isUpdate = false;					    }					}				if(denom<Math.max(m_Zero*Math.sqrt(dxSq)*Math.sqrt(dgSq), m_Zero)){		    if (m_Debug) 			System.err.println("dg'*dx negative!");		    isUpdate = false; // Do not update		    		}				// If Hessian will be positive definite, update it		if(isUpdate){		    double[] v= new double[l];  		    Matrix[] result;	    		    // modify once: dg*dg'/(dg'*dx)			    double coeff = 1.0/denom; // 1/(dg'*dx)			    result = updateCholeskyFactor(L,D,deltaGrad,coeff,isFixed);		    		    // modify twice: g*g'/(g'*p)			    coeff = 1.0/m_Slope; // 1/(g'*p)		    result=updateCholeskyFactor			(result[0],result[1],oldGrad,coeff,isFixed);  		    		    L = result[0];		    D = result[1];		}	    }	    	    // Find new direction 	    Matrix LD = new Matrix(l,l); // L*D	    double[] b = new double[l];	    	    for(int k=0; k<l; k++){		if(!isFixed[k])  b[k] = -grad[k];		else             b[k] = 0.0;				for(int j=k; j<l; j++){ // Lower triangle			    if(!isFixed[j] && !isFixed[k])			LD.setElement(j, k, L.getElement(j,k)*D.getElement(k,k));		}			    }	    		    	    // Solve (LD)*y = -g, where y=L'*direct	    double[] LDIR = solveTriangle(LD, b, true, isFixed);	    	    for(int m=0; m<LDIR.length; m++){		if(Double.isNaN(LDIR[m]))		    throw new Exception("L*direct["+m+"] is NaN!"					+"|-g="+b[m]+"|"+isFixed[m]					+"|diag="+D.getElement(m,m));	    }	    	    // Solve L'*direct = y	    direct = solveTriangle(L, LDIR, false, isFixed);	    for(int m=0; m<direct.length; m++){		if(Double.isNaN(direct[m]))		    throw new Exception("direct is NaN!");	    }	    	    System.gc();	}		if(m_Debug)	    System.err.println("Cannot find minimum"+			       " -- too many interations!");	m_X = x;	return null;    }        /**      * Solve the linear equation of TX=B where T is a triangle matrix     * It can be solved using back/forward substitution, with O(N^2)      * complexity     * @param t the matrix T     * @param b the vector B     * @param isLower whether T is a lower or higher triangle matrix     * @param isZero which row(s) of T are not used when solving the equation.      *               If it's null or all 'false', then every row is used.     * @return the solution of X     */         public static double[] solveTriangle(Matrix t, double[] b, 					 boolean isLower, boolean[] isZero){	int n = b.length; 	double[] result = new double[n];	if(isZero == null)	    isZero = new boolean[n];		if(isLower){ // lower triangle, forward-substitution	    int j = 0;	    while((j<n)&&isZero[j]){result[j]=0.0; j++;} // go to the first row	    	    if(j<n){		result[j] = b[j]/t.getElement(j,j);				for(; j<n; j++){		    if(!isZero[j]){			double numerator=b[j];			for(int k=0; k<j; k++)			    numerator -= t.getElement(j,k)*result[k];			result[j] = numerator/t.getElement(j,j);		    }		    else 			result[j] = 0.0;		}	    }	}	else{ // Upper triangle, back-substitution	    int j=n-1;	    while((j>=0)&&isZero[j]){result[j]=0.0; j--;} // go to the last row	    	    if(j>=0){		result[j] = b[j]/t.getElement(j,j);				for(; j>=0; j--){		    if(!isZero[j]){			double numerator=b[j];			for(int k=j+1; k<n; k++)			    numerator -= t.getElement(k,j)*result[k];			result[j] = numerator/t.getElement(j,j);		    }		    else 			result[j] = 0.0;		}	    }	}		return result;    }    /**     * One rank update of the Cholesky factorization of B matrix in BFGS updates,     * i.e. B = LDL', and B_{new} = LDL' + coeff*(vv') where L is a unit lower triangle     * matrix and D is a diagonal matrix, and v is a vector.<br>     * When coeff > 0, we use C1 algorithm, and otherwise we use C2 algorithm described     * in ``Methods for Modifying Matrix Factorizations''      *     * @param L the unit triangle matrix L     * @param D the diagonal matrix D     * @param v the update vector v     * @param coeff the coeffcient of update     * @param isFixed which variables are not to be updated     * @return the updated L and D     */        protected Matrix[] updateCholeskyFactor(Matrix L, Matrix D, 					    double[] v, double coeff,					    boolean[] isFixed)	throws Exception{	double t, p, b;	int n = v.length;	double[] vp =  new double[n];	Matrix dBar = new Matrix(n, n), lBar = new Matrix(n, n);		for (int i=0; i<v.length; i++)	    if(!isFixed[i])		vp[i]=v[i];	    else		vp[i]=0.0;		if(coeff>0.0){	    t = coeff;	    	    for(int j=0; j<n; j++){				if(isFixed[j]) continue;				lBar.setElement(j, j, 1.0); // Unit triangle				p = vp[j];		double d=D.getElement(j,j), dbarj=d+t*p*p;		dBar.setElement(j, j, dbarj);				b = p*t/dbarj;		t *= d/dbarj;		for(int r=j+1; r<n; r++){		    if(!isFixed[r]){			double l=L.getElement(r, j);			vp[r] -= p*l;			lBar.setElement(r, j, l+b*vp[r]);		    }		    else		    	lBar.setElement(r, j, 0.0);		}	    }	}	else{	    double[] P = solveTriangle(L, v, true, isFixed);	    	    t = 0.0;	    for(int i=0; i<n; i++)		if(!isFixed[i])		    t += P[i]*P[i]/D.getElement(i,i);	    		    	    double sqrt=1.0+coeff*t;	    sqrt = (sqrt<0.0)? 0.0 : Math.sqrt(sqrt);	    	    double alpha=coeff, sigma=coeff/(1.0+sqrt), rho, theta;	    	    for(int j=0; j<n; j++){		if(isFixed[j]) continue;				lBar.setElement(j, j, 1.0); // Unit triangle				double d=D.getElement(j,j);		p = P[j]*P[j]/d;		theta = 1.0+sigma*p;		t -= p; 		if(t<0.0) t=0.0; // Rounding error		double plus = sigma*sigma*p*t;		if((j<n-1) && (plus <= m_Zero)) 		    plus=m_Zero; // Avoid rounding error		rho = theta*theta + plus;				dBar.setElement(j, j, rho*d);				if(Double.isNaN(dBar.getElement(j,j))){		    throw new Exception("d["+j+"] NaN! P="+P[j]+",d="+d+					",t="+t+",p="+p+",sigma="+sigma+					",sclar="+coeff);		}				b=alpha*P[j]/(rho*d);		alpha /= rho;		rho = Math.sqrt(rho);		double sigmaOld = sigma;		sigma *= (1.0+rho)/(rho*(theta+rho));	 		if((j<n-1) && 		   (Double.isNaN(sigma) || Double.isInfinite(sigma)))		    throw new Exception("sigma NaN/Inf! rho="+rho+				       ",theta="+theta+",P["+j+"]="+				       P[j]+",p="+p+",d="+d+",t="+t+				       ",oldsigma="+sigmaOld);				for(int r=j+1; r<n; r++){		    if(!isFixed[r]){			double l=L.getElement(r, j);			vp[r] -= P[j]*l;			lBar.setElement(r, j, l+b*vp[r]);		    }		    else		    	lBar.setElement(r, j, 0.0);		}	    }	}		Matrix[] rt = new Matrix[2];	rt[0] = lBar; rt[1] = dBar;	return rt;    }    /**     * Check whether the two integer vectors equal to each other     * Two integer vectors are equal if all the elements are the      * same, regardless of the order of the elements     *     * @param a one integer vector     * @param b another integer vector     * @return whether they are equal     */     private boolean equal(FastVector a, FastVector b){	if((a==null) || (b==null) || (a.size()!=b.size()))	    return false;		int size=a.size();	// Store into int arrays	int[] ia=new int[size], ib=new int[size];	for(int i=0;i<size;i++){	    ia[i] = ((Integer)a.elementAt(i)).intValue();	    ib[i] = ((Integer)b.elementAt(i)).intValue();	}	// Only values matter, order does not matter	int[] sorta=Utils.sort(ia), sortb=Utils.sort(ib);	for(int j=0; j<size;j++)	    if(ia[sorta[j]] != ib[sortb[j]])		return false;		return true;    }}
上一页 1 23
💿 文件大小 12323 K
👤 上传用户 ilovexzhu
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#university #supervised #learning #wekaUT
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -