📄 minnd.java
字号:
} /** * Pre-process the given exemplar according to the other exemplars * in the given exemplars. It also updates noise data statistics. * * @param data the whole exemplars * @param pos the position of given exemplar in data * @return the processed exemplar * @throws Exception if the returned exemplar is wrong */ public Instance preprocess(Instances data, int pos) throws Exception{ Instance before = data.instance(pos); if((int)before.classValue() == 0){ m_NoiseM[pos] = null; m_NoiseV[pos] = null; return before; } Instances after_relationInsts =before.attribute(1).relation().stringFreeStructure(); Instances noises_relationInsts =before.attribute(1).relation().stringFreeStructure(); Instances newData = m_Attributes; Instance after = new Instance(before.numAttributes()); Instance noises = new Instance(before.numAttributes()); after.setDataset(newData); noises.setDataset(newData); for(int g=0; g < before.relationalValue(1).numInstances(); g++){ Instance datum = before.relationalValue(1).instance(g); double[] dists = new double[data.numInstances()]; for(int i=0; i < data.numInstances(); i++){ if(i != pos) dists[i] = distance(datum, m_Mean[i], m_Variance[i], i); else dists[i] = Double.POSITIVE_INFINITY; } int[] pred = new int[m_NumClasses]; for(int n=0; n < pred.length; n++) pred[n] = 0; for(int o=0; o<m_Select; o++){ int index = Utils.minIndex(dists); pred[(int)m_Class[index]]++; dists[index] = Double.POSITIVE_INFINITY; } int clas = Utils.maxIndex(pred); if((int)before.classValue() != clas) noises_relationInsts.add(datum); else after_relationInsts.add(datum); } int relationValue; relationValue = noises.attribute(1).addRelation( noises_relationInsts); noises.setValue(0,before.value(0)); noises.setValue(1, relationValue); noises.setValue(2, before.classValue()); relationValue = after.attribute(1).addRelation( after_relationInsts); after.setValue(0,before.value(0)); after.setValue(1, relationValue); after.setValue(2, before.classValue()); if(Utils.gr(noises.relationalValue(1).sumOfWeights(), 0)){ for (int i=0; i<m_Dimension; i++) { m_NoiseM[pos][i] = noises.relationalValue(1).meanOrMode(i); m_NoiseV[pos][i] = noises.relationalValue(1).variance(i); if(Utils.eq(m_NoiseV[pos][i],0.0)) m_NoiseV[pos][i] = m_ZERO; } /* for(int y=0; y < m_NoiseV[pos].length; y++){ if(Utils.eq(m_NoiseV[pos][y],0.0)) m_NoiseV[pos][y] = m_ZERO; } */ } else{ m_NoiseM[pos] = null; m_NoiseV[pos] = null; } return after; } /** * Calculates the distance between two instances * * @param first the first instance * @param second the second instance * @return the distance between the two given instances */ private double distance(Instance first, double[] mean, double[] var, int pos) { double diff, distance = 0; for(int i = 0; i < m_Dimension; i++) { // If attribute is numeric if(first.attribute(i).isNumeric()){ if (!first.isMissing(i)){ diff = first.value(i) - mean[i]; if(Utils.gr(var[i], m_ZERO)) distance += m_Change[pos][i] * var[i] * diff * diff; else distance += m_Change[pos][i] * diff * diff; } else{ if(Utils.gr(var[i], m_ZERO)) distance += m_Change[pos][i] * var[i]; else distance += m_Change[pos][i] * 1.0; } } } return distance; } /** * Updates the minimum and maximum values for all the attributes * based on a new exemplar. * * @param ex the new exemplar */ private void updateMinMax(Instance ex) { Instances insts = ex.relationalValue(1); for (int j = 0;j < m_Dimension; j++) { if (insts.attribute(j).isNumeric()){ for(int k=0; k < insts.numInstances(); k++){ Instance ins = insts.instance(k); if(!ins.isMissing(j)){ if (Double.isNaN(m_MinArray[j])) { m_MinArray[j] = ins.value(j); m_MaxArray[j] = ins.value(j); } else { if (ins.value(j) < m_MinArray[j]) m_MinArray[j] = ins.value(j); else if (ins.value(j) > m_MaxArray[j]) m_MaxArray[j] = ins.value(j); } } } } } } /** * Scale the given exemplar so that the returned exemplar * has the value of 0 to 1 for each dimension * * @param before the given exemplar * @return the resultant exemplar after scaling * @throws Exception if given exampler cannot be scaled properly */ private Instance scale(Instance before) throws Exception{ Instances afterInsts = before.relationalValue(1).stringFreeStructure(); Instance after = new Instance(before.numAttributes()); after.setDataset(m_Attributes); for(int i=0; i < before.relationalValue(1).numInstances(); i++){ Instance datum = before.relationalValue(1).instance(i); Instance inst = (Instance)datum.copy(); for(int j=0; j < m_Dimension; j++){ if(before.relationalValue(1).attribute(j).isNumeric()) inst.setValue(j, (datum.value(j) - m_MinArray[j])/(m_MaxArray[j] - m_MinArray[j])); } afterInsts.add(inst); } int attValue = after.attribute(1).addRelation(afterInsts); after.setValue(0, before.value( 0)); after.setValue(1, attValue); after.setValue(2, before.value( 2)); return after; } /** * Use gradient descent to distort the MU parameter for * the exemplar. The exemplar can be in the specified row in the * given matrix, which has numExemplar rows and numDimension columns; * or not in the matrix. * * @param row the given row index * @param mean */ public void findWeights(int row, double[][] mean){ double[] neww = new double[m_Dimension]; double[] oldw = new double[m_Dimension]; System.arraycopy(m_Change[row], 0, neww, 0, m_Dimension); //for(int z=0; z<m_Dimension; z++) //System.out.println("mu("+row+"): "+origin[z]+" | "+newmu[z]); double newresult = target(neww, mean, row, m_Class); double result = Double.POSITIVE_INFINITY; double rate= 0.05; if(m_Rate != -1) rate = m_Rate; //System.out.println("???Start searching ...");search: while(Utils.gr((result-newresult), m_STOP)){ // Full step oldw = neww; neww= new double[m_Dimension]; double[] delta = delta(oldw, mean, row, m_Class); for(int i=0; i < m_Dimension; i++) if(Utils.gr(m_Variance[row][i], 0.0)) neww[i] = oldw[i] + rate * delta[i]; result = newresult; newresult = target(neww, mean, row, m_Class); //System.out.println("???old: "+result+"|new: "+newresult); while(Utils.gr(newresult, result)){ // Search back //System.out.println("search back"); if(m_Rate == -1){ rate *= m_Decay; // Decay for(int i=0; i < m_Dimension; i++) if(Utils.gr(m_Variance[row][i], 0.0)) neww[i] = oldw[i] + rate * delta[i]; newresult = target(neww, mean, row, m_Class); } else{ for(int i=0; i < m_Dimension; i++) neww[i] = oldw[i]; break search; } } } //System.out.println("???Stop"); m_Change[row] = neww; } /** * Delta of x in one step of gradient descent: * delta(Wij) = 1/2 * sum[k=1..N, k!=i](sqrt(P)*(Yi-Yk)/D - 1) * (MUij - * MUkj)^2 where D = sqrt(sum[j=1..P]Kkj(MUij - MUkj)^2) * N is number of exemplars and P is number of dimensions * * @param x the weights of the exemplar in question * @param rowpos row index of x in X * @param Y the observed class label * @return the delta for all dimensions */ private double[] delta(double[] x, double[][] X, int rowpos, double[] Y){ double y = Y[rowpos]; double[] delta=new double[m_Dimension]; for(int h=0; h < m_Dimension; h++) delta[h] = 0.0; for(int i=0; i < X.length; i++){ if((i != rowpos) && (X[i] != null)){ double var = (y==Y[i]) ? 0.0 : Math.sqrt((double)m_Dimension - 1); double distance=0; for(int j=0; j < m_Dimension; j++) if(Utils.gr(m_Variance[rowpos][j], 0.0)) distance += x[j]*(X[rowpos][j]-X[i][j]) * (X[rowpos][j]-X[i][j]); distance = Math.sqrt(distance); if(distance != 0) for(int k=0; k < m_Dimension; k++) if(m_Variance[rowpos][k] > 0.0) delta[k] += (var/distance - 1.0) * 0.5 * (X[rowpos][k]-X[i][k]) * (X[rowpos][k]-X[i][k]); } } //System.out.println("???delta: "+delta); return delta; } /** * Compute the target function to minimize in gradient descent * The formula is:<br/> * 1/2*sum[i=1..p](f(X, Xi)-var(Y, Yi))^2 <p/> * where p is the number of exemplars and Y is the class label. * In the case of X=MU, f() is the Euclidean distance between two * exemplars together with the related weights and var() is * sqrt(numDimension)*(Y-Yi) where Y-Yi is either 0 (when Y==Yi) * or 1 (Y!=Yi) * * @param x the weights of the exemplar in question * @param rowpos row index of x in X * @param Y the observed class label * @return the result of the target function */ public double target(double[] x, double[][] X, int rowpos, double[] Y){ double y = Y[rowpos], result=0; for(int i=0; i < X.length; i++){ if((i != rowpos) && (X[i] != null)){ double var = (y==Y[i]) ? 0.0 : Math.sqrt((double)m_Dimension - 1); double f=0; for(int j=0; j < m_Dimension; j++) if(Utils.gr(m_Variance[rowpos][j], 0.0)){ f += x[j]*(X[rowpos][j]-X[i][j]) * (X[rowpos][j]-X[i][j]); //System.out.println("i:"+i+" j: "+j+" row: "+rowpos); } f = Math.sqrt(f); //System.out.println("???distance between "+rowpos+" and "+i+": "+f+"|y:"+y+" vs "+Y[i]); if(Double.isInfinite(f)) System.exit(1); result += 0.5 * (f - var) * (f - var); } } //System.out.println("???target: "+result); return result; } /** * Use Kullback Leibler distance to find the nearest neighbours of * the given exemplar. * It also uses K-Nearest Neighbour algorithm to classify the * test exemplar * * @param ex the given test exemplar * @return the classification * @throws Exception if the exemplar could not be classified * successfully */ public double classifyInstance(Instance ex)throws Exception{ ex = scale(ex); double[] var = new double [m_Dimension]; for (int i=0; i<m_Dimension; i++) var[i]= ex.relationalValue(1).variance(i); // The Kullback distance to all exemplars double[] kullback = new double[m_Class.length];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -