📄 tldsimple.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
        // Negative exemplars: next run         while(Double.isNaN(m_MeanN[none][x]))          none = whichEx.nextInt(nnum);        m = m_MeanN[none][x];        w = (m-nThisParam[1])*(m-nThisParam[1]);        nThisParam[0] = w;  // w        nThisParam[1] = m;  // m	 		      }		  	    	        }    m_LkRatio = new double[m_Dimension];    if(m_UseEmpiricalCutOff){	      // Find the empirical cut-off      double[] pLogOdds=new double[pnum], nLogOdds=new double[nnum];        for(int p=0; p<pnum; p++)        pLogOdds[p] =           likelihoodRatio(m_SumP[p], m_MeanP[p]);      for(int q=0; q<nnum; q++)        nLogOdds[q] =           likelihoodRatio(m_SumN[q], m_MeanN[q]);      // Update m_Cutoff      findCutOff(pLogOdds, nLogOdds);    }    else      m_Cutoff = -Math.log((double)pnum/(double)nnum);    /*        for(int x=0, y=0; x<m_Dimension; x++, y++){       if((x==exs.classIndex()) || (x==exs.idIndex()))       y++;       w=m_ParamsP[2*x]; m=m_ParamsP[2*x+1];       System.err.println("\n\n???Positive: ( "+exs.attribute(y)+       "):  w="+w+", m="+m+", sgmSq="+m_SgmSqP[x]);       w=m_ParamsN[2*x]; m=m_ParamsN[2*x+1];       System.err.println("???Negative: ("+exs.attribute(y)+       "):  w="+w+", m="+m+", sgmSq="+m_SgmSqN[x]+       "\nAvg. log-likelihood ratio in training data="       +(m_LkRatio[x]/(pnum+nnum)));       }	       */    if (getDebug())      System.err.println("\n\n???Cut-off="+m_Cutoff);  }          /**   *   * @param ex the given test exemplar   * @return the classification    * @throws Exception if the exemplar could not be classified   * successfully   */  public double classifyInstance(Instance ex)throws Exception{    //Instance ex = new Exemplar(e);    Instances exi = ex.relationalValue(1);    double[] n = new double[m_Dimension];    double [] xBar = new double[m_Dimension];    for (int i=0; i<exi.numAttributes() ; i++)      xBar[i] = exi.meanOrMode(i);    for (int w=0, t=0; w < m_Dimension; w++, t++){      // if((t==m_ClassIndex) || (t==m_IdIndex))      //t++;	      for(int u=0;u<exi.numInstances();u++)        if(!exi.instance(u).isMissing(t))          n[w] += exi.instance(u).weight();    }    double logOdds = likelihoodRatio(n, xBar);    return (logOdds > m_Cutoff) ? 1 : 0 ;  }    /**   * Computes the distribution for a given exemplar   *   * @param ex the exemplar for which distribution is computed   * @return the distribution   * @throws Exception if the distribution can't be computed successfully   */  public double[] distributionForInstance(Instance ex) throws Exception {        double[] distribution = new double[2];    Instances exi = ex.relationalValue(1);    double[] n = new double[m_Dimension];    double[] xBar = new double[m_Dimension];    for (int i = 0; i < exi.numAttributes() ; i++)      xBar[i] = exi.meanOrMode(i);        for (int w = 0, t = 0; w < m_Dimension; w++, t++){      for (int u = 0; u < exi.numInstances(); u++)	if (!exi.instance(u).isMissing(t))	  n[w] += exi.instance(u).weight();    }        double logOdds = likelihoodRatio(n, xBar);        // returned logOdds value has been divided by m_Dimension to avoid     // Math.exp(logOdds) getting too large or too small,     // that may result in two fixed distribution value (1 or 0).    distribution[0] = 1 / (1 + Math.exp(logOdds)); // Prob. for class 0 (negative)    distribution[1] = 1 - distribution[0];        return distribution;  }	  /**   * Compute the log-likelihood ratio   */  private double likelihoodRatio(double[] n, double[] xBar){	    double LLP = 0.0, LLN = 0.0;    for (int x=0; x<m_Dimension; x++){      if(Double.isNaN(xBar[x])) continue; // All missing values      //if(Double.isNaN(xBar[x]) || (m_ParamsP[2*x] <= ZERO)       //  || (m_ParamsN[2*x]<=ZERO))       //	continue; // All missing values      //Log-likelihood for positive       double w=m_ParamsP[2*x], m=m_ParamsP[2*x+1];      double llp = Math.log(w*n[x]+m_SgmSqP[x])        + n[x]*(m-xBar[x])*(m-xBar[x])/(w*n[x]+m_SgmSqP[x]);      LLP -= llp;      //Log-likelihood for negative       w=m_ParamsN[2*x]; m=m_ParamsN[2*x+1];       double lln = Math.log(w*n[x]+m_SgmSqN[x])        + n[x]*(m-xBar[x])*(m-xBar[x])/(w*n[x]+m_SgmSqN[x]);      LLN -= lln;      m_LkRatio[x] += llp - lln;    }    return LLP - LLN / m_Dimension;  }  private void findCutOff(double[] pos, double[] neg){    int[] pOrder = Utils.sort(pos),      nOrder = Utils.sort(neg);    /*       System.err.println("\n\n???Positive: ");       for(int t=0; t<pOrder.length; t++)       System.err.print(t+":"+Utils.doubleToString(pos[pOrder[t]],0,2)+" ");       System.err.println("\n\n???Negative: ");       for(int t=0; t<nOrder.length; t++)       System.err.print(t+":"+Utils.doubleToString(neg[nOrder[t]],0,2)+" ");       */    int pNum = pos.length, nNum = neg.length, count, p=0, n=0;	    double fstAccu=0.0, sndAccu=(double)pNum, split;     double maxAccu = 0, minDistTo0 = Double.MAX_VALUE;    // Skip continuous negatives	    for(;(n<nNum)&&(pos[pOrder[0]]>=neg[nOrder[n]]); n++, fstAccu++);    if(n>=nNum){ // totally seperate      m_Cutoff = (neg[nOrder[nNum-1]]+pos[pOrder[0]])/2.0;	      //m_Cutoff = neg[nOrder[nNum-1]];      return;      }	    count=n;    while((p<pNum)&&(n<nNum)){      // Compare the next in the two lists      if(pos[pOrder[p]]>=neg[nOrder[n]]){ // Neg has less log-odds        fstAccu += 1.0;            split=neg[nOrder[n]];        n++;	       }      else{        sndAccu -= 1.0;        split=pos[pOrder[p]];        p++;      }	    	        count++;      /*         double entropy=0.0, cover=(double)count;         if(fstAccu>0.0)         entropy -= fstAccu*Math.log(fstAccu/cover);         if(sndAccu>0.0)         entropy -= sndAccu*Math.log(sndAccu/(total-cover));         if(entropy < minEntropy){         minEntropy = entropy;      //find the next smallest      //double next = neg[nOrder[n]];      //if(pos[pOrder[p]]<neg[nOrder[n]])      //    next = pos[pOrder[p]];	      //m_Cutoff = (split+next)/2.0;      m_Cutoff = split;         }         */      if ((fstAccu+sndAccu > maxAccu) ||           ((fstAccu+sndAccu == maxAccu) && (Math.abs(split)<minDistTo0))){        maxAccu = fstAccu+sndAccu;        m_Cutoff = split;        minDistTo0 = Math.abs(split);     }	        }		  }  /**   * Returns an enumeration describing the available options   *   * @return an enumeration of all the available options   */  public Enumeration listOptions() {    Vector result = new Vector();        result.addElement(new Option(          "\tSet whether or not use empirical\n"          + "\tlog-odds cut-off instead of 0",          "C", 0, "-C"));        result.addElement(new Option(          "\tSet the number of multiple runs \n"          + "\tneeded for searching the MLE.",          "R", 1, "-R <numOfRuns>"));        Enumeration enu = super.listOptions();    while (enu.hasMoreElements()) {      result.addElement(enu.nextElement());    }    return result.elements();  }  /**   * Parses a given list of options. <p/>   *    <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -C   *  Set whether or not use empirical   *  log-odds cut-off instead of 0</pre>   *    * <pre> -R &lt;numOfRuns&gt;   *  Set the number of multiple runs    *  needed for searching the MLE.</pre>   *    * <pre> -S &lt;num&gt;   *  Random number seed.   *  (default 1)</pre>   *    * <pre> -D   *  If set, classifier is run in debug mode and   *  may output additional info to the console</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception{    setDebug(Utils.getFlag('D', options));    setUsingCutOff(Utils.getFlag('C', options));    String runString = Utils.getOption('R', options);    if (runString.length() != 0)       setNumRuns(Integer.parseInt(runString));    else       setNumRuns(1);    super.setOptions(options);  }  /**   * Gets the current settings of the Classifier.   *   * @return an array of strings suitable for passing to setOptions   */  public String[] getOptions() {    Vector        result;    String[]      options;    int           i;        result  = new Vector();    options = super.getOptions();    for (i = 0; i < options.length; i++)      result.add(options[i]);    if (getDebug())      result.add("-D");        if (getUsingCutOff())      result.add("-C");    result.add("-R");    result.add("" + getNumRuns());    return (String[]) result.toArray(new String[result.size()]);  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String numRunsTipText() {    return "The number of runs to perform.";  }    /**   * Sets the number of runs to perform.   *   * @param numRuns   the number of runs to perform   */  public void setNumRuns(int numRuns) {    m_Run = numRuns;  }  /**   * Returns the number of runs to perform.   *   * @return          the number of runs to perform   */  public int getNumRuns() {    return m_Run;  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String usingCutOffTipText() {    return "Whether to use an empirical cutoff.";  }  /**   * Sets whether to use an empirical cutoff.   *   * @param cutOff      whether to use an empirical cutoff   */  public void setUsingCutOff (boolean cutOff) {    m_UseEmpiricalCutOff =cutOff;  }  /**   * Returns whether an empirical cutoff is used   *   * @return            true if an empirical cutoff is used   */  public boolean getUsingCutOff() {    return m_UseEmpiricalCutOff ;  }  /**   * Gets a string describing the classifier.   *   * @return a string describing the classifer built.   */  public String toString(){    StringBuffer text = new StringBuffer("\n\nTLDSimple:\n");    double sgm, w, m;    for (int x=0, y=0; x<m_Dimension; x++, y++){      // if((x==m_ClassIndex) || (x==m_IdIndex))      //y++;      sgm = m_SgmSqP[x];      w=m_ParamsP[2*x];       m=m_ParamsP[2*x+1];      text.append("\n"+m_Attribute.attribute(y).name()+"\nPositive: "+          "sigma^2="+sgm+", w="+w+", m="+m+"\n");      sgm = m_SgmSqN[x];      w=m_ParamsN[2*x];       m=m_ParamsN[2*x+1];      text.append("Negative: "+          "sigma^2="+sgm+", w="+w+", m="+m+"\n");    }    return text.toString();  }       /**   * Main method for testing.   *   * @param args the options for the classifier   */  public static void main(String[] args) {	    runClassifier(new TLDSimple(), args);  }}class TLDSimple_Optm extends Optimization{  private double[] num;  private double sSq;  private double[] xBar;  public void setNum(double[] n) {num = n;}  public void setSgmSq(double s){    sSq = s;  }  public void setXBar(double[] x){xBar = x;}  /**   * Implement this procedure to evaluate objective   * function to be minimized   */  protected double objectiveFunction(double[] x){    int numExs = num.length;    double NLL=0; // Negative Log-Likelihood    double w=x[0], m=x[1];     for(int j=0; j < numExs; j++){      if(Double.isNaN(xBar[j])) continue; // All missing values      double bag=0;       bag += Math.log(w*num[j]+sSq);      if(Double.isNaN(bag) && m_Debug){        System.out.println("???????????1: "+w+" "+m            +"|x-: "+xBar[j] +             "|n: "+num[j] + "|S^2: "+sSq);        //System.exit(1);      }      bag += num[j]*(m-xBar[j])*(m-xBar[j])/(w*num[j]+sSq);	    	          if(Double.isNaN(bag) && m_Debug){        System.out.println("???????????2: "+w+" "+m            +"|x-: "+xBar[j] +             "|n: "+num[j] + "|S^2: "+sSq);        //System.exit(1);      }	    	             //if(bag<0) bag=0;      NLL += bag;    }    //System.out.println("???????????NLL:"+NLL);    return NLL;  }  /**   * Subclass should implement this procedure to evaluate gradient   * of the objective function   */  protected double[] evaluateGradient(double[] x){    double[] g = new double[x.length];    int numExs = num.length;    double w=x[0],m=x[1];	    double dw=0.0, dm=0.0;    for(int j=0; j < numExs; j++){      if(Double.isNaN(xBar[j])) continue; // All missing values	          dw += num[j]/(w*num[j]+sSq)         - num[j]*num[j]*(m-xBar[j])*(m-xBar[j])/((w*num[j]+sSq)*(w*num[j]+sSq));      dm += 2.0*num[j]*(m-xBar[j])/(w*num[j]+sSq);    }    g[0] = dw;    g[1] = dm;    return g;  }  /**   * Subclass should implement this procedure to evaluate second-order   * gradient of the objective function   */  protected double[] evaluateHessian(double[] x, int index){    double[] h = new double[x.length];    // # of exemplars, # of dimensions    // which dimension and which variable for 'index'    int numExs = num.length;    double w,m;    // Take the 2nd-order derivative    switch(index){	      case 0: // w           w=x[0];m=x[1];        for(int j=0; j < numExs; j++){          if(Double.isNaN(xBar[j])) continue; //All missing values          h[0] += 2.0*Math.pow(num[j],3)*(m-xBar[j])*(m-xBar[j])/Math.pow(w*num[j]+sSq,3)            - num[j]*num[j]/((w*num[j]+sSq)*(w*num[j]+sSq));          h[1] -= 2.0*(m-xBar[j])*num[j]*num[j]/((num[j]*w+sSq)*(num[j]*w+sSq));		        }        break;      case 1: // m        w=x[0];m=x[1];        for(int j=0; j < numExs; j++){          if(Double.isNaN(xBar[j])) continue; //All missing values          h[0] -= 2.0*(m-xBar[j])*num[j]*num[j]/((num[j]*w+sSq)*(num[j]*w+sSq));          h[1] += 2.0*num[j]/(w*num[j]+sSq);				        }    }    return h;  }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -