⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 discretizefilter.java

📁 :<<数据挖掘--实用机器学习技术及java实现>>一书的配套源程序
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
    return "Make resulting attributes binary.";  }  /**   * Gets whether binary attributes should be made for discretized ones.   *   * @return true if attributes will be binarized   */  public boolean getMakeBinary() {    return m_MakeBinary;  }  /**    * Sets whether binary attributes should be made for discretized ones.   *   * @param makeBinary if binary attributes are to be made   */  public void setMakeBinary(boolean makeBinary) {    m_MakeBinary = makeBinary;  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String useMDLTipText() {    return "Use class-based discretization. If set to false, does"      + " not require a class attribute, and uses a fixed number"      + " of bins (according to bins setting).";  }  /**   * Gets whether MDL will be used as the discretisation method.   *   * @return true if so, false if fixed bins should be used.   */  public boolean getUseMDL() {    return m_UseMDL;  }  /**    * Sets whether MDL will be used as the discretisation method.   *   * @param useMDL true if MDL should be used, false if fixed bins should   * be used.   */  public void setUseMDL(boolean useMDL) {    m_UseMDL = useMDL;  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String useKononenkoTipText() {    return "Use Kononenko's MDL criterion. If set to false"      + " uses the Fayyad & Irani criterion.";  }  /**   * Gets whether Kononenko's MDL criterion is to be used.   *   * @return true if Kononenko's criterion will be used.   */  public boolean getUseKononenko() {    return m_UseKononenko;  }  /**    * Sets whether Kononenko's MDL criterion is to be used.   *   * @param useKon true if Kononenko's one is to be used   */  public void setUseKononenko(boolean useKon) {    m_UseMDL = true;    m_UseKononenko = useKon;  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String useBetterEncodingTipText() {    return "Uses a different split point encoding. Who says it's better?"      + " (Eibe fix this).";  }  /**   * Gets whether better encoding is to be used for MDL.   *   * @return true if the better MDL encoding will be used   */  public boolean getUseBetterEncoding() {    return m_UseBetterEncoding;  }  /**    * Sets whether better encoding is to be used for MDL.   *   * @param useBetterEncoding true if better encoding to be used.   */  public void setUseBetterEncoding(boolean useBetterEncoding) {    m_UseMDL = true;    m_UseBetterEncoding = useBetterEncoding;  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String binsTipText() {    return "Number of bins for class-blind discretisation. This"      + " setting is ignored if MDL-based discretisation is used.";  }  /**   * Gets the number of bins numeric attributes will be divided into   *   * @return the number of bins.   */  public int getBins() {    return m_NumBins;  }  /**   * Sets the number of bins to divide each selected numeric attribute into   *   * @param numBins the number of bins   */  public void setBins(int numBins) {    m_UseMDL = false;    m_NumBins = numBins;  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String invertSelectionTipText() {    return "Set attribute selection mode. If false, only selected"      + " (numeric) attributes in the range will be discretized; if"      + " true, only non-selected attributes will be discretized.";  }  /**   * Gets whether the supplied columns are to be removed or kept   *   * @return true if the supplied columns will be kept   */  public boolean getInvertSelection() {    return m_DiscretizeCols.getInvert();  }  /**   * Sets whether selected columns should be removed or kept. If true the    * selected columns are kept and unselected columns are deleted. If false   * selected columns are deleted and unselected columns are kept.   *   * @param invert the new invert setting   */  public void setInvertSelection(boolean invert) {    m_DiscretizeCols.setInvert(invert);  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String attributeIndicesTipText() {    return "Specify range of attributes to act on."      + " This is a comma separated list of attribute indices, with"      + " \"first\" and \"last\" valid values. Specify an inclusive"      + " range with \"-\". E.g: \"first-3,5,6-10,last\".";  }  /**   * Gets the current range selection   *   * @return a string containing a comma separated list of ranges   */  public String getAttributeIndices() {    return m_DiscretizeCols.getRanges();  }  /**   * Sets which attributes are to be Discretized (only numeric   * attributes among the selection will be Discretized).   *   * @param rangeList a string representing the list of attributes. Since   * the string will typically come from a user, attributes are indexed from   * 1. <br>   * eg: first-3,5,6-last   * @exception IllegalArgumentException if an invalid range list is supplied    */  public void setAttributeIndices(String rangeList) {    m_DiscretizeCols.setRanges(rangeList);  }  /**   * Sets which attributes are to be Discretized (only numeric   * attributes among the selection will be Discretized).   *   * @param attributes an array containing indexes of attributes to Discretize.   * Since the array will typically come from a program, attributes are indexed   * from 0.   * @exception IllegalArgumentException if an invalid set of ranges   * is supplied    */  public void setAttributeIndicesArray(int [] attributes) {    setAttributeIndices(Range.indicesToRangeList(attributes));  }  /**   * Gets the cut points for an attribute   *   * @param the index (from 0) of the attribute to get the cut points of   * @return an array containing the cutpoints (or null if the   * attribute requested isn't being Discretized   */  public double [] getCutPoints(int attributeIndex) {    if (m_CutPoints == null) {      return null;    }    return m_CutPoints[attributeIndex];  }  /** Generate the cutpoints for each attribute */  protected void calculateCutPoints() {    Instances copy = null;    m_CutPoints = new double [getInputFormat().numAttributes()] [];    for(int i = getInputFormat().numAttributes() - 1; i >= 0; i--) {      if ((m_DiscretizeCols.isInRange(i)) && 	  (getInputFormat().attribute(i).isNumeric())) {	if (m_UseMDL) {	  // Use copy to preserve order	  if (copy == null) {	    copy = new Instances(getInputFormat());	  }	  calculateCutPointsByMDL(i, copy);	} else {	  if (m_FindNumBins) {	    findNumBins(i);	  } else {	    calculateCutPointsByBinning(i);	  }	}      }    }  }  /**   * Set cutpoints for a single attribute using MDL.   *   * @param index the index of the attribute to set cutpoints for   */  protected void calculateCutPointsByMDL(int index,					 Instances data) {    // Sort instances    data.sort(data.attribute(index));    // Find first instances that's missing    int firstMissing = data.numInstances();    for (int i = 0; i < data.numInstances(); i++) {      if (data.instance(i).isMissing(index)) {        firstMissing = i;        break;      }    }    m_CutPoints[index] = cutPointsForSubset(data, index, 0, firstMissing);  }  /** Test using Kononenko's MDL criterion. */  private boolean KononenkosMDL(double[] priorCounts,				double[][] bestCounts,				double numInstances,				int numCutPoints) {    double distPrior, instPrior, distAfter = 0, sum, instAfter = 0;    double before, after;    int numClassesTotal;    // Number of classes occuring in the set    numClassesTotal = 0;    for (int i = 0; i < priorCounts.length; i++) {      if (priorCounts[i] > 0) {	numClassesTotal++;      }    }    // Encode distribution prior to split    distPrior = SpecialFunctions.log2Binomial(numInstances 					      + numClassesTotal - 1,					      numClassesTotal - 1);    // Encode instances prior to split.    instPrior = SpecialFunctions.log2Multinomial(numInstances,						 priorCounts);    before = instPrior + distPrior;    // Encode distributions and instances after split.    for (int i = 0; i < bestCounts.length; i++) {      sum = Utils.sum(bestCounts[i]);      distAfter += SpecialFunctions.log2Binomial(sum + numClassesTotal - 1,						 numClassesTotal - 1);      instAfter += SpecialFunctions.log2Multinomial(sum,						    bestCounts[i]);    }    // Coding cost after split    after = Utils.log2(numCutPoints) + distAfter + instAfter;    // Check if split is to be accepted    return (Utils.gr(before, after));  }  /** Test using Fayyad and Irani's MDL criterion. */  private boolean FayyadAndIranisMDL(double[] priorCounts,				     double[][] bestCounts,				     double numInstances,				     int numCutPoints) {    double priorEntropy, entropy, gain;     double entropyLeft, entropyRight, delta;    int numClassesTotal, numClassesRight, numClassesLeft;    // Compute entropy before split.    priorEntropy = ContingencyTables.entropy(priorCounts);    // Compute entropy after split.    entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);    // Compute information gain.    gain = priorEntropy - entropy;    // Number of classes occuring in the set    numClassesTotal = 0;    for (int i = 0; i < priorCounts.length; i++) {      if (priorCounts[i] > 0) {	numClassesTotal++;      }    }    // Number of classes occuring in the left subset    numClassesLeft = 0;    for (int i = 0; i < bestCounts[0].length; i++) {      if (bestCounts[0][i] > 0) {	numClassesLeft++;      }    }    // Number of classes occuring in the right subset    numClassesRight = 0;    for (int i = 0; i < bestCounts[1].length; i++) {      if (bestCounts[1][i] > 0) {	numClassesRight++;      }    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -