⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 apriori.java

📁 Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
   * these all association rules with a minimum confidence.   *   * @param instances the instances to be used for generating the associations   * @throws Exception if rules can't be built successfully   */  public void buildAssociations(Instances instances) throws Exception {    double[] confidences, supports;    int[] indices;    FastVector[] sortedRuleSet;    int necSupport=0;    if (m_removeMissingCols) {      instances = removeMissingColumns(instances);    }    if(m_car && m_metricType != CONFIDENCE)      throw new Exception("For CAR-Mining metric type has to be confidence!");        if (m_classIndex == -1)      instances.setClassIndex(instances.numAttributes()-1);         else if (m_classIndex < instances.numAttributes() && m_classIndex >= 0)      instances.setClassIndex(m_classIndex);    else      throw new Exception("Invalid class index.");    // can associator handle the data?    getCapabilities().testWithFail(instances);    m_cycles = 0;    if(m_car){        //m_instances does not contain the class attribute        m_instances = LabeledItemSet.divide(instances,false);            //m_onlyClass contains only the class attribute        m_onlyClass = LabeledItemSet.divide(instances,true);    }    else        m_instances = instances;        if(m_car && m_numRules == Integer.MAX_VALUE){        // Set desired minimum support        m_minSupport = m_lowerBoundMinSupport;    }    else{        // Decrease minimum support until desired number of rules found.        m_minSupport = m_upperBoundMinSupport - m_delta;        m_minSupport = (m_minSupport < m_lowerBoundMinSupport)             ? m_lowerBoundMinSupport             : m_minSupport;    }    do {      // Reserve space for variables      m_Ls = new FastVector();      m_hashtables = new FastVector();      m_allTheRules = new FastVector[6];      m_allTheRules[0] = new FastVector();      m_allTheRules[1] = new FastVector();      m_allTheRules[2] = new FastVector();      if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {	m_allTheRules[3] = new FastVector();	m_allTheRules[4] = new FastVector();	m_allTheRules[5] = new FastVector();      }      sortedRuleSet = new FastVector[6];      sortedRuleSet[0] = new FastVector();      sortedRuleSet[1] = new FastVector();      sortedRuleSet[2] = new FastVector();      if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {	sortedRuleSet[3] = new FastVector();	sortedRuleSet[4] = new FastVector();	sortedRuleSet[5] = new FastVector();      }      if(!m_car){        // Find large itemsets and rules        findLargeItemSets();        if (m_significanceLevel != -1 || m_metricType != CONFIDENCE)             findRulesBruteForce();        else            findRulesQuickly();      }      else{          findLargeCarItemSets();          findCarRulesQuickly();      }            // Sort rules according to their support      /*supports = new double[m_allTheRules[2].size()];      for (int i = 0; i < m_allTheRules[2].size(); i++) 	supports[i] = (double)((AprioriItemSet)m_allTheRules[1].elementAt(i)).support();      indices = Utils.stableSort(supports);      for (int i = 0; i < m_allTheRules[2].size(); i++) {	sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i]));	sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i]));	sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i]));	if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {	  sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[i]));	  sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[i]));	  sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[i]));	}      }*/      int j = m_allTheRules[2].size()-1;      supports = new double[m_allTheRules[2].size()];      for (int i = 0; i < (j+1); i++) 	supports[j-i] = ((double)((ItemSet)m_allTheRules[1].elementAt(j-i)).support())*(-1);      indices = Utils.stableSort(supports);      for (int i = 0; i < (j+1); i++) {	sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[j-i]));	sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[j-i]));	sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[j-i]));	if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {	  sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[j-i]));	  sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[j-i]));	  sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[j-i]));	}      }      // Sort rules according to their confidence      m_allTheRules[0].removeAllElements();      m_allTheRules[1].removeAllElements();      m_allTheRules[2].removeAllElements();      if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {	m_allTheRules[3].removeAllElements();	m_allTheRules[4].removeAllElements();	m_allTheRules[5].removeAllElements();      }      confidences = new double[sortedRuleSet[2].size()];      int sortType = 2 + m_metricType;      for (int i = 0; i < sortedRuleSet[2].size(); i++) 	confidences[i] = 	  ((Double)sortedRuleSet[sortType].elementAt(i)).doubleValue();      indices = Utils.stableSort(confidences);      for (int i = sortedRuleSet[0].size() - 1; 	   (i >= (sortedRuleSet[0].size() - m_numRules)) && (i >= 0); i--) {	m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i]));	m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i]));	m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i]));	if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {	  m_allTheRules[3].addElement(sortedRuleSet[3].elementAt(indices[i]));	  m_allTheRules[4].addElement(sortedRuleSet[4].elementAt(indices[i]));	  m_allTheRules[5].addElement(sortedRuleSet[5].elementAt(indices[i]));	}      }      if (m_verbose) {	if (m_Ls.size() > 1) {	  System.out.println(toString());	}      }      if(m_minSupport == m_lowerBoundMinSupport || m_minSupport - m_delta >  m_lowerBoundMinSupport)        m_minSupport -= m_delta;      else        m_minSupport = m_lowerBoundMinSupport;            necSupport = Math.round((float)((m_minSupport * 			 (double)m_instances.numInstances())+0.5));      m_cycles++;    } while ((m_allTheRules[0].size() < m_numRules) &&	     (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport))	     /*	     (necSupport >= lowerBoundNumInstancesSupport)*/	     /*	     (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) */ &&     	     (necSupport >= 1));    m_minSupport += m_delta;  }          /**     * Method that mines all class association rules with minimum support and     * with a minimum confidence.     * @return an sorted array of FastVector (confidence depended) containing the rules and metric information     * @param data the instances for which class association rules should be mined     * @throws Exception if rules can't be built successfully     */    public FastVector[] mineCARs(Instances data) throws Exception{	         m_car = true;	buildAssociations(data);	return m_allTheRules;    }   /**   * Gets the instances without the class atrribute.   *   * @return the instances without the class attribute.   */   public Instances getInstancesNoClass() {            return m_instances;  }        /**   * Gets only the class attribute of the instances.   *   * @return the class attribute of all instances.   */   public Instances getInstancesOnlyClass() {            return m_onlyClass;  }    /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    String string1 = "\tThe required number of rules. (default = " + m_numRules + ")",      string2 =       "\tThe minimum confidence of a rule. (default = " + m_minMetric + ")",      string3 = "\tThe delta by which the minimum support is decreased in\n",      string4 = "\teach iteration. (default = " + m_delta + ")",      string5 =       "\tThe lower bound for the minimum support. (default = " +       m_lowerBoundMinSupport + ")",      string6 = "\tIf used, rules are tested for significance at\n",      string7 = "\tthe given level. Slower. (default = no significance testing)",      string8 = "\tIf set the itemsets found are also output. (default = no)",      string9 = "\tIf set class association rules are mined. (default = no)",      string10 = "\tThe class index. (default = last)",      stringType = "\tThe metric type by which to rank rules. (default = "      +"confidence)";        FastVector newVector = new FastVector(11);    newVector.addElement(new Option(string1, "N", 1, 				    "-N <required number of rules output>"));    newVector.addElement(new Option(stringType, "T", 1,				    "-T <0=confidence | 1=lift | "				    +"2=leverage | 3=Conviction>"));    newVector.addElement(new Option(string2, "C", 1, 				    "-C <minimum metric score of a rule>"));    newVector.addElement(new Option(string3 + string4, "D", 1,				    "-D <delta for minimum support>"));    newVector.addElement(new Option("\tUpper bound for minimum support. "				    +"(default = 1.0)", "U", 1,				     "-U <upper bound for minimum support>"));    newVector.addElement(new Option(string5, "M", 1,				    "-M <lower bound for minimum support>"));    newVector.addElement(new Option(string6 + string7, "S", 1,				    "-S <significance level>"));    newVector.addElement(new Option(string8, "I", 0,				    "-I"));    newVector.addElement(new Option("\tRemove columns that contain "				    +"all missing values (default = no)"				    , "R", 0,				    "-R"));    newVector.addElement(new Option("\tReport progress iteratively. (default "				    +"= no)", "V", 0,				    "-V"));    newVector.addElement(new Option(string9, "A", 0,				    "-A"));    newVector.addElement(new Option(string10, "c", 1,				    "-c <the class index>"));        return newVector.elements();  }  /**   * Parses a given list of options. <p/>   *    <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -N &lt;required number of rules output&gt;   *  The required number of rules. (default = 10)</pre>   *    * <pre> -T &lt;0=confidence | 1=lift | 2=leverage | 3=Conviction&gt;   *  The metric type by which to rank rules. (default = confidence)</pre>   *    * <pre> -C &lt;minimum metric score of a rule&gt;   *  The minimum confidence of a rule. (default = 0.9)</pre>   *    * <pre> -D &lt;delta for minimum support&gt;   *  The delta by which the minimum support is decreased in   *  each iteration. (default = 0.05)</pre>   *    * <pre> -U &lt;upper bound for minimum support&gt;   *  Upper bound for minimum support. (default = 1.0)</pre>   *    * <pre> -M &lt;lower bound for minimum support&gt;   *  The lower bound for the minimum support. (default = 0.1)</pre>   *    * <pre> -S &lt;significance level&gt;   *  If used, rules are tested for significance at   *  the given level. Slower. (default = no significance testing)</pre>   *    * <pre> -I   *  If set the itemsets found are also output. (default = no)</pre>   *    * <pre> -R   *  Remove columns that contain all missing values (default = no)</pre>   *    * <pre> -V   *  Report progress iteratively. (default = no)</pre>   *    * <pre> -A   *  If set class association rules are mined. (default = no)</pre>   *    * <pre> -c &lt;the class index&gt;   *  The class index. (default = last)</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported    */  public void setOptions(String[] options) throws Exception {        resetOptions();    String numRulesString = Utils.getOption('N', options),      minConfidenceString = Utils.getOption('C', options),      deltaString = Utils.getOption('D', options),      maxSupportString = Utils.getOption('U', options),      minSupportString = Utils.getOption('M', options),      significanceLevelString = Utils.getOption('S', options),      classIndexString = Utils.getOption('c',options);    String metricTypeString = Utils.getOption('T', options);    if (metricTypeString.length() != 0) {      setMetricType(new SelectedTag(Integer.parseInt(metricTypeString),				    TAGS_SELECTION));    }        if (numRulesString.length() != 0) {      m_numRules = Integer.parseInt(numRulesString);    }    if (classIndexString.length() != 0) {      m_classIndex = Integer.parseInt(classIndexString);    }    if (minConfidenceString.length() != 0) {      m_minMetric = (new Double(minConfidenceString)).doubleValue();    }    if (deltaString.length() != 0) {      m_delta = (new Double(deltaString)).doubleValue();    }    if (maxSupportString.length() != 0) {      setUpperBoundMinSupport((new Double(maxSupportString)).doubleValue());    }    if (minSupportString.length() != 0) {      m_lowerBoundMinSupport = (new Double(minSupportString)).doubleValue();    }    if (significanceLevelString.length() != 0) {      m_significanceLevel = (new Double(significanceLevelString)).doubleValue();    }    m_outputItemSets = Utils.getFlag('I', options);    m_car = Utils.getFlag('A', options);    m_verbose = Utils.getFlag('V', options);    setRemoveAllMissingCols(Utils.getFlag('R', options));  }  /**   * Gets the current settings of the Apriori object.   *   * @return an array of strings suitable for passing to setOptions   */  public String [] getOptions() {    String [] options = new String [20];    int current = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -