📄 apriori.java
字号:
* these all association rules with a minimum confidence. * * @param instances the instances to be used for generating the associations * @throws Exception if rules can't be built successfully */ public void buildAssociations(Instances instances) throws Exception { double[] confidences, supports; int[] indices; FastVector[] sortedRuleSet; int necSupport=0; if (m_removeMissingCols) { instances = removeMissingColumns(instances); } if(m_car && m_metricType != CONFIDENCE) throw new Exception("For CAR-Mining metric type has to be confidence!"); if (m_classIndex == -1) instances.setClassIndex(instances.numAttributes()-1); else if (m_classIndex < instances.numAttributes() && m_classIndex >= 0) instances.setClassIndex(m_classIndex); else throw new Exception("Invalid class index."); // can associator handle the data? getCapabilities().testWithFail(instances); m_cycles = 0; if(m_car){ //m_instances does not contain the class attribute m_instances = LabeledItemSet.divide(instances,false); //m_onlyClass contains only the class attribute m_onlyClass = LabeledItemSet.divide(instances,true); } else m_instances = instances; if(m_car && m_numRules == Integer.MAX_VALUE){ // Set desired minimum support m_minSupport = m_lowerBoundMinSupport; } else{ // Decrease minimum support until desired number of rules found. m_minSupport = m_upperBoundMinSupport - m_delta; m_minSupport = (m_minSupport < m_lowerBoundMinSupport) ? m_lowerBoundMinSupport : m_minSupport; } do { // Reserve space for variables m_Ls = new FastVector(); m_hashtables = new FastVector(); m_allTheRules = new FastVector[6]; m_allTheRules[0] = new FastVector(); m_allTheRules[1] = new FastVector(); m_allTheRules[2] = new FastVector(); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { m_allTheRules[3] = new FastVector(); m_allTheRules[4] = new FastVector(); m_allTheRules[5] = new FastVector(); } sortedRuleSet = new FastVector[6]; sortedRuleSet[0] = new FastVector(); sortedRuleSet[1] = new FastVector(); sortedRuleSet[2] = new FastVector(); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { sortedRuleSet[3] = new FastVector(); sortedRuleSet[4] = new FastVector(); sortedRuleSet[5] = new FastVector(); } if(!m_car){ // Find large itemsets and rules findLargeItemSets(); if (m_significanceLevel != -1 || m_metricType != CONFIDENCE) findRulesBruteForce(); else findRulesQuickly(); } else{ findLargeCarItemSets(); findCarRulesQuickly(); } // Sort rules according to their support /*supports = new double[m_allTheRules[2].size()]; for (int i = 0; i < m_allTheRules[2].size(); i++) supports[i] = (double)((AprioriItemSet)m_allTheRules[1].elementAt(i)).support(); indices = Utils.stableSort(supports); for (int i = 0; i < m_allTheRules[2].size(); i++) { sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i])); sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i])); sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i])); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[i])); sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[i])); sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[i])); } }*/ int j = m_allTheRules[2].size()-1; supports = new double[m_allTheRules[2].size()]; for (int i = 0; i < (j+1); i++) supports[j-i] = ((double)((ItemSet)m_allTheRules[1].elementAt(j-i)).support())*(-1); indices = Utils.stableSort(supports); for (int i = 0; i < (j+1); i++) { sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[j-i])); sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[j-i])); sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[j-i])); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[j-i])); sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[j-i])); sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[j-i])); } } // Sort rules according to their confidence m_allTheRules[0].removeAllElements(); m_allTheRules[1].removeAllElements(); m_allTheRules[2].removeAllElements(); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { m_allTheRules[3].removeAllElements(); m_allTheRules[4].removeAllElements(); m_allTheRules[5].removeAllElements(); } confidences = new double[sortedRuleSet[2].size()]; int sortType = 2 + m_metricType; for (int i = 0; i < sortedRuleSet[2].size(); i++) confidences[i] = ((Double)sortedRuleSet[sortType].elementAt(i)).doubleValue(); indices = Utils.stableSort(confidences); for (int i = sortedRuleSet[0].size() - 1; (i >= (sortedRuleSet[0].size() - m_numRules)) && (i >= 0); i--) { m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i])); m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i])); m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i])); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { m_allTheRules[3].addElement(sortedRuleSet[3].elementAt(indices[i])); m_allTheRules[4].addElement(sortedRuleSet[4].elementAt(indices[i])); m_allTheRules[5].addElement(sortedRuleSet[5].elementAt(indices[i])); } } if (m_verbose) { if (m_Ls.size() > 1) { System.out.println(toString()); } } if(m_minSupport == m_lowerBoundMinSupport || m_minSupport - m_delta > m_lowerBoundMinSupport) m_minSupport -= m_delta; else m_minSupport = m_lowerBoundMinSupport; necSupport = Math.round((float)((m_minSupport * (double)m_instances.numInstances())+0.5)); m_cycles++; } while ((m_allTheRules[0].size() < m_numRules) && (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) /* (necSupport >= lowerBoundNumInstancesSupport)*/ /* (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) */ && (necSupport >= 1)); m_minSupport += m_delta; } /** * Method that mines all class association rules with minimum support and * with a minimum confidence. * @return an sorted array of FastVector (confidence depended) containing the rules and metric information * @param data the instances for which class association rules should be mined * @throws Exception if rules can't be built successfully */ public FastVector[] mineCARs(Instances data) throws Exception{ m_car = true; buildAssociations(data); return m_allTheRules; } /** * Gets the instances without the class atrribute. * * @return the instances without the class attribute. */ public Instances getInstancesNoClass() { return m_instances; } /** * Gets only the class attribute of the instances. * * @return the class attribute of all instances. */ public Instances getInstancesOnlyClass() { return m_onlyClass; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { String string1 = "\tThe required number of rules. (default = " + m_numRules + ")", string2 = "\tThe minimum confidence of a rule. (default = " + m_minMetric + ")", string3 = "\tThe delta by which the minimum support is decreased in\n", string4 = "\teach iteration. (default = " + m_delta + ")", string5 = "\tThe lower bound for the minimum support. (default = " + m_lowerBoundMinSupport + ")", string6 = "\tIf used, rules are tested for significance at\n", string7 = "\tthe given level. Slower. (default = no significance testing)", string8 = "\tIf set the itemsets found are also output. (default = no)", string9 = "\tIf set class association rules are mined. (default = no)", string10 = "\tThe class index. (default = last)", stringType = "\tThe metric type by which to rank rules. (default = " +"confidence)"; FastVector newVector = new FastVector(11); newVector.addElement(new Option(string1, "N", 1, "-N <required number of rules output>")); newVector.addElement(new Option(stringType, "T", 1, "-T <0=confidence | 1=lift | " +"2=leverage | 3=Conviction>")); newVector.addElement(new Option(string2, "C", 1, "-C <minimum metric score of a rule>")); newVector.addElement(new Option(string3 + string4, "D", 1, "-D <delta for minimum support>")); newVector.addElement(new Option("\tUpper bound for minimum support. " +"(default = 1.0)", "U", 1, "-U <upper bound for minimum support>")); newVector.addElement(new Option(string5, "M", 1, "-M <lower bound for minimum support>")); newVector.addElement(new Option(string6 + string7, "S", 1, "-S <significance level>")); newVector.addElement(new Option(string8, "I", 0, "-I")); newVector.addElement(new Option("\tRemove columns that contain " +"all missing values (default = no)" , "R", 0, "-R")); newVector.addElement(new Option("\tReport progress iteratively. (default " +"= no)", "V", 0, "-V")); newVector.addElement(new Option(string9, "A", 0, "-A")); newVector.addElement(new Option(string10, "c", 1, "-c <the class index>")); return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -N <required number of rules output> * The required number of rules. (default = 10)</pre> * * <pre> -T <0=confidence | 1=lift | 2=leverage | 3=Conviction> * The metric type by which to rank rules. (default = confidence)</pre> * * <pre> -C <minimum metric score of a rule> * The minimum confidence of a rule. (default = 0.9)</pre> * * <pre> -D <delta for minimum support> * The delta by which the minimum support is decreased in * each iteration. (default = 0.05)</pre> * * <pre> -U <upper bound for minimum support> * Upper bound for minimum support. (default = 1.0)</pre> * * <pre> -M <lower bound for minimum support> * The lower bound for the minimum support. (default = 0.1)</pre> * * <pre> -S <significance level> * If used, rules are tested for significance at * the given level. Slower. (default = no significance testing)</pre> * * <pre> -I * If set the itemsets found are also output. (default = no)</pre> * * <pre> -R * Remove columns that contain all missing values (default = no)</pre> * * <pre> -V * Report progress iteratively. (default = no)</pre> * * <pre> -A * If set class association rules are mined. (default = no)</pre> * * <pre> -c <the class index> * The class index. (default = last)</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { resetOptions(); String numRulesString = Utils.getOption('N', options), minConfidenceString = Utils.getOption('C', options), deltaString = Utils.getOption('D', options), maxSupportString = Utils.getOption('U', options), minSupportString = Utils.getOption('M', options), significanceLevelString = Utils.getOption('S', options), classIndexString = Utils.getOption('c',options); String metricTypeString = Utils.getOption('T', options); if (metricTypeString.length() != 0) { setMetricType(new SelectedTag(Integer.parseInt(metricTypeString), TAGS_SELECTION)); } if (numRulesString.length() != 0) { m_numRules = Integer.parseInt(numRulesString); } if (classIndexString.length() != 0) { m_classIndex = Integer.parseInt(classIndexString); } if (minConfidenceString.length() != 0) { m_minMetric = (new Double(minConfidenceString)).doubleValue(); } if (deltaString.length() != 0) { m_delta = (new Double(deltaString)).doubleValue(); } if (maxSupportString.length() != 0) { setUpperBoundMinSupport((new Double(maxSupportString)).doubleValue()); } if (minSupportString.length() != 0) { m_lowerBoundMinSupport = (new Double(minSupportString)).doubleValue(); } if (significanceLevelString.length() != 0) { m_significanceLevel = (new Double(significanceLevelString)).doubleValue(); } m_outputItemSets = Utils.getFlag('I', options); m_car = Utils.getFlag('A', options); m_verbose = Utils.getFlag('V', options); setRemoveAllMissingCols(Utils.getFlag('R', options)); } /** * Gets the current settings of the Apriori object. * * @return an array of strings suitable for passing to setOptions */ public String [] getOptions() { String [] options = new String [20]; int current = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -