📄 apriori.java
字号:
return "Lower bound for minimum support.";
}
/**
* Get the value of lowerBoundMinSupport.
*
* @return Value of lowerBoundMinSupport.
*/
public double getLowerBoundMinSupport() {
return m_lowerBoundMinSupport;
}
/**
* Set the value of lowerBoundMinSupport.
*
* @param v Value to assign to lowerBoundMinSupport.
*/
public void setLowerBoundMinSupport(double v) {
m_lowerBoundMinSupport = v;
}
/**
* Get the metric type
*
* @return the type of metric to use for ranking rules
*/
public SelectedTag getMetricType() {
return new SelectedTag(m_metricType, TAGS_SELECTION);
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String metricTypeTipText() {
return "Set the type of metric by which to rank rules. Confidence is "
+"the proportion of the examples covered by the premise that are also "
+"covered by the consequence. Lift is confidence divided by the "
+"proportion of all examples that are covered by the consequence. This "
+"is a measure of the importance of the association that is independent "
+"of support. Leverage is the proportion of additional examples covered "
+"by both the premise and consequence above those expected if the "
+"premise and consequence were independent of each other. The total "
+"number of examples that this represents is presented in brackets "
+"following the leverage. Conviction is "
+"another measure of departure from independence and furthermore takes into "
+"account implicaton. Conviction is given "
+"by P(premise)P(!consequence) / P(premise, !consequence).";
}
/**
* Set the metric type for ranking rules
*
* @param d the type of metric
*/
public void setMetricType (SelectedTag d) {
if (d.getTags() == TAGS_SELECTION) {
m_metricType = d.getSelectedTag().getID();
}
if (m_significanceLevel != -1 && m_metricType != CONFIDENCE) {
m_metricType = CONFIDENCE;
}
if (m_metricType == CONFIDENCE) {
setMinMetric(0.9);
}
if (m_metricType == LIFT || m_metricType == CONVICTION) {
setMinMetric(1.1);
}
if (m_metricType == LEVERAGE) {
setMinMetric(0.1);
}
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String minMetricTipText() {
return "Minimum metric score. Consider only rules with scores higher than "
+"this value.";
}
/**
* Get the value of minConfidence.
*
* @return Value of minConfidence.
*/
public double getMinMetric() {
return m_minMetric;
}
/**
* Set the value of minConfidence.
*
* @param v Value to assign to minConfidence.
*/
public void setMinMetric(double v) {
m_minMetric = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String numRulesTipText() {
return "Number of rules to find.";
}
/**
* Get the value of numRules.
*
* @return Value of numRules.
*/
public int getNumRules() {
return m_numRules;
}
/**
* Set the value of numRules.
*
* @param v Value to assign to numRules.
*/
public void setNumRules(int v) {
m_numRules = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String deltaTipText() {
return "Iteratively decrease support by this factor. Reduces support "
+"until min support is reached or required number of rules has been "
+"generated.";
}
/**
* Get the value of delta.
*
* @return Value of delta.
*/
public double getDelta() {
return m_delta;
}
/**
* Set the value of delta.
*
* @param v Value to assign to delta.
*/
public void setDelta(double v) {
m_delta = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String significanceLevelTipText() {
return "Significance level. Significance test (confidence metric only).";
}
/**
* Get the value of significanceLevel.
*
* @return Value of significanceLevel.
*/
public double getSignificanceLevel() {
return m_significanceLevel;
}
/**
* Set the value of significanceLevel.
*
* @param v Value to assign to significanceLevel.
*/
public void setSignificanceLevel(double v) {
m_significanceLevel = v;
}
/**
* Method that finds all large itemsets for the given set of instances.
*
* @param the instances to be used
* @exception Exception if an attribute is numeric
*/
private void findLargeItemSets(Instances instances) throws Exception {
FastVector kMinusOneSets, kSets;
Hashtable hashtable;
int necSupport, necMaxSupport,i = 0;
m_instances = instances;
// Find large itemsets
// minimum support
necSupport = (int)(m_minSupport * (double)instances.numInstances()+0.5);
necMaxSupport = (int)(m_upperBoundMinSupport * (double)instances.numInstances()+0.5);
kSets = ItemSet.singletons(instances);
ItemSet.upDateCounters(kSets, instances);
kSets = ItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
if (kSets.size() == 0)
return;
do {
m_Ls.addElement(kSets);
kMinusOneSets = kSets;
kSets = ItemSet.mergeAllItemSets(kMinusOneSets, i, instances.numInstances());
hashtable = ItemSet.getHashtable(kMinusOneSets, kMinusOneSets.size());
m_hashtables.addElement(hashtable);
kSets = ItemSet.pruneItemSets(kSets, hashtable);
ItemSet.upDateCounters(kSets, instances);
kSets = ItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
i++;
} while (kSets.size() > 0);
}
/**
* Method that finds all association rules and performs significance test.
*
* @exception Exception if an attribute is numeric
*/
private void findRulesBruteForce() throws Exception {
FastVector[] rules;
// Build rules
for (int j = 1; j < m_Ls.size(); j++) {
FastVector currentItemSets = (FastVector)m_Ls.elementAt(j);
Enumeration emItemSets = currentItemSets.elements();
while (emItemSets.hasMoreElements()) {
ItemSet currentItemSet = (ItemSet)emItemSets.nextElement();
rules=currentItemSet.
generateRulesBruteForce(m_minMetric,m_metricType,
m_hashtables,j+1,
m_instances.numInstances(),
m_significanceLevel);
for (int k = 0; k < rules[0].size(); k++) {
m_allTheRules[0].addElement(rules[0].elementAt(k));
m_allTheRules[1].addElement(rules[1].elementAt(k));
m_allTheRules[2].addElement(rules[2].elementAt(k));
m_allTheRules[3].addElement(rules[3].elementAt(k));
m_allTheRules[4].addElement(rules[4].elementAt(k));
m_allTheRules[5].addElement(rules[5].elementAt(k));
}
}
}
}
/**
* Method that finds all association rules.
*
* @exception Exception if an attribute is numeric
*/
private void findRulesQuickly() throws Exception {
FastVector[] rules;
// Build rules
for (int j = 1; j < m_Ls.size(); j++) {
FastVector currentItemSets = (FastVector)m_Ls.elementAt(j);
Enumeration emItemSets = currentItemSets.elements();
while (emItemSets.hasMoreElements()) {
ItemSet currentItemSet = (ItemSet)emItemSets.nextElement();
rules = currentItemSet.generateRules(m_minMetric, m_hashtables, j + 1);
for (int k = 0; k < rules[0].size(); k++) {
m_allTheRules[0].addElement(rules[0].elementAt(k));
m_allTheRules[1].addElement(rules[1].elementAt(k));
m_allTheRules[2].addElement(rules[2].elementAt(k));
}
}
}
}
/**
* Main method for testing this class.
*/
public static void main(String[] options) {
String trainFileString;
StringBuffer text = new StringBuffer();
Apriori apriori = new Apriori();
Reader reader;
try {
text.append("\n\nApriori options:\n\n");
text.append("-t <training file>\n");
text.append("\tThe name of the training file.\n");
Enumeration em = apriori.listOptions();
while (em.hasMoreElements()) {
Option option = (Option)em.nextElement();
text.append(option.synopsis()+'\n');
text.append(option.description()+'\n');
}
trainFileString = Utils.getOption('t', options);
if (trainFileString.length() == 0)
throw new Exception("No training file given!");
apriori.setOptions(options);
reader = new BufferedReader(new FileReader(trainFileString));
apriori.buildAssociations(new Instances(reader));
System.out.println(apriori);
} catch(Exception e) {
e.printStackTrace();
System.out.println("\n"+e.getMessage()+text);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -