📄 roughset.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* RoughSet.java
* Copyright (C) 2007 ren xian hua
*
*/
package weka.classifiers.rules;
import weka.classifiers.Classifier;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import java.io.Serializable;
import java.util.Enumeration;
public class RoughSet
extends Classifier
implements TechnicalInformationHandler {
/** for serialization */
static final long serialVersionUID = 1310258880025902106L;
/**
* Returns a string describing classifier
* @return a description suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Class for building and using a RoughSet rule set for classification. "
+ "只能处理名词型属性. 不能处理缺省值. "
+ "不能处理任何修剪.\n\n"
+ "想了解更多的信息,请查看 \n\n"
+ getTechnicalInformation().toString();
}
int T=1;
/**
* Returns an instance of a TechnicalInformation object, containing
* detailed information about the technical background of this class,
* e.g., paper reference or book this class is based on.
*
* @return the technical information about this class
*/
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.ARTICLE);
result.setValue(Field.AUTHOR, "Z. pawlak");
result.setValue(Field.YEAR, "1982");
result.setValue(Field.TITLE, "RoughSet: An algorithm for inducing modular rules");
result.setValue(Field.JOURNAL, "International Journal of Man-Machine Studies");
result.setValue(Field.VOLUME, "27");
result.setValue(Field.NUMBER, "4");
result.setValue(Field.PAGES, "349-370");
return result;
}
/* 规则的类*/
private class RoughSetRule
implements Serializable {
/** for serialization */
static final long serialVersionUID = 4248784350656508583L;
/** The classification */
private int m_classification;
/* 规则的实例个数 */
private int m_num;
/** 实例表 */
private Instances m_instances;
/** 规则的第一个属性 */
private Attr m_attribute;
/** Number of errors made by this rule (will end up 0) */
// private int m_errors;
/** 接下一个规则 */
private RoughSetRule m_next;
/**
* 对规则进行初始化.
*
* @param data the instances
* @param cl the class
* @exception Exception if something goes wrong
*/
public RoughSetRule(Instances data) {
m_instances = data;
// m_classification = cl;
m_num=0;
m_attribute = null;
m_next = null;
// m_errors = 0;
/*
Enumeration enu = data.enumerateInstances();
while (enu.hasMoreElements()) {
if ((int) ((Instance) enu.nextElement()).classValue() != cl) {
m_errors++;
}
}
*/
m_instances = new Instances(m_instances, 0);
}
/**
* Returns the result assigned by this rule to a given instance.
*
* @param inst the instance to be classified
* @return the classification
*/
public int resultRule(Instance inst) {
if (m_attribute == null || m_attribute.satisfies(inst)) {
return m_classification;
} else {
return -1;
}
}
/**
* Returns the result assigned by these rules to a given instance.
*
* @param inst the instance to be classified
* @return the classification
*/
public int resultRules(Instance inst) {
if (resultRule(inst) != -1) {
return m_classification;
} else if (m_next != null) {
return m_next.resultRules(inst);
} else {
return -1;
}
}
/**
* Returns the set of instances that are covered by this rule.
*
* @param data the instances to be checked
* @return the instances covered
*/
public Instances coveredBy(Instances data) {
Instances r = new Instances(data, data.numInstances());
Enumeration enu = data.enumerateInstances();
while (enu.hasMoreElements()) {
Instance i = (Instance) enu.nextElement();
if (resultRule(i) != -1) {
r.add(i);
}
}
r.compactify();
return r;
}
/**
* Returns the set of instances that are not covered by this rule.
*
* @param data the instances to be checked
* @return the instances not covered
*/
public Instances notCoveredBy(Instances data) {
Instances r = new Instances(data, data.numInstances());
Enumeration enu = data.enumerateInstances();
while (enu.hasMoreElements()) {
Instance i = (Instance) enu.nextElement();
if (resultRule(i) == -1) {
r.add(i);
}
}
r.compactify();
return r;
}
/**
* 输出规则结果.
*
* @return a description of the rules as a string
*/
public String toString() {
try {
StringBuffer text = new StringBuffer();
if (m_attribute != null) {
text.append("If ");
for (Attr t = m_attribute; t != null; t = t.m_next) {
if (t.m_attr == -1) {
text.append("?");
} else {
text.append(m_instances.attribute(t.m_attr).name() + " = " +
m_instances.attribute(t.m_attr).value(t.m_val));
}
if (t.m_next != null) {
text.append("\n and ");
}
}
text.append(" ==> ");
}
text.append(m_instances.classAttribute().value(m_classification));
text.append("(");
text.append(m_num);
text.append(")" + "\n");
if (m_next != null) {
text.append(m_next.toString());
}
return text.toString();
} catch (Exception e) {
return "不能输出RoughSet分类结果!";
}
}
}
/* 属性的类*/
private class Attr
implements Serializable {
/** for serialization */
static final long serialVersionUID = -8925333011350280799L;
/** 属性的标记t */
private int m_attr = -1;
/** 属性值的标记 */
private int m_val;
/** 接下一个属性 */
private Attr m_next = null;
/**
* Returns whether a given instance satisfies this test.
*
* @param inst the instance to be tested
* @return true if the instance satisfies the test
*/
private boolean satisfies(Instance inst) {
if ((int) inst.value(m_attr) == m_val) {
if (m_next == null) {
return true;
} else {
return m_next.satisfies(inst);
}
}
return false;
}
}
/** 规则的链表头 */
private RoughSetRule m_rules;
/**
* Classifies a given instance.
*
* @param inst the instance to be classified
* @return the classification
*/
public double classifyInstance(Instance inst) {
int result = m_rules.resultRules(inst);
if (result == -1) {
return Instance.missingValue();
} else {
return (double)result;
}
}
/**
* Returns default capabilities of the classifier.
*
* @return the capabilities of this classifier
*/
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
// class
result.enable(Capability.NOMINAL_CLASS);
result.enable(Capability.MISSING_CLASS_VALUES);
return result;
}
/**
* 建立分类模型.
*
* @param instances set of instances serving as training data
* @exception Exception if the classifier has not been generated successfully
*/
public void buildClassifier(Instances data) throws Exception {
// can classifier handle the data?
// getCapabilities().testWithFail(data);
// remove instances with missing class
data = new Instances(data);
data.deleteWithMissingClass();
makeRules(data);//模型的主函数
}
/**
* 添加一个新规则到规则链表中.
*
* @param lastRule the last rule in the rule set
* @param newRule the rule to be added
* @return the new last rule in the rule set
*/
private RoughSetRule addRule(RoughSetRule lastRule, RoughSetRule newRule) {
if (lastRule == null) {
m_rules = newRule;
} else {
lastRule.m_next = newRule;
}
return newRule;
}
/**
* Add a test to this rule.
*
* @param rule the rule to which test is to be added
* @param lastTest the rule's last test
* @param newTest the test to be added
* @return the new last test of the rule
*/
private Attr addAttr(RoughSetRule rule, Attr lastAttr, Attr newAttr) {
if (rule.m_attribute == null) {
rule.m_attribute = newAttr;
} else {
lastAttr.m_next = newAttr;
}
return newAttr;
}
/*
private Instances[] ind(Instances data, Attribute att) {
Instances[] Ind = new Instances[att.numValues()];
for (int j = 0; j < att.numValues(); j++) {
Ind[j] = new Instances(data, data.numInstances());
}
int d = 0;
Enumeration instEnum = data.enumerateInstances();
while (instEnum.hasMoreElements()) {
Instance inst = (Instance) instEnum.nextElement();
inst.setWeight((double)d);
Ind[(int) inst.value(att)].add(inst);
d++;
}
for (int i = 0; i < Ind.length; i++) {
Ind[i].compactify();
}
return Ind;
}
*/
private Instances[] ind(Instances data, Attribute[] att) {
int count=1;
if(att[0]==null)
return null;
for(int i=0;i<att.length&&(att[i]!=null);i++){
count=count*att[i].numValues();
}
Instances[] Ind = new Instances[count];
for (int j = 0; j < count; j++) {
Ind[j] = new Instances(data, data.numInstances());
}
// int d = 0;
Enumeration instEnum = data.enumerateInstances();
while (instEnum.hasMoreElements()) {
Instance inst = (Instance) instEnum.nextElement();
// inst.setWeight((double)d);
int p=0;
for(int i=0;i<att.length&&(att[i]!=null);i++){
p=(p*att[i].numValues()+(int) inst.value(att[i]));
}
Ind[p].add(inst);
// d++;
}
for (int i = 0; i < Ind.length; i++) {
Ind[i].compactify();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -