⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 roughset.java

📁 数据挖掘技术-粗糙集属性约减算法
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    RoughSet.java
 *    Copyright (C) 2007 ren xian hua
 *
 */

package weka.classifiers.rules;

import weka.classifiers.Classifier;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;

import java.io.Serializable;
import java.util.Enumeration;

public class RoughSet
extends Classifier 
implements TechnicalInformationHandler {

	/** for serialization */
	  static final long serialVersionUID = 1310258880025902106L;
	  
	  /**
	   * Returns a string describing classifier
	   * @return a description suitable for
	   * displaying in the explorer/experimenter gui
	   */
	  public String globalInfo() {
	    return "Class for building and using a RoughSet rule set for classification. "
	      + "只能处理名词型属性. 不能处理缺省值. "
	      + "不能处理任何修剪.\n\n"
	      + "想了解更多的信息,请查看 \n\n"
	      + getTechnicalInformation().toString();
	  }
	  int T=1;

	  /**
	   * Returns an instance of a TechnicalInformation object, containing 
	   * detailed information about the technical background of this class,
	   * e.g., paper reference or book this class is based on.
	   * 
	   * @return the technical information about this class
	   */
	  public TechnicalInformation getTechnicalInformation() {
	    TechnicalInformation 	result;
	    
	    result = new TechnicalInformation(Type.ARTICLE);
	    result.setValue(Field.AUTHOR, "Z. pawlak");
	    result.setValue(Field.YEAR, "1982");
	    result.setValue(Field.TITLE, "RoughSet: An algorithm for inducing modular rules");
	    result.setValue(Field.JOURNAL, "International Journal of Man-Machine Studies");
	    result.setValue(Field.VOLUME, "27");
	    result.setValue(Field.NUMBER, "4");
	    result.setValue(Field.PAGES, "349-370");
	    
	    return result;
	  }
	  
	  
	  /*  规则的类*/
	  private class RoughSetRule 
	    implements Serializable {
	    
	    /** for serialization */
	    static final long serialVersionUID = 4248784350656508583L;
	    
	    /** The classification */
	    private int m_classification;
       
	    /* 规则的实例个数	*/    
	    private int m_num;

	    /** 实例表 */
	    private Instances m_instances;

	    /** 规则的第一个属性 */
	    private Attr m_attribute; 

	    /** Number of errors made by this rule (will end up 0) */
//	    private int m_errors; 

	    /** 接下一个规则 */
	    private RoughSetRule m_next;

	    /**
	     * 对规则进行初始化.
	     *
	     * @param data the instances
	     * @param cl the class
	     * @exception Exception if something goes wrong
	     */
	    public RoughSetRule(Instances data) {

	      m_instances = data;
//	      m_classification = cl;
	      m_num=0;
	      m_attribute = null;
	      m_next = null;
//	      m_errors = 0;
	      /*
	      Enumeration enu = data.enumerateInstances();
	      while (enu.hasMoreElements()) {
	        if ((int) ((Instance) enu.nextElement()).classValue() != cl) {
		  m_errors++;
		}
	      }
	      */
	      m_instances = new Instances(m_instances, 0);
	    }  

	    /**
	     * Returns the result assigned by this rule to a given instance.
	     *
	     * @param inst the instance to be classified
	     * @return the classification
	     */
	    public int resultRule(Instance inst) {

	      if (m_attribute == null || m_attribute.satisfies(inst)) {
		return m_classification;
	      } else {
		return -1;
	      }
	    }

	    /**
	     * Returns the result assigned by these rules to a given instance.
	     *
	     * @param inst the instance to be classified
	     * @return the classification
	     */
	    public int resultRules(Instance inst) {

	      if (resultRule(inst) != -1) {
		return m_classification;
	      } else if (m_next != null) {
		return m_next.resultRules(inst);
	      } else {
		return -1;
	      }
	    }

	    /**
	     * Returns the set of instances that are covered by this rule.
	     *
	     * @param data the instances to be checked
	     * @return the instances covered
	     */
	    public Instances coveredBy(Instances data) {

	      Instances r = new Instances(data, data.numInstances());
	      Enumeration enu = data.enumerateInstances();
	      while (enu.hasMoreElements()) {
		Instance i = (Instance) enu.nextElement();
		if (resultRule(i) != -1) {
		  r.add(i);
		}
	      }
	      r.compactify();
	      return r;
	    }

	    /**
	     * Returns the set of instances that are not covered by this rule.
	     *
	     * @param data the instances to be checked
	     * @return the instances not covered
	     */
	    public Instances notCoveredBy(Instances data) {

	      Instances r = new Instances(data, data.numInstances());
	      Enumeration enu = data.enumerateInstances();
	      while (enu.hasMoreElements()) {
		Instance i = (Instance) enu.nextElement();
		if (resultRule(i) == -1) {
		  r.add(i);
		}
	      }
	      r.compactify();
	      return r;
	    }

	    /**
	     * 输出规则结果.
	     *
	     * @return a description of the rules as a string
	     */
	    public String toString() {

	      try {
		StringBuffer text = new StringBuffer();
		if (m_attribute != null) {
		  text.append("If ");
		  for (Attr t = m_attribute; t != null; t = t.m_next) {
		    if (t.m_attr == -1) {
		      text.append("?");
		    } else {
		      text.append(m_instances.attribute(t.m_attr).name() + " = " +
				  m_instances.attribute(t.m_attr).value(t.m_val));
		    }
		    if (t.m_next != null) {
		      text.append("\n  and ");
		    }
		  }
		  text.append("   ==> ");
		}
		text.append(m_instances.classAttribute().value(m_classification));
		text.append("(");
		text.append(m_num);
		text.append(")" + "\n");
		if (m_next != null) {
		  text.append(m_next.toString());
		}
		return text.toString();
	      } catch (Exception e) {
		return "不能输出RoughSet分类结果!";
	      }
	    }
 }

	  /* 属性的类*/
  private class Attr 
	    implements Serializable { 
	    
	    /** for serialization */
	    static final long serialVersionUID = -8925333011350280799L;

	    /** 属性的标记t */
	    private int m_attr = -1; 

	    /** 属性值的标记 */
	    private int m_val; 

	    /** 接下一个属性 */
	    private Attr m_next = null; 

	    /**
	     * Returns whether a given instance satisfies this test.
	     *
	     * @param inst the instance to be tested
	     * @return true if the instance satisfies the test
	     */
	    private boolean satisfies(Instance inst) {

	      if ((int) inst.value(m_attr) == m_val) {
	        if (m_next == null) {
		  return true;
		} else {
		  return m_next.satisfies(inst);
		}
	      }
	      return false;    
	    }
 }

	  
	  /** 规则的链表头 */
	  private RoughSetRule m_rules;

	  /**
	   * Classifies a given instance.
	   *
	   * @param inst the instance to be classified
	   * @return the classification
	   */
	  public double classifyInstance(Instance inst) {

	    int result = m_rules.resultRules(inst);
	    if (result == -1) {
	      return Instance.missingValue();
	    } else {
	      return (double)result;
	    }
	  }

	  /**
	   * Returns default capabilities of the classifier.
	   *
	   * @return      the capabilities of this classifier
	   */
	  public Capabilities getCapabilities() {
	    Capabilities result = super.getCapabilities();

	    // attributes
	    result.enable(Capability.NOMINAL_ATTRIBUTES);

	    // class
	    result.enable(Capability.NOMINAL_CLASS);
	    result.enable(Capability.MISSING_CLASS_VALUES);
	    
	    return result;
	  }
	  
/**
 * 建立分类模型.
 *
 * @param instances set of instances serving as training data 
 * @exception Exception if the classifier has not been generated successfully
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
 //   getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();
    
    makeRules(data);//模型的主函数
  }


/**
 * 添加一个新规则到规则链表中.
 *
 * @param lastRule the last rule in the rule set
 * @param newRule the rule to be added
 * @return the new last rule in the rule set
 */
private RoughSetRule addRule(RoughSetRule lastRule, RoughSetRule newRule) {

  if (lastRule == null) {
    m_rules = newRule;
  } else {
    lastRule.m_next = newRule;
  }
  return newRule;
}

/**
 * Add a test to this rule.
 *
 * @param rule the rule to which test is to be added
 * @param lastTest the rule's last test
 * @param newTest the test to be added
 * @return the new last test of the rule
 */
private Attr addAttr(RoughSetRule rule, Attr lastAttr, Attr newAttr) {

  if (rule.m_attribute == null) {
    rule.m_attribute = newAttr;
  } else {
    lastAttr.m_next = newAttr;
  }
  return newAttr;
}

/*
private Instances[] ind(Instances data, Attribute att) {

    Instances[] Ind = new Instances[att.numValues()];
    for (int j = 0; j < att.numValues(); j++) {
      Ind[j] = new Instances(data, data.numInstances());
    }
    int d = 0;
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
      Instance inst = (Instance) instEnum.nextElement();
      inst.setWeight((double)d);
      Ind[(int) inst.value(att)].add(inst);
      d++;
    }
    for (int i = 0; i < Ind.length; i++) {
      Ind[i].compactify();
    }
    return Ind;
}
*/

private Instances[] ind(Instances data, Attribute[] att) {

	int count=1;
	if(att[0]==null)
		return null;
	for(int i=0;i<att.length&&(att[i]!=null);i++){
		
		count=count*att[i].numValues();
	}
	
	
    Instances[] Ind = new Instances[count];
    for (int j = 0; j < count; j++) {
      Ind[j] = new Instances(data, data.numInstances());
    }
//    int d = 0;
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
      Instance inst = (Instance) instEnum.nextElement();
 //     inst.setWeight((double)d);
        int p=0;
        for(int i=0;i<att.length&&(att[i]!=null);i++){
  		    p=(p*att[i].numValues()+(int) inst.value(att[i]));
  	    }
      
      Ind[p].add(inst);
      
//      d++;
    }
    for (int i = 0; i < Ind.length; i++) {
      Ind[i].compactify();
    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -