📄 c45loader.java

📁 Java 编写的多种数据挖掘算法包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
    } catch (Exception ex) {      ex.printStackTrace();    }    return result;  }  /**   * Read the data set incrementally---get the next instance in the data    * set or returns null if there are no   * more instances to get. If the structure hasn't yet been    * determined by a call to getStructure then method should do so before   * returning the next instance in the data set.   *   * If it is not possible to read the data set incrementally (ie. in cases   * where the data set structure cannot be fully established before all   * instances have been seen) then an exception should be thrown.   *   * @return the next instance in the data set as an Instance object or null   * if there are no more instances to be read   * @exception IOException if there is an error during parsing   */  public Instance getNextInstance() throws IOException {    if (m_sourceFile == null) {      throw new IOException("No source has been specified");    }        if (getRetrieval() == BATCH) {      throw new IOException("Cannot mix getting Instances in both incremental and batch modes");    }    setRetrieval(INCREMENTAL);    if (m_structure == null) {      getStructure();    }    StreamTokenizer st = new StreamTokenizer(m_dataReader);    initTokenizer(st);    //    st.ordinaryChar('.');    Instance nextI = getInstance(st);    if (nextI != null) {      nextI.setDataset(m_structure);    }    else{      try {        reset();      } catch (Exception ex) {        ex.printStackTrace();      }    }    return nextI;  }  /**   * Reads an instance using the supplied tokenizer.   *   * @param tokenizer the tokenizer to use   * @return an Instance or null if there are no more instances to read   * @exception IOException if an error occurs   */  private Instance getInstance(StreamTokenizer tokenizer)     throws IOException {    double [] instance = new double[m_structure.numAttributes()];        ConverterUtils.getFirstToken(tokenizer);    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {      return null;    }        int counter = 0;    for (int i = 0; i < m_numAttribs; i++) {      if (i > 0) {	ConverterUtils.getToken(tokenizer);      }      if (!m_ignore[i]) {	// Check if value is missing.	if  (tokenizer.ttype == '?') {	  instance[counter++] = Instance.missingValue();	} else {	  String val = tokenizer.sval;	  if (i == m_numAttribs - 1) {	    // remove trailing period	    	    if (val.charAt(val.length()-1) == '.') {	      val = val.substring(0,val.length()-1);	    }	  }	  if (m_structure.attribute(counter).isNominal()) {	    int index = m_structure.attribute(counter).indexOfValue(val);	    if (index == -1) {	      ConverterUtils.errms(tokenizer, "nominal value not declared in "				   +"header :"+val+" column "+i);	    }	    instance[counter++] = (double)index;	  } else if (m_structure.attribute(counter).isNumeric()) {	    try {	      instance[counter++] = Double.valueOf(val).doubleValue();	    } catch (NumberFormatException e) {	      ConverterUtils.errms(tokenizer, "number expected");	    }	  } else {	    System.err.println("Shouldn't get here");	    System.exit(1);	  }	}      }    }    return new Instance(1.0, instance);  }  /**   * removes the trailing period   *    * @param val the string to work on   * @return the processed string   */  private String removeTrailingPeriod(String val) {    // remove trailing period    if (val.charAt(val.length()-1) == '.') {      val = val.substring(0,val.length()-1);    }    return val;  }  /**   * Reads header (from the names file) using the supplied tokenizer   *   * @param tokenizer the tokenizer to use   * @exception IOException if an error occurs   */  private void readHeader(StreamTokenizer tokenizer) throws IOException {    FastVector attribDefs = new FastVector();    FastVector ignores = new FastVector();    ConverterUtils.getFirstToken(tokenizer);    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {      ConverterUtils.errms(tokenizer,"premature end of file");    }    m_numAttribs = 1;    // Read the class values    FastVector classVals = new FastVector();    while (tokenizer.ttype != StreamTokenizer.TT_EOL) {      String val = tokenizer.sval.trim();            if (val.length() > 0) {	val = removeTrailingPeriod(val);	classVals.addElement(val);      }      ConverterUtils.getToken(tokenizer);    }    // read the attribute names and types    int counter = 0;    while (tokenizer.ttype != StreamTokenizer.TT_EOF) {      ConverterUtils.getFirstToken(tokenizer);      if (tokenizer.ttype != StreamTokenizer.TT_EOF) {	String attribName = tokenizer.sval;	ConverterUtils.getToken(tokenizer);	if (tokenizer.ttype == StreamTokenizer.TT_EOL) {	  ConverterUtils.errms(tokenizer, "premature end of line. Expected "			       +"attribute type.");	}	String temp = tokenizer.sval.toLowerCase().trim();	if (temp.startsWith("ignore") || temp.startsWith("label")) {	  ignores.addElement(new Integer(counter));	  counter++;	} else if (temp.startsWith("continuous")) {	  attribDefs.addElement(new Attribute(attribName));	  counter++;	} else {	  counter++;	  // read the values of the attribute	  FastVector attribVals = new FastVector();	  while (tokenizer.ttype != StreamTokenizer.TT_EOL &&		 tokenizer.ttype != StreamTokenizer.TT_EOF) {	    String val = tokenizer.sval.trim();	    if (val.length() > 0) {	      val = removeTrailingPeriod(val);	      attribVals.addElement(val);	    }	    ConverterUtils.getToken(tokenizer);	  }	  attribDefs.addElement(new Attribute(attribName, attribVals));	}      }    }    boolean ok = true;    int i = -1;    if (classVals.size() == 1) {      // look to see if this is an attribute name (ala c5 names file style)      for (i = 0; i < attribDefs.size(); i++) {	if (((Attribute)attribDefs.elementAt(i))	    .name().compareTo((String)classVals.elementAt(0)) == 0) {	  ok = false;	  m_numAttribs--;	  break;	}      }    }    if (ok) {      attribDefs.addElement(new Attribute("Class", classVals));    }    m_structure = new Instances(m_fileStem, attribDefs, 0);    try {      if (ok) {	m_structure.setClassIndex(m_structure.numAttributes()-1);      } else {	m_structure.setClassIndex(i);      }    } catch (Exception ex) {      ex.printStackTrace();    }    m_numAttribs = m_structure.numAttributes() + ignores.size();    m_ignore = new boolean[m_numAttribs];    for (i = 0; i < ignores.size(); i++) {      m_ignore[((Integer)ignores.elementAt(i)).intValue()] = true;    }  }  /**   * Initializes the stream tokenizer   *   * @param tokenizer the tokenizer to initialize   */  private void initTokenizer(StreamTokenizer tokenizer) {    tokenizer.resetSyntax();             tokenizer.whitespaceChars(0, (' '-1));        tokenizer.wordChars(' ','\u00FF');    tokenizer.whitespaceChars(',',',');    tokenizer.whitespaceChars(':',':');    //    tokenizer.whitespaceChars('.','.');    tokenizer.commentChar('|');    tokenizer.whitespaceChars('\t','\t');    tokenizer.quoteChar('"');    tokenizer.quoteChar('\'');    tokenizer.eolIsSignificant(true);  }  /**   * Main method for testing this class.   *   * @param args should contain &lt;filestem&gt;[.names | data]   */  public static void main (String [] args) {    if (args.length > 0) {      File inputfile;      inputfile = new File(args[0]);      try {	C45Loader cta = new C45Loader();	cta.setSource(inputfile);	System.out.println(cta.getStructure());	Instance temp = cta.getNextInstance();	while (temp != null) {	  System.out.println(temp);	  temp = cta.getNextInstance();	}      } catch (Exception ex) {	ex.printStackTrace();      }    } else {      System.err.println("Usage:\n\tC45Loader <filestem>[.names | data]\n");    }  }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -