📄 instances.java

📁 :<<数据挖掘--实用机器学习技术及java实现>>一书的配套源程序
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
	result.append(Utils.padLeft("Str", 4)).append(' ');	percent = Math.round(100.0 * as.intCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	result.append(Utils.padLeft("" + 0, 3)).append("% ");	percent = Math.round(100.0 * as.realCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	break;      default:	result.append(Utils.padLeft("???", 4)).append(' ');	result.append(Utils.padLeft("" + 0, 3)).append("% ");	percent = Math.round(100.0 * as.intCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	percent = Math.round(100.0 * as.realCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	break;      }      result.append(Utils.padLeft("" + as.missingCount, 5)).append(" /");      percent = Math.round(100.0 * as.missingCount / as.totalCount);      result.append(Utils.padLeft("" + percent, 3)).append("% ");      result.append(Utils.padLeft("" + as.uniqueCount, 5)).append(" /");      percent = Math.round(100.0 * as.uniqueCount / as.totalCount);      result.append(Utils.padLeft("" + percent, 3)).append("% ");      result.append(Utils.padLeft("" + as.distinctCount, 5)).append(' ');      result.append('\n');    }    return result.toString();  }    /**   * Reads a single instance using the tokenizer and appends it   * to the dataset. Automatically expands the dataset if it   * is not large enough to hold the instance.   *   * @param tokenizer the tokenizer to be used   * @param flag if method should test for carriage return after    * each instance   * @return false if end of file has been reached   * @exception IOException if the information is not read    * successfully   */   protected boolean getInstance(StreamTokenizer tokenizer, 				boolean flag)        throws IOException {        // Check if any attributes have been declared.    if (m_Attributes.size() == 0) {      errms(tokenizer,"no header information available");    }    // Check if end of file reached.    getFirstToken(tokenizer);    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {      return false;    }        // Parse instance    if (tokenizer.ttype == '{') {      return getInstanceSparse(tokenizer, flag);    } else {      return getInstanceFull(tokenizer, flag);    }  }  /**   * Reads a single instance using the tokenizer and appends it   * to the dataset. Automatically expands the dataset if it   * is not large enough to hold the instance.   *   * @param tokenizer the tokenizer to be used   * @param flag if method should test for carriage return after    * each instance   * @return false if end of file has been reached   * @exception IOException if the information is not read    * successfully   */   protected boolean getInstanceSparse(StreamTokenizer tokenizer, 				      boolean flag)        throws IOException {    int valIndex, numValues = 0, maxIndex = -1;        // Get values    do {            // Get index      getIndex(tokenizer);      if (tokenizer.ttype == '}') {	break;      }             // Is index valid?      try{	m_IndicesBuffer[numValues] = Integer.valueOf(tokenizer.sval).intValue();      } catch (NumberFormatException e) {	errms(tokenizer,"index number expected");      }      if (m_IndicesBuffer[numValues] <= maxIndex) {	errms(tokenizer,"indices have to be ordered");      }      if ((m_IndicesBuffer[numValues] < 0) || 	  (m_IndicesBuffer[numValues] >= numAttributes())) {	errms(tokenizer,"index out of bounds");      }      maxIndex = m_IndicesBuffer[numValues];      // Get value;      getNextToken(tokenizer);      // Check if value is missing.      if  (tokenizer.ttype == '?') {	m_ValueBuffer[numValues] = Instance.missingValue();      } else {	// Check if token is valid.	if (tokenizer.ttype != StreamTokenizer.TT_WORD) {	  errms(tokenizer,"not a valid value");	}	if (attribute(m_IndicesBuffer[numValues]).isNominal()) {	  	  // Check if value appears in header.	  valIndex = 	    attribute(m_IndicesBuffer[numValues]).indexOfValue(tokenizer.sval);	  if (valIndex == -1) {	    errms(tokenizer,"nominal value not declared in header");	  }	  m_ValueBuffer[numValues] = (double)valIndex;	} else if (attribute(m_IndicesBuffer[numValues]).isNumeric()) {	  	  // Check if value is really a number.	  try{	    m_ValueBuffer[numValues] = Double.valueOf(tokenizer.sval).	      doubleValue();	  } catch (NumberFormatException e) {	    errms(tokenizer,"number expected");	  }	} else { 	  m_ValueBuffer[numValues] = 	    attribute(m_IndicesBuffer[numValues]).addStringValue(tokenizer.sval);	}      }      numValues++;    } while (true);    if (flag) {      getLastToken(tokenizer,true);    }          // Add instance to dataset    double[] tempValues = new double[numValues];    int[] tempIndices = new int[numValues];    System.arraycopy(m_ValueBuffer, 0, tempValues, 0, numValues);    System.arraycopy(m_IndicesBuffer, 0, tempIndices, 0, numValues);    add(new SparseInstance(1, tempValues, tempIndices, numAttributes()));    return true;  }  /**   * Reads a single instance using the tokenizer and appends it   * to the dataset. Automatically expands the dataset if it   * is not large enough to hold the instance.   *   * @param tokenizer the tokenizer to be used   * @param flag if method should test for carriage return after    * each instance   * @return false if end of file has been reached   * @exception IOException if the information is not read    * successfully   */   protected boolean getInstanceFull(StreamTokenizer tokenizer, 				    boolean flag)        throws IOException {    double[] instance = new double[numAttributes()];    int index;        // Get values for all attributes.    for (int i = 0; i < numAttributes(); i++){            // Get next token      if (i > 0) {	getNextToken(tokenizer);      }                  // Check if value is missing.      if  (tokenizer.ttype == '?') {	instance[i] = Instance.missingValue();      } else {	// Check if token is valid.	if (tokenizer.ttype != StreamTokenizer.TT_WORD) {	  errms(tokenizer,"not a valid value");	}	if (attribute(i).isNominal()) {	  	  // Check if value appears in header.	  index = attribute(i).indexOfValue(tokenizer.sval);	  if (index == -1) {	    errms(tokenizer,"nominal value not declared in header");	  }	  instance[i] = (double)index;	} else if (attribute(i).isNumeric()) {	  	  // Check if value is really a number.	  try{	    instance[i] = Double.valueOf(tokenizer.sval).	      doubleValue();	  } catch (NumberFormatException e) {	    errms(tokenizer,"number expected");	  }	} else { 	  instance[i] = attribute(i).addStringValue(tokenizer.sval);	}      }    }    if (flag) {      getLastToken(tokenizer,true);    }          // Add instance to dataset    add(new Instance(1, instance));    return true;  }  /**   * Reads and stores header of an ARFF file.   *   * @param tokenizer the stream tokenizer   * @exception IOException if the information is not read    * successfully   */   protected void readHeader(StreamTokenizer tokenizer)      throws IOException{        String attributeName;    FastVector attributeValues;    int i;    // Get name of relation.    getFirstToken(tokenizer);    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {      errms(tokenizer,"premature end of file");    }    if (tokenizer.sval.equalsIgnoreCase("@relation")){      getNextToken(tokenizer);      m_RelationName = tokenizer.sval;      getLastToken(tokenizer,false);    } else {      errms(tokenizer,"keyword @relation expected");    }    // Create vectors to hold information temporarily.    m_Attributes = new FastVector();     // Get attribute declarations.    getFirstToken(tokenizer);    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {      errms(tokenizer,"premature end of file");    }    while (tokenizer.sval.equalsIgnoreCase("@attribute")) {      // Get attribute name.      getNextToken(tokenizer);      attributeName = tokenizer.sval;      getNextToken(tokenizer);      // Check if attribute is nominal.      if (tokenizer.ttype == StreamTokenizer.TT_WORD) {	// Attribute is real, integer, or string.	if (tokenizer.sval.equalsIgnoreCase("real") ||	    tokenizer.sval.equalsIgnoreCase("integer") ||	    tokenizer.sval.equalsIgnoreCase("numeric")) {	  m_Attributes.addElement(new Attribute(attributeName,						 numAttributes()));	  readTillEOL(tokenizer);	} else if (tokenizer.sval.equalsIgnoreCase("string")) {	  m_Attributes.	    addElement(new Attribute(attributeName, null,				     numAttributes()));	  readTillEOL(tokenizer);	} else {	  errms(tokenizer,"no valid attribute type or invalid "+		"enumeration");	}      } else {	// Attribute is nominal.	attributeValues = new FastVector();	tokenizer.pushBack();		// Get values for nominal attribute.	if (tokenizer.nextToken() != '{') {	  errms(tokenizer,"{ expected at beginning of enumeration");	}	while (tokenizer.nextToken() != '}') {	  if (tokenizer.ttype == StreamTokenizer.TT_EOL) {	    errms(tokenizer,"} expected at end of enumeration");	  } else {	    attributeValues.addElement(tokenizer.sval);	  }	}	if (attributeValues.size() == 0) {	  errms(tokenizer,"no nominal values found");	}	m_Attributes.	  addElement(new Attribute(attributeName, attributeValues,				   numAttributes()));      }      getLastToken(tokenizer,false);      getFirstToken(tokenizer);      if (tokenizer.ttype == StreamTokenizer.TT_EOF)	errms(tokenizer,"premature end of file");    }    // Check if data part follows. We can't easily check for EOL.    if (!tokenizer.sval.equalsIgnoreCase("@data")) {      errms(tokenizer,"keyword @data expected");    }        // Check if any attributes have been declared.    if (m_Attributes.size() == 0) {      errms(tokenizer,"no attributes declared");    }    // Allocate buffers in case sparse instances have to be read    m_ValueBuffer = new double[numAttributes()];    m_IndicesBuffer = new int[numAttributes()];  }  /**   * Copies instances from one set to the end of another    * one.   *   * @param source the source of the instances   * @param from the position of the first instance to be copied   * @param dest the destination for the instances   * @param num the number of instances to be copied   */  private void copyInstances(int from, Instances dest, int num) {        for (int i = 0; i < num; i++) {      dest.add(instance(from + i));    }  }    /**   * Throws error message with line number and last token read.   *   * @param theMsg the error message to be thrown   * @param tokenizer the stream tokenizer   * @throws IOExcpetion containing the error message   */  private void errms(StreamTokenizer tokenizer, String theMsg)        throws IOException {        throw new IOException(theMsg + ", read " + tokenizer.toString());  }    /**   * Replaces the attribute information by a clone of   * itself.   */  private void freshAttributeInfo() {    m_Attributes = (FastVector) m_Attributes.copyElements();  }  /**   * Gets next token, skipping empty lines.   *   * @param tokenizer the stream tokenizer   * @exception IOException if reading the next token fails   */  private void getFirstToken(StreamTokenizer tokenizer)     throws IOException{        while (tokenizer.nextToken() == StreamTokenizer.TT_EOL){};    if ((tokenizer.ttype == '\'') ||	(tokenizer.ttype == '"')) {      tokenizer.ttype = StreamTokenizer.TT_WORD;    } else if ((tokenizer.ttype == StreamTokenizer.TT_WORD) &&	       (tokenizer.sval.equals("?"))){      tokenizer.ttype = '?';    }  }  /**   * Gets index, checking for a premature and of line.   *   * @param tokenizer the stream tokenizer   * @exception IOException if it finds a premature end of line   */  private void getIndex(StreamTokenizer tokenizer) throws IOException{        if (tokenizer.nextToken() == StreamTokenizer.TT_EOL) {      errms(tokenizer,"premature end of line");    }    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {      errms(tokenizer,"premature end of file");    }  }    /**   * Gets token and checks if its end of line.   *   * @param tokenizer the stream tokenizer   * @exception IOException if it doesn't find an end of line   */  private void getLastToken(StreamTokenizer tokenizer, boolean endOfFileOk)        throws IOException{    if ((tokenizer.nextToken() != StreamTokenizer.TT_EOL) &&	((tokenizer.nextToken() != StreamTokenizer.TT_EOF) || !endOfFileOk)) {      errms(tokenizer,"end of line expected");    }  }  /**   * Gets next token, checking for a premature and of line.   *   * @param tokenizer the stream tokenizer   * @exception IOException if it finds a premature end of line   */  private void getNextToken(StreamTokenizer tokenizer)        throws IOException{        if (tokenizer.nextToken() == StreamTokenizer.TT_EOL) {      errms(tokenizer,"premature end of line");    }    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {      errms(tokenizer,"premature end of file");    } else if ((tokenizer.ttype == '\'') ||
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -