📄 instances.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
    result.append(Utils.padLeft("Type", 5)).append(Utils.padLeft("Nom", 5));    result.append(Utils.padLeft("Int", 5)).append(Utils.padLeft("Real", 5));    result.append(Utils.padLeft("Missing", 12));    result.append(Utils.padLeft("Unique", 12));    result.append(Utils.padLeft("Dist", 6)).append('\n');    for (int i = 0; i < numAttributes(); i++) {      Attribute a = attribute(i);      AttributeStats as = attributeStats(i);      result.append(Utils.padLeft("" + (i + 1), 4)).append(' ');      result.append(Utils.padRight(a.name(), 25)).append(' ');      long percent;      switch (a.type()) {      case Attribute.NOMINAL:	result.append(Utils.padLeft("Nom", 4)).append(' ');	percent = Math.round(100.0 * as.intCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	result.append(Utils.padLeft("" + 0, 3)).append("% ");	percent = Math.round(100.0 * as.realCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	break;      case Attribute.NUMERIC:	result.append(Utils.padLeft("Num", 4)).append(' ');	result.append(Utils.padLeft("" + 0, 3)).append("% ");	percent = Math.round(100.0 * as.intCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	percent = Math.round(100.0 * as.realCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	break;      case Attribute.DATE:	result.append(Utils.padLeft("Dat", 4)).append(' ');	result.append(Utils.padLeft("" + 0, 3)).append("% ");	percent = Math.round(100.0 * as.intCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	percent = Math.round(100.0 * as.realCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	break;      case Attribute.STRING:	result.append(Utils.padLeft("Str", 4)).append(' ');	percent = Math.round(100.0 * as.intCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	result.append(Utils.padLeft("" + 0, 3)).append("% ");	percent = Math.round(100.0 * as.realCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	break;      default:	result.append(Utils.padLeft("???", 4)).append(' ');	result.append(Utils.padLeft("" + 0, 3)).append("% ");	percent = Math.round(100.0 * as.intCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	percent = Math.round(100.0 * as.realCount / as.totalCount);	result.append(Utils.padLeft("" + percent, 3)).append("% ");	break;      }      result.append(Utils.padLeft("" + as.missingCount, 5)).append(" /");      percent = Math.round(100.0 * as.missingCount / as.totalCount);      result.append(Utils.padLeft("" + percent, 3)).append("% ");      result.append(Utils.padLeft("" + as.uniqueCount, 5)).append(" /");      percent = Math.round(100.0 * as.uniqueCount / as.totalCount);      result.append(Utils.padLeft("" + percent, 3)).append("% ");      result.append(Utils.padLeft("" + as.distinctCount, 5)).append(' ');      result.append('\n');    }    return result.toString();  }    /**   * Reads a single instance using the tokenizer and appends it   * to the dataset. Automatically expands the dataset if it   * is not large enough to hold the instance.   *   * @param tokenizer the tokenizer to be used   * @param flag if method should test for carriage return after    * each instance   * @return false if end of file has been reached   * @exception IOException if the information is not read    * successfully   */   protected boolean getInstance(StreamTokenizer tokenizer, 				boolean flag)        throws IOException {        // Check if any attributes have been declared.    if (m_Attributes.size() == 0) {      errms(tokenizer,"no header information available");    }    // Check if end of file reached.    getFirstToken(tokenizer);    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {      return false;    }        // Parse instance    if (tokenizer.ttype == '{') {      return getInstanceSparse(tokenizer, flag);    } else {      return getInstanceFull(tokenizer, flag);    }  }  /**   * Reads a single instance using the tokenizer and appends it   * to the dataset. Automatically expands the dataset if it   * is not large enough to hold the instance.   *   * @param tokenizer the tokenizer to be used   * @param flag if method should test for carriage return after    * each instance   * @return false if end of file has been reached   * @exception IOException if the information is not read    * successfully   */   protected boolean getInstanceSparse(StreamTokenizer tokenizer, 				      boolean flag)        throws IOException {    int valIndex, numValues = 0, maxIndex = -1;        // Get values    do {            // Get index      getIndex(tokenizer);      if (tokenizer.ttype == '}') {	break;      }             // Is index valid?      try{	m_IndicesBuffer[numValues] = Integer.valueOf(tokenizer.sval).intValue();      } catch (NumberFormatException e) {	errms(tokenizer,"index number expected");      }      if (m_IndicesBuffer[numValues] <= maxIndex) {	errms(tokenizer,"indices have to be ordered");      }      if ((m_IndicesBuffer[numValues] < 0) || 	  (m_IndicesBuffer[numValues] >= numAttributes())) {	errms(tokenizer,"index out of bounds");      }      maxIndex = m_IndicesBuffer[numValues];      // Get value;      getNextToken(tokenizer);      // Check if value is missing.      if  (tokenizer.ttype == '?') {	m_ValueBuffer[numValues] = Instance.missingValue();      } else {	// Check if token is valid.	if (tokenizer.ttype != StreamTokenizer.TT_WORD) {	  errms(tokenizer,"not a valid value");	}        switch (attribute(m_IndicesBuffer[numValues]).type()) {          case Attribute.NOMINAL:            // Check if value appears in header.            valIndex =               attribute(m_IndicesBuffer[numValues]).indexOfValue(tokenizer.sval);            if (valIndex == -1) {              errms(tokenizer,"nominal value not declared in header");            }            m_ValueBuffer[numValues] = (double)valIndex;            break;	case Attribute.NUMERIC:	  // Check if value is really a number.	  try{	    m_ValueBuffer[numValues] = Double.valueOf(tokenizer.sval).	      doubleValue();	  } catch (NumberFormatException e) {	    errms(tokenizer,"number expected");	  }          break;	case Attribute.STRING:	  m_ValueBuffer[numValues] = 	    attribute(m_IndicesBuffer[numValues]).addStringValue(tokenizer.sval);          break;        case Attribute.DATE:          try {            m_ValueBuffer[numValues] =               attribute(m_IndicesBuffer[numValues]).parseDate(tokenizer.sval);          } catch (ParseException e) {            errms(tokenizer,"unparseable date: " + tokenizer.sval);          }          break;        default:          errms(tokenizer,"unknown attribute type in column " + m_IndicesBuffer[numValues]);	}      }      numValues++;    } while (true);    if (flag) {      getLastToken(tokenizer,true);    }          // Add instance to dataset    double[] tempValues = new double[numValues];    int[] tempIndices = new int[numValues];    System.arraycopy(m_ValueBuffer, 0, tempValues, 0, numValues);    System.arraycopy(m_IndicesBuffer, 0, tempIndices, 0, numValues);    add(new SparseInstance(1, tempValues, tempIndices, numAttributes()));    return true;  }  /**   * Reads a single instance using the tokenizer and appends it   * to the dataset. Automatically expands the dataset if it   * is not large enough to hold the instance.   *   * @param tokenizer the tokenizer to be used   * @param flag if method should test for carriage return after    * each instance   * @return false if end of file has been reached   * @exception IOException if the information is not read    * successfully   */   protected boolean getInstanceFull(StreamTokenizer tokenizer, 				    boolean flag)        throws IOException {    double[] instance = new double[numAttributes()];    int index;        // Get values for all attributes.    for (int i = 0; i < numAttributes(); i++){            // Get next token      if (i > 0) {	getNextToken(tokenizer);      }                  // Check if value is missing.      if  (tokenizer.ttype == '?') {	instance[i] = Instance.missingValue();      } else {	// Check if token is valid.	if (tokenizer.ttype != StreamTokenizer.TT_WORD) {	  errms(tokenizer,"not a valid value");	}        switch (attribute(i).type()) {        case Attribute.NOMINAL:	  // Check if value appears in header.	  index = attribute(i).indexOfValue(tokenizer.sval);	  if (index == -1) {	    errms(tokenizer,"nominal value not declared in header");	  }	  instance[i] = (double)index;          break;	case Attribute.NUMERIC:	  // Check if value is really a number.	  try{	    instance[i] = Double.valueOf(tokenizer.sval).	      doubleValue();	  } catch (NumberFormatException e) {	    errms(tokenizer,"number expected");	  }          break;	case Attribute.STRING:	  instance[i] = attribute(i).addStringValue(tokenizer.sval);          break;        case Attribute.DATE:          try {            instance[i] = attribute(i).parseDate(tokenizer.sval);          } catch (ParseException e) {            errms(tokenizer,"unparseable date: " + tokenizer.sval);          }          break;        default:          errms(tokenizer,"unknown attribute type in column " + i);	}      }    }    if (flag) {      getLastToken(tokenizer,true);    }          // Add instance to dataset    add(new Instance(1, instance));    return true;  }  /**   * Reads and stores header of an ARFF file.   *   * @param tokenizer the stream tokenizer   * @exception IOException if the information is not read    * successfully   */   protected void readHeader(StreamTokenizer tokenizer)      throws IOException {        String attributeName;    FastVector attributeValues;    int i;    // Get name of relation.    getFirstToken(tokenizer);    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {      errms(tokenizer,"premature end of file");    }    if (tokenizer.sval.equalsIgnoreCase("@relation")){      getNextToken(tokenizer);      m_RelationName = tokenizer.sval;      getLastToken(tokenizer,false);    } else {      errms(tokenizer,"keyword @relation expected");    }    // Create vectors to hold information temporarily.    m_Attributes = new FastVector();     // Get attribute declarations.    getFirstToken(tokenizer);    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {      errms(tokenizer,"premature end of file");    }    while (tokenizer.sval.equalsIgnoreCase("@attribute")) {      // Get attribute name.      getNextToken(tokenizer);      attributeName = tokenizer.sval;      getNextToken(tokenizer);      // Check if attribute is nominal.      if (tokenizer.ttype == StreamTokenizer.TT_WORD) {	// Attribute is real, integer, or string.	if (tokenizer.sval.equalsIgnoreCase("real") ||	    tokenizer.sval.equalsIgnoreCase("integer") ||	    tokenizer.sval.equalsIgnoreCase("numeric")) {	  m_Attributes.addElement(new Attribute(attributeName,						 numAttributes()));	  readTillEOL(tokenizer);	} else if (tokenizer.sval.equalsIgnoreCase("string")) {	  m_Attributes.	    addElement(new Attribute(attributeName, (FastVector)null,				     numAttributes()));	  readTillEOL(tokenizer);	} else if (tokenizer.sval.equalsIgnoreCase("date")) {          String format = null;          if (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {            if ((tokenizer.ttype != StreamTokenizer.TT_WORD) &&                (tokenizer.ttype != '\'') &&                (tokenizer.ttype != '\"')) {              errms(tokenizer,"not a valid date format");            }            format = tokenizer.sval;            readTillEOL(tokenizer);          } else {            tokenizer.pushBack();          }	  m_Attributes.addElement(new Attribute(attributeName, format,                                                numAttributes()));	} else {	  errms(tokenizer,"no valid attribute type or invalid "+		"enumeration");	}      } else {	// Attribute is nominal.	attributeValues = new FastVector();	tokenizer.pushBack();		// Get values for nominal attribute.	if (tokenizer.nextToken() != '{') {	  errms(tokenizer,"{ expected at beginning of enumeration");	}	while (tokenizer.nextToken() != '}') {	  if (tokenizer.ttype == StreamTokenizer.TT_EOL) {	    errms(tokenizer,"} expected at end of enumeration");	  } else {	    attributeValues.addElement(tokenizer.sval);	  }	}	if (attributeValues.size() == 0) {	  errms(tokenizer,"no nominal values found");	}	m_Attributes.	  addElement(new Attribute(attributeName, attributeValues,				   numAttributes()));      }      getLastToken(tokenizer,false);      getFirstToken(tokenizer);      if (tokenizer.ttype == StreamTokenizer.TT_EOF)	errms(tokenizer,"premature end of file");    }    // Check if data part follows. We can't easily check for EOL.    if (!tokenizer.sval.equalsIgnoreCase("@data")) {      errms(tokenizer,"keyword @data expected");    }        // Check if any attributes have been declared.    if (m_Attributes.size() == 0) {      errms(tokenizer,"no attributes declared");    }    // Allocate buffers in case sparse instances have to be read    m_ValueBuffer = new double[numAttributes()];    m_IndicesBuffer = new int[numAttributes()];  }  /**   * Copies instances from one set to the end of another    * one.   *   * @param source the source of the instances   * @param from the position of the first instance to be copied   * @param dest the destination for the instances   * @param num the number of instances to be copied   */  private void copyInstances(int from, Instances dest, int num) {        for (int i = 0; i < num; i++) {      dest.add(instance(from + i));    }  }    /**   * Throws error message with line number and last token read.   *   * @param theMsg the error message to be thrown   * @param tokenizer the stream tokenizer   * @throws IOExcpetion containing the error message   */  private void errms(StreamTokenizer tokenizer, String theMsg)        throws IOException {        throw new IOException(theMsg + ", read " + tokenizer.toString());  }    /**   * Replaces the attribute information by a clone of   * itself.   */  private void freshAttributeInfo() {    m_Attributes = (FastVector) m_Attributes.copyElements();  }  /**   * Gets next token, skipping empty lines.
💿 文件大小 12323 K
👤 上传用户 ilovexzhu
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#university #supervised #learning #wekaUT
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -