📄 arffloader.java

📁 矩阵的QR分解算法
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
    /**     * Reads a single instance using the tokenizer and returns it.      *     * @param structure 	the dataset header information, will get updated      * 				in case of string or relational attributes     * @param flag 		if method should test for carriage return after      * 				each instance     * @return 			null if end of file has been reached     * @throws IOException 	if the information is not read      * successfully     */     public Instance readInstance(Instances structure, boolean flag) throws IOException {      return getInstance(structure, flag);    }        /**     * Reads a single instance using the tokenizer and returns it.      *     * @param structure 	the dataset header information, will get updated      * 				in case of string or relational attributes     * @param flag 		if method should test for carriage return after      * 				each instance     * @return 			null if end of file has been reached     * @throws IOException 	if the information is not read      * 				successfully     */     protected Instance getInstance(Instances structure, boolean flag) throws IOException {      m_Data = structure;            // Check if any attributes have been declared.      if (m_Data.numAttributes() == 0) {        errorMessage("no header information available");      }      // Check if end of file reached.      getFirstToken();      if (m_Tokenizer.ttype == StreamTokenizer.TT_EOF) {        return null;      }            // Parse instance      if (m_Tokenizer.ttype == '{') {        return getInstanceSparse(flag);      } else {        return getInstanceFull(flag);      }    }    /**     * Reads a single instance using the tokenizer and returns it.     *     * @param flag 		if method should test for carriage return after      * 				each instance     * @return 			null if end of file has been reached     * @throws IOException 	if the information is not read      * 				successfully     */     protected Instance getInstanceSparse(boolean flag) throws IOException {      int valIndex, numValues = 0, maxIndex = -1;            // Get values      do {        // Get index        getIndex();        if (m_Tokenizer.ttype == '}') {  	break;        }           // Is index valid?        try{  	m_IndicesBuffer[numValues] = Integer.valueOf(m_Tokenizer.sval).intValue();        } catch (NumberFormatException e) {  	errorMessage("index number expected");        }        if (m_IndicesBuffer[numValues] <= maxIndex) {  	errorMessage("indices have to be ordered");        }        if ((m_IndicesBuffer[numValues] < 0) ||   	  (m_IndicesBuffer[numValues] >= m_Data.numAttributes())) {  	errorMessage("index out of bounds");        }        maxIndex = m_IndicesBuffer[numValues];        // Get value;        getNextToken();        // Check if value is missing.        if  (m_Tokenizer.ttype == '?') {  	m_ValueBuffer[numValues] = Instance.missingValue();        } else {  	// Check if token is valid.  	if (m_Tokenizer.ttype != StreamTokenizer.TT_WORD) {  	  errorMessage("not a valid value");  	}          switch (m_Data.attribute(m_IndicesBuffer[numValues]).type()) {            case Attribute.NOMINAL:              // Check if value appears in header.              valIndex =                 m_Data.attribute(m_IndicesBuffer[numValues]).indexOfValue(m_Tokenizer.sval);              if (valIndex == -1) {                errorMessage("nominal value not declared in header");              }              m_ValueBuffer[numValues] = (double)valIndex;              break;  	case Attribute.NUMERIC:  	  // Check if value is really a number.  	  try{  	    m_ValueBuffer[numValues] = Double.valueOf(m_Tokenizer.sval).  	      doubleValue();  	  } catch (NumberFormatException e) {  	    errorMessage("number expected");  	  }            break;  	case Attribute.STRING:  	  m_ValueBuffer[numValues] =   	    m_Data.attribute(m_IndicesBuffer[numValues]).addStringValue(m_Tokenizer.sval);            break;          case Attribute.DATE:            try {              m_ValueBuffer[numValues] =                 m_Data.attribute(m_IndicesBuffer[numValues]).parseDate(m_Tokenizer.sval);            } catch (ParseException e) {              errorMessage("unparseable date: " + m_Tokenizer.sval);            }            break;          case Attribute.RELATIONAL:            try {              ArffReader arff = new ArffReader(new StringReader(m_Tokenizer.sval), m_Data.attribute(m_IndicesBuffer[numValues]).relation(), 0);              Instances data = arff.getData();              m_ValueBuffer[numValues] = m_Data.attribute(m_IndicesBuffer[numValues]).addRelation(data);            }            catch (Exception e) {              throw new IOException(e.toString() + " of line " + getLineNo());            }            break;          default:            errorMessage("unknown attribute type in column " + m_IndicesBuffer[numValues]);  	}        }        numValues++;      } while (true);            if (flag) {        getLastToken(true);      }              // Add instance to dataset      double[] tempValues = new double[numValues];      int[] tempIndices = new int[numValues];      System.arraycopy(m_ValueBuffer, 0, tempValues, 0, numValues);      System.arraycopy(m_IndicesBuffer, 0, tempIndices, 0, numValues);      Instance inst = new SparseInstance(1, tempValues, tempIndices, m_Data.numAttributes());      inst.setDataset(m_Data);            return inst;    }    /**     * Reads a single instance using the tokenizer and returns it.     *     * @param flag 		if method should test for carriage return after      * 				each instance     * @return 			null if end of file has been reached     * @throws IOException 	if the information is not read      * 				successfully     */     protected Instance getInstanceFull(boolean flag) throws IOException {      double[] instance = new double[m_Data.numAttributes()];      int index;            // Get values for all attributes.      for (int i = 0; i < m_Data.numAttributes(); i++){        // Get next token        if (i > 0) {  	getNextToken();        }                      // Check if value is missing.        if  (m_Tokenizer.ttype == '?') {  	instance[i] = Instance.missingValue();        } else {  	// Check if token is valid.  	if (m_Tokenizer.ttype != StreamTokenizer.TT_WORD) {  	  errorMessage("not a valid value");  	}          switch (m_Data.attribute(i).type()) {          case Attribute.NOMINAL:  	  // Check if value appears in header.  	  index = m_Data.attribute(i).indexOfValue(m_Tokenizer.sval);  	  if (index == -1) {  	    errorMessage("nominal value not declared in header");  	  }  	  instance[i] = (double)index;            break;  	case Attribute.NUMERIC:  	  // Check if value is really a number.  	  try{  	    instance[i] = Double.valueOf(m_Tokenizer.sval).  	      doubleValue();  	  } catch (NumberFormatException e) {  	    errorMessage("number expected");  	  }            break;  	case Attribute.STRING:  	  instance[i] = m_Data.attribute(i).addStringValue(m_Tokenizer.sval);            break;          case Attribute.DATE:            try {              instance[i] = m_Data.attribute(i).parseDate(m_Tokenizer.sval);            } catch (ParseException e) {              errorMessage("unparseable date: " + m_Tokenizer.sval);            }            break;          case Attribute.RELATIONAL:            try {              ArffReader arff = new ArffReader(new StringReader(m_Tokenizer.sval), m_Data.attribute(i).relation(), 0);              Instances data = arff.getData();              instance[i] = m_Data.attribute(i).addRelation(data);            }            catch (Exception e) {              throw new IOException(e.toString() + " of line " + getLineNo());            }            break;          default:            errorMessage("unknown attribute type in column " + i);  	}        }      }            if (flag) {        getLastToken(true);      }              // Add instance to dataset      Instance inst = new Instance(1, instance);      inst.setDataset(m_Data);            return inst;    }    /**     * Reads and stores header of an ARFF file.     *     * @param capacity 		the number of instances to reserve in the data      * 				structure     * @throws IOException 	if the information is not read      * 				successfully     */     protected void readHeader(int capacity) throws IOException {      m_Lines = 0;      String relationName = "";            // Get name of relation.      getFirstToken();      if (m_Tokenizer.ttype == StreamTokenizer.TT_EOF) {        errorMessage("premature end of file");      }      if (Instances.ARFF_RELATION.equalsIgnoreCase(m_Tokenizer.sval)) {        getNextToken();        relationName = m_Tokenizer.sval;        getLastToken(false);      } else {        errorMessage("keyword " + Instances.ARFF_RELATION + " expected");      }      // Create vectors to hold information temporarily.      FastVector attributes = new FastVector();         // Get attribute declarations.      getFirstToken();      if (m_Tokenizer.ttype == StreamTokenizer.TT_EOF) {        errorMessage("premature end of file");      }      while (Attribute.ARFF_ATTRIBUTE.equalsIgnoreCase(m_Tokenizer.sval)) {        attributes = parseAttribute(attributes);      }      // Check if data part follows. We can't easily check for EOL.      if (!Instances.ARFF_DATA.equalsIgnoreCase(m_Tokenizer.sval)) {        errorMessage("keyword " + Instances.ARFF_DATA + " expected");      }            // Check if any attributes have been declared.      if (attributes.size() == 0) {        errorMessage("no attributes declared");      }            m_Data = new Instances(relationName, attributes, capacity);    }    /**     * Parses the attribute declaration.     *     * @param attributes 		the current attributes vector     * @return 			the new attributes vector     * @throws IOException 	if the information is not read      * 				successfully     */    protected FastVector parseAttribute(FastVector attributes) throws IOException {      String attributeName;      FastVector attributeValues;      // Get attribute name.      getNextToken();      attributeName = m_Tokenizer.sval;      getNextToken();            // Check if attribute is nominal.      if (m_Tokenizer.ttype == StreamTokenizer.TT_WORD) {                // Attribute is real, integer, or string.        if (m_Tokenizer.sval.equalsIgnoreCase(Attribute.ARFF_ATTRIBUTE_REAL) ||            m_Tokenizer.sval.equalsIgnoreCase(Attribute.ARFF_ATTRIBUTE_INTEGER) ||            m_Tokenizer.sval.equalsIgnoreCase(Attribute.ARFF_ATTRIBUTE_NUMERIC)) {          attributes.addElement(new Attribute(attributeName, attributes.size()));          readTillEOL();        } else if (m_Tokenizer.sval.equalsIgnoreCase(Attribute.ARFF_ATTRIBUTE_STRING)) {          attributes.            addElement(new Attribute(attributeName, (FastVector)null,                attributes.size()));          readTillEOL();        } else if (m_Tokenizer.sval.equalsIgnoreCase(Attribute.ARFF_ATTRIBUTE_DATE)) {          String format = null;          if (m_Tokenizer.nextToken() != StreamTokenizer.TT_EOL) {            if ((m_Tokenizer.ttype != StreamTokenizer.TT_WORD) &&                (m_Tokenizer.ttype != '\'') &&                (m_Tokenizer.ttype != '\"')) {              errorMessage("not a valid date format");            }            format = m_Tokenizer.sval;            readTillEOL();          } else {            m_Tokenizer.pushBack();          }          attributes.addElement(new Attribute(attributeName, format,              attributes.size()));                  } else if (m_Tokenizer.sval.equalsIgnoreCase(Attribute.ARFF_ATTRIBUTE_RELATIONAL)) {          readTillEOL();                    // Read attributes for subrelation          // First, save current set of attributes
💿 文件大小 531 K
👤 上传用户 bobey
📂 所属分类数学计算
🏷️ 相关标签

#矩阵 #分解 #算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -