📄 miningarffstream.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
                  }
                }
                else
                {
                  try {
                    instance[i] = Double.parseDouble(token);
                  }
                  catch(NumberFormatException ex) {
                    instance[i] = Category.MISSING_VALUE;
                  }
                }
                if ( Category.isMissingValue(instance[i]) )
                {
                    missingValues = true;
                }
            }
            token = getNextToken();
            if( !token.equalsIgnoreCase( "endofline" ) )
            {
                if( !token.equalsIgnoreCase( "endoffile" ) )
                {
                    tokenizerException( "end of line expected" );
                }
            }
            // Add instance to dataset
            cursorVector = new MiningVector(instance);
            cursorVector.setMetaData( metaData );
            cursorPosition++;
            return true;
        }
        else
        {
            double[] m_ValueBuffer = new double[ metaData.getAttributesNumber() ];
            int[] m_IndicesBuffer = new int[ metaData.getAttributesNumber() ];
            int valIndex, numValues = 0, maxIndex = -1;
            do
            {
                token = getNextToken();
                if (tokenizer.ttype == '}')
                {
                    break;
                }
                // Is index valid?
                try
                {
                    m_IndicesBuffer[numValues] = Integer.valueOf( token ).intValue();
                }
                catch(NumberFormatException e)
                {
                    tokenizerException( "index number expected" );
                }
                if( m_IndicesBuffer[numValues] <= maxIndex )
                {
                    tokenizerException( "indices have to be ordered" );
                }
                if( ( m_IndicesBuffer[numValues] < 0 ) || ( m_IndicesBuffer[numValues] >= metaData.getAttributesNumber() ) )
                {
                    tokenizerException( "index out of bounds" );
                }
                maxIndex = m_IndicesBuffer[numValues];
                // Get value;
                token = getNextToken();
                //<<09/03/2005, Frank J. Xu
                //do not handle this kind of error, except throw exceptions. 
                //Bug: 124,It shows error message: "end of line expected, read token[4,9], line 75]" when 
				//running source node with \\etifs2\BI\96 Sample Data\arff\iris-error.arff. The 
				//first data vector contains an empty data i.e. 5.1,3.5,1.4,0.2,,0 
                if(token.equalsIgnoreCase( "endofline" ) )
                {
                	int startIndex = tokenizer.toString().lastIndexOf(" ")-4;
                	int endIndex = tokenizer.toString().length();
                	String errorMsg = tokenizer.toString().substring(startIndex, endIndex);
                	throw new MiningDataException("Invalid characters in" + errorMsg);
                }
                //>>09/03/2005, Frank J. Xu                
                MiningAttribute attribute = metaData.getMiningAttribute( m_IndicesBuffer[numValues] );
                if(attribute instanceof CategoricalAttribute)
                {
                    Category cat = new Category( token.toString(), token, new CategoryProperty() );
                  m_ValueBuffer[numValues] = ((CategoricalAttribute)attribute).getKey( cat );
                }
                else m_ValueBuffer[numValues] = Double.parseDouble( token );
                if( m_ValueBuffer[numValues] == Category.MISSING_VALUE )
                {
                    missingValues = true;
                }
                numValues++;
            }
            while( true );
            token = getNextToken();
            if( !token.equalsIgnoreCase( "endofline" ) )
            {
                if( !token.equalsIgnoreCase( "endoffile" ) )
                {
                    tokenizerException( "end of line expected" );
                }
            }
            // Add instance to dataset
            double[] tempValues = new double[numValues];
            int[] tempIndices = new int[numValues];
            System.arraycopy(m_ValueBuffer, 0, tempValues, 0, numValues);
            System.arraycopy(m_IndicesBuffer, 0, tempIndices, 0, numValues);
            // Add instance to dataset
            cursorVector = new MiningSparseVector( 1, tempValues, tempIndices );
            cursorVector.setMetaData( metaData );
            cursorPosition++;
            return true;
        }
    }

    // -----------------------------------------------------------------------
    //  Methods of reading from the stream
    // -----------------------------------------------------------------------
    /**
     * Reads current data vector.
     *
     * @return data vector at current cursor position
     */
    public MiningVector read() throws MiningException
    {
        return cursorVector;
    }

    // -----------------------------------------------------------------------
    //  Methods of writing into the stream
    // -----------------------------------------------------------------------
    private PrintWriter writer;

        /**
         * Sets new meta data to this stream. Removes all data.
         *
         * @param metaData new meta data to set
         * @exception MiningException if an error occurs
         */
        public void updateSetMetaData(MiningDataSpecification metaData) throws MiningException
        {
          this.metaData = metaData;
          updateRemoveAllVectors();
        };

        /**
         * Removes all mining vectors from this stream. Note that metadata is not
         * affected by this operation since it is fixed for any stream.
         *
         * @exception MiningException if an error occurs
         */
        public void updateRemoveAllVectors() throws MiningException
        {
          try {
            if (writer != null) writer.close();
            writer = new PrintWriter( new FileWriter( fileName ) );
            writer.println( metaData.createArffDescription() );
//            writer.println();
            writer.println("@data");
          }
          catch (Exception ex) {
            throw new MiningException("Can't write to file: "+fileName);
          };
        };

        /**
         * Appends new mining vector to this stream. Only for updateble
         * input streams. Before using this method the first time,
         * updateSetMetaData or updateRemoveAllVectors must be called.
         *
         * @param vector mining vector to add
         * @exception MiningException if an error occurs
         */
        public void updateAppendVector(MiningVector vector) throws MiningException
        {
            String line = "";
            MiningDataSpecification mds = vector.getMetaData();
            int nVec = vector.getValues().length;
            for (int i = 0; i < nVec; i++) {
              double value = vector.getValue(i);
              String stVal = "";
              if (mds == null || mds.getMiningAttribute(i) instanceof NumericAttribute)
                stVal = String.valueOf(value);
              else
                stVal = "" + ((CategoricalAttribute)mds.getMiningAttribute(i)).
                              getCategory(vector.getValue(i));
              if (Category.isMissingValue(value))
                stVal = "?";
              line = line + stVal;
              if (i < nVec - 1)
                line = line + ",";
            };

            writer.println(line);
        }

        /**
         * Close mining ARFF stream by closing reader and writer.
         *
         * @exception MiningException if a mining source access error occurs
         */
        public void close() throws MiningException
        {
          super.close();
          try
          {
            if (writer != null) writer.close();
          }
          catch( Exception ex)
          {
            throw new MiningException( "Can't close ARFF stream from file: "+fileName );
          };
        }

    /**
     * Returns next token.
     *
     * @return next token
     */
    protected String getNextToken() throws MiningException
    {
        String token = "";
        int c;
        try
        {
            c = tokenizer.nextToken();
        }
        catch( IOException ex )
        {
            throw new MiningDataException( ex.getMessage() );
        }
        switch( c )
        {
            case StreamTokenizer.TT_EOL:token = "endofline";break;
            case StreamTokenizer.TT_EOF:token = "endoffile";break;
            case '{'  : token="{"; break;
            case '}'  : token="}"; break;
            case '?'  : tokenizer.ttype = StreamTokenizer.TT_WORD; token = "?"; break;
            case '\'' : tokenizer.ttype = StreamTokenizer.TT_WORD; token = tokenizer.sval; break;
            case '"'  : tokenizer.ttype = StreamTokenizer.TT_WORD; token = tokenizer.sval; break;
            default   : token = tokenizer.sval;break;
        }
        return token.trim();
    }

    protected void resetTokenizerSyntax(int a_SyntaxMode)
    {
    	if (tokenizer == null)
    	{
    		return;
    	}
        tokenizer.resetSyntax();
        // If it is parsing meta data, ' ' would be whiespaceChars
        if (a_SyntaxMode == SYNTAX_MODE_META_DATA)
        {
	        tokenizer.whitespaceChars( 0, ' ');
	        tokenizer.wordChars( ' '+1, '\u00FF' );
        }else
        {
        	// If it is parsing data, ' ' would be wordChars
	        tokenizer.whitespaceChars( 0, ' '-1);
	        tokenizer.wordChars( ' ', '\u00FF' );
        }
        tokenizer.whitespaceChars( ',', ',' );
//        tokenizer.whitespaceChars( '', '' );
        tokenizer.commentChar( '%' );
        tokenizer.quoteChar( '"' );
        tokenizer.quoteChar( '\'' );
        tokenizer.ordinaryChar( '{' );
        tokenizer.ordinaryChar( '}' );
        tokenizer.eolIsSignificant( true );
    }
    
    /**
     * Init tokenizer.
     *
     * @param reader of tokenizer
     */
    protected void initTokenizer( Reader reader, int a_SyntaxMode)
    {
        BufferedReader bufferedInput = new BufferedReader( reader );
        tokenizer = new StreamTokenizer( bufferedInput );
        resetTokenizerSyntax(a_SyntaxMode);
    }

    /**
     * Throws error message with line number and last token read.
     *
     * @param message the error message to be thrown
     * @throws IOExcpetion containing the error message
    */
    protected void tokenizerException( String message ) throws MiningException
    {
        throw new MiningDataException( message + ", read " + tokenizer.toString() );
    }

    /**
     * Representation as string in Arff format. Attention: changes cursor position.
     *
     * @return representation of mining input stream as string
     */
    public String createArffDescription() {

      // Meta data:
      String description = metaData.createArffDescription() + "\n";

      // Data:
      description = description + "@data" + "\n";
      // Try to reset stream:
      try {
        reset();
      }
      catch (MiningException ex) {
        description = description + "Warning: can't reset cursor. " +
                      "Start reading at current position" + "\n";
      };
      // Read data:
      int i = 0;
      try {
        while( next() )
          description = description + read() + "\n";
        i++;
      }
      catch (Exception ex) {
        description = description + "Error: can't read vector " + i;
      };

      return description;
    }

    // -----------------------------------------------------------------------
    //  Test
    // -----------------------------------------------------------------------
    /**
     * Test of ARFF stream.
     *
     * @param args arguments (ignored)
     */
    public static void main(String[] args)
    {
        try
        {
            MiningArffStream mas = new MiningArffStream( "data/arff/labor.arff", null );
            System.out.println(mas);
        }
        catch (Exception ex)
        {
            ex.printStackTrace();
        }
    }
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -