⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 okcloader.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
字号:
package chen.macroweka.core.converters;

import weka.core.Instance;
import weka.core.Instances;
import weka.core.FastVector;
import weka.core.Attribute;
import weka.core.converters.*; // pancher add

import java.io.*;

/**
 * Implementation of class
 * 
 */
public class OKCLoader extends AbstractLoader
        implements BatchLoader, IncrementalLoader
{
    /**
     * Holds the determined structure (header) of the data set.
     */
    protected Instances m_structure = null;

    /**
     * Holds the meta information of the data set. In this case, the meta file
     * of the data set. m_sourceDataFile is the data file.
     */
    protected File m_sourceMetaFile = null;

    /**
     * Holds the source of the data set. In this case the okc file of the
     * data set.
     */
    private File m_sourceDataFile = null;

    /**
     * Reader for names file
     */
    private transient Reader m_metaReader = null;

    /**
     * Reader for data file
     */
    private transient Reader m_dataReader = null;

    /**
     * Holds the filestem.
     */
    private String m_fileStem;

    /**
     * Number of attributes in the data (including ignore and label attributes).
     */
    private int m_numAttribs;
    private int m_numInstances;

    /**
     * Returns a string describing this attribute evaluator
     *
     * @return a description of the evaluator suitable for
     *         displaying in the explorer/experimenter gui
     */
    public String globalInfo()
    {
        return "Reads a source that is in xmdv file format (.okc file " +
               "containing data with meta information in .meta file).";
    }

    /**
     * Default implementation throws an IOException.
     *
     * @param file the File
     * @throws java.io.IOException always
     */
    public void setSource(File file) throws IOException
    {
        reset();

        if ( file == null ) {
            throw new IOException( "Source file object is null!" );
        }

        String fname = file.getName();
        String fileStem;
        String path = file.getParent();
        if ( path != null ) {
            path += File.separator;
        } else {
            path = "";
        }
        if ( fname.indexOf( '.' ) < 0 ) {
            fileStem = fname;
            fname += ".meta";
        } else {
            fileStem = fname.substring( 0, fname.indexOf( '.' ) );
            fname = fileStem + ".meta";
        }
        m_fileStem = fileStem;
        file = new File( path + fname );

        m_sourceMetaFile = file;
        try {
            BufferedReader br = new BufferedReader( new FileReader( file ) );
            m_metaReader = br;
        } catch (FileNotFoundException ex) {
            // meta file not found
            //throw new IOException( "File not found : " + ( path + fname ) );
        }

        m_sourceDataFile = new File( path + fileStem + ".okc" );
        try {
            BufferedReader br = new BufferedReader( new FileReader( m_sourceDataFile ) );
            m_dataReader = br;
        } catch (FileNotFoundException ex) {
            throw new IOException( "File not found : " + ( path + fname ) );
        }
    }

    // private void reset() pancher change
    public void reset()
    {
        m_structure = null;
    }

    /*
     * To be overridden.
     */
    public Instances getStructure() throws IOException
    {
        if ( m_sourceDataFile == null ) {
            throw new IOException( "No source has been specified" );
        }

        if ( m_structure == null ) {
            if ( m_metaReader != null ) {
                StreamTokenizer st = new StreamTokenizer( m_metaReader );
                initTokenizer( st );
                readHeaderFromMeta( st );
            } else {
                StreamTokenizer st = new StreamTokenizer( m_dataReader );
                initTokenizer( st );
                readHeaderFromOKC( st );
            }
        }

        return m_structure;
    }

    private void readHeaderFromOKC(StreamTokenizer tokenizer)
            throws IOException
    {
        ConverterUtils.getFirstToken( tokenizer );
        if ( tokenizer.ttype == StreamTokenizer.TT_EOF ) {
            ConverterUtils.errms( tokenizer, "premature end of file" );
        }

        // read number of dimensions
        if ( tokenizer.ttype != StreamTokenizer.TT_EOL ) {
            m_numAttribs = Integer.parseInt( tokenizer.sval.trim() );
        }

        // read number of data items
        ConverterUtils.getToken( tokenizer );
        if ( tokenizer.ttype != StreamTokenizer.TT_EOL ) {
            m_numInstances = Integer.parseInt( tokenizer.sval.trim() );
        }

        FastVector attribDefs = new FastVector();
        for ( int i = 0; i < m_numAttribs; i++ ) {
            ConverterUtils.getToken( tokenizer );
            if ( tokenizer.ttype == StreamTokenizer.TT_EOL ) {
                ConverterUtils.errms( tokenizer, "premature end of line. Expected "
                                                 + "attribute name." );
            }
            String attribName = tokenizer.sval;
            attribDefs.addElement( new Attribute( attribName ) );
        }

        m_structure = new Instances( m_fileStem, attribDefs, 0 );
        m_structure.setClassIndex( m_structure.numAttributes() - 1 );

        // ignore dim min, max and cardinality
        for ( int i = 0; i < m_numAttribs; i++ ) {
            ConverterUtils.getToken( tokenizer );
            if ( tokenizer.ttype == StreamTokenizer.TT_EOL ) {
                ConverterUtils.errms( tokenizer, "premature end of line. Expected "
                                                 + "attribute min value." );
            }
            ConverterUtils.getToken( tokenizer );
            if ( tokenizer.ttype == StreamTokenizer.TT_EOL ) {
                ConverterUtils.errms( tokenizer, "premature end of line. Expected "
                                                 + "attribute max value." );
            }
            ConverterUtils.getToken( tokenizer );
            if ( tokenizer.ttype == StreamTokenizer.TT_EOL ) {
                ConverterUtils.errms( tokenizer, "premature end of line. Expected "
                                                 + "attribute cardinality value." );
            }
        }
    }

    private void initTokenizer(StreamTokenizer tokenizer)
    {
        tokenizer.resetSyntax();
        tokenizer.whitespaceChars( 0, ( ' ' - 1 ) );
        tokenizer.wordChars( ' ', '\u00FF' );
        tokenizer.whitespaceChars( ' ', ' ' );
        tokenizer.whitespaceChars( '\n', '\n' );
        tokenizer.whitespaceChars( ',', ',' );
        tokenizer.whitespaceChars( ':', ':' );
        tokenizer.commentChar( '#' );
        tokenizer.whitespaceChars( '\t', '\t' );
        tokenizer.quoteChar( '"' );
        tokenizer.quoteChar( '\'' );
        tokenizer.eolIsSignificant( false );
    }

    private void readHeaderFromMeta(StreamTokenizer st)
    {
        //To change body of created methods use File | Settings | File Templates.
    }

    /*
     * To be overridden.
     */
    public Instances getDataSet() throws IOException
    {
        if ( m_sourceDataFile == null ) {
            throw new IOException( "No source has been specified" );
        }

        if ( getRetrieval() == INCREMENTAL ) {
            throw new IOException( "Cannot mix getting Instances in both incremental and batch modes" );
        }
        setRetrieval( BATCH );

        if ( m_structure == null ) {
            getStructure();
        }

        StreamTokenizer st = new StreamTokenizer( m_dataReader );
        initTokenizer( st );

        Instances result = new Instances( m_structure );

        for ( int i = 0; i < m_numInstances; i++ ) {
            Instance current = getInstance( st );
            result.add( current );
        }

        return result;
    }

    private Instance getInstance(StreamTokenizer tokenizer) throws IOException
    {
        double[] instance = new double[m_structure.numAttributes()];

        ConverterUtils.getFirstToken( tokenizer );
        if ( tokenizer.ttype == StreamTokenizer.TT_EOF ) {
            ConverterUtils.errms( tokenizer, "data expected" );
        }

        int counter = 0;
        for ( int i = 0; i < m_numAttribs; i++ ) {
            if ( i > 0 ) {
                ConverterUtils.getToken( tokenizer );
            }

            String val = tokenizer.sval;
            val.trim();

            if ( i == m_numAttribs - 1 ) {
                // remove trailing period
                if ( val.charAt( val.length() - 1 ) == '.' ) {
                    val = val.substring( 0, val.length() - 1 );
                }
            }
            if ( m_structure.attribute( counter ).isNominal() ) {
                int index = m_structure.attribute( counter )
                        .indexOfValue( val );
                if ( index == -1 ) {
                    ConverterUtils.errms( tokenizer, "nominal value not declared in "
                                                     + "header :" + val + " column " + i );
                }
                instance[counter++] = (double) index;
            } else if ( m_structure.attribute( counter ).isNumeric() ) {
                try {
                    instance[counter++] = Double.valueOf( val )
                            .doubleValue();
                } catch (NumberFormatException e) {
                    ConverterUtils.errms( tokenizer, "number expected" );
                }
            } else {
                System.err.println( "Shouldn't get here" );
                System.exit( 1 );
            }
        }

        return new Instance( 1.0, instance );
    }

    /*
     * To be overridden.
     */
    public Instance getNextInstance() throws IOException
    {
        if ( m_sourceDataFile == null ) {
            throw new IOException( "No source has been specified" );
        }

        if ( getRetrieval() == BATCH ) {
            throw new IOException( "Cannot mix getting Instances in both incremental and batch modes" );
        }
        setRetrieval( INCREMENTAL );

        if ( m_structure == null ) {
            getStructure();
        }

        StreamTokenizer st = new StreamTokenizer( m_dataReader );
        initTokenizer( st );

        Instance nextI = getInstance( st );
        if ( nextI != null ) {
            nextI.setDataset( m_structure );
        }
        return nextI;
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -