📄 arfffilereader.java

📁 本算法是实现基于KNN的基因遗传算法
💻 JAVA
字号:
/* * To change this template, choose Tools | Templates * and open the template in the editor. */package gaknn.dataaccess;import java.io.StreamTokenizer;import java.io.IOException;import java.text.ParseException;import gaknn.core.Instance;import gaknn.core.Attribute;import java.io.FileReader;import java.io.BufferedReader;import gaknn.core.FastVector;import gaknn.core.InvalidClassIndexException;/** * * @author Niro */public class ArffFileReader extends DataFileReader{    public static String FILE_EXTENSION = "arff";        public static char MISSING_VALUE = '?';        /** The keyword used to denote the start of an arff header */    public final static String ARFF_RELATION = "@relation";    /** The keyword used to denote the start of the arff data section */    public final static String ARFF_DATA = "@data";        /** The keyword used to denote the start of an arff attribute declaration */    public final static String ARFF_ATTRIBUTE = "@attribute";    /** A keyword used to denote a numeric attribute */    public final static String ARFF_ATTRIBUTE_INTEGER = "integer";    /** A keyword used to denote a numeric attribute */    public final static String ARFF_ATTRIBUTE_REAL = "real";    /** A keyword used to denote a numeric attribute */    public final static String ARFF_ATTRIBUTE_NUMERIC = "numeric";    /** The keyword used to denote a string attribute */    public final static String ARFF_ATTRIBUTE_STRING = "string";    /** The keyword used to denote a date attribute */    public final static String ARFF_ATTRIBUTE_DATE = "date";   private StreamTokenizer m_Tokenizer;    public ArffFileReader(String sDataFile)  {        this.m_filePath = sDataFile;    }            protected void initTokenizer(){        m_Tokenizer.resetSyntax();                 m_Tokenizer.whitespaceChars(0, ' ');            m_Tokenizer.wordChars(' '+1,'\u00FF');        m_Tokenizer.whitespaceChars(',',',');        m_Tokenizer.commentChar('%');        m_Tokenizer.quoteChar('"');        m_Tokenizer.quoteChar('\'');        m_Tokenizer.ordinaryChar('{');        m_Tokenizer.ordinaryChar('}');        m_Tokenizer.eolIsSignificant(true);    }        protected void getFirstToken() throws IOException     {        while (m_Tokenizer.nextToken() == StreamTokenizer.TT_EOL) {};        if ((m_Tokenizer.ttype == '\'') ||(m_Tokenizer.ttype == '"'))             m_Tokenizer.ttype = StreamTokenizer.TT_WORD;        else if ((m_Tokenizer.ttype == StreamTokenizer.TT_WORD) &&                        (m_Tokenizer.sval.equals("?")))            m_Tokenizer.ttype = '?';    }        protected void getNextToken() throws IOException {        if (m_Tokenizer.nextToken() == StreamTokenizer.TT_EOL)         {             errorMessage("premature end of file");        }        if (m_Tokenizer.ttype == StreamTokenizer.TT_EOF)         {            errorMessage("premature end of file");        }         else if ((m_Tokenizer.ttype == '\'') ||               (m_Tokenizer.ttype == '"'))         {            m_Tokenizer.ttype = StreamTokenizer.TT_WORD;        }         else if ((m_Tokenizer.ttype == StreamTokenizer.TT_WORD) &&               (m_Tokenizer.sval.equals("?")))        {            m_Tokenizer.ttype = '?';        }    }        private void errorMessage(String msg) throws IOException     {        String str = msg + ", read " + m_Tokenizer.toString();        if (m_Lines > 0)         {            int line = Integer.parseInt(str.replaceAll(".* line ", ""));            str = str.replaceAll(" line .*", " line " + (m_Lines + line - 1));        }        throw new IOException(str);    }        protected void getLastToken(boolean endOfFileOk) throws IOException     {        if ((m_Tokenizer.nextToken() != StreamTokenizer.TT_EOL) &&            ((m_Tokenizer.ttype != StreamTokenizer.TT_EOF) || !endOfFileOk))        {            errorMessage("end of line expected");        }    }        protected void readTillEOL() throws IOException {        while (m_Tokenizer.nextToken() != StreamTokenizer.TT_EOL) {};        m_Tokenizer.pushBack();    }        protected Instance ReadInstance() throws IOException {        double[] instanceValues = new double [m_Data.NumAttributes()];        int index = 0;        int classIndex = 0;        int numIndex = 0;        int strIndex = 0;        boolean bMissingValue ;        //	    Check if end of file reached.        getFirstToken();        if (m_Tokenizer.ttype == StreamTokenizer.TT_EOF) {            return null;        }        // Get values for all attributes.        for (int i = 0; i < m_Data.NumAllAttributes(); i++){            // Get next token            if (i > 0) {                getNextToken();            }                        if (m_Data.Attribute(i).Selected())            {                          // Check if value is missing.                //   if  (m_Tokenizer.ttype == '?') {                //   	instance.addElement(Instance.MISSING_VALUE);                //   } else {                bMissingValue = false;                //Check if value is missing.                        if  (m_Tokenizer.ttype == MISSING_VALUE)                     bMissingValue = true;                else                {                // Check if token is valid.                    if (m_Tokenizer.ttype != StreamTokenizer.TT_WORD) {                        errorMessage("not a valid value");                    }                }                String attrName = m_Data.Attribute(i).Name();                switch (m_Data.Attribute(i).Type()) {                case Attribute.NOMINAL:                    if (bMissingValue)                        index = -1;                    else                        // Check if value appears in header.                        index = m_Data.Attribute(i).IndexOfValue(m_Tokenizer.sval);                    if (m_Data.Attribute(i).IsClassAttribute())                    {                        m_Data.SetClassIndex(i);                        classIndex = index;                    }                    else                    {                        if (bMissingValue)                            instanceValues[numIndex] = m_ValueHandler.GetMissingValue(Attribute.NOMINAL, i);                         else                            instanceValues[numIndex] = m_ValueHandler.GetValue(Attribute.NOMINAL, i, m_Tokenizer.sval);                        numIndex++;                    }                    break;                case Attribute.NUMERIC:                // Check if value is really a number.                    try{                        double value;                        if (bMissingValue)                            value = m_ValueHandler.GetMissingValue(Attribute.NUMERIC, i);                         else                            value = Double.valueOf(m_Tokenizer.sval).doubleValue();                        instanceValues[numIndex] = value;                        numIndex++;                    } catch (NumberFormatException e) {                        errorMessage("number expected");                    }                    break;                case Attribute.STRING:                    if (bMissingValue)                        instanceValues[numIndex] = m_ValueHandler.GetMissingValue(Attribute.STRING, i);                    else                        instanceValues[numIndex] = m_ValueHandler.GetValue(Attribute.STRING, i, m_Tokenizer.sval);                    numIndex++;                    break;                case Attribute.DATE:                    try {                        if (bMissingValue)                            instanceValues[numIndex] = m_ValueHandler.GetMissingValue(Attribute.DATE, i);                        else                            instanceValues[numIndex] = m_Data.Attribute(i).ParseDate(m_Tokenizer.sval);                        numIndex++;                        numIndex++;                    } catch (ParseException e) {                        errorMessage("unparseable date: " + m_Tokenizer.sval);                    }                    break;                default:                    errorMessage("unknown attribute type in column " + i);                }            }        }        // Add instance to dataset        Instance inst = new Instance(instanceValues);        // inst.setDataset(m_Data);        inst.SetClassIndex(classIndex);        return inst;    }        public void ReadHeader() throws IOException {                if (m_filePath.length() == 0)            throw new IOException("Missing File");        else        {            FileReader fr = new FileReader(m_filePath);             BufferedReader br = new BufferedReader(fr);            m_Tokenizer = new StreamTokenizer(br);            initTokenizer();            m_Lines = 0;            // Get name of relation.            getFirstToken();            if (m_Tokenizer.ttype == StreamTokenizer.TT_EOF) {              errorMessage("premature end of file");            }                        if (ARFF_RELATION.equalsIgnoreCase(m_Tokenizer.sval)) {                getNextToken();                m_RelationName = m_Tokenizer.sval;                getLastToken(false);            } else {                errorMessage("keyword " + ARFF_RELATION + " expected");            }            //Create vectors to hold information temporarily.            FastVector attributes = new FastVector();            //Get attribute declarations.            getFirstToken();                        if (m_Tokenizer.ttype == StreamTokenizer.TT_EOF)                 errorMessage("premature end of file");            attributes = parseAttributes();            // Check if data part follows. We can't easily check for EOL.            if (!ARFF_DATA.equalsIgnoreCase(m_Tokenizer.sval)) {                errorMessage("keyword " + ARFF_DATA + " expected");            }              // Check if any attributes have been declared.            if (attributes.size() == 0) {                errorMessage("no attributes declared");            }            m_Attributes = attributes;        }    }        protected FastVector parseAttributes() throws IOException {        String attributeName;        FastVector attributes = new FastVector();        FastVector attributeValues;        int numAttributes = 0;        while (ARFF_ATTRIBUTE.equalsIgnoreCase(m_Tokenizer.sval)) {            // Get attribute name.            getNextToken();            attributeName = m_Tokenizer.sval;            getNextToken();        // Check if attribute is nominal.            if (m_Tokenizer.ttype == StreamTokenizer.TT_WORD) {                // Attribute is real, integer, or string.                if (m_Tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_REAL) ||                    m_Tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_INTEGER) ||                    m_Tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_NUMERIC)) {                    numAttributes++;                    attributes.addElement(new Attribute(attributeName, attributes.size(), numAttributes));                    readTillEOL();                } else if (m_Tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_STRING)) {                    numAttributes++;                    attributes.addElement(new Attribute(attributeName, (FastVector)null,                                    attributes.size(), numAttributes));                    readTillEOL();                } else if (m_Tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_DATE)) {                  String format = null;                  if (m_Tokenizer.nextToken() != StreamTokenizer.TT_EOL) {                        if ((m_Tokenizer.ttype != StreamTokenizer.TT_WORD) &&                            (m_Tokenizer.ttype != '\'') &&                            (m_Tokenizer.ttype != '\"')) {                            errorMessage("not a valid date format");                        }                        format = m_Tokenizer.sval;                        readTillEOL();                  } else {                        m_Tokenizer.pushBack();                  }                  numAttributes++;                  attributes.addElement(new Attribute(attributeName, format,                                  attributes.size(), numAttributes));                } else {                      errorMessage("no valid attribute type or invalid "+                            "enumeration");                }        } else {            // Attribute is nominal.            attributeValues = new FastVector();            m_Tokenizer.pushBack();            // Get values for nominal attribute.            if (m_Tokenizer.nextToken() != '{') {                errorMessage("{ expected at beginning of enumeration");            }            while (m_Tokenizer.nextToken() != '}') {                if (m_Tokenizer.ttype == StreamTokenizer.TT_EOL) {                    errorMessage("} expected at end of enumeration");                } else {                    attributeValues.addElement(m_Tokenizer.sval);                }            }            Attribute attrb = new Attribute(attributeName, attributeValues,                            attributes.size(), numAttributes);            //System.out.println(attributeName);           // if (attributeName.compareTo(Attribute.ARFF_ATTRIBUTE_CLASS) == 0){            //    attrb.SetClassAttribute(true);           // }            //else            numAttributes++;            attributes.addElement(attrb);        }                getLastToken(false);        getFirstToken();                if (m_Tokenizer.ttype == StreamTokenizer.TT_EOF)            errorMessage("premature end of file");        }        return attributes;    }        public void SetClassAttribute(String classAttribute) throws Exception{       int index = -1;       Attribute attribute;              for (int i=0; i<m_Attributes.size(); i++){           attribute = (Attribute)m_Attributes.elementAt(i);           if (attribute.Name().equalsIgnoreCase(classAttribute)){               index = i;               attribute.SetClassAttribute(true);           }       }              if (index > -1)       {           for (int i=index; i<m_Attributes.size(); i++){               attribute = (Attribute)m_Attributes.elementAt(i);               attribute.ValueIndex(i-1);           }       }       else           throw new Exception("Invalid attribute name for class " + classAttribute);    }}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -