📄 instances.java

📁 一个数据挖掘系统的源码
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
	// Check if token is valid.
	if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
	  errms(tokenizer,"not a valid value");
	}
	if (attribute(i).isNominal()) {

	  // Check if value appears in header.
	  index = attribute(i).indexOfValue(tokenizer.sval);
	  if (index == -1) {
	    errms(tokenizer,"nominal value not declared in header");
	  }
	  instance[i] = (double)index;
	} else if (attribute(i).isNumeric()) {

	  // Check if value is really a number.
	  try{
	    instance[i] = Double.valueOf(tokenizer.sval).
	      doubleValue();
	  } catch (NumberFormatException e) {
	    errms(tokenizer,"number expected");
	  }
	} else {
	  instance[i] = attribute(i).addStringValue(tokenizer.sval);
	}
      }
    }
    if (flag) {
      getLastToken(tokenizer,true);
    }
/*
    // Add instance to dataset
    for (int i=0; i<instance.length; i++){
        System.out.println(Double.toString(instance[i]));
    }
*/
    //    Instance neoInstance = ;
//     System.out.println(neoInstance.dataset().toString());
    Instance neoInstance = new Instance(1,instance);
    add(neoInstance);

    return true;
  }
  /**
   * Reads and stores header of an XML file.
   *
   * @param document the org.jdom.Document object reference
   * @exception e if the information is not read
   * successfully
   * by asymeon to process the Header part of a XML format
   */
  protected void readHeader (Document document) {
    try {
        String valueSetListString = "";
        String documentName;
        String attributeName;
        String attributeType;
        FastVector attributeValues = null;

        //Get name of relation.
        Document xmlDocument = document;
        Element docElement = xmlDocument.getRootElement();
        if (docElement.getName().compareTo("INSTANCES")==0){
          m_RelationName = (docElement.getAttributeValue("title"));
//          System.out.println(m_RelationName);
        }
        else {
          log.error("An error has occured: Document root relation expected");
        }

        // Create vectors to hold information temporarily.
        m_Attributes = new FastVector ();

        // Get attribute declarations.
        List childrenList = docElement.getChildren("ATTRIBUTES");

        if (childrenList.size()!=1) {
          log.error("An error has occured : Bad Attributes of the document");
        }
        Element attributesElement = docElement.getChild("ATTRIBUTES");
        List attributeList = attributesElement.getChildren("ATTRIBUTE");

        if (attributeList.size() == 0) {
            log.error("An error has occured:  No attribute is defined");
        }
        else {
	// Get attribute name and type
            Iterator childrenIterator = attributeList.iterator();
            while (childrenIterator.hasNext()){
                Element child = (Element) childrenIterator.next ();
                attributeName = child.getChildTextTrim("NAME");
                attributeType = child.getChildTextTrim("TYPE");

                if (attributeType.equalsIgnoreCase("real") ||
                 attributeType.equalsIgnoreCase("integer") ||
                 attributeType.equalsIgnoreCase("numeric")) {
                     Attribute babis = new org.agentacademy.modules.dataminer.core.Attribute (attributeName, numAttributes());
                     m_Attributes.addElement(babis);
                }
                else if (attributeType.equalsIgnoreCase("string")) {
                   Attribute babis2 = new org.agentacademy.modules.dataminer.core.Attribute (attributeName, (FastVector)null, numAttributes());
                   m_Attributes.addElement(babis2);
                }
                else if (attributeType.equalsIgnoreCase("nominal")) {
                  attributeValues = new FastVector();
                  List valueSetList = child.getChildren("VALUE-SET");
                  valueSetListString = valueSetList.toString();
                  Element valueSetElement = child.getChild("VALUE-SET");
                  if (valueSetList.size()!=1) {
                     log.error("An error has occured: Invalid value-set defined");
                  }
                  else {

                    List valuesList = valueSetElement.getChildren("VALUE");
                    if (valuesList.size()<1) {
                      log.error("An error has occured: No value defined");
                    }
            //Get the values one by one.
                    Iterator valueIterator = valuesList.iterator();
                    while (valueIterator.hasNext()){
                       Element attrValue = (Element)valueIterator.next();
                       attributeValues.addElement(attrValue.getText());
                    }
                    Attribute babis3 = new org.agentacademy.modules.dataminer.core.Attribute (attributeName, attributeValues, numAttributes());
                    m_Attributes.addElement(babis3);
                  }
                }
                else {
                      log.error(" An error has occured: No valid attribute type or invalid ");
                }
            }//end of while
        }//end of if-else
        printVector(m_Attributes);
    }
    catch (Exception e){
      log.error("Error on function readHeader@Instances has occured:" + e);
    }

  }

  /**
   * Reads and stores header of an ARFF file.
   *
   * @param tokenizer the stream tokenizer
   * @exception IOException if the information is not read
   * successfully
   */
  protected void readHeader(StreamTokenizer tokenizer)
     throws IOException{

    String attributeName;
    FastVector attributeValues;
    int i;

    // Get name of relation.
    getFirstToken(tokenizer);
    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
      errms(tokenizer,"premature end of file");
    }
    if (tokenizer.sval.equalsIgnoreCase("@relation")){
      getNextToken(tokenizer);
      m_RelationName = tokenizer.sval;
//      System.out.println(m_RelationName);
      getLastToken(tokenizer,false);
    } else {
      errms(tokenizer,"keyword @relation expected");
    }

    // Create vectors to hold information temporarily.
    m_Attributes = new FastVector();

    // Get attribute declarations.
    getFirstToken(tokenizer);
    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
      errms(tokenizer,"premature end of file");
    }
    while (tokenizer.sval.equalsIgnoreCase("@attribute")) {

      // Get attribute name.
      getNextToken(tokenizer);
      attributeName = tokenizer.sval;
      getNextToken(tokenizer);

      // Check if attribute is nominal.
      if (tokenizer.ttype == StreamTokenizer.TT_WORD) {

	// Attribute is real, integer, or string.
	if (tokenizer.sval.equalsIgnoreCase("real") ||
	    tokenizer.sval.equalsIgnoreCase("integer") ||
	    tokenizer.sval.equalsIgnoreCase("numeric")) {
	  m_Attributes.addElement(new Attribute(attributeName,
						 numAttributes()));
	  readTillEOL(tokenizer);
	} else if (tokenizer.sval.equalsIgnoreCase("string")) {
	  m_Attributes.
	    addElement(new Attribute(attributeName, (FastVector)null,
				     numAttributes()));
	  readTillEOL(tokenizer);
	} else {
	  errms(tokenizer,"no valid attribute type or invalid "+
		"enumeration");
	}
      } else {

	// Attribute is nominal.
	attributeValues = new FastVector();
	tokenizer.pushBack();

	// Get values for nominal attribute.
	if (tokenizer.nextToken() != '{') {
	  errms(tokenizer,"{ expected at beginning of enumeration");
	}
	while (tokenizer.nextToken() != '}') {
	  if (tokenizer.ttype == StreamTokenizer.TT_EOL) {
	    errms(tokenizer,"} expected at end of enumeration");
	  } else {
	    attributeValues.addElement(tokenizer.sval);
	  }
	}
	if (attributeValues.size() == 0) {
	  errms(tokenizer,"no nominal values found");
	}
	m_Attributes.
	  addElement(new Attribute(attributeName, attributeValues,
				   numAttributes()));
      }
      getLastToken(tokenizer,false);
      getFirstToken(tokenizer);
      if (tokenizer.ttype == StreamTokenizer.TT_EOF)
	errms(tokenizer,"premature end of file");
    }

    // Check if data part follows. We can't easily check for EOL.
    if (!tokenizer.sval.equalsIgnoreCase("@data")) {
      errms(tokenizer,"keyword @data expected");
    }

    // Check if any attributes have been declared.
    if (m_Attributes.size() == 0) {
      errms(tokenizer,"no attributes declared");
    }

    // Allocate buffers in case sparse instances have to be read
    m_ValueBuffer = new double[numAttributes()];
    m_IndicesBuffer = new int[numAttributes()];
  }

  /**
   * Copies instances from one set to the end of another
   * one.
   *
   * @param source the source of the instances
   * @param from the position of the first instance to be copied
   * @param dest the destination for the instances
   * @param num the number of instances to be copied
   */
  private void copyInstances(int from, Instances dest, int num) {

    for (int i = 0; i < num; i++) {
      dest.add(instance(from + i));
    }
  }

  /**
   * Throws error message with line number and last token read.
   *
   * @param theMsg the error message to be thrown
   * @param tokenizer the stream tokenizer
   * @throws IOExcpetion containing the error message
   */
  private void errms(StreamTokenizer tokenizer, String theMsg)
       throws IOException {

    throw new IOException(theMsg + ", read " + tokenizer.toString());
  }

  /**
   * Replaces the attribute information by a clone of
   * itself.
   */
  private void freshAttributeInfo() {

    m_Attributes = (FastVector) m_Attributes.copyElements();
  }

  /**
   * Gets next token, skipping empty lines.
   *
   * @param tokenizer the stream tokenizer
   * @exception IOException if reading the next token fails
   */
  private void getFirstToken(StreamTokenizer tokenizer)
    throws IOException{

    while (tokenizer.nextToken() == StreamTokenizer.TT_EOL){};
    if ((tokenizer.ttype == '\'') ||
	(tokenizer.ttype == '"')) {
      tokenizer.ttype = StreamTokenizer.TT_WORD;
    } else if ((tokenizer.ttype == StreamTokenizer.TT_WORD) &&
	       (tokenizer.sval.equals("?"))){
      tokenizer.ttype = '?';
    }
  }

  /**
   * Gets index, checking for a premature and of line.
   *
   * @param tokenizer the stream tokenizer
   * @exception IOException if it finds a premature end of line
   */
  private void getIndex(StreamTokenizer tokenizer) throws IOException{

    if (tokenizer.nextToken() == StreamTokenizer.TT_EOL) {
      errms(tokenizer,"premature end of line");
    }
    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
      errms(tokenizer,"premature end of file");
    }
  }

  /**
   * Gets token and checks if its end of line.
   *
   * @param tokenizer the stream tokenizer
   * @exception IOException if it doesn't find an end of line
   */
  private void getLastToken(StreamTokenizer tokenizer, boolean endOfFileOk)
       throws IOException{

    if ((tokenizer.nextToken() != StreamTokenizer.TT_EOL) &&
	((tokenizer.nextToken() != StreamTokenizer.TT_EOF) || !endOfFileOk)) {
      errms(tokenizer,"end of line expected");
    }
  }

  /**
   * Gets next token, checking for a premature and of line.
   *
   * @param tokenizer the stream tokenizer
   * @exception IOException if it finds a premature end of line
   */
  private void getNextToken(StreamTokenizer tokenizer)
       throws IOException{

    if (tokenizer.nextToken() == StreamTokenizer.TT_EOL) {
      errms(tokenizer,"premature end of line");
    }
    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
      errms(tokenizer,"premature end of file");
    } else if ((tokenizer.ttype == '\'') ||
	       (tokenizer.ttype == '"')) {
      tokenizer.ttype = StreamTokenizer.TT_WORD;
    } else if ((tokenizer.ttype == StreamTokenizer.TT_WORD) &&
	       (tokenizer.sval.equals("?"))){
      tokenizer.ttype = '?';
    }
  }

  /**
   * Initializes the StreamTokenizer used for reading the ARFF file.
   *
   * @param tokenizer the stream tokenizer
   */
  private void initTokenizer(StreamTokenizer tokenizer){

    tokenizer.resetSyntax();
    tokenizer.whitespaceChars(0, ' ');
    tokenizer.wordChars(' '+1,'\u00FF');
    tokenizer.whitespaceChars(',',',');
    tokenizer.commentChar('%');
    tokenizer.quoteChar('"');
    tokenizer.quoteChar('\'');
    tokenizer.ordinaryChar('{');
    tokenizer.ordinaryChar('}');
    tokenizer.eolIsSignificant(true);
  }

  /**
   * Returns string including all instances, their weights and
   * their indices in the original dataset.
   *
   * @return description of instance and its weight as a string
   */
  private String instancesAndWeights(){

    StringBuffer text = new StringBuffer();

    for (int i = 0; i < numInstances(); i++) {
      text.append(instance(i) + " " + instance(i).weight());
      if (i < numInstances() - 1) {
	text.append("\n");
      }
    }
    return text.toString();
  }

  /**
   * Implements quicksort.
   *
   * @param attIndex the attribute's index
   * @param lo0 the first index of the subset to be sorted
   * @param hi0 the last index of the subset to be sorted
   */
  private void quickSort(int attIndex, int lo0, int hi0) {

    int lo = lo0, hi = hi0;
    double mid, midPlus, midMinus;

    if (hi0 > lo0) {

      // Arbitrarily establishing partition element as the
      // midpoint of the array.
      mid = instance((lo0 + hi0) / 2).value(attIndex);
      midPlus = mid + 1e-6;
      midMinus = mid - 1e-6;

      // loop through the ar
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -