📄 averagingresultproducer.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
      m_ResultListener.acceptResult(this, newKey, result);      
    }
  }
  
  /**
   * Checks whether any duplicate results (with respect to a key template)
   * were received.
   *
   * @param template the template key.
   * @exception Exception if duplicate results are detected
   */
  protected void checkForDuplicateKeys(Object [] template) throws Exception {

    Hashtable hash = new Hashtable();
    int numMatches = 0;
    for (int i = 0; i < m_Keys.size(); i++) {
      Object [] current = (Object [])m_Keys.elementAt(i);
      // Skip non-matching keys
      if (!matchesTemplate(template, current)) {
	continue;
      }
      if (hash.containsKey(current[m_KeyIndex])) {
	throw new Exception("Duplicate result received:"
			    + DatabaseUtils.arrayToString(current));
      }
      numMatches++;
      hash.put(current[m_KeyIndex], current[m_KeyIndex]);
    }
    if (numMatches != m_ExpectedResultsPerAverage) {
      throw new Exception("Expected " + m_ExpectedResultsPerAverage
			  + " results matching key \""
			  + DatabaseUtils.arrayToString(template)
			  + "\" but got "
			  + numMatches);
    }
  }
  
  /**
   * Checks that the keys for a run only differ in one key field. If they
   * differ in more than one field, a more sophisticated averager will submit
   * multiple results - for now an exception is thrown. Currently assumes that
   * the most differences will be shown between the first and last
   * result received.
   *
   * @exception Exception if the keys differ on fields other than the
   * key averaging field
   */
  protected void checkForMultipleDifferences() throws Exception {
    
    Object [] firstKey = (Object [])m_Keys.elementAt(0);
    Object [] lastKey = (Object [])m_Keys.elementAt(m_Keys.size() - 1);
    /*
    System.err.println("First key:" +  DatabaseUtils.arrayToString(firstKey));
    System.err.println("Last key :" + DatabaseUtils.arrayToString(lastKey));
    */
    for (int i = 0; i < firstKey.length; i++) {
      if ((i != m_KeyIndex) && !firstKey[i].equals(lastKey[i])) {
	throw new Exception("Keys differ on fields other than \""
			    + m_KeyFieldName
			    + "\" -- time to implement multiple averaging");
      }
    }
  }
  
  /**
   * Prepare for the results to be received.
   *
   * @param rp the ResultProducer that will generate the results
   * @exception Exception if an error occurs during preprocessing.
   */
  public void preProcess(ResultProducer rp) throws Exception {

    if (m_ResultListener == null) {
      throw new Exception("No ResultListener set");
    }
    m_ResultListener.preProcess(this);
  }

  /**
   * Prepare to generate results. The ResultProducer should call
   * preProcess(this) on the ResultListener it is to send results to.
   *
   * @exception Exception if an error occurs during preprocessing.
   */
  public void preProcess() throws Exception {
    
    if (m_ResultProducer == null) {
      throw new Exception("No ResultProducer set");
    }
    // Tell the resultproducer to send results to us
    m_ResultProducer.setResultListener(this);
    findKeyIndex();
    if (m_KeyIndex == -1) {
      throw new Exception("No key field called " + m_KeyFieldName
			  + " produced by "
			  + m_ResultProducer.getClass().getName());
    }
    m_ResultProducer.preProcess();
  }
  
  /**
   * When this method is called, it indicates that no more results
   * will be sent that need to be grouped together in any way.
   *
   * @param rp the ResultProducer that generated the results
   * @exception Exception if an error occurs
   */
  public void postProcess(ResultProducer rp) throws Exception {

    m_ResultListener.postProcess(this);
  }

  /**
   * When this method is called, it indicates that no more requests to
   * generate results for the current experiment will be sent. The
   * ResultProducer should call preProcess(this) on the
   * ResultListener it is to send results to.
   *
   * @exception Exception if an error occurs
   */
  public void postProcess() throws Exception {

    m_ResultProducer.postProcess();
  }
  
  /**
   * Accepts results from a ResultProducer.
   *
   * @param rp the ResultProducer that generated the results
   * @param key an array of Objects (Strings or Doubles) that uniquely
   * identify a result for a given ResultProducer with given compatibilityState
   * @param result the results stored in an array. The objects stored in
   * the array may be Strings, Doubles, or null (for the missing value).
   * @exception Exception if the result could not be accepted.
   */
  public void acceptResult(ResultProducer rp, Object [] key, Object [] result)
    throws Exception {

    if (m_ResultProducer != rp) {
      throw new Error("Unrecognized ResultProducer sending results!!");
    }
    m_Keys.addElement(key);
    m_Results.addElement(result);
  }

  /**
   * Determines whether the results for a specified key must be
   * generated.
   *
   * @param rp the ResultProducer wanting to generate the results
   * @param key an array of Objects (Strings or Doubles) that uniquely
   * identify a result for a given ResultProducer with given compatibilityState
   * @return true if the result should be generated
   * @exception Exception if it could not be determined if the result 
   * is needed.
   */
  public boolean isResultRequired(ResultProducer rp, Object [] key) 
    throws Exception {

    if (m_ResultProducer != rp) {
      throw new Error("Unrecognized ResultProducer sending results!!");
    }
    return true;
  }

  /**
   * Gets the names of each of the columns produced for a single run.
   *
   * @return an array containing the name of each column
   * @exception Exception if key names cannot be generated
   */
  public String [] getKeyNames() throws Exception {

    if (m_KeyIndex == -1) {
      throw new Exception("No key field called " + m_KeyFieldName
			  + " produced by "
			  + m_ResultProducer.getClass().getName());
    }
    String [] keyNames = m_ResultProducer.getKeyNames();
    String [] newKeyNames = new String [keyNames.length - 1];
    System.arraycopy(keyNames, 0, newKeyNames, 0, m_KeyIndex);
    System.arraycopy(keyNames, m_KeyIndex + 1,
		     newKeyNames, m_KeyIndex,
		     keyNames.length - m_KeyIndex - 1);
    return newKeyNames;
  }

  /**
   * Gets the data types of each of the columns produced for a single run.
   * This method should really be static.
   *
   * @return an array containing objects of the type of each column. The 
   * objects should be Strings, or Doubles.
   * @exception Exception if the key types could not be determined (perhaps
   * because of a problem from a nested sub-resultproducer)
   */
  public Object [] getKeyTypes() throws Exception {

    if (m_KeyIndex == -1) {
      throw new Exception("No key field called " + m_KeyFieldName
			  + " produced by "
			  + m_ResultProducer.getClass().getName());
    }
    Object [] keyTypes = m_ResultProducer.getKeyTypes();
    // Find and remove the key field that is being averaged over
    Object [] newKeyTypes = new String [keyTypes.length - 1];
    System.arraycopy(keyTypes, 0, newKeyTypes, 0, m_KeyIndex);
    System.arraycopy(keyTypes, m_KeyIndex + 1,
		     newKeyTypes, m_KeyIndex,
		     keyTypes.length - m_KeyIndex - 1);
    return newKeyTypes;
  }

  /**
   * Gets the names of each of the columns produced for a single run.
   * A new result field is added for the number of results used to
   * produce each average.
   * If only averages are being produced the names are not altered, if
   * standard deviations are produced then "Dev_" and "Avg_" are prepended
   * to each result deviation and average field respectively.
   *
   * @return an array containing the name of each column
   * @exception Exception if the result names could not be determined (perhaps
   * because of a problem from a nested sub-resultproducer)
   */
  public String [] getResultNames() throws Exception {

    String [] resultNames = m_ResultProducer.getResultNames();
    // Add in the names of our extra Result fields
    if (getCalculateStdDevs()) {
      Object [] resultTypes = m_ResultProducer.getResultTypes();
      int numNumeric = 0;
      for (int i = 0; i < resultTypes.length; i++) {
	if (resultTypes[i] instanceof Double) {
	  numNumeric++;
	}
      }
      String [] newResultNames = new String [resultNames.length +
					    1 + numNumeric];
      newResultNames[0] = m_CountFieldName;
      int j = 1;
      for (int i = 0; i < resultNames.length; i++) {
	newResultNames[j++] = "Avg_" + resultNames[i];
	if (resultTypes[i] instanceof Double) {
	  newResultNames[j++] = "Dev_" + resultNames[i];
	}
      }
      return newResultNames;
    } else {
      String [] newResultNames = new String [resultNames.length + 1];
      newResultNames[0] = m_CountFieldName;
      System.arraycopy(resultNames, 0, newResultNames, 1, resultNames.length);
      return newResultNames;
    }
  }

  /**
   * Gets the data types of each of the columns produced for a single run.
   *
   * @return an array containing objects of the type of each column. The 
   * objects should be Strings, or Doubles.
   * @exception Exception if the result types could not be determined (perhaps
   * because of a problem from a nested sub-resultproducer)
   */
  public Object [] getResultTypes() throws Exception {

    Object [] resultTypes = m_ResultProducer.getResultTypes();
    // Add in the types of our extra Result fields
    if (getCalculateStdDevs()) {
      int numNumeric = 0;
      for (int i = 0; i < resultTypes.length; i++) {
	if (resultTypes[i] instanceof Double) {
	  numNumeric++;
	}
      }
      Object [] newResultTypes = new Object [resultTypes.length +
					    1 + numNumeric];
      newResultTypes[0] = new Double(0);
      int j = 1;
      for (int i = 0; i < resultTypes.length; i++) {
	newResultTypes[j++] = resultTypes[i];
	if (resultTypes[i] instanceof Double) {
	  newResultTypes[j++] = new Double(0);
	}
      }
      return newResultTypes;
    } else {
      Object [] newResultTypes = new Object [resultTypes.length + 1];
      newResultTypes[0] = new Double(0);
      System.arraycopy(resultTypes, 0, newResultTypes, 1, resultTypes.length);
      return newResultTypes;
    }
  }

  /**
   * Gets a description of the internal settings of the result
   * producer, sufficient for distinguishing a ResultProducer
   * instance from another with different settings (ignoring
   * those settings set through this interface). For example,
   * a cross-validation ResultProducer may have a setting for the
   * number of folds. For a given state, the results produced should
   * be compatible. Typically if a ResultProducer is an OptionHandler,
   * this string will represent the command line arguments required
   * to set the ResultProducer to that state.
   *
   * @return the description of the ResultProducer state, or null
   * if no state is defined
   */
  public String getCompatibilityState() {

    String result = // "-F " + Utils.quote(getKeyFieldName())
      " -X " + getExpectedResultsPerAverage() + " ";
    if (getCalculateStdDevs()) {
      result += "-S ";
    }
    if (m_ResultProducer == null) {
      result += "<null ResultProducer>";
    } else {
      result += "-W " + m_ResultProducer.getClass().getName();
    }
    result  += " -- " + m_ResultProducer.getCompatibilityState();
    return result.trim();
  }


  /**
   * Returns an enumeration describing the available options..
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {

    Vector newVector = new Vector(2);

    newVector.addElement(new Option(
	     "\tThe name of the field to average over.\n"
	      +"\t(default \"Fold\")", 
	     "F", 1, 
	     "-F <field name>"));
    newVector.addElement(new Option(
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -