partitionedmultifilter.java

来自「Weka」· Java 代码 · 共 697 行 · 第 1/2 页

JAVA
697
字号
    }    else {      result  = filter.getClass().getName();      if (filter instanceof OptionHandler)        result += " "           + Utils.joinOptions(((OptionHandler) filter).getOptions());    }    return result;  }  /**   * Sets the list of possible Ranges to choose from.   * Also resets the state of the Range (this reset doesn't affect the    * options).   *   * @param Ranges	an array of Ranges with all options set.   * @see #reset()   */  public void setRanges(Range[] Ranges) {    m_Ranges = Ranges;    reset();  }  /**   * Gets the list of possible Ranges to choose from.   *   * @return 		the array of Ranges   */  public Range[] getRanges() {    return m_Ranges;  }    /**   * Returns the tip text for this property   *    * @return    	tip text for this property suitable for   *            	displaying in the explorer/experimenter gui   */  public String rangesTipText() {    return "The attribute ranges to be used.";  }    /**   * Gets a single Range from the set of available Ranges.   *   * @param index 	the index of the Range wanted   * @return 		the Range   */  public Range getRange(int index) {    return m_Ranges[index];  }    /**   * determines the indices of unused attributes (ones that are not covered   * by any of the range)   *    * @param data	the data to base the determination on   * @see 		#m_IndicesUnused   */  protected void determineUnusedIndices(Instances data) {    Vector<Integer>	indices;    int			i;    int			n;    boolean		covered;        // traverse all ranges    indices = new Vector<Integer>();    for (i = 0; i < data.numAttributes(); i++) {      if (i == data.classIndex())	continue;            covered = false;      for (n = 0; n < getRanges().length; n++) {	if (getRanges()[n].isInRange(i)) {	  covered = true;	  break;	}      }            if (!covered)	indices.add(new Integer(i));    }        // create array    m_IndicesUnused = new int[indices.size()];    for (i = 0; i < indices.size(); i++)      m_IndicesUnused[i] = indices.get(i).intValue();        if (getDebug())      System.out.println(	  "Unused indices: " + Utils.arrayToString(m_IndicesUnused));  }    /**   * generates a subset of the dataset with only the attributes from the range   * (class is always added if present)   *    * @param data	the data to work on   * @param range	the range of attribute to use   * @return		the generated subset   * @throws Exception	if creation fails   */  protected Instances generateSubset(Instances data, Range range) throws Exception {    Remove	filter;    String	atts;    Instances	result;     // determine attributes    atts = range.getRanges();    if ((data.classIndex() > -1) && (!range.isInRange(data.classIndex())))      atts += "," + (data.classIndex() + 1);        // setup filter    filter = new Remove();    filter.setAttributeIndices(atts);    filter.setInvertSelection(true);    filter.setInputFormat(data);        // generate output    result = Filter.useFilter(data, filter);        return result;  }    /**   * renames all the attributes in the dataset (excluding the class if present)   * by adding the prefix to the name.   *    * @param data	the data to work on   * @param prefix	the prefix for the attributes   * @return		a copy of the data with the attributes renamed   * @throws Exception	if renaming fails   */  protected Instances renameAttributes(Instances data, String prefix) throws Exception {    Instances	result;    int		i;    FastVector	atts;        // rename attributes    atts = new FastVector();    for (i = 0; i < data.numAttributes(); i++) {      if (i == data.classIndex())	atts.addElement(data.attribute(i).copy());      else	atts.addElement(data.attribute(i).copy(prefix + data.attribute(i).name()));    }        // create new dataset    result = new Instances(data.relationName(), atts, data.numInstances());    for (i = 0; i < data.numInstances(); i++) {      result.add((Instance) data.instance(i).copy());    }        // set class if present    if (data.classIndex() > -1)      result.setClassIndex(data.classIndex());        return result;  }    /**   * Determines the output format based only on the full input dataset and    * returns this otherwise null is returned. In case the output format cannot    * be returned immediately, i.e., immediateOutputFormat() returns false,    * then this method will be called from batchFinished().   *   * @param inputFormat     the input format to base the output format on   * @return                the output format   * @throws Exception      in case the determination goes wrong   * @see                   #hasImmediateOutputFormat()   * @see                   #batchFinished()   */  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {    Instances   result;    Instances	processed;    int         i;    int		n;    FastVector	atts;    Attribute	att;        if (!isFirstBatchDone()) {      // we need the full dataset here, see process(Instances)      if (inputFormat.numInstances() == 0)	return null;      checkDimensions();      // determine unused indices      determineUnusedIndices(inputFormat);      atts = new FastVector();      for (i = 0; i < getFilters().length; i++) {	if (!isFirstBatchDone()) {	  // generate subset	  processed = generateSubset(inputFormat, getRange(i));	  // set input format	  if (!getFilter(i).setInputFormat(processed))	    Filter.useFilter(processed, getFilter(i));	}	// get output format	processed = getFilter(i).getOutputFormat();	// rename attributes	processed = renameAttributes(processed, "filtered-" + i + "-");	// add attributes	for (n = 0; n < processed.numAttributes(); n++) {	  if (n == processed.classIndex())	    continue;	  atts.addElement(processed.attribute(n).copy());	}      }      // add unused attributes      if (!getRemoveUnused()) {	for (i = 0; i < m_IndicesUnused.length; i++) {	  att = inputFormat.attribute(m_IndicesUnused[i]);	  atts.addElement(att.copy("unfiltered-" + att.name()));	}      }      // add class if present      if (inputFormat.classIndex() > -1)	atts.addElement(inputFormat.classAttribute().copy());      // generate new dataset      result = new Instances(inputFormat.relationName(), atts, 0);      if (inputFormat.classIndex() > -1)	result.setClassIndex(result.numAttributes() - 1);    }    else {      result = getOutputFormat();    }        return result;  }  /**   * Processes the given data (may change the provided dataset) and returns   * the modified version. This method is called in batchFinished().   *   * @param instances   the data to process   * @return            the modified data   * @throws Exception  in case the processing goes wrong   * @see               #batchFinished()   */  protected Instances process(Instances instances) throws Exception {    Instances		result;    int        		i;    int			n;    int			m;    int			index;    Instances[]		processed;    Instance		inst;    Instance		newInst;    double[]		values;    Vector		errors;    if (!isFirstBatchDone()) {      checkDimensions();      // set upper limits      for (i = 0; i < m_Ranges.length; i++)	m_Ranges[i].setUpper(instances.numAttributes() - 1);      // determine unused indices      determineUnusedIndices(instances);    }    // pass data through all filters    processed = new Instances[getFilters().length];    for (i = 0; i < getFilters().length; i++) {      processed[i] = generateSubset(instances, getRange(i));      if (!isFirstBatchDone())	getFilter(i).setInputFormat(processed[i]);      processed[i] = Filter.useFilter(processed[i], getFilter(i));    }    // set output format (can only be determined with full dataset, hence here)    if (!isFirstBatchDone()) {      result = determineOutputFormat(instances);      setOutputFormat(result);    }    else {      result = getOutputFormat();    }        // check whether all filters didn't change the number of instances    errors = new Vector();    for (i = 0; i < processed.length; i++) {      if (processed[i].numInstances() != instances.numInstances())	errors.add(new Integer(i));    }    if (errors.size() > 0)      throw new IllegalStateException(	  "The following filter(s) changed the number of instances: " + errors);        // assemble data    for (i = 0; i < instances.numInstances(); i++) {      inst   = instances.instance(i);      values = new double[result.numAttributes()];      // filtered data      index = 0;      for (n = 0; n < processed.length; n++) {	for (m = 0; m < processed[n].numAttributes(); m++) {	  if (m == processed[n].classIndex())	    continue;	  values[index] = processed[n].instance(i).value(m);	  index++;	}      }            // unused attributes      if (!getRemoveUnused()) {	for (n = 0; n < m_IndicesUnused.length; n++) {	  values[index] = inst.value(m_IndicesUnused[n]);	  index++;	}      }            // class      if (instances.classIndex() > -1)	values[values.length - 1] = inst.value(instances.classIndex());      // generate and add instance      if (inst instanceof SparseInstance)	newInst = new SparseInstance(instances.instance(i).weight(), values);      else	newInst = new Instance(instances.instance(i).weight(), values);      result.add(newInst);    }        return result;  }  /**   * Main method for executing this class.   *   * @param args should contain arguments for the filter: use -h for help   */  public static void main(String[] args) {    runFilter(new PartitionedMultiFilter(), args);  }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?