⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 interquartilerange.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
  /**   * Set whether extreme values are also tagged as outliers.   *   * @param value 	whether or not to tag extreme values also as outliers.   */  public void setExtremeValuesAsOutliers(boolean value) {    m_ExtremeValuesAsOutliers = value;  }  /**   * Get whether extreme values are also tagged as outliers.   *   * @return 		true if extreme values are also tagged as outliers.   */  public boolean getExtremeValuesAsOutliers() {    return m_ExtremeValuesAsOutliers;  }  /**   * Returns the tip text for this property   *   * @return 		tip text for this property suitable for   * 			displaying in the explorer/experimenter gui   */  public String detectionPerAttributeTipText() {    return         "Generates Outlier/ExtremeValue attribute pair for each numeric "      + "attribute, not just a single pair for all numeric attributes together.";  }  /**   * Set whether an Outlier/ExtremeValue attribute pair is generated for    * each numeric attribute ("true") or just one pair for all numeric    * attributes together ("false").   *   * @param value 	whether or not to generate indicator attribute pairs    * 			for each numeric attribute.   */  public void setDetectionPerAttribute(boolean value) {    m_DetectionPerAttribute = value;    if (!m_DetectionPerAttribute)      m_OutputOffsetMultiplier = false;  }  /**   * Gets whether an Outlier/ExtremeValue attribute pair is generated for    * each numeric attribute ("true") or just one pair for all numeric    * attributes together ("false").   *   * @return 		true if indicator attribute pairs are generated for   * 			each numeric attribute.   */  public boolean getDetectionPerAttribute() {    return m_DetectionPerAttribute;  }  /**   * Returns the tip text for this property   *   * @return 		tip text for this property suitable for   * 			displaying in the explorer/experimenter gui   */  public String outputOffsetMultiplierTipText() {    return         "Generates an additional attribute 'Offset' that contains the "      + "multiplier the value is off the median: "      + "value = median + 'multiplier' * IQR";  }  /**   * Set whether an additional attribute "Offset" is generated per    * Outlier/ExtremeValue attribute pair that lists the multiplier the value   * is off the median: value = median + 'multiplier' * IQR.   *   * @param value 	whether or not to generate the additional attribute.   */  public void setOutputOffsetMultiplier(boolean value) {    m_OutputOffsetMultiplier = value;    if (m_OutputOffsetMultiplier)      m_DetectionPerAttribute = true;  }  /**   * Gets whether an additional attribute "Offset" is generated per    * Outlier/ExtremeValue attribute pair that lists the multiplier the value   * is off the median: value = median + 'multiplier' * IQR.   *   * @return 		true if the additional attribute is generated.   */  public boolean getOutputOffsetMultiplier() {    return m_OutputOffsetMultiplier;  }  /**    * Returns the Capabilities of this filter.   *   * @return            the capabilities of this object   * @see               Capabilities   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enableAllAttributes();    result.enable(Capability.MISSING_VALUES);        // class    result.enableAllClasses();    result.enable(Capability.MISSING_CLASS_VALUES);    result.enable(Capability.NO_CLASS);        return result;  }  /**   * Determines the output format based on the input format and returns    * this. In case the output format cannot be returned immediately, i.e.,   * hasImmediateOutputFormat() returns false, then this method will called   * from batchFinished() after the call of preprocess(Instances), in which,   * e.g., statistics for the actual processing step can be gathered.   *   * @param inputFormat     the input format to base the output format on   * @return                the output format   * @throws Exception      in case the determination goes wrong   * @see                   #hasImmediateOutputFormat()   * @see                   #batchFinished()   */  protected Instances determineOutputFormat(Instances inputFormat)      throws Exception {        FastVector		atts;    FastVector		values;    Instances		result;    int			i;    // attributes must be numeric    m_Attributes.setUpper(inputFormat.numAttributes() - 1);    m_AttributeIndices = m_Attributes.getSelection();    for (i = 0; i < m_AttributeIndices.length; i++) {      // ignore class      if (m_AttributeIndices[i] == inputFormat.classIndex()) {	m_AttributeIndices[i] = NON_NUMERIC;	continue;      }      // not numeric -> ignore it      if (!inputFormat.attribute(m_AttributeIndices[i]).isNumeric())	m_AttributeIndices[i] = NON_NUMERIC;    }        // get old attributes    atts = new FastVector();    for (i = 0; i < inputFormat.numAttributes(); i++)      atts.addElement(inputFormat.attribute(i));        if (!getDetectionPerAttribute()) {      m_OutlierAttributePosition    = new int[1];      m_OutlierAttributePosition[0] = atts.size();            // add 2 new attributes      values = new FastVector();      values.addElement("no");      values.addElement("yes");      atts.addElement(new Attribute("Outlier", values));            values = new FastVector();      values.addElement("no");      values.addElement("yes");      atts.addElement(new Attribute("ExtremeValue", values));    }    else {      m_OutlierAttributePosition = new int[m_AttributeIndices.length];            for (i = 0; i < m_AttributeIndices.length; i++) {	if (m_AttributeIndices[i] == NON_NUMERIC)	  continue;		m_OutlierAttributePosition[i] = atts.size();	// add new attributes	values = new FastVector();	values.addElement("no");	values.addElement("yes");	atts.addElement(	    new Attribute(		inputFormat.attribute(		    m_AttributeIndices[i]).name() + "_Outlier", values));		values = new FastVector();	values.addElement("no");	values.addElement("yes");	atts.addElement(	    new Attribute(		inputFormat.attribute(		    m_AttributeIndices[i]).name() + "_ExtremeValue", values));	if (getOutputOffsetMultiplier())	  atts.addElement(	      new Attribute(		  inputFormat.attribute(		      m_AttributeIndices[i]).name() + "_Offset"));      }    }    // generate header    result = new Instances(inputFormat.relationName(), atts, 0);    result.setClassIndex(inputFormat.classIndex());        return result;  }  /**   * computes the thresholds for outliers and extreme values   *    * @param instances	the data to work on   */  protected void computeThresholds(Instances instances) {    int		i;    double[]	values;    int[]	sortedIndices;    int		half;    int		quarter;    double	q1;    double	q2;    double	q3;        m_UpperExtremeValue = new double[m_AttributeIndices.length];    m_UpperOutlier      = new double[m_AttributeIndices.length];    m_LowerOutlier      = new double[m_AttributeIndices.length];    m_LowerExtremeValue = new double[m_AttributeIndices.length];    m_Median            = new double[m_AttributeIndices.length];    m_IQR               = new double[m_AttributeIndices.length];        for (i = 0; i < m_AttributeIndices.length; i++) {      // non-numeric attribute?      if (m_AttributeIndices[i] == NON_NUMERIC)	continue;            // sort attribute data      values        = instances.attributeToDoubleArray(m_AttributeIndices[i]);      sortedIndices = Utils.sort(values);            // determine indices      half    = sortedIndices.length / 2;      quarter = half / 2;            if (sortedIndices.length % 2 == 1) {	q2 = values[sortedIndices[half]];      }      else {	q2 = (values[sortedIndices[half]] + values[sortedIndices[half + 1]]) / 2;      }            if (half % 2 == 1) {	q1 = values[sortedIndices[quarter]];	q3 = values[sortedIndices[sortedIndices.length - quarter - 1]];      }      else {	q1 = (values[sortedIndices[quarter]] + values[sortedIndices[quarter + 1]]) / 2;	q3 = (values[sortedIndices[sortedIndices.length - quarter - 1]] + values[sortedIndices[sortedIndices.length - quarter]]) / 2;      }            // determine thresholds and other values      m_Median[i]            = q2;      m_IQR[i]               = q3 - q1;      m_UpperExtremeValue[i] = q3 + getExtremeValuesFactor() * m_IQR[i];      m_UpperOutlier[i]      = q3 + getOutlierFactor()       * m_IQR[i];      m_LowerOutlier[i]      = q1 - getOutlierFactor()       * m_IQR[i];      m_LowerExtremeValue[i] = q1 - getExtremeValuesFactor() * m_IQR[i];    }  }    /**   * returns whether the instance has an outlier in the specified attribute    * or not   *    * @param inst	the instance to test   * @param index	the attribute index   * @return		true if the instance is an outlier   */  protected boolean isOutlier(Instance inst, int index) {    boolean	result;    double	value;    value  = inst.value(m_AttributeIndices[index]);    result =    ((m_UpperOutlier[index]      <  value) && (value <= m_UpperExtremeValue[index]))             || ((m_LowerExtremeValue[index] <= value) && (value <  m_LowerOutlier[index]));        return result;  }    /**   * returns whether the instance is an outlier or not   *    * @param inst	the instance to test   * @return		true if the instance is an outlier   */  protected boolean isOutlier(Instance inst) {    boolean	result;    int		i;    result = false;        for (i = 0; i < m_AttributeIndices.length; i++) {      // non-numeric attribute?      if (m_AttributeIndices[i] == NON_NUMERIC)	continue;      result = isOutlier(inst, m_AttributeIndices[i]);            if (result)	break;    }        return result;  }    /**   * returns whether the instance has an extreme value in the specified    * attribute or not   *    * @param inst	the instance to test   * @param index	the attribute index   * @return		true if the instance is an extreme value   */  protected boolean isExtremeValue(Instance inst, int index) {    boolean	result;    double	value;    value  = inst.value(m_AttributeIndices[index]);    result =    (value > m_UpperExtremeValue[index])              || (value < m_LowerExtremeValue[index]);          return result;  }    /**   * returns whether the instance is an extreme value or not   *    * @param inst	the instance to test   * @return		true if the instance is an extreme value   */  protected boolean isExtremeValue(Instance inst) {    boolean	result;    int		i;    result = false;        for (i = 0; i < m_AttributeIndices.length; i++) {      // non-numeric attribute?      if (m_AttributeIndices[i] == NON_NUMERIC)	continue;            result = isExtremeValue(inst, m_AttributeIndices[i]);            if (result)	break;    }        return result;  }    /**   * returns the mulitplier of the IQR the instance is off the median for this   * particular attribute.   *    * @param inst	the instance to test   * @param index	the attribute index   * @return		the multiplier   */  protected double calculateMultiplier(Instance inst, int index) {    double	result;    double	value;    value  = inst.value(m_AttributeIndices[index]);    result = (value - m_Median[index]) / m_IQR[index];          return result;  }    /**   * Processes the given data (may change the provided dataset) and returns   * the modified version. This method is called in batchFinished().   * This implementation only calls process(Instance) for each instance   * in the given dataset.   *   * @param instances   the data to process   * @return            the modified data   * @throws Exception  in case the processing goes wrong   * @see               #batchFinished()   */  protected Instances process(Instances instances) throws Exception {    Instances	result;    Instance	instOld;    Instance	instNew;    int		i;    int		n;    double[]	values;    int		numAttNew;    int		numAttOld;        if (!isFirstBatchDone())      computeThresholds(instances);        result    = getOutputFormat();    numAttOld = instances.numAttributes();    numAttNew = result.numAttributes();        for (n = 0; n < instances.numInstances(); n++) {      instOld = instances.instance(n);      values  = new double[numAttNew];      System.arraycopy(instOld.toDoubleArray(), 0, values, 0, numAttOld);            // generate new instance      instNew = new Instance(1.0, values);      instNew.setDataset(result);      // per attribute?      if (!getDetectionPerAttribute()) {	// outlier?	if (isOutlier(instOld))	  instNew.setValue(m_OutlierAttributePosition[0], 1);	// extreme value?	if (isExtremeValue(instOld)) {	  instNew.setValue(m_OutlierAttributePosition[0] + 1, 1);	  // tag extreme values also as outliers?	  if (getExtremeValuesAsOutliers())	    instNew.setValue(m_OutlierAttributePosition[0], 1);	}      }      else {	for (i = 0; i < m_AttributeIndices.length; i++) {	  // non-numeric attribute?	  if (m_AttributeIndices[i] == NON_NUMERIC)	    continue;	  	  // outlier?	  if (isOutlier(instOld, m_AttributeIndices[i]))	    instNew.setValue(m_OutlierAttributePosition[i], 1);	  // extreme value?	  if (isExtremeValue(instOld, m_AttributeIndices[i])) {	    instNew.setValue(m_OutlierAttributePosition[i] + 1, 1);	    // tag extreme values also as outliers?	    if (getExtremeValuesAsOutliers())	      instNew.setValue(m_OutlierAttributePosition[i], 1);	  }	  // add multiplier?	  if (getOutputOffsetMultiplier())	    instNew.setValue(		m_OutlierAttributePosition[i] + 2, 		calculateMultiplier(instOld, m_AttributeIndices[i]));	}      }            // copy possible strings, relational values...      copyValues(instNew, false, instOld.dataset(), getOutputFormat());            // add to output      result.add(instNew);    }        return result;  }  /**   * Main method for testing this class.   *   * @param args should contain arguments to the filter: use -h for help   */  public static void main(String[] args) {    runFilter(new InterquartileRange(), args);  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -