relaggs.java
来自「Weka」· Java 代码 · 共 582 行 · 第 1/2 页
JAVA
582 行
*/ public String attributeIndicesTipText() { return "Specify range of attributes to act on; " + "this is a comma separated list of attribute indices, with " + "\"first\" and \"last\" valid values; Specify an inclusive " + "range with \"-\"; eg: \"first-3,5,6-10,last\"."; } /** * Set the range of attributes to process. * * @param value the new range. */ public void setSelectedRange(String value) { m_SelectedRange = new Range(value); } /** * Gets the current range selection. * * @return current selection. */ public Range getSelectedRange() { return m_SelectedRange; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String invertSelectionTipText() { return "Set attribute selection mode. If false, only selected " + "attributes in the range will be worked on; if " + "true, only non-selected attributes will be processed."; } /** * Sets whether selected columns should be processed or skipped. * * @param value the new invert setting */ public void setInvertSelection(boolean value) { m_SelectedRange.setInvert(value); } /** * Gets whether the supplied columns are to be processed or skipped * * @return true if the supplied columns will be kept */ public boolean getInvertSelection() { return m_SelectedRange.getInvert(); } /** * Returns the Capabilities of this filter. * * @return the capabilities of this object * @see Capabilities */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.DATE_ATTRIBUTES); result.enable(Capability.RELATIONAL_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.NUMERIC_CLASS); result.enable(Capability.DATE_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); result.enable(Capability.NO_CLASS); return result; } /** * Determines the output format based on the input format and returns * this. In case the output format cannot be returned immediately, i.e., * immediateOutputFormat() returns false, then this method will be called * from batchFinished(). * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong * @see #hasImmediateOutputFormat() * @see #batchFinished() */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances result; Instances relFormat; FastVector atts; int i; int n; int m; int clsIndex; Attribute att; String prefix; m_SelectedRange.setUpper(inputFormat.numAttributes() - 1); atts = new FastVector(); clsIndex = -1; for (i = 0; i < inputFormat.numAttributes(); i++) { // we don't process the class if (i == inputFormat.classIndex()) { clsIndex = atts.size(); atts.addElement(inputFormat.attribute(i).copy()); continue; } if (!inputFormat.attribute(i).isRelationValued()) { atts.addElement(inputFormat.attribute(i).copy()); continue; } if (!m_SelectedRange.isInRange(i)) { if (getDebug()) System.out.println( "Attribute " + (i+1) + " (" + inputFormat.attribute(i).name() + ") skipped."); continue; } // process relational attribute prefix = inputFormat.attribute(i).name() + "_"; relFormat = inputFormat.attribute(i).relation(); for (n = 0; n < relFormat.numAttributes(); n++) { att = relFormat.attribute(n); if (att.isNumeric()) { atts.addElement(new Attribute(prefix + att.name() + "_MIN")); atts.addElement(new Attribute(prefix + att.name() + "_MAX")); atts.addElement(new Attribute(prefix + att.name() + "_AVG")); atts.addElement(new Attribute(prefix + att.name() + "_STDEV")); atts.addElement(new Attribute(prefix + att.name() + "_SUM")); } else if (att.isNominal()) { if (att.numValues() <= m_MaxCardinality) { for (m = 0; m < att.numValues(); m++) atts.addElement(new Attribute(prefix + att.name() + "_" + att.value(m) + "_CNT")); } else { if (getDebug()) System.out.println( "Attribute " + (i+1) + "/" + (n+1) + " (" + inputFormat.attribute(i).name() + "/" + att.name() + ") skipped, " + att.numValues() + " > " + m_MaxCardinality + "."); } } else { if (getDebug()) System.out.println( "Attribute " + (i+1) + "/" + (n+1) + " (" + inputFormat.attribute(i).name() + "/" + att.name() + ") skipped."); } } } // generate new format result = new Instances(inputFormat.relationName(), atts, 0); result.setClassIndex(clsIndex); // neither string nor relational attributes need to be copied to the // output: initOutputLocators(result, new int[0]); return result; } /** * Processes the given data (may change the provided dataset) and returns * the modified version. This method is called in batchFinished(). * * @param instances the data to process * @return the modified data * @throws Exception in case the processing goes wrong * @see #batchFinished() */ protected Instances process(Instances instances) throws Exception { Instances result; Instance inst; Instance newInst; Instances relInstances; int k; int l; int i; int n; int m; AttributeStats stats; Attribute att; result = getOutputFormat(); // initialize attribute statistics m_AttStats.clear(); // collect data for all relational attributes for (i = 0; i < instances.numAttributes(); i++) { if (i == instances.classIndex()) continue; if (!instances.attribute(i).isRelationValued()) continue; if (!m_SelectedRange.isInRange(i)) continue; // compute statistics for (k = 0; k < instances.numInstances(); k++) { relInstances = instances.instance(k).relationalValue(i); for (n = 0; n < relInstances.numAttributes(); n++) { att = relInstances.attribute(n); stats = null; if ( att.isNumeric() || (att.isNominal() && att.numValues() <= m_MaxCardinality) ) { stats = relInstances.attributeStats(n); m_AttStats.put(k + "-" + i + "-" + n, stats); } } } } // convert data for (k = 0; k < instances.numInstances(); k++) { inst = instances.instance(k); newInst = new Instance(result.numAttributes()); newInst.setWeight(inst.weight()); l = 0; for (i = 0; i < instances.numAttributes(); i++) { if (!instances.attribute(i).isRelationValued()) { newInst.setValue(l, inst.value(i)); l++; } else { if (!m_SelectedRange.isInRange(i)) continue; // replace relational data with statistics relInstances = inst.relationalValue(i); for (n = 0; n < relInstances.numAttributes(); n++) { att = relInstances.attribute(n); stats = (AttributeStats) m_AttStats.get(k + "-" + i + "-" + n); if (att.isNumeric()) { newInst.setValue(l, stats.numericStats.min); l++; newInst.setValue(l, stats.numericStats.max); l++; newInst.setValue(l, stats.numericStats.mean); l++; newInst.setValue(l, stats.numericStats.stdDev); l++; newInst.setValue(l, stats.numericStats.sum); l++; } else if (att.isNominal() && att.numValues() <= m_MaxCardinality) { for (m = 0; m < att.numValues(); m++) { newInst.setValue(l, stats.nominalCounts[m]); l++; } } } } } result.add(newInst); } return result; } /** * runs the filter with the given arguments * * @param args the commandline arguments */ public static void main(String[] args) { runFilter(new RELAGGS(), args); }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?