📄 removefrequentvalues.java
字号:
} /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String modifyHeaderTipText() { return "When selecting on nominal attributes, removes header references to " + "excluded values."; } /** * Gets whether the header will be modified when selecting on nominal * attributes. * * @return true if so. */ public boolean getModifyHeader() { return m_ModifyHeader; } /** * Sets whether the header will be modified when selecting on nominal * attributes. * * @param newModifyHeader true if so. */ public void setModifyHeader(boolean newModifyHeader) { m_ModifyHeader = newModifyHeader; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String invertSelectionTipText() { return "Invert matching sense."; } /** * Get whether the supplied columns are to be removed or kept * * @return true if the supplied columns will be kept */ public boolean getInvertSelection() { return m_Invert; } /** * Set whether selected values should be removed or kept. If true the * selected values are kept and unselected values are deleted. * * @param invert the new invert setting */ public void setInvertSelection(boolean invert) { m_Invert = invert; } /** * Returns true if selection attribute is nominal. * * @return true if selection attribute is nominal */ public boolean isNominal() { if (getInputFormat() == null) { return false; } else { return getInputFormat().attribute(m_AttIndex.getIndex()).isNominal(); } } /** * determines the values to retain, it is always at least 1 * and up to the maximum number of distinct values * * @param inst the Instances to determine the values from which are kept */ public void determineValues(Instances inst) { int i; AttributeStats stats; int attIdx; int min; int max; int count; m_AttIndex.setUpper(inst.numAttributes() - 1); attIdx = m_AttIndex.getIndex(); // init names m_Values = new HashSet(); if (inst == null) return; // number of values to retain stats = inst.attributeStats(attIdx); if (m_Invert) count = stats.nominalCounts.length - m_NumValues; else count = m_NumValues; // out of bounds? -> fix if (count < 1) count = 1; // at least one value! if (count > stats.nominalCounts.length) count = stats.nominalCounts.length; // at max the existing values // determine min/max occurences Arrays.sort(stats.nominalCounts); if (m_LeastValues) { min = stats.nominalCounts[0]; max = stats.nominalCounts[count - 1]; } else { min = stats.nominalCounts[(stats.nominalCounts.length - 1) - count + 1]; max = stats.nominalCounts[stats.nominalCounts.length - 1]; } // add values if they are inside min/max (incl. borders) and not more than count stats = inst.attributeStats(attIdx); for (i = 0; i < stats.nominalCounts.length; i++) { if ( (stats.nominalCounts[i] >= min) && (stats.nominalCounts[i] <= max) && (m_Values.size() < count) ) m_Values.add(inst.attribute(attIdx).value(i)); } } /** * modifies the header of the Instances and returns the format w/o * any instances * * @param instanceInfo the instances structure to modify * @return the new structure (w/o instances!) */ protected Instances modifyHeader(Instances instanceInfo) { instanceInfo = new Instances(getInputFormat(), 0); // copy before modifying Attribute oldAtt = instanceInfo.attribute(m_AttIndex.getIndex()); int [] selection = new int[m_Values.size()]; Iterator iter = m_Values.iterator(); int i = 0; while (iter.hasNext()) { selection[i] = oldAtt.indexOfValue(iter.next().toString()); i++; } FastVector newVals = new FastVector(); for (i = 0; i < selection.length; i++) { newVals.addElement(oldAtt.value(selection[i])); } instanceInfo.deleteAttributeAt(m_AttIndex.getIndex()); instanceInfo.insertAttributeAt(new Attribute(oldAtt.name(), newVals), m_AttIndex.getIndex()); m_NominalMapping = new int [oldAtt.numValues()]; for (i = 0; i < m_NominalMapping.length; i++) { boolean found = false; for (int j = 0; j < selection.length; j++) { if (selection[j] == i) { m_NominalMapping[i] = j; found = true; break; } } if (!found) { m_NominalMapping[i] = -1; } } return instanceInfo; } /** * Returns the Capabilities of this filter. * * @return the capabilities of this object * @see Capabilities */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enableAllAttributes(); result.enable(Capability.MISSING_VALUES); // class result.enableAllClasses(); result.enable(Capability.MISSING_CLASS_VALUES); result.enable(Capability.NO_CLASS); return result; } /** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance * structure (any instances contained in the object are ignored - only the * structure is required). * @return true if the outputFormat can be collected immediately * @throws UnsupportedAttributeTypeException if the specified attribute * is not nominal. */ public boolean setInputFormat(Instances instanceInfo) throws Exception { super.setInputFormat(instanceInfo); m_AttIndex.setUpper(instanceInfo.numAttributes() - 1); if (!isNominal()) throw new UnsupportedAttributeTypeException("Can only handle nominal attributes."); m_Values = null; return false; } /** * Set the output format. Takes the currently defined Values to retain and * m_InputFormat and calls setOutputFormat(Instances) appropriately. * Those instances that have a value to retain are "push"ed to the output. */ protected void setOutputFormat() { Instances instances; int i; Instance instance; if (m_Values == null) { setOutputFormat(null); return; } // get structure if (getModifyHeader()) instances = modifyHeader(getInputFormat()); else instances = new Instances(getInputFormat(), 0); setOutputFormat(instances); // remove instances with unwanted values, for the others change the values // value if m_ModifyHeader is set for (i = 0; i < getInputFormat().numInstances(); i++) { instance = getInputFormat().instance(i); if (m_Values.contains(instance.stringValue(m_AttIndex.getIndex()))) { if (getModifyHeader()) { instance.setValue(m_AttIndex.getIndex(), m_NominalMapping[(int)instance.value(m_AttIndex.getIndex())]); } push(instance); } } } /** * Input an instance for filtering. Ordinarily the instance is processed * and made available for output immediately. Some filters require all * instances be read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be * collected with output(). * @throws IllegalStateException if no input format has been set. */ public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if (isFirstBatchDone()) { push(instance); return true; } else { bufferInput(instance); return false; } } /** * Signifies that this batch of input to the filter is finished. If the * filter requires all instances prior to filtering, output() may now * be called to retrieve the filtered instances. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } // process input if (m_Values == null) { determineValues(getInputFormat()); setOutputFormat(); } flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); } /** * Main method for testing this class. * * @param argv should contain arguments to the filter: * use -h for help */ public static void main(String[] argv) { runFilter(new RemoveFrequentValues(), argv); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -