📄 attributechooser.java
字号:
//AttributeChooser.java
import java.util.Vector;
class AttributeChooser
{
private TrainingData data;
// This Vector holds the different values that
// the selected attribute has.
Vector<String> distinctAttributeValues;
// constructor
public AttributeChooser(TrainingData d)
{
data = d;
distinctAttributeValues = new Vector<String>();
}
public boolean dataIsEmpty()
{
return data.numRecords() == 0;
}
public boolean allExamplesHaveTheSameClassification()
{
if (dataIsEmpty())
return false;
String c = data.getClassification(0);
for (int r=1; r<data.numRecords(); r++)
if (!c.equals(data.getClassification(r)))
return false;
return true;
}
// This function should be called only if
// allExamplesHaveTheSameClassification() is true
public String thatClassification()
{
return data.getClassification(0);
}
// This is a "stub" implementation of the function.
// It returns the first attribute for which not all the
// values are the same. Replace this with information gain.
public int chooseAttribute()
{
// for each attribute . . .
for (int c=0; c<data.numColumns(); c++)
{
// skip a column that was listed on the command line as skip-able
// or which is the class attribute
if (DecisionTree.shouldBeSkipped(c))
continue;
for (int r=0; r<data.numRecords(); r++)
{
String v = data.getAttributeValue(r, c);
// If v isn't on the list of distinct values this
// attribute has, then add it to the list
if (distinctAttributeValues.indexOf(v) == -1) // not found
distinctAttributeValues.addElement(v);
}
// Now we've looked at this attribute in all the records.
// If there's more than one value, then select it for
// splitting (because this dumb heuristic just returns
// the first attribute that has more that one value).
if (distinctAttributeValues.size() > 1)
return c;
else
// See if the next attribute has more than one value
distinctAttributeValues.removeAllElements();
}
// If we got this far there's a problem -- all the
// records have the same values for all the attributes,
// but they don't all have the same classification, because
// allExamplesHaveTheSameClassification() returned false.
// This could indeed happen, if there is "noise" in the
// data. We'll return -1 as a special flag indicating this
// has happened, and then the most frequent classification
// will be used. You may well want to do something smarter.
return -1;
}
// This method converts the distinctAttributeValues Vector
// into a String[].
// Make sure your version of chooseAttribute sets up
// distinctAttributeValues to hold the values of the
// attribute chosen to split on.
public String[] getValues()
{
if (distinctAttributeValues == null)
throw new IllegalArgumentException(
"distinctAttributeValues should not be null");
if (distinctAttributeValues.size() == 0)
throw new IllegalArgumentException(
"distinctAttributeValues should not have zero elements in it");
String [] v = new String[distinctAttributeValues.size()];
distinctAttributeValues.copyInto(v);
return v;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -