📄 trainingdata.java
字号:
// TrainingData.java
import java.util.Vector;
// An object of class TrainingData holds training data (all that was read in,
// or a subset). The internal structure of the class is
// a Vector, each element of which is an array of Strings.
// Each element of the Vector holds the information pertaining
// to one training instance. This is also called a record.
class TrainingData
{
private int columns;
private Vector<String[]> d;
// constructor
public TrainingData(int numCols)
{
columns = numCols;
d = new Vector<String[]>(100);
}
public int numColumns() { return columns; }
public int numRecords() { return d.size();}
protected void add(String[] values)
{
d.addElement(values);
}
public String getAttributeValue(int row, int col)
{
if (row > d.size() || d.elementAt(row) == null)
throw new IllegalArgumentException("invalid row in getAttributeValue");
return ((String[])d.elementAt(row))[col];
}
public String getClassification(int row)
{
return getAttributeValue(row, DecisionTree.classIndex);
}
// selectRecords returns a new TrainingData object which is
// a subset of this TrainingData object. The new TrainingData only
// contains those records which have the value
// matchValue in the attribute indicated by col.
public TrainingData selectRecords(int col, String matchValue)
{
TrainingData newD = new TrainingData(columns);
for (int j=0; j<d.size(); j++)
{
String[] sa = (String[])(d.elementAt(j));
if (sa[col].equals(matchValue))
newD.add(sa);
}
return newD;
}
// inner class -- just a collection of three fields
class MostFrequentClassInfo
{
String name;
int recordCountInClass;
int recordCountNotInClass;
}
// Find and return information about the most frequent class
// that occurs in this TrainingData object.
public MostFrequentClassInfo getMostFrequentClass()
{
// Build a StringCounter that has all the class values
// and doesn't care about attribute values.
StringCounter sc = new StringCounter();
for (int i=0; i<d.size(); i++)
sc.add("dummy", getClassification(i));
// Now find the class that
int max = -1;
int mostFrequentClassIndex = 0;
for (int v=0; v<sc.numDistinctClassValues(); v++)
{
int freq = sc.attrAndClassOccurences(0, v);
if (freq > max)
{
max = freq;
mostFrequentClassIndex = v;
}
}
MostFrequentClassInfo freqInfo = new MostFrequentClassInfo();
freqInfo.name = sc.getClassValue(mostFrequentClassIndex);
freqInfo.recordCountInClass = max;
freqInfo.recordCountNotInClass = d.size() - max;
return freqInfo;
}
// For efficiency, this routine builds a big StringBuffer,
// instead of a String. The StringBuffer is cast to a String
// on return.
public String toString()
{
StringBuffer s = new StringBuffer();
for (int j=0; j<d.size(); j++)
{
String[] sa = (String[])(d.elementAt(j));
for (int k=0; k<sa.length; k++)
{
s.append(sa[k]);
s.append("\t");
}
s.append("\n");
}
return s.toString();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -