📄 forwardselection.java
字号:
setGenerateRanking(Utils.getFlag('R', options));
optionString = Utils.getOption('T', options);
if (optionString.length() != 0) {
Double temp;
temp = Double.valueOf(optionString);
setThreshold(temp.doubleValue());
}
optionString = Utils.getOption('N', options);
if (optionString.length() != 0) {
setNumToSelect(Integer.parseInt(optionString));
}
}
/**
* Gets the current settings of ReliefFAttributeEval.
*
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] options = new String[7];
int current = 0;
if (!(getStartSet().equals(""))) {
options[current++] = "-P";
options[current++] = ""+startSetToString();
}
if (getGenerateRanking()) {
options[current++] = "-R";
}
options[current++] = "-T";
options[current++] = "" + getThreshold();
options[current++] = "-N";
options[current++] = ""+getNumToSelect();
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* converts the array of starting attributes to a string. This is
* used by getOptions to return the actual attributes specified
* as the starting set. This is better than using m_startRanges.getRanges()
* as the same start set can be specified in different ways from the
* command line---eg 1,2,3 == 1-3. This is to ensure that stuff that
* is stored in a database is comparable.
* @return a comma seperated list of individual attribute numbers as a String
*/
private String startSetToString() {
StringBuffer FString = new StringBuffer();
boolean didPrint;
if (m_starting == null) {
return getStartSet();
}
for (int i = 0; i < m_starting.length; i++) {
didPrint = false;
if ((m_hasClass == false) ||
(m_hasClass == true && i != m_classIndex)) {
FString.append((m_starting[i] + 1));
didPrint = true;
}
if (i == (m_starting.length - 1)) {
FString.append("");
}
else {
if (didPrint) {
FString.append(",");
}
}
}
return FString.toString();
}
/**
* returns a description of the search.
* @return a description of the search as a String.
*/
public String toString() {
StringBuffer FString = new StringBuffer();
FString.append("\tForward Selection.\n\tStart set: ");
if (m_starting == null) {
FString.append("no attributes\n");
}
else {
FString.append(startSetToString()+"\n");
}
if (!m_doneRanking) {
FString.append("\tMerit of best subset found: "
+Utils.doubleToString(Math.abs(m_bestMerit),8,3)+"\n");
}
if ((m_threshold != -Double.MAX_VALUE) && (m_doneRanking)) {
FString.append("\tThreshold for discarding attributes: "
+ Utils.doubleToString(m_threshold,8,4)+"\n");
}
return FString.toString();
}
/**
* Searches the attribute subset space by forward selection.
*
* @param ASEvaluator the attribute evaluator to guide the search
* @param data the training instances.
* @return an array (not necessarily ordered) of selected attribute indexes
* @exception Exception if the search can't be completed
*/
public int[] search (ASEvaluation ASEval, Instances data)
throws Exception {
int i;
double best_merit = -Double.MAX_VALUE;
double temp_best,temp_merit;
int temp_index=0;
BitSet temp_group;
if (data != null) { // this is a fresh run so reset
resetOptions();
m_Instances = data;
}
m_ASEval = ASEval;
m_numAttribs = m_Instances.numAttributes();
if (m_best_group == null) {
m_best_group = new BitSet(m_numAttribs);
}
if (!(m_ASEval instanceof SubsetEvaluator)) {
throw new Exception(m_ASEval.getClass().getName()
+ " is not a "
+ "Subset evaluator!");
}
m_startRange.setUpper(m_numAttribs-1);
if (!(getStartSet().equals(""))) {
m_starting = m_startRange.getSelection();
}
if (m_ASEval instanceof UnsupervisedSubsetEvaluator) {
m_hasClass = false;
}
else {
m_hasClass = true;
m_classIndex = m_Instances.classIndex();
}
SubsetEvaluator ASEvaluator = (SubsetEvaluator)m_ASEval;
if (m_rankedAtts == null) {
m_rankedAtts = new double[m_numAttribs][2];
m_rankedSoFar = 0;
}
// If a starting subset has been supplied, then initialise the bitset
if (m_starting != null) {
for (i = 0; i < m_starting.length; i++) {
if ((m_starting[i]) != m_classIndex) {
m_best_group.set(m_starting[i]);
}
}
}
// Evaluate the initial subset
best_merit = ASEvaluator.evaluateSubset(m_best_group);
// main search loop
boolean done = false;
boolean addone = false;
while (!done) {
temp_group = (BitSet)m_best_group.clone();
temp_best = best_merit;
if (m_doRank) {
temp_best = -Double.MAX_VALUE;
}
done = true;
addone = false;
for (i=0;i<m_numAttribs;i++) {
if ((i != m_classIndex) && (!temp_group.get(i))) {
// set the bit
temp_group.set(i);
temp_merit = ASEvaluator.evaluateSubset(temp_group);
if (temp_merit > temp_best) {
temp_best = temp_merit;
temp_index = i;
addone = true;
done = false;
}
// unset the bit
temp_group.clear(i);
if (m_doRank) {
done = false;
}
}
}
if (addone) {
m_best_group.set(temp_index);
best_merit = temp_best;
m_rankedAtts[m_rankedSoFar][0] = temp_index;
m_rankedAtts[m_rankedSoFar][1] = best_merit;
m_rankedSoFar++;
}
}
m_bestMerit = best_merit;
return attributeList(m_best_group);
}
/**
* Produces a ranked list of attributes. Search must have been performed
* prior to calling this function. Search is called by this function to
* complete the traversal of the the search space. A list of
* attributes and merits are returned. The attributes a ranked by the
* order they are added to the subset during a forward selection search.
* Individual merit values reflect the merit associated with adding the
* corresponding attribute to the subset; because of this, merit values
* may initially increase but then decrease as the best subset is
* "passed by" on the way to the far side of the search space.
*
* @return an array of attribute indexes and associated merit values
* @exception Exception if something goes wrong.
*/
public double [][] rankedAttributes() throws Exception {
if (m_rankedAtts == null || m_rankedSoFar == -1) {
throw new Exception("Search must be performed before attributes "
+"can be ranked.");
}
m_doRank = true;
search (m_ASEval, null);
double [][] final_rank = new double [m_rankedSoFar][2];
for (int i=0;i<m_rankedSoFar;i++) {
final_rank[i][0] = m_rankedAtts[i][0];
final_rank[i][1] = m_rankedAtts[i][1];
}
resetOptions();
m_doneRanking = true;
if (m_numToSelect > final_rank.length) {
throw new Exception("More attributes requested than exist in the data");
}
if (m_numToSelect <= 0) {
if (m_threshold == -Double.MAX_VALUE) {
m_calculatedNumToSelect = final_rank.length;
} else {
determineNumToSelectFromThreshold(final_rank);
}
}
return final_rank;
}
private void determineNumToSelectFromThreshold(double [][] ranking) {
int count = 0;
for (int i = 0; i < ranking.length; i++) {
if (ranking[i][1] > m_threshold) {
count++;
}
}
m_calculatedNumToSelect = count;
}
/**
* converts a BitSet into a list of attribute indexes
* @param group the BitSet to convert
* @return an array of attribute indexes
**/
private int[] attributeList (BitSet group) {
int count = 0;
// count how many were selected
for (int i = 0; i < m_numAttribs; i++) {
if (group.get(i)) {
count++;
}
}
int[] list = new int[count];
count = 0;
for (int i = 0; i < m_numAttribs; i++) {
if (group.get(i)) {
list[count++] = i;
}
}
return list;
}
/**
* Resets options
*/
private void resetOptions() {
m_doRank = false;
m_best_group = null;
m_ASEval = null;
m_Instances = null;
m_rankedSoFar = -1;
m_rankedAtts = null;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -