📄 relieffattributeeval.java
字号:
public int getSampleSize () {
return m_sampleM;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String weightByDistanceTipText() {
return "Weight nearest neighbours by their distance.";
}
/**
* Set the nearest neighbour weighting method
*
* @param b true nearest neighbours are to be weighted by distance.
*/
public void setWeightByDistance (boolean b) {
m_weightByDistance = b;
}
/**
* Get whether nearest neighbours are being weighted by distance
*
* @return m_weightByDiffernce
*/
public boolean getWeightByDistance () {
return m_weightByDistance;
}
/**
* Gets the current settings of ReliefFAttributeEval.
*
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] options = new String[9];
int current = 0;
if (getWeightByDistance()) {
options[current++] = "-W";
}
options[current++] = "-M";
options[current++] = "" + getSampleSize();
options[current++] = "-D";
options[current++] = "" + getSeed();
options[current++] = "-K";
options[current++] = "" + getNumNeighbours();
options[current++] = "-A";
options[current++] = "" + getSigma();
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Return a description of the ReliefF attribute evaluator.
*
* @return a description of the evaluator as a String.
*/
public String toString () {
StringBuffer text = new StringBuffer();
if (m_trainInstances == null) {
text.append("ReliefF feature evaluator has not been built yet\n");
}
else {
text.append("\tReliefF Ranking Filter");
text.append("\n\tInstances sampled: ");
if (m_sampleM == -1) {
text.append("all\n");
}
else {
text.append(m_sampleM + "\n");
}
text.append("\tNumber of nearest neighbours (k): " + m_Knn + "\n");
if (m_weightByDistance) {
text.append("\tExponentially decreasing (with distance) "
+ "influence for\n"
+ "\tnearest neighbours. Sigma: "
+ m_sigma + "\n");
}
else {
text.append("\tEqual influence nearest neighbours\n");
}
}
return text.toString();
}
/**
* Initializes a ReliefF attribute evaluator.
*
* @param data set of instances serving as training data
* @exception Exception if the evaluator has not been
* generated successfully
*/
public void buildEvaluator (Instances data)
throws Exception
{
int z, totalInstances;
Random r = new Random(m_seed);
if (data.checkForStringAttributes()) {
throw new Exception("Can't handle string attributes!");
}
m_trainInstances = data;
m_classIndex = m_trainInstances.classIndex();
m_numAttribs = m_trainInstances.numAttributes();
m_numInstances = m_trainInstances.numInstances();
if (m_trainInstances.attribute(m_classIndex).isNumeric()) {
m_numericClass = true;
}
else {
m_numericClass = false;
}
if (!m_numericClass) {
m_numClasses = m_trainInstances.attribute(m_classIndex).numValues();
}
else {
m_ndc = 0;
m_numClasses = 1;
m_nda = new double[m_numAttribs];
m_ndcda = new double[m_numAttribs];
}
if (m_weightByDistance) // set up the rank based weights
{
m_weightsByRank = new double[m_Knn];
for (int i = 0; i < m_Knn; i++) {
m_weightsByRank[i] =
Math.exp(-((i/(double)m_sigma)*(i/(double)m_sigma)));
}
}
// the final attribute weights
m_weights = new double[m_numAttribs];
// num classes (1 for numeric class) knn neighbours,
// and 0 = distance, 1 = instance index
m_karray = new double[m_numClasses][m_Knn][2];
if (!m_numericClass) {
m_classProbs = new double[m_numClasses];
for (int i = 0; i < m_numInstances; i++) {
m_classProbs[(int)m_trainInstances.instance(i).value(m_classIndex)]++;
}
for (int i = 0; i < m_numClasses; i++) {
m_classProbs[i] /= m_numInstances;
}
}
m_worst = new double[m_numClasses];
m_index = new int[m_numClasses];
m_stored = new int[m_numClasses];
m_minArray = new double[m_numAttribs];
m_maxArray = new double[m_numAttribs];
for (int i = 0; i < m_numAttribs; i++) {
m_minArray[i] = m_maxArray[i] = Double.NaN;
}
for (int i = 0; i < m_numInstances; i++) {
updateMinMax(m_trainInstances.instance(i));
}
if ((m_sampleM > m_numInstances) || (m_sampleM < 0)) {
totalInstances = m_numInstances;
}
else {
totalInstances = m_sampleM;
}
// process each instance, updating attribute weights
for (int i = 0; i < totalInstances; i++) {
if (totalInstances == m_numInstances) {
z = i;
}
else {
z = r.nextInt()%m_numInstances;
}
if (z < 0) {
z *= -1;
}
if (!(m_trainInstances.instance(z).isMissing(m_classIndex))) {
// first clear the knn and worst index stuff for the classes
for (int j = 0; j < m_numClasses; j++) {
m_index[j] = m_stored[j] = 0;
for (int k = 0; k < m_Knn; k++) {
m_karray[j][k][0] = m_karray[j][k][1] = 0;
}
}
findKHitMiss(z);
if (m_numericClass) {
updateWeightsNumericClass(z);
}
else {
updateWeightsDiscreteClass(z);
}
}
}
// now scale weights by 1/m_numInstances (nominal class) or
// calculate weights numeric class
// System.out.println("num inst:"+m_numInstances+" r_ndc:"+r_ndc);
for (int i = 0; i < m_numAttribs; i++) {if (i != m_classIndex) {
if (m_numericClass) {
m_weights[i] = m_ndcda[i]/m_ndc -
((m_nda[i] - m_ndcda[i])/((double)totalInstances - m_ndc));
}
else {
m_weights[i] *= (1.0/(double)totalInstances);
}
// System.out.println(r_weights[i]);
}
}
}
/**
* Evaluates an individual attribute using ReliefF's instance based approach.
* The actual work is done by buildEvaluator which evaluates all features.
*
* @param attribute the index of the attribute to be evaluated
* @exception Exception if the attribute could not be evaluated
*/
public double evaluateAttribute (int attribute)
throws Exception
{
return m_weights[attribute];
}
/**
* Reset options to their default values
*/
protected void resetOptions () {
m_trainInstances = null;
m_sampleM = -1;
m_Knn = 10;
m_sigma = 2;
m_weightByDistance = false;
m_seed = 1;
}
/**
* Normalizes a given value of a numeric attribute.
*
* @param x the value to be normalized
* @param i the attribute's index
*/
private double norm (double x, int i) {
if (Double.isNaN(m_minArray[i]) ||
Utils.eq(m_maxArray[i], m_minArray[i])) {
return 0;
}
else {
return (x - m_minArray[i])/(m_maxArray[i] - m_minArray[i]);
}
}
/**
* Updates the minimum and maximum values for all the attributes
* based on a new instance.
*
* @param instance the new instance
*/
private void updateMinMax (Instance instance) {
// for (int j = 0; j < m_numAttribs; j++) {
try {
for (int j = 0; j < instance.numValues(); j++) {
if ((instance.attributeSparse(j).isNumeric()) &&
(!instance.isMissingSparse(j))) {
if (Double.isNaN(m_minArray[instance.index(j)])) {
m_minArray[instance.index(j)] = instance.valueSparse(j);
m_maxArray[instance.index(j)] = instance.valueSparse(j);
}
else {
if (instance.valueSparse(j) < m_minArray[instance.index(j)]) {
m_minArray[instance.index(j)] = instance.valueSparse(j);
}
else {
if (instance.valueSparse(j) > m_maxArray[instance.index(j)]) {
m_maxArray[instance.index(j)] = instance.valueSparse(j);
}
}
}
}
}
} catch (Exception ex) {
System.err.println(ex);
ex.printStackTrace();
}
}
/**
* Computes the difference between two given attribute
* values.
*/
private double difference(int index, double val1, double val2) {
switch (m_trainInstances.attribute(index).type()) {
case Attribute.NOMINAL:
// If attribute is nominal
if (Instance.isMissingValue(val1) ||
Instance.isMissingValue(val2)) {
return (1.0 - (1.0/((double)m_trainInstances.
attribute(index).numValues())));
} else if ((int)val1 != (int)val2) {
return 1;
} else {
return 0;
}
case Attribute.NUMERIC:
// If attribute is numeric
if (Instance.isMissingValue(val1) ||
Instance.isMissingValue(val2)) {
if (Instance.isMissingValue(val1) &&
Instance.isMissingValue(val2)) {
return 1;
} else {
double diff;
if (Instance.isMissingValue(val2)) {
diff = norm(val1, index);
} else {
diff = norm(val2, index);
}
if (diff < 0.5) {
diff = 1.0 - diff;
}
return diff;
}
} else {
return Math.abs(norm(val1, index) - norm(val2, index));
}
default:
return 0;
}
}
/**
* Calculates the distance between two instances
*
* @param test the first instance
* @param train the second instance
* @return the distance between the two given instances, between 0 and 1
*/
private double distance(Instance first, Instance second) {
double distance = 0;
int firstI, secondI;
for (int p1 = 0, p2 = 0;
p1 < first.numValues() || p2 < second.numValues();) {
if (p1 >= first.numValues()) {
firstI = m_trainInstances.numAttributes();
} else {
firstI = first.index(p1);
}
if (p2 >= second.numValues()) {
secondI = m_trainInstances.numAttributes();
} else {
secondI = second.index(p2);
}
if (firstI == m_trainInstances.classIndex()) {
p1++; continue;
}
if (secondI == m_trainInstances.classIndex()) {
p2++; continue;
}
double diff;
if (firstI == secondI) {
diff = difference(firstI,
first.valueSparse(p1),
second.valueSparse(p2));
p1++; p2++;
} else if (firstI > secondI) {
diff = difference(secondI,
0, second.valueSparse(p2));
p2++;
} else {
diff = difference(firstI,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -