📄 cfssubseteval.java
字号:
if (Utils.eq(corr_measure, 0.0)) {
if (flag == true) {
return (0.0);
}
else {
return (1.0);
}
}
else {
return (corr_measure);
}
}
private double num_num (int att1, int att2) {
int i;
Instance inst;
double r, diff1, diff2, num = 0.0, sx = 0.0, sy = 0.0;
double mx = m_trainInstances.meanOrMode(m_trainInstances.attribute(att1));
double my = m_trainInstances.meanOrMode(m_trainInstances.attribute(att2));
for (i = 0; i < m_numInstances; i++) {
inst = m_trainInstances.instance(i);
diff1 = (inst.isMissing(att1))? 0.0 : (inst.value(att1) - mx);
diff2 = (inst.isMissing(att2))? 0.0 : (inst.value(att2) - my);
num += (diff1*diff2);
sx += (diff1*diff1);
sy += (diff2*diff2);
}
if (sx != 0.0) {
if (m_std_devs[att1] == 1.0) {
m_std_devs[att1] = Math.sqrt((sx/m_numInstances));
}
}
if (sy != 0.0) {
if (m_std_devs[att2] == 1.0) {
m_std_devs[att2] = Math.sqrt((sy/m_numInstances));
}
}
if ((sx*sy) > 0.0) {
r = (num/(Math.sqrt(sx*sy)));
return ((r < 0.0)? -r : r);
}
else {
if (att1 != m_classIndex && att2 != m_classIndex) {
return 1.0;
}
else {
return 0.0;
}
}
}
private double num_nom2 (int att1, int att2) {
int i, ii, k;
double temp;
Instance inst;
int mx = (int)m_trainInstances.
meanOrMode(m_trainInstances.attribute(att1));
double my = m_trainInstances.
meanOrMode(m_trainInstances.attribute(att2));
double stdv_num = 0.0;
double diff1, diff2;
double r = 0.0, rr, max_corr = 0.0;
int nx = (!m_missingSeperate)
? m_trainInstances.attribute(att1).numValues()
: m_trainInstances.attribute(att1).numValues() + 1;
double[] prior_nom = new double[nx];
double[] stdvs_nom = new double[nx];
double[] covs = new double[nx];
for (i = 0; i < nx; i++) {
stdvs_nom[i] = covs[i] = prior_nom[i] = 0.0;
}
// calculate frequencies (and means) of the values of the nominal
// attribute
for (i = 0; i < m_numInstances; i++) {
inst = m_trainInstances.instance(i);
if (inst.isMissing(att1)) {
if (!m_missingSeperate) {
ii = mx;
}
else {
ii = nx - 1;
}
}
else {
ii = (int)inst.value(att1);
}
// increment freq for nominal
prior_nom[ii]++;
}
for (k = 0; k < m_numInstances; k++) {
inst = m_trainInstances.instance(k);
// std dev of numeric attribute
diff2 = (inst.isMissing(att2))? 0.0 : (inst.value(att2) - my);
stdv_num += (diff2*diff2);
//
for (i = 0; i < nx; i++) {
if (inst.isMissing(att1)) {
if (!m_missingSeperate) {
temp = (i == mx)? 1.0 : 0.0;
}
else {
temp = (i == (nx - 1))? 1.0 : 0.0;
}
}
else {
temp = (i == inst.value(att1))? 1.0 : 0.0;
}
diff1 = (temp - (prior_nom[i]/m_numInstances));
stdvs_nom[i] += (diff1*diff1);
covs[i] += (diff1*diff2);
}
}
// calculate weighted correlation
for (i = 0, temp = 0.0; i < nx; i++) {
// calculate the weighted variance of the nominal
temp += ((prior_nom[i]/m_numInstances)*(stdvs_nom[i]/m_numInstances));
if ((stdvs_nom[i]*stdv_num) > 0.0) {
//System.out.println("Stdv :"+stdvs_nom[i]);
rr = (covs[i]/(Math.sqrt(stdvs_nom[i]*stdv_num)));
if (rr < 0.0) {
rr = -rr;
}
r += ((prior_nom[i]/m_numInstances)*rr);
}
/* if there is zero variance for the numeric att at a specific
level of the catergorical att then if neither is the class then
make this correlation at this level maximally bad i.e. 1.0.
If either is the class then maximally bad correlation is 0.0 */
else {if (att1 != m_classIndex && att2 != m_classIndex) {
r += ((prior_nom[i]/m_numInstances)*1.0);
}
}
}
// set the standard deviations for these attributes if necessary
// if ((att1 != classIndex) && (att2 != classIndex)) // =============
if (temp != 0.0) {
if (m_std_devs[att1] == 1.0) {
m_std_devs[att1] = Math.sqrt(temp);
}
}
if (stdv_num != 0.0) {
if (m_std_devs[att2] == 1.0) {
m_std_devs[att2] = Math.sqrt((stdv_num/m_numInstances));
}
}
if (r == 0.0) {
if (att1 != m_classIndex && att2 != m_classIndex) {
r = 1.0;
}
}
return r;
}
private double nom_nom (int att1, int att2) {
int i, j, ii, jj, z;
double temp1, temp2;
Instance inst;
int mx = (int)m_trainInstances.
meanOrMode(m_trainInstances.attribute(att1));
int my = (int)m_trainInstances.
meanOrMode(m_trainInstances.attribute(att2));
double diff1, diff2;
double r = 0.0, rr, max_corr = 0.0;
int nx = (!m_missingSeperate)
? m_trainInstances.attribute(att1).numValues()
: m_trainInstances.attribute(att1).numValues() + 1;
int ny = (!m_missingSeperate)
? m_trainInstances.attribute(att2).numValues()
: m_trainInstances.attribute(att2).numValues() + 1;
double[][] prior_nom = new double[nx][ny];
double[] sumx = new double[nx];
double[] sumy = new double[ny];
double[] stdvsx = new double[nx];
double[] stdvsy = new double[ny];
double[][] covs = new double[nx][ny];
for (i = 0; i < nx; i++) {
sumx[i] = stdvsx[i] = 0.0;
}
for (j = 0; j < ny; j++) {
sumy[j] = stdvsy[j] = 0.0;
}
for (i = 0; i < nx; i++) {
for (j = 0; j < ny; j++) {
covs[i][j] = prior_nom[i][j] = 0.0;
}
}
// calculate frequencies (and means) of the values of the nominal
// attribute
for (i = 0; i < m_numInstances; i++) {
inst = m_trainInstances.instance(i);
if (inst.isMissing(att1)) {
if (!m_missingSeperate) {
ii = mx;
}
else {
ii = nx - 1;
}
}
else {
ii = (int)inst.value(att1);
}
if (inst.isMissing(att2)) {
if (!m_missingSeperate) {
jj = my;
}
else {
jj = ny - 1;
}
}
else {
jj = (int)inst.value(att2);
}
// increment freq for nominal
prior_nom[ii][jj]++;
sumx[ii]++;
sumy[jj]++;
}
for (z = 0; z < m_numInstances; z++) {
inst = m_trainInstances.instance(z);
for (j = 0; j < ny; j++) {
if (inst.isMissing(att2)) {
if (!m_missingSeperate) {
temp2 = (j == my)? 1.0 : 0.0;
}
else {
temp2 = (j == (ny - 1))? 1.0 : 0.0;
}
}
else {
temp2 = (j == inst.value(att2))? 1.0 : 0.0;
}
diff2 = (temp2 - (sumy[j]/m_numInstances));
stdvsy[j] += (diff2*diff2);
}
//
for (i = 0; i < nx; i++) {
if (inst.isMissing(att1)) {
if (!m_missingSeperate) {
temp1 = (i == mx)? 1.0 : 0.0;
}
else {
temp1 = (i == (nx - 1))? 1.0 : 0.0;
}
}
else {
temp1 = (i == inst.value(att1))? 1.0 : 0.0;
}
diff1 = (temp1 - (sumx[i]/m_numInstances));
stdvsx[i] += (diff1*diff1);
for (j = 0; j < ny; j++) {
if (inst.isMissing(att2)) {
if (!m_missingSeperate) {
temp2 = (j == my)? 1.0 : 0.0;
}
else {
temp2 = (j == (ny - 1))? 1.0 : 0.0;
}
}
else {
temp2 = (j == inst.value(att2))? 1.0 : 0.0;
}
diff2 = (temp2 - (sumy[j]/m_numInstances));
covs[i][j] += (diff1*diff2);
}
}
}
// calculate weighted correlation
for (i = 0; i < nx; i++) {
for (j = 0; j < ny; j++) {
if ((stdvsx[i]*stdvsy[j]) > 0.0) {
//System.out.println("Stdv :"+stdvs_nom[i]);
rr = (covs[i][j]/(Math.sqrt(stdvsx[i]*stdvsy[j])));
if (rr < 0.0) {
rr = -rr;
}
r += ((prior_nom[i][j]/m_numInstances)*rr);
}
// if there is zero variance for either of the categorical atts then if
// neither is the class then make this
// correlation at this level maximally bad i.e. 1.0. If either is
// the class then maximally bad correlation is 0.0
else {if (att1 != m_classIndex && att2 != m_classIndex) {
r += ((prior_nom[i][j]/m_numInstances)*1.0);
}
}
}
}
// calculate weighted standard deviations for these attributes
// (if necessary)
for (i = 0, temp1 = 0.0; i < nx; i++) {
temp1 += ((sumx[i]/m_numInstances)*(stdvsx[i]/m_numInstances));
}
if (temp1 != 0.0) {
if (m_std_devs[att1] == 1.0) {
m_std_devs[att1] = Math.sqrt(temp1);
}
}
for (j = 0, temp2 = 0.0; j < ny; j++) {
temp2 += ((sumy[j]/m_numInstances)*(stdvsy[j]/m_numInstances));
}
if (temp2 != 0.0) {
if (m_std_devs[att2] == 1.0) {
m_std_devs[att2] = Math.sqrt(temp2);
}
}
if (r == 0.0) {
if (att1 != m_classIndex && att2 != m_classIndex) {
r = 1.0;
}
}
return r;
}
/**
* returns a string describing CFS
*
* @return the description as a string
*/
public String toString () {
StringBuffer text = new StringBuffer();
if (m_trainInstances == null) {
text.append("CFS subset evaluator has not been built yet\n");
}
else {
text.append("\tCFS Subset Evaluator\n");
if (m_missingSeperate) {
text.append("\tTreating missing values as a seperate value\n");
}
if (m_locallyPredictive) {
text.append("\tIncluding locally predictive attributes\n");
}
}
return text.toString();
}
private void addLocallyPredictive (BitSet best_group) {
int i, j;
boolean done = false;
boolean ok = true;
double temp_best = -1.0;
double corr;
j = 0;
BitSet temp_group = (BitSet)best_group.clone();
while (!done) {
temp_best = -1.0;
// find best not already in group
for (i = 0; i < m_numAttribs; i++) {
if ((!temp_group.get(i)) && (i != m_classIndex)) {
if (m_corr_matrix.getElement(i, m_classIndex) == -999) {
corr = correlate(i, m_classIndex);
m_corr_matrix.setElement(i, m_classIndex, corr);
m_corr_matrix.setElement(m_classIndex, i, corr);
}
if (m_corr_matrix.getElement(i, m_classIndex) > temp_best) {
temp_best = m_corr_matrix.getElement(i, m_classIndex);
j = i;
}
}
}
if (temp_best == -1.0) {
done = true;
}
else {
ok = true;
temp_group.set(j);
// check the best against correlations with others already
// in group
for (i = 0; i < m_numAttribs; i++) {
if (best_group.get(i)) {
if (m_corr_matrix.getElement(i, j) == -999) {
corr = correlate(i, j);
m_corr_matrix.setElement(i, j, corr);
m_corr_matrix.setElement(j, i, corr);
}
if (m_corr_matrix.getElement(i, j) > temp_best - m_c_Threshold) {
ok = false;
break;
}
}
}
// if ok then add to best_group
if (ok) {
best_group.set(j);
}
}
}
}
/**
* Calls locallyPredictive in order to include locally predictive
* attributes (if requested).
*
* @param attributeSet the set of attributes found by the search
* @return a possibly ranked list of postprocessed attributes
* @exception Exception if postprocessing fails for some reason
*/
public int[] postProcess (int[] attributeSet)
throws Exception
{
int j = 0;
if (!m_locallyPredictive) {
// m_trainInstances = new Instances(m_trainInstances,0);
return attributeSet;
}
BitSet bestGroup = new BitSet(m_numAttribs);
for (int i = 0; i < attributeSet.length; i++) {
bestGroup.set(attributeSet[i]);
}
addLocallyPredictive(bestGroup);
// count how many are set
for (int i = 0; i < m_numAttribs; i++) {
if (bestGroup.get(i)) {
j++;
}
}
int[] newSet = new int[j];
j = 0;
for (int i = 0; i < m_numAttribs; i++) {
if (bestGroup.get(i)) {
newSet[j++] = i;
}
}
// m_trainInstances = new Instances(m_trainInstances,0);
return newSet;
}
protected void resetOptions () {
m_trainInstances = null;
m_missingSeperate = false;
m_locallyPredictive = false;
m_c_Threshold = 0.0;
}
/**
* Main method for testing this class.
*
* @param args the options
*/
public static void main (String[] args) {
try {
System.out.println(AttributeSelection.
SelectAttributes(new CfsSubsetEval(), args));
}
catch (Exception e) {
log.error(e.getStackTrace().toString());
log.error(e.getMessage());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -