📄 distance.java
字号:
if (diff > dist)
dist = diff;
}
;
if (measureType == MEASURE_TYPE_SIMILARITY)
dist = 1.0 / (1.0 + dist / simMeasNormConst);
break;
case TYPE_CITY_BLOCK :
for (int i = 0; i < numbAtt; i++) {
double diff = AttDist(vec1.getMetaData(), i, vec1.getValue(i), vec2.getValue(i));
dist = dist + weights[i] * diff;
}
;
if (measureType == MEASURE_TYPE_SIMILARITY)
dist = 1.0 / (1.0 + dist / simMeasNormConst);
break;
case TYPE_MINKOVSKI :
for (int i = 0; i < numbAtt; i++) {
double diff = Math.abs(vec1.getValue(i) - vec2.getValue(i));
dist = dist + weights[i] * Math.pow(diff, minkPar);
}
;
dist = Math.pow(dist, (1.0 / minkPar));
if (measureType == MEASURE_TYPE_SIMILARITY)
dist = 1.0 / (1.0 + dist / simMeasNormConst);
break;
case TYPE_SIMPLE_MATCHING :
double[] counts = getDistCounts(vec1, vec2);
dist = (counts[0] + counts[3]) / (counts[0] + counts[1] + counts[2] + counts[3]);
break;
case TYPE_JACCARD :
counts = getDistCounts(vec1, vec2);
dist = counts[0] / (counts[0] + counts[1] + counts[2]);
break;
case TYPE_TANIMOTO :
counts = getDistCounts(vec1, vec2);
dist = (counts[0] + counts[3]) / (counts[0] + 2.0 * (counts[1] + counts[2]) + counts[3]);
break;
case TYPE_BINARY_SIMILARITY :
throw new MiningException("Binary similarity not supported");
default :
throw new MiningException("Unknown distance type specified.");
};
return dist;
}
/**
* Calculates distance between two points of an attribute given by its meta data and its index.
*
* Currently the compare functions of the types GAUSS_SIM and TABLE are not implemented and return a missing value.
*
* @param metaData
* meta data of vectors
* @param index
* index of current attribute
* @param p1
* point 1
* @param p2
* point 2
* @return distance betwwen p1 and p2 on given attribute
* @exception MiningException
* cannot calculate attribute point distance
*/
private double AttDist(MiningDataSpecification metaData, int index, double p1, double p2) throws MiningException {
double dist = 0.0;
switch (compareFunction) {
case COMPARISON_FUNCTION_ABS_DIFF :
dist = Math.abs(normValue(index, p1) - normValue(index, p2));
break;
case COMPARISON_FUNCTION_GAUSS_SIM :
dist = Category.MISSING_VALUE;
break;
case COMPARISON_FUNCTION_DELTA :
if (Math.abs(p1 - p2) < SIMILARITY_EPSILON)
dist = 0.0;
else
dist = 1.0;
break;
case COMPARISON_FUNCTION_EQUAL :
if (Math.abs(p1 - p2) < SIMILARITY_EPSILON)
dist = 1.0;
else
dist = 0.0;
break;
case COMPARISON_FUNCTION_TABLE :
dist = Category.MISSING_VALUE;
break;
default :
throw new MiningException("Unknown comparison function specified.");
};
return dist;
}
// -----------------------------------------------------------------------
// new added Methods of distance calculation for k-prototye algorithm
// added by XiaoMing Li 2006/03/29
// -----------------------------------------------------------------------
/**
*
* @param vec1
* @param vec2
* @param indexArray
* @param attributeType
* @return
* @throws MiningException
*/
public double distance(MiningVector vec1, MiningVector vec2, int[] indexArray, int attributeType)
throws MiningException {
// Initializations:
// System.out.println("beta="+beta);
int numbAtt = vec1.getValues().length;
double[] weights = new double[numbAtt];
if (fieldWeights == null) {
for (int i = 0; i < numbAtt; i++)
weights[i] = 1.0;
} else
weights = fieldWeights;
// Distance (or similarity):
double dist = 0.0;
switch (type) {
case TYPE_EUCLIDEAN :
if (attributeType == AttributeType.NUMERICAL) {
for (int i = 0; i < indexArray.length; i++) {
int index = indexArray[i];
double diff = numAttDist(vec1.getMetaData(), index, vec1.getValue(index), vec2.getValue(index));
dist = dist + Math.pow(weights[i], beta) * diff * diff;
};
}
if (attributeType == AttributeType.CATEGORICAL) {
for (int i = 0; i < indexArray.length; i++) {
int index = indexArray[i];
double diff = catAttDist(vec1.getMetaData(), index, vec1.getValue(index), vec2.getValue(index));
dist = dist + Math.pow(weights[i], beta) * diff * diff;
};
}
dist = Math.sqrt(dist);
break;
case TYPE_SQUARED_EUCLIDEAN :
if (attributeType == AttributeType.NUMERICAL) {
for (int i = 0; i < indexArray.length; i++) {
int index = indexArray[i];
double diff = numAttDist(vec1.getMetaData(), index, vec1.getValue(index), vec2.getValue(index));
dist = dist + Math.pow(weights[i], beta) * diff * diff;
};
}
if (attributeType == AttributeType.CATEGORICAL) {
for (int i = 0; i < indexArray.length; i++) {
int index = indexArray[i];
double diff = catAttDist(vec1.getMetaData(), index, vec1.getValue(index), vec2.getValue(index));
dist = dist + Math.pow(weights[i], beta) * diff * diff;
};
}
break;
}
return dist;
}
// -----------------------------------------------------------------------
// new added Methods of partition percete calculation for F W Kmeans algorithm
// added by XiaoGuang Xu 2006/05/17
// -----------------------------------------------------------------------
/**
*
* @param vec1
* @param vec2
* @param indexArray
* @param attributeType
* @return
* @throws MiningException
*/
/*
* public double distanceD(MiningVector vec1, MiningVector vec2,int[]indexArray,int attributeType) throws
* MiningException {
*
* int numbAtt = vec1.getValues().length; double[] weights = new double[numbAtt]; if (fieldWeights == null) { for
* (int i = 0; i < numbAtt; i++) weights[i] = 1.0; } else weights = fieldWeights; double dist = 0.0; switch(type) {
* case TYPE_EUCLIDEAN: if(attributeType==AttributeType.NUMERICAL) { for (int i = 0; i < indexArray.length; i++) {
* int index=indexArray[i]; double diff = numAttDist( vec1.getMetaData(), index, vec1.getValue(index),
* vec2.getValue(index) ); dist = dist + Math.pow(weights[i],alpha)*diff*diff; }; }
* if(attributeType==AttributeType.CATEGORICAL) { for (int i = 0; i < indexArray.length; i++) { int
* index=indexArray[i]; double diff = catAttDist( vec1.getMetaData(), index, vec1.getValue(index),
* vec2.getValue(index) ); dist = dist + Math.pow(weights[i],alpha)*diff*diff; }; } dist = Math.sqrt( dist ); break;
* case TYPE_SQUARED_EUCLIDEAN: if(attributeType==AttributeType.NUMERICAL) { for (int i = 0; i < indexArray.length;
* i++) { int index=indexArray[i]; double diff = numAttDist( vec1.getMetaData(), index, vec1.getValue(index),
* vec2.getValue(index) ); dist = dist + Math.pow(weights[i],alpha)*diff*diff; }; }
* if(attributeType==AttributeType.CATEGORICAL) { for (int i = 0; i < indexArray.length; i++) { int
* index=indexArray[i]; double diff = catAttDist( vec1.getMetaData(), index, vec1.getValue(index),
* vec2.getValue(index) ); dist = dist + Math.pow(weights[i],alpha)*diff*diff; }; } break; } return dist; }
*/
// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// /End XiaoguangXu
// /
// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
*
* @param metaData
* @param i
* @param value
* @param value2
* @return
*/
private double catAttDist(MiningDataSpecification metaData, int index, double p1, double p2) throws MiningException {
double dist = 0.0;
if (Math.abs(p1 - p2) == 0)
dist = 0.0;
else
dist = 1.0;
return dist;
}
/**
*
* @param metaData
* @param i
* @param value
* @param value2
* @return
*/
private double numAttDist(MiningDataSpecification metaData, int index, double p1, double p2) throws MiningException {
double dist = 0.0;
dist = Math.abs(p1 - p2);
return dist;
}
/**
* Returns normalized value or value, if no normalization is specified.
*
* @param index
* attribute index
* @param value
* value to be normalized
* @return normalized value
* @exception MiningException
* cannot normalize value
*/
private double normValue(int index, double value) throws MiningException {
double normValue = value;
// Use normalization:
if (normalized) {
if (minAtt == null || maxAtt == null)
throw new MiningException("Normalization required but min/maxAtt not specified.");
if (Double.isNaN(minAtt[index]) || Math.abs(maxAtt[index] - minAtt[index]) < SIMILARITY_EPSILON)
normValue = 0.0;
else
normValue = (value - minAtt[index]) / (maxAtt[index] - minAtt[index]);
};
return normValue;
}
/**
* Get counts a11, a10, a01, a00 for basically binary attributes. a11 = number of times where vec1(i) = 1 and
* vec2(i) = 1 a10 = number of times where vec1(i) = 1 and vec2(i) = 0 a01 = number of times where vec1(i) = 0 and
* vec2(i) = 1 a00 = number of times where vec1(i) = 0 and vec2(i) = 0
*
* @param vec1
* mining vector 1
* @param vec2
* mining vector 2
* @return array of a11, a10, a01, a00 (in this order)
*/
private double[] getDistCounts(MiningVector vec1, MiningVector vec2) {
double[] counts = new double[4];
for (int i = 0; i < vec1.getValues().length; i++) {
boolean vec1Is1 = (Math.abs(vec1.getValue(i) - 1.0) < SIMILARITY_EPSILON);
boolean vec1Is0 = (Math.abs(vec1.getValue(i) - 0.0) < SIMILARITY_EPSILON);
boolean vec2Is1 = (Math.abs(vec2.getValue(i) - 1.0) < SIMILARITY_EPSILON);
boolean vec2Is0 = (Math.abs(vec2.getValue(i) - 0.0) < SIMILARITY_EPSILON);
if (vec1Is1 && vec2Is1)
counts[0] = counts[0] + 1;
if (vec1Is1 && vec2Is0)
counts[1] = counts[1] + 1;
if (vec1Is0 && vec2Is1)
counts[2] = counts[2] + 1;
if (vec1Is0 && vec2Is0)
counts[3] = counts[3] + 1;
}
return counts;
}
// -----------------------------------------------------------------------
// Methods of PMML handling
// -----------------------------------------------------------------------
/**
* Creates PMML object ComparisonMeasure from distance. Attention: normalization tag cannot be stored in PMML model.
* For normalization, use corresponding transformations before clustering!
*
* @return PMML object of Distance
* @see com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure
* @exception MiningException
* cannot create PMML object
*/
public Object createPmmlObject() throws MiningException {
// Create instance of PMMLs ComparisonMesaure object:
com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure cm = new com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure();
// Add distance type:
if (measureType == MEASURE_TYPE_DISTANCE)
cm.setKind("distance");
else
cm.setKind("similarity");
// Add compare function:
switch (compareFunction) {
case COMPARISON_FUNCTION_ABS_DIFF :
cm.setCompareFunction("absDiff");
break;
case COMPARISON_FUNCTION_GAUSS_SIM :
cm.setCompareFunction("gaussSim");
break;
case COMPARISON_FUNCTION_DELTA :
cm.setCompareFunction("delta");
break;
case COMPARISON_FUNCTION_EQUAL :
cm.setCompareFunction("equal");
break;
case COMPARISON_FUNCTION_TABLE :
cm.setCompareFunction("table");
break;
default :
throw new MiningException("Unknown comparison function specified.");
};
cm.setMinimum(String.valueOf(minCompareFunction));
cm.setMaximum(String.valueOf(maxCompareFunction));
// Add type:
switch (type) {
case TYPE_EUCLIDEAN :
com.prudsys.pdm.Adapters.PmmlVersion20.Euclidean euclidean = new com.prudsys.pdm.Adapters.PmmlVersion20.Euclidean();
cm.setEuclidean(euclidean);
break;
case TYPE_SQUARED_EUCLIDEAN :
com.prudsys.pdm.Adapters.PmmlVersion20.SquaredEuclidean sqEuclidean = new com.prudsys.pdm.Adapters.PmmlVersion20.SquaredEuclidean();
cm.setSquaredEuclidean(sqEuclidean);
break;
case TYPE_CHEBYCHEV :
com.prudsys.pdm.Adapters.PmmlVersion20.Chebychev chebychev = new com.prudsys.pdm.Adapters.PmmlVersion20.Chebychev();
cm.setChebychev(chebychev);
break;
case TYPE_CITY_BLOCK :
com.prudsys.pdm.Adapters.PmmlVersion20.CityBlock cityBlock = new com.prudsys.pdm.Adapters.PmmlVersion20.CityBlock();
cm.setCityBlock(cityBlock);
break;
case TYPE_MINKOVSKI :
com.prudsys.pdm.Adapters.PmmlVersion20.Minkowski minkowski = new com.prudsys.pdm.Adapters.PmmlVersion20.Minkowski();
minkowski.setPParameter(String.valueOf(minkPar));
cm.setMinkowski(minkowski);
break;
case TYPE_SIMPLE_MATCHING :
com.prudsys.pdm.Adapters.PmmlVersion20.SimpleMatching simpleMatching = new com.prudsys.pdm.Adapters.PmmlVersion20.SimpleMatching();
cm.setSimpleMatching(simpleMatching);
break;
case TYPE_JACCARD :
com.prudsys.pdm.Adapters.PmmlVersion20.Jaccard jaccard = new com.prudsys.pdm.Adapters.PmmlVersion20.Jaccard();
cm.setJaccard(jaccard);
break;
case TYPE_TANIMOTO :
com.prudsys.pdm.Adapters.PmmlVersion20.Tanimoto tanimoto = new com.prudsys.pdm.Adapters.PmmlVersion20.Tanimoto();
cm.setTanimoto(tanimoto);
break;
case TYPE_BINARY_SIMILARITY :
com.prudsys.pdm.Adapters.PmmlVersion20.BinarySimilarity binSimilarity = new com.prudsys.pdm.Adapters.PmmlVersion20.BinarySimilarity();
cm.setBinarySimilarity(binSimilarity);
break;
default :
throw new MiningException("Unknown distance type specified.");
};
return cm;
}
/**
* Reades ComparisonMeasure from PMML object.
*
* @param pmmlObject
* PMML object of Distance
* @see com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure
* @exception MiningException
* cannot read PMML object
*/
public void parsePmmlObject(Object pmmlObject) throws MiningException {
// Get PMMLs ComparisonMeasure object:
com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure cm = (com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure) pmmlObject;
// Get distance type:
if (cm.getKind().equals("distance"))
measureType = MEASURE_TYPE_DISTANCE;
if (cm.getKind().equals("similarity"))
measureType = MEASURE_TYPE_SIMILARITY;
// Get compare function:
if (cm.getCompareFunction().equals("absDiff"))
compareFunction = COMPARISON_FUNCTION_ABS_DIFF;
if (cm.getCompareFunction().equals("gaussSime"))
compareFunction = COMPARISON_FUNCTION_GAUSS_SIM;
if (cm.getCompareFunction().equals("delta"))
compareFunction = COMPARISON_FUNCTION_DELTA;
if (cm.getCompareFunction().equals("equal"))
compareFunction = COMPARISON_FUNCTION_EQUAL;
if (cm.getCompareFunction().equals("table"))
compareFunction = COMPARISON_FUNCTION_TABLE;
minCompareFunction = Double.parseDouble(cm.getMinimum());
maxCompareFunction = Double.parseDouble(cm.getMaximum());
// Get type:
if (cm.getEuclidean() != null)
type = TYPE_EUCLIDEAN;
if (cm.getSquaredEuclidean() != null)
type = TYPE_SQUARED_EUCLIDEAN;
if (cm.getChebychev() != null)
type = TYPE_CHEBYCHEV;
if (cm.getCityBlock() != null)
type = TYPE_CITY_BLOCK;
if (cm.getMinkowski() != null) {
type = TYPE_MINKOVSKI;
com.prudsys.pdm.Adapters.PmmlVersion20.Minkowski minkowski = cm.getMinkowski();
minkPar = Double.parseDouble(minkowski.getPParameter());
};
if (cm.getSimpleMatching() != null)
type = TYPE_SIMPLE_MATCHING;
if (cm.getJaccard() != null)
type = TYPE_JACCARD;
if (cm.getTanimoto() != null)
type = TYPE_TANIMOTO;
if (cm.getBinarySimilarity() != null) {
type = TYPE_BINARY_SIMILARITY;
com.prudsys.pdm.Adapters.PmmlVersion20.BinarySimilarity binSimilarity = cm.getBinarySimilarity();
// ... //
};
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -