📄 normalizabledistance.java
字号:
else firstI = first.index(p1); if (p2 >= secondNumValues) secondI = numAttributes; else secondI = second.index(p2); if (firstI == classIndex) { p1++; continue; } if ((firstI < numAttributes) && !m_ActiveIndices[firstI]) { p1++; continue; } if (secondI == classIndex) { p2++; continue; } if ((secondI < numAttributes) && !m_ActiveIndices[secondI]) { p2++; continue; } double diff; if (firstI == secondI) { diff = difference(firstI, first.valueSparse(p1), second.valueSparse(p2)); p1++; p2++; } else if (firstI > secondI) { diff = difference(secondI, 0, second.valueSparse(p2)); p2++; } else { diff = difference(firstI, first.valueSparse(p1), 0); p1++; } if (stats != null) stats.incrCoordCount(); distance = updateDistance(distance, diff); if (distance > cutOffValue) return Double.POSITIVE_INFINITY; } return distance; } /** * Updates the current distance calculated so far with the new difference * between two attributes. The difference between the attributes was * calculated with the difference(int,double,double) method. * * @param currDist the current distance calculated so far * @param diff the difference between two new attributes * @return the update distance * @see #difference(int, double, double) */ protected abstract double updateDistance(double currDist, double diff); /** * Normalizes a given value of a numeric attribute. * * @param x the value to be normalized * @param i the attribute's index * @return the normalized value */ protected double norm(double x, int i) { if (Double.isNaN(m_Ranges[i][R_MIN]) || (m_Ranges[i][R_MAX] == m_Ranges[i][R_MIN])) return 0; else return (x - m_Ranges[i][R_MIN]) / (m_Ranges[i][R_WIDTH]); } /** * Computes the difference between two given attribute * values. * * @param index the attribute index * @param val1 the first value * @param val2 the second value * @return the difference */ protected double difference(int index, double val1, double val2) { switch (m_Data.attribute(index).type()) { case Attribute.NOMINAL: if (Instance.isMissingValue(val1) || Instance.isMissingValue(val2) || ((int) val1 != (int) val2)) { return 1; } else { return 0; } case Attribute.NUMERIC: if (Instance.isMissingValue(val1) || Instance.isMissingValue(val2)) { if (Instance.isMissingValue(val1) && Instance.isMissingValue(val2)) { if (!m_DontNormalize) //We are doing normalization return 1; else return (m_Ranges[index][R_MAX] - m_Ranges[index][R_MIN]); } else { double diff; if (Instance.isMissingValue(val2)) { diff = (!m_DontNormalize) ? norm(val1, index) : val1; } else { diff = (!m_DontNormalize) ? norm(val2, index) : val2; } if (!m_DontNormalize && diff < 0.5) { diff = 1.0 - diff; } else if (m_DontNormalize) { if ((m_Ranges[index][R_MAX]-diff) > (diff-m_Ranges[index][R_MIN])) return m_Ranges[index][R_MAX]-diff; else return diff-m_Ranges[index][R_MIN]; } return diff; } } else { return (!m_DontNormalize) ? (norm(val1, index) - norm(val2, index)) : (val1 - val2); } default: return 0; } } /** * Initializes the ranges using all instances of the dataset. * Sets m_Ranges. * * @return the ranges */ public double[][] initializeRanges() { if (m_Data == null) { m_Ranges = null; return m_Ranges; } int numAtt = m_Data.numAttributes(); double[][] ranges = new double [numAtt][3]; if (m_Data.numInstances() <= 0) { initializeRangesEmpty(numAtt, ranges); m_Ranges = ranges; return m_Ranges; } else { // initialize ranges using the first instance updateRangesFirst(m_Data.instance(0), numAtt, ranges); } // update ranges, starting from the second for (int i = 1; i < m_Data.numInstances(); i++) updateRanges(m_Data.instance(i), numAtt, ranges); m_Ranges = ranges; return m_Ranges; } /** * Used to initialize the ranges. For this the values of the first * instance is used to save time. * Sets low and high to the values of the first instance and * width to zero. * * @param instance the new instance * @param numAtt number of attributes in the model * @param ranges low, high and width values for all attributes */ public void updateRangesFirst(Instance instance, int numAtt, double[][] ranges) { for (int j = 0; j < numAtt; j++) { if (!instance.isMissing(j)) { ranges[j][R_MIN] = instance.value(j); ranges[j][R_MAX] = instance.value(j); ranges[j][R_WIDTH] = 0.0; } else { // if value was missing ranges[j][R_MIN] = Double.POSITIVE_INFINITY; ranges[j][R_MAX] = -Double.POSITIVE_INFINITY; ranges[j][R_WIDTH] = Double.POSITIVE_INFINITY; } } } /** * Updates the minimum and maximum and width values for all the attributes * based on a new instance. * * @param instance the new instance * @param numAtt number of attributes in the model * @param ranges low, high and width values for all attributes */ public void updateRanges(Instance instance, int numAtt, double[][] ranges) { // updateRangesFirst must have been called on ranges for (int j = 0; j < numAtt; j++) { double value = instance.value(j); if (!instance.isMissing(j)) { if (value < ranges[j][R_MIN]) { ranges[j][R_MIN] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; if (value > ranges[j][R_MAX]) { //if this is the first value that is ranges[j][R_MAX] = value; //not missing. The,0 ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } } else { if (value > ranges[j][R_MAX]) { ranges[j][R_MAX] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } } } } } /** * Used to initialize the ranges. * * @param numAtt number of attributes in the model * @param ranges low, high and width values for all attributes */ public void initializeRangesEmpty(int numAtt, double[][] ranges) { for (int j = 0; j < numAtt; j++) { ranges[j][R_MIN] = Double.POSITIVE_INFINITY; ranges[j][R_MAX] = -Double.POSITIVE_INFINITY; ranges[j][R_WIDTH] = Double.POSITIVE_INFINITY; } } /** * Updates the ranges given a new instance. * * @param instance the new instance * @param ranges low, high and width values for all attributes * @return the updated ranges */ public double[][] updateRanges(Instance instance, double[][] ranges) { // updateRangesFirst must have been called on ranges for (int j = 0; j < ranges.length; j++) { double value = instance.value(j); if (!instance.isMissing(j)) { if (value < ranges[j][R_MIN]) { ranges[j][R_MIN] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } else { if (instance.value(j) > ranges[j][R_MAX]) { ranges[j][R_MAX] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } } } } return ranges; } /** * Initializes the ranges of a subset of the instances of this dataset. * Therefore m_Ranges is not set. * * @param instList list of indexes of the subset * @return the ranges * @throws Exception if something goes wrong */ public double[][] initializeRanges(int[] instList) throws Exception { if (m_Data == null) throw new Exception("No instances supplied."); int numAtt = m_Data.numAttributes(); double[][] ranges = new double [numAtt][3]; if (m_Data.numInstances() <= 0) { initializeRangesEmpty(numAtt, ranges); return ranges; } else { // initialize ranges using the first instance updateRangesFirst(m_Data.instance(instList[0]), numAtt, ranges); // update ranges, starting from the second for (int i = 1; i < instList.length; i++) { updateRanges(m_Data.instance(instList[i]), numAtt, ranges); } } return ranges; } /** * Initializes the ranges of a subset of the instances of this dataset. * Therefore m_Ranges is not set. * The caller of this method should ensure that the supplied start and end * indices are valid (start <= end, end<instList.length etc) and * correct. * * @param instList list of indexes of the instances * @param startIdx start index of the subset of instances in the indices array * @param endIdx end index of the subset of instances in the indices array * @return the ranges * @throws Exception if something goes wrong */ public double[][] initializeRanges(int[] instList, int startIdx, int endIdx) throws Exception { if (m_Data == null) throw new Exception("No instances supplied."); int numAtt = m_Data.numAttributes(); double[][] ranges = new double [numAtt][3]; if (m_Data.numInstances() <= 0) { initializeRangesEmpty(numAtt, ranges); return ranges; } else { // initialize ranges using the first instance updateRangesFirst(m_Data.instance(instList[startIdx]), numAtt, ranges); // update ranges, starting from the second for (int i = startIdx+1; i <= endIdx; i++) { updateRanges(m_Data.instance(instList[i]), numAtt, ranges); } } return ranges; } /** * Update the ranges if a new instance comes. * * @param instance the new instance */ public void updateRanges(Instance instance) { validate(); m_Ranges = updateRanges(instance, m_Ranges); } /** * Test if an instance is within the given ranges. * * @param instance the instance * @param ranges the ranges the instance is tested to be in * @return true if instance is within the ranges */ public boolean inRanges(Instance instance, double[][] ranges) { boolean isIn = true; // updateRangesFirst must have been called on ranges for (int j = 0; isIn && (j < ranges.length); j++) { if (!instance.isMissing(j)) { double value = instance.value(j); isIn = value <= ranges[j][R_MAX]; if (isIn) isIn = value >= ranges[j][R_MIN]; } } return isIn; } /** * Check if ranges are set. * * @return true if ranges are set */ public boolean rangesSet() { return (m_Ranges != null); } /** * Method to get the ranges. * * @return the ranges * @throws Exception if no randes are set yet */ public double[][] getRanges() throws Exception { validate(); if (m_Ranges == null) throw new Exception("Ranges not yet set."); return m_Ranges; } /** * Returns an empty string. * * @return an empty string */ public String toString() { return ""; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -