📄 instances.java
字号:
* * @param tokenizer the stream tokenizer * @exception IOException if reading the next token fails */ private void getFirstToken(StreamTokenizer tokenizer) throws IOException { while (tokenizer.nextToken() == StreamTokenizer.TT_EOL){}; if ((tokenizer.ttype == '\'') || (tokenizer.ttype == '"')) { tokenizer.ttype = StreamTokenizer.TT_WORD; } else if ((tokenizer.ttype == StreamTokenizer.TT_WORD) && (tokenizer.sval.equals("?"))){ tokenizer.ttype = '?'; } } /** * Gets index, checking for a premature and of line. * * @param tokenizer the stream tokenizer * @exception IOException if it finds a premature end of line */ private void getIndex(StreamTokenizer tokenizer) throws IOException { if (tokenizer.nextToken() == StreamTokenizer.TT_EOL) { errms(tokenizer,"premature end of line"); } if (tokenizer.ttype == StreamTokenizer.TT_EOF) { errms(tokenizer,"premature end of file"); } } /** * Gets token and checks if its end of line. * * @param tokenizer the stream tokenizer * @exception IOException if it doesn't find an end of line */ private void getLastToken(StreamTokenizer tokenizer, boolean endOfFileOk) throws IOException { if ((tokenizer.nextToken() != StreamTokenizer.TT_EOL) && ((tokenizer.ttype != StreamTokenizer.TT_EOF) || !endOfFileOk)) { errms(tokenizer,"end of line expected"); } } /** * Gets next token, checking for a premature and of line. * * @param tokenizer the stream tokenizer * @exception IOException if it finds a premature end of line */ private void getNextToken(StreamTokenizer tokenizer) throws IOException { if (tokenizer.nextToken() == StreamTokenizer.TT_EOL) { errms(tokenizer,"premature end of line"); } if (tokenizer.ttype == StreamTokenizer.TT_EOF) { errms(tokenizer,"premature end of file"); } else if ((tokenizer.ttype == '\'') || (tokenizer.ttype == '"')) { tokenizer.ttype = StreamTokenizer.TT_WORD; } else if ((tokenizer.ttype == StreamTokenizer.TT_WORD) && (tokenizer.sval.equals("?"))){ tokenizer.ttype = '?'; } } /** * Initializes the StreamTokenizer used for reading the ARFF file. * * @param tokenizer the stream tokenizer */ private void initTokenizer(StreamTokenizer tokenizer){ tokenizer.resetSyntax(); tokenizer.whitespaceChars(0, ' '); tokenizer.wordChars(' '+1,'\u00FF'); tokenizer.whitespaceChars(',',','); tokenizer.commentChar('%'); tokenizer.quoteChar('"'); tokenizer.quoteChar('\''); tokenizer.ordinaryChar('{'); tokenizer.ordinaryChar('}'); tokenizer.eolIsSignificant(true); } /** * Returns string including all instances, their weights and * their indices in the original dataset. * * @return description of instance and its weight as a string */ private String instancesAndWeights(){ StringBuffer text = new StringBuffer(); for (int i = 0; i < numInstances(); i++) { text.append(instance(i) + " " + instance(i).weight()); if (i < numInstances() - 1) { text.append("\n"); } } return text.toString(); } /** * Implements quicksort. * * @param attIndex the attribute's index * @param lo0 the first index of the subset to be sorted * @param hi0 the last index of the subset to be sorted */ private void quickSort(int attIndex, int lo0, int hi0) { int lo = lo0, hi = hi0; double mid, midPlus, midMinus; if (hi0 > lo0) { // Arbitrarily establishing partition element as the // midpoint of the array. mid = instance((lo0 + hi0) / 2).value(attIndex); midPlus = mid + 1e-6; midMinus = mid - 1e-6; // loop through the array until indices cross while(lo <= hi) { // find the first element that is greater than or equal to // the partition element starting from the left Index. while ((instance(lo).value(attIndex) < midMinus) && (lo < hi0)) { ++lo; } // find an element that is smaller than or equal to // the partition element starting from the right Index. while ((instance(hi).value(attIndex) > midPlus) && (hi > lo0)) { --hi; } // if the indexes have not crossed, swap if(lo <= hi) { swap(lo,hi); ++lo; --hi; } } // If the right index has not reached the left side of array // must now sort the left partition. if(lo0 < hi) { quickSort(attIndex,lo0,hi); } // If the left index has not reached the right side of array // must now sort the right partition. if(lo < hi0) { quickSort(attIndex,lo,hi0); } } } /** * Reads and skips all tokens before next end of line token. * * @param tokenizer the stream tokenizer */ private void readTillEOL(StreamTokenizer tokenizer) throws IOException { while (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {}; tokenizer.pushBack(); } /** * Help function needed for stratification of set. * * @param numFolds the number of folds for the stratification */ private void stratStep (int numFolds){ FastVector newVec = new FastVector(m_Instances.capacity()); int start = 0, j; // create stratified batch while (newVec.size() < numInstances()) { j = start; while (j < numInstances()) { newVec.addElement(instance(j)); j = j + numFolds; } start++; } m_Instances = newVec; } /** * Swaps two instances in the set. * * @param i the first instance's index * @param j the second instance's index */ private void swap(int i, int j){ m_Instances.swap(i, j); } /** * Merges two sets of Instances together. The resulting set will have * all the attributes of the first set plus all the attributes of the * second set. The number of instances in both sets must be the same. * * @param first the first set of Instances * @param second the second set of Instances * @return the merged set of Instances * @exception IllegalArgumentException if the datasets are not the same size */ public static Instances mergeInstances(Instances first, Instances second) { if (first.numInstances() != second.numInstances()) { throw new IllegalArgumentException("Instance sets must be of the same size"); } // Create the vector of merged attributes FastVector newAttributes = new FastVector(); for (int i = 0; i < first.numAttributes(); i++) { newAttributes.addElement(first.attribute(i)); } for (int i = 0; i < second.numAttributes(); i++) { newAttributes.addElement(second.attribute(i)); } // Create the set of Instances Instances merged = new Instances(first.relationName() + '_' + second.relationName(), newAttributes, first.numInstances()); // Merge each instance for (int i = 0; i < first.numInstances(); i++) { merged.add(first.instance(i).mergeInstance(second.instance(i))); } return merged; } /** * Initializes the ranges using all instances of the dataset. * Sets m_Ranges. * @return the ranges */ public double [][] initializeRanges() { int numAtt = this.numAttributes(); double [][] ranges = new double [numAtt][3]; if (this.numInstances() <= 0) { initializeRangesEmpty(numAtt, ranges); return ranges; } else // initialize ranges using the first instance updateRangesFirst(this.instance(0), numAtt, ranges); // update ranges, starting from the second for (int i = 1; i < this.numInstances(); i++) { updateRanges(this.instance(i), numAtt, ranges); } m_Ranges = ranges; return ranges; } /** * Initializes the ranges of a subset of the instances of this dataset. * Therefore m_Ranges is not set. * @param instList list of indexes of the subset * @return the ranges */ public double [][] initializeRanges(int[] instList) { int numAtt = this.numAttributes(); double [][] ranges = new double [numAtt][3]; if (this.numInstances() <= 0) { initializeRangesEmpty(numAtt, ranges); return ranges; } else { // initialize ranges using the first instance updateRangesFirst(this.instance(instList[0]), numAtt, ranges); // update ranges, starting from the second for (int i = 1; i < instList.length; i++) { updateRanges(this.instance(instList[i]), numAtt, ranges); } } return ranges; } /** * Used to initialize the ranges. * @param numAtt number of attributes in the model * @param ranges low, high and width values for all attributes */ public void initializeRangesEmpty(int numAtt, double[][] ranges) { for (int j = 0; j < numAtt; j++) { ranges[j][R_MIN] = Double.MAX_VALUE; ranges[j][R_MAX] = Double.MIN_VALUE; ranges[j][R_WIDTH] = Double.MIN_VALUE; } } /** * Used to initialize the ranges. For this the values of the first * instance is used to save time. * Sets low and high to the values of the first instance and * width to zero. * @param instance the new instance * @param numAtt number of attributes in the model * @param ranges low, high and width values for all attributes */ public void updateRangesFirst(Instance instance, int numAtt, double[][] ranges) { for (int j = 0; j < numAtt; j++) { if (!instance.isMissing(j)) { ranges[j][R_MIN] = instance.value(j); ranges[j][R_MAX] = instance.value(j); ranges[j][R_WIDTH] = 0.0; } else { // if value was missing ranges[j][R_MIN] = Double.MIN_VALUE; ranges[j][R_MAX] = Double.MAX_VALUE; ranges[j][R_WIDTH] = Double.MAX_VALUE; } } } /** * Updates the minimum and maximum and width values for all the attributes * based on a new instance. * @param instance the new instance * @param numAtt number of attributes in the model * @param ranges low, high and width values for all attributes */ private void updateRanges(Instance instance, int numAtt, double [][] ranges) { // updateRangesFirst must have been called on ranges for (int j = 0; j < numAtt; j++) { double value = instance.value(j); if (!instance.isMissing(j)) { if (value < ranges[j][R_MIN]) { ranges[j][R_MIN] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } else { if (instance.value(j) > ranges[j][R_MAX]) { ranges[j][R_MAX] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } } } } } /** * prints the ranges. * @param instance the new instance * @param numAtt number of attributes in the model * @param ranges low, high and width values for all attributes */ public static void printRanges(double [][] ranges) { OOPSS("printRanges"); // updateRangesFirst must have been called on ranges for (int j = 0; j < ranges.length; j++) { OOPSS(" "+j+"-MIN "+ranges[j][R_MIN]); OOPSS(" "+j+"-MAX "+ranges[j][R_MAX]); OOPSS(" "+j+"-WIDTH "+ranges[j][R_WIDTH]); } } /** * Updates the ranges given a new instance. * @param instance the new instance * @param numAtt number of attributes in the model * @param ranges low, high and width values for all attributes * public void updateRanges(Instance instance, double [][] ranges) { int numAtt = numAttributes(); // updateRangesFirst must have been called on ranges for (int j = 0; j < numAtt; j++) { double value = instance.value(j); if (!instance.isMissing(j)) { if (value < ranges[j][R_MIN]) { ranges[j][R_MIN] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } else { if (instance.value(j) > ranges[j][R_MAX]) { ranges[j][R_MAX] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } } } } }*/ /** * Updates the ranges given a new instance. * @param instance the new instance * @param ranges low, high and width values for all attributes */ public static double [][] updateRanges(Instance instance, double [][] ranges) { // updateRangesFirst must have been called on ranges for (int j = 0; j < ranges.length; j++) { double value = instance.value(j); if (!instance.isMissing(j)) { if (value < ranges[j][R_MIN]) { ranges[j][R_MIN] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } else { if (instance.value(j) > ranges[j][R_MAX]) { ranges[j][R_MAX] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } } } } return ranges; } /** * Test if an instance is within the giv
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -