📄 dbtransform.java

📁 clustering data for the different techniques of data mining
💻 JAVA
字号:
/*

  DBTransform.java

  Definition of the class DBTransform which transform the columns of
  a database residing in a file into a collection of Partition
  objects
  
  (P)2002  Dana Cristofor

*/

/*

GAClust - Clustering categorical databases using genetic algorithms
Copyright (C) 2002  Dana Cristofor


This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
USA

GAClust was written by Dana Cristofor (dana@cs.umb.edu).

*/

import java.io.*;
import java.util.*;

/**
 * DBTransform
 *
 * @version 	1.0
 * @author	Dana Cristofor
 */
public class DBTransform
{
  /** transforms the database residing in the file <code>fName</code>
   * into a collection of Partition objects
   * @param fName name of the file containing the database
   * @param nRows number of rows
   * @param nCols number of columns
   * @return a Vector containing an Integer representing the number
   * of rows, an Integer representing the number of columns, and a
   * number of Partition objects associated with every column in the
   * database; throws an IOException
  */
  static public  Vector transform(String fName, int nRows, int nCols, 
				  StringBuffer output)
    throws IOException
  {
    int RESERVED_POS = 2;
    Vector v = new Vector(nCols + RESERVED_POS);
    // insert the number of rows and columns
    v.insertElementAt(new Integer(nRows), 0);
    v.insertElementAt(new Integer(nCols), 1);
    
    BufferedReader in
      = new BufferedReader(new FileReader(fName));
    
    int currRowNo = 0;
    String delim = new String(" \t\n\r,");
    
    // stores the currently available integer to be used in the
    // encoding process
    int[] numCodes = new int[nCols]; 
    for (int i = 0; i < nCols; i++)
      numCodes[i] = 1; // first class is encoded with 1
    
    // we still have rows to process
    while (currRowNo < nRows && in.ready())
      {
	String currRow = in.readLine();
	StringTokenizer currRowTok = 
	  new StringTokenizer(currRow, delim);
	
	int currColNo = 0;
	// install each token in the appropriate Partition object
	while (currColNo < nCols && currRowTok.hasMoreTokens())
	  {
	    // for the first row create the Partition objects and
	    // add them to the Vector v
	    if (currRowNo == 0)
	      v.insertElementAt(new Partition(nRows), 
				currColNo + RESERVED_POS);
	    
	    String token = currRowTok.nextToken();
	    Partition currPart = (Partition)v.get(currColNo 
						  + RESERVED_POS);
	    
	    // check if token has already a numeric code
	    if (currPart.hasClassCode(token) == false)
	      // if it doesn't generate the next numeric code for it
	      currPart.setClassCode(token, 
				    new Integer(numCodes[currColNo]++));
	    
	    // put the numeric code in the associated vector
	    int numCode = currPart.getClassCode(token).intValue();
	    currPart.set(currRowNo, numCode);
	    
	    currColNo++;		
	  } // end of tokens per line processing (or end of column
	// processing)
	
	if (currRowNo == 0)
	  {
	    if (currColNo != nCols)
	      {
		output.append("Warning: database " +  fName 
			      + " has fewer than " + nCols 
			      + " columns\n");
		
		// insert the actual number of column in the second
		// position in the returned Vector
		v.setElementAt(new Integer(currColNo), 1);
	      }
	    else
	      if (currRowTok.hasMoreTokens())
		output.append("Warning: database " +  fName 
			      + " truncated to " + nCols 
			      + " columns\n");
	  }
	
	currRowNo++;
      } // end of line processing
    
    if (in.ready())
      // there are more rows in the database
      output.append("Warning: database " + fName + " truncated to " 
		    + currRowNo + " rows\n");
    else
      if (currRowNo < nRows)
	output.append("Warning: database " + fName + " has only " 
		      + currRowNo + " rows, not " + nRows 
		      + " as required\n");
    
    // insert the actual number of rows in the first position in
    // the returned Vector
    v.setElementAt(new Integer(currRowNo), 0);
    in.close();
    
    // set the appropriate size for the Partition objects
    int totalColNo = ((Integer)v.get(1)).intValue();
    for (int i = 0; i < totalColNo; i++)
      ((Partition)v.get(i + RESERVED_POS)).setSize(currRowNo);

    return v;
  }
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -