📄 gaincluster.java~

📁 用于multivariate时间序列分类
💻 JAVA~
📖 第 1 页 / 共 2 页
字号:
12 下一页
/** * An algorithm that does clustering based on the gain that is to be * received on using these particular centroids. It does a random * search. *  * *  * @author Waleed Kadous * @version $Id: GainCluster.java,v 1.3 2002/08/02 05:05:34 waleed Exp $ * $Log: GainCluster.java,v $ * Revision 1.3  2002/08/02 05:05:34  waleed * Hmmm ... fixed the sd's, but then forgot that distance measure * itself has to change. * * Revision 1.2  2002/08/02 05:02:21  waleed * Added better handling of discrete parameters of metafeatures. * Standard deviation is no longer applied. * * * Yuuuuk. This thing is getting really messy and ugly. But too much effort into it.  * Needs to be rewritten.  */      package tclass.clusteralg;   import tclass.*; import tclass.util.*; import java.util.*; // import weka.core.Statistics; public class GainCluster implements ClusterAlgI {     static final float MIN_SD = 1e-9f;     static final int RANDOM = 1;    static final int GENETIC = 2;     static final int GAINRATIO = 1;     static final int GAIN = 2;     static final int CHISQUARE = 3;     static final int DISTRATIO = 1;    static final int DISTANCE = 2;     String baseName = "directed";    String description = "Implements clustering using a random search for high-gain centroids";    EventDescVecI edvi = null;     DomDesc domDesc = null;     int numTrials  = 1000;     int clustBias = 1;     int minCent = 2;     int maxCent = 8;     int pepIndex = 0;     int searchAlg = RANDOM;         int evalMetric = GAINRATIO;     int distMetric = DISTRATIO;     // These are required by the genetic algorithm.     int numRounds = 10;     float survivalRate = 0.05f; // This is the ratio of cases which will survive the next round.     float crossoverRate = 0.4f; // Percentage of instances that will be "crossed over"     float mutationRate = 0.05f;  // Random rate of mutation.         // These are used for calculations.     float[][] points;     int[] classes;  // An array containing the class of each instance as an int.     int[] clusters; // An array containing the cluster of each instance as an int.     int numInstances;     int numParams;     int numClasses;     boolean isDiscrete[];     float[] sds;     float[][] clustSDs; // Store the individual standard deviation of each cluster.     float[] avgs;     EventI[] origEvents;     ClassHistogram allHist; // The histogram of all classes.     ClassHistogram[] chs; // Made global to simplify debugging     int[] clusterMem;     float allInfo;         /**     * Name of this clustering algorithm.      */         public String name(){        return baseName;    }    /**     * Clone the current object.      *     */         public Object clone()    {        try {            return super.clone();         }        catch (CloneNotSupportedException e){            // Can't happen, or so the java programming book says            throw new InternalError(e.toString());         }    }        public void setDomDesc(DomDesc dd){        domDesc = dd;     }    /**     * Set the description of the incoming Class Stream Events Vector     * Note that we need this first ... for the parsing of values,     * before we do any actual processing. I expect that the     * ClusterAlgMgr should have a copy of it and passes it through in     * the constructor, pretty much like the Domain description for the      * GlobalExtrMgr     */    public void setEventDescVec(EventDescVecI events){        edvi = events;     }        /**      * Provides a description of the clustering algorithm.     * This description explains what the basic idea of the clustering     * algorihtm is (i.e. the sort of shapes it tried to find). It     * should also explain any potential configuration options that     * may be used to configure the object, using the configure     * option.     *      * @return The description of this class.       */        public String description(){        return description;     }    /**     * Configures this instance so that parameter <i>p</i> has     * value <i>v</i>.      *     * @param p the parameter to set.      * @param v the value of the parameter.      * @return true if the operation succeeded.      *     */    public void setParam(String p, String v) throws InvalidParameterException {        // Let's just use a simple parameter as an example.         if(p.equals("metafeature")){            // So they want to use a particular metafeature. 	    //Try to find the metafeature.             pepIndex = edvi.elIndex(v);             if(pepIndex == -1){                throw new InvalidParameterException(p, v, "Unknown metafeature " + v);             }        }        else if(p.equals("numTrials")){            // Cool bananas. Do nothing.             try {                numTrials = Integer.parseInt(v);             }	    catch(NumberFormatException nfe){		throw new InvalidParameterException(p, v, "Could not understand number of trials."); 	    }        }        else if(p.equals("clustBias")){            // Cool bananas. Do nothing.             try {                clustBias = Integer.parseInt(v);             }	    catch(NumberFormatException nfe){		throw new InvalidParameterException(p, v, "Could not understand bias exponent."); 	    }        }        else if(p.equals("minCent")){            // Cool bananas. Do nothing.             try {                minCent = Integer.parseInt(v);             }	    catch(NumberFormatException nfe){		throw new InvalidParameterException(p, v, "Could not understand number of trials."); 	    }        }        else if(p.equals("maxCent")){            // Cool bananas. Do nothing.             try {                maxCent = Integer.parseInt(v);             }            catch(NumberFormatException nfe){                throw new InvalidParameterException(p, v, "Could not understand number of trials.");             }	}        else if(p.equals("searchAlg")){            if(v.equals("random")){                searchAlg = RANDOM;             }            else if(v.equals("genetic")){                searchAlg = GENETIC;             }            else {                throw new InvalidParameterException(p, v, "Algorithm is random or genetic");             }        }        else if(p.equals("dispMeasure")){            if(v.equals("gainratio")){                evalMetric = GAINRATIO;             }            else if(v.equals("chisquare")){                evalMetric = CHISQUARE;             }            else {                throw new InvalidParameterException(p, v, "Algorithm is random or genetic");             }        }        else if(p.equals("distMetric")){            if(v.equals("distratio")){                evalMetric = DISTRATIO;             }            else if(v.equals("distance")){                evalMetric = DISTANCE;             }            else {                throw new InvalidParameterException(p, v, "Distance is distratio or distance");             }        }        else {            throw new InvalidParameterException(p, v, "I was expecting exp");         }    }    /**      *     * Describes any parameters used by this global extractor,     * to suit a particular domain.      *     * @return A vector of parameters.      */        public ParamVec getParamList(){        ParamVec pv = new ParamVec();         pv.add(new Param("metafeature", "Name of metafeature to operate on", "First metafeature"));         pv.add(new Param("numTrials", "Number of Trials to Run", "1000"));         pv.add(new Param("minCent", "Minimum number of centroids", "2"));         pv.add(new Param("maxCent", "Maximum number of centroids", "8"));         pv.add(new Param("clustBias", "Bias exponent towards more clusters", "1"));         pv.add(new Param("searchAlg", "Search algorithm: random or genetic", "random"));         pv.add(new Param("evalMetric", "Evaluation metric: gainratio, gain or chisquare", "random"));                 return pv;     }        public ClusterVecI cluster(ClassStreamEventsVecI csvi){        // int oldDebug = Debug.getDebugLevel();         // Debug.setDebugLevel(Debug.EVERYTHING);         // First, flatten the data and get it into a nice form.         Debug.dp(Debug.EVERYTHING, "");         if(!createData(csvi)){            // Uh oh ... no instances ;-(.             // Return an empty clusterVecI.             return new ClusterVec();         }        findSDs();         Debug.dp(Debug.EVERYTHING, "Preprocessing complete");         if(allInfo == 0){	    return new ClusterVec();         }        // Then, normalise the data.         // Then pick some random points.         int[] bestCentroids = new int[0];         float bestGR = -Float.MAX_VALUE;         int[] bestClusterMem = new int[0];         int[] currentCentroids;        float currentGR;         ClassHistogram[] bestchs = new ClassHistogram[0];         System.out.println("Search Alg = " + searchAlg);         if(searchAlg == RANDOM){             for(int i=0; i < numTrials; i++){                currentCentroids = randomCentroids();                 currentGR = gainRatio(currentCentroids);                 Debug.dp(Debug.EVERYTHING, "Test: " + i  + " Current: " + currentGR + " Best: " + bestGR);                 if(currentGR > bestGR){                    bestCentroids = currentCentroids;                     bestGR = currentGR;                     bestchs = chs; 		    bestClusterMem = clusterMem;                 }            }               }        else{            // Ok, this is the new thing.             TopVector parents = new TopVector((int) (survivalRate*numTrials));              TopVector children = new TopVector((int) (survivalRate*numTrials));              int countSoFar = 0;             // Start out with a normal run; pretty much the same as             // the random search, except we insert results into the TopVector.             for(int i=0; i < numTrials; i++){                                currentCentroids = randomCentroids();                 currentGR = gainRatio(currentCentroids);                 parents.add(currentCentroids, currentGR); 		if(currentGR > bestGR){                    bestCentroids = currentCentroids;                     bestGR = currentGR;                     bestchs = chs;                     bestClusterMem = clusterMem;                 }                Debug.dp(Debug.EVERYTHING, "GTest: " + i  + " Current: " + currentGR + " Best: " + bestGR);                 countSoFar++;             }            for(int i=0; i < (numRounds-1); i++){                children = new TopVector((int) (survivalRate*numTrials));                  for(int j=0; j < numTrials; j++){                    currentCentroids = breed(parents);                     // Breeds a bunch of centroids.                     currentGR = gainRatio(currentCentroids);                     children.add(currentCentroids, currentGR); 		    if(currentGR > bestGR){			bestCentroids = currentCentroids; 			bestGR = currentGR; 			bestchs = chs;                          			bestClusterMem = clusterMem; 		    }                    Debug.dp(Debug.EVERYTHING, "GTest: " + countSoFar + " Current: " + currentGR + " Best: " + bestGR);                     countSoFar++;                 }                parents = children;             }        }        if(Debug.getDebugLevel() >= Debug.PROGRESS){            System.err.println("The BEST division (gr = " + bestGR + ", n = " + bestCentroids.length + ") is: ");             for(int i=0; i < bestCentroids.length; i++){		System.err.println(bestchs[i]);             }        }        findClustSDs(bestCentroids, bestClusterMem);                 ClusterVecI cvi = makeClusters(bestCentroids);        // Debug.setDebugLevel(oldDebug);         return cvi;     }        public int[] breed(TopVector genes){        Debug.dp(Debug.EVERYTHING, "Breeding ...");         int numGenes = genes.size();         int[] retval;         Random R = new Random(); // Why use random here? Because it didn't look         // Random.         // Crossover stage         if(R.nextFloat() < crossoverRate){ // Check if there is a crossover;             // Make a set out of the instances.             int mumIndex = (int) Math.abs(R.nextInt() % numGenes);             int dadIndex = (int) Math.abs(R.nextInt() % numGenes);             while(dadIndex == mumIndex){
12 下一页
💿 文件大小 2441 K
👤 上传用户 laoniu
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#multivariate #时间序列 #分类
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -