📄 emclusterer.java
字号:
/** * This is an example of a clustering algorithm, in other words, * what we expect our clustering algorithms to be able to do. * * * @author Waleed Kadous * @version $Id: EMClusterer.java,v 1.1.1.1 2002/06/28 07:36:16 waleed Exp $ */package tclass.clusteralg; import tclass.*; import tclass.util.*; import weka.clusterers.*; import weka.core.*; public class EMClusterer implements ClusterAlgI { String baseName = "em"; String description = "Implements an EM clustering algorithm by using WEKA to do everything"; EventDescVecI edvi = null; DomDesc domDesc = null; private int pepIndex = 0; private boolean ignoreClasses = true; private boolean ignoreTime = false; private boolean autoNumClusters = true; int numClusters = 0; /** * Name of this clustering algorithm. */ public String name(){ return baseName; } /** * Clone the current object. * */ public Object clone() { try { return super.clone(); } catch (CloneNotSupportedException e){ // Can't happen, or so the java programming book says throw new InternalError(e.toString()); } } public void setDomDesc(DomDesc dd){ domDesc = dd; } /** * Set the description of the incoming Class Stream Events Vector * Note that we need this first ... for the parsing of values, * before we do any actual processing. I expect that the * ClusterAlgMgr should have a copy of it and passes it through in * the constructor, pretty much like the Domain description for the * GlobalExtrMgr */ public void setEventDescVec(EventDescVecI events){ edvi = events; } /** * Provides a description of the clustering algorithm. * This description explains what the basic idea of the clustering * algorihtm is (i.e. the sort of shapes it tried to find). It * should also explain any potential configuration options that * may be used to configure the object, using the configure * option. * * @return The description of this class. */ public String description(){ return description; } /** * * Describes any parameters used by this global extractor, * to suit a particular domain. * * @return A vector of parameters. */ public ParamVec getParamList(){ ParamVec pv = new ParamVec(); pv.add(new Param("metafeature", "The name of the metafeature to apply this to", "First")); pv.add(new Param("numclusters", "Number of clusters. Possible values: auto or a number", "auto")); pv.add(new Param("ignoreclass", "Completely ignore class information. Can be true or false", "false")); pv.add(new Param("ignoretime", "Ignore time (assuming the first attribute of any metafeature is time).. Can be true or false", "false")); return pv; } public void setParam(String p, String v) throws InvalidParameterException { if(p.equals("metafeature")){ // So they want to use a particular metafeature. //Try to find the metafeature. pepIndex = edvi.elIndex(v); if(pepIndex == -1){ throw new InvalidParameterException(p, v, "Unknown metafeature " + v); } } else if(p.equals("numclusters")){ if(v.equals("auto")){ autoNumClusters = true; } else { try { numClusters = Integer.parseInt(v); } catch(NumberFormatException nfe){ throw new InvalidParameterException(p, v, "Could not understand number of exceptions."); } } } else if(p.equals("ignoretime")){ if(v.equals("true")){ ignoreTime = true; } else if(v.equals("false")){ ignoreTime = false; } else { throw new InvalidParameterException(p, v, "Must be either true or false"); } } else if(p.equals("ignoreclass")){ if(v.equals("true")){ ignoreClasses = true; } else if(v.equals("false")){ ignoreClasses = false; } else { throw new InvalidParameterException(p, v, "Must be either true or false"); } } else { throw new InvalidParameterException(p, v, "Unknown parameter "+p); } } public ClusterVecI cluster(ClassStreamEventsVecI csevi){ try { // System.out.println("Ok, here's the stuff ..."); // System.out.println(csevi); // System.out.println(edvi); // System.out.println("PI = " + pepIndex); // System.out.println("IgnoreTime = " + ignoreTime); Instances stuff = WekaBridge.makeInstances(csevi, domDesc.getClassDescVec(), edvi, pepIndex, ignoreClasses, ignoreTime); EM myClusterer = new EM(); // myClusterer.setDebug(true); if(stuff.numInstances() > 10){ myClusterer.setNumClusters(-1); myClusterer.setNumCVs(10); } else { myClusterer.setNumClusters(2); } myClusterer.buildClusterer(stuff); return new EMClusterVec(myClusterer, pepIndex, edvi.elAt(pepIndex), stuff, ignoreTime); } catch(Exception e){ System.err.println("AAAARGH!! Clustering failed."); e.printStackTrace(); } return null; } }class EMClusterVec implements ClusterVecI { EM em; int pepIndex; boolean ignoreTime; EventDescI edi; Instances stuff; EMCluster[] myClusters; EMClusterVec(EM em, int pepIndex, EventDescI edi, Instances stuff, boolean ignoreTime){ this.em = em; this.pepIndex = pepIndex; this.edi = edi; this.stuff = stuff; myClusters = new EMCluster[size()]; for(int i=0; i < myClusters.length; i++){ myClusters[i] = new EMCluster(this, pepIndex, i, edi, ignoreTime); } this.ignoreTime = ignoreTime; } public float prob(Instance inst, int cluster){ try { inst.setDataset(stuff); float retval = (float) em.probInstanceInCluster(inst, cluster); return retval; } catch(Exception e){ System.err.println("AAAARGH!!! Could not get distribution f for instance!"); e.printStackTrace(); } return 0; } public ClusterMem findBestLabel(EventI ev){ // System.out.println("Trying to find best label for " + ev); Instance inst = WekaBridge.makeInstance(ev, edi, ignoreTime); inst.setDataset(stuff); try { int cluster = em.clusterInstance(inst); float conf = (float) em.probInstanceInCluster(inst, cluster); // if(conf > 1){ // System.out.println("WTF?? Label is " + (new ClusterMem(elAt(cluster), conf)).toString() ); // } return new ClusterMem(elAt(cluster), conf); } catch(Exception e){ System.out.println("Label finding failed. AAAAAARGH!!!"); e.printStackTrace(); return null; } } public String getClusterDesc(int cluster){ return em.describeCluster(cluster); } public int size(){ try { return em.numberOfClusters(); } catch(Exception e){ System.err.println("AAAARGH!!! Could not get number of instances!"); } return 0; } public ClusterI elAt(int i){ return myClusters[i]; } /** * Clone the current object. * */ public Object clone() { try { return super.clone(); } catch (CloneNotSupportedException e){ // Can't happen, or so the java programming book says throw new InternalError(e.toString()); } } }class EMCluster implements ClusterI { EMClusterVec parent; int pepIndex; int clusterIndex; boolean ignoreTime; EventDescI edi; String myName = "unnamed"; public String getName(){ return myName; } public void setName(String newname){ // System.out.println("Name set to " + newname); myName = newname; } public EMCluster(EMClusterVec parent, int pepIndex, int clusterIndex, EventDescI edi, boolean ignoreTime){ this.parent = parent; this.pepIndex = pepIndex; this.clusterIndex = clusterIndex; this.edi = edi; this.ignoreTime = ignoreTime; } public String getDescription() { return parent.getClusterDesc(clusterIndex); } public String toString(){ return myName + " " + parent.getClusterDesc(clusterIndex); } public float findMatch(StreamI si, StreamEventsI sei){ EventVecI events = sei.getEvents(pepIndex); int numEvents = events.size(); float bestEventDistance = Float.MAX_VALUE; for(int i=0; i < numEvents; i++){ Instance inst = WekaBridge.makeInstance(events.elAt(i), edi, ignoreTime); float dist = parent.prob(inst, clusterIndex); if(dist < bestEventDistance){ bestEventDistance = dist; } } return bestEventDistance; } public Object clone() { try { return super.clone(); } catch (CloneNotSupportedException e){ // Can't happen, or so the java programming book says throw new InternalError(e.toString()); } } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -