📄 counter.java

📁 Standord Classifier实现了一个基于Java的最大熵分类器。用于模式识别
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
     * Removes all keys whose count is 0. After incrementing and decrementing     * counts or adding and subtracting Counters, there may be keys left whose     * count is 0, though normally this is undesirable. This method cleans up     * the map.      * <p>     * Maybe in the future we should try to do this more on-the-fly, though it's     * not clear whether a distinction should be made between "never seen" (i.e.     * null count) and "seen with 0 count". Certainly there's no distinction in     * getCount() but there is in containsKey().     */    public void removeZeroCounts()    {        for(Iterator iter=keySet().iterator();iter.hasNext();)            if(getCount(iter.next())==0)                 iter.remove();                }        /** Finds and returns the largest count in this Counter. */    public double max()    {        double max=Double.NEGATIVE_INFINITY;        for(Iterator iter=keySet().iterator();iter.hasNext();)            max=Math.max(max,getCount(iter.next()));        return(max);    }        /** Finds and returns the smallest count in this Counter. */    public double min()    {        double min=Double.POSITIVE_INFINITY;        for(Iterator iter=keySet().iterator();iter.hasNext();)            min=Math.min(min,getCount(iter.next()));        return(min);    }        /**      * Finds and returns the key in this Counter with the largest count.     * Ties are broken by comparing the objects using the given tie breaking     * Comparator, favoring Objects that are sorted to the front. This is useful     * if the keys are numeric and there is a bias to prefer smaller or larger     * values, and can be useful in other circumstances where random tie-breaking     * is not desirable. Returns null if this Counter is empty.     */    public Object argmax(Comparator tieBreaker)    {        double max=Double.NEGATIVE_INFINITY;        Object argmax=null;        for(Iterator iter=keySet().iterator();iter.hasNext();)        {            Object key=iter.next();            double count=getCount(key);            if(count>max || (count==max && tieBreaker.compare(key,argmax)<0))            {                max=count;                argmax=key;            }        }        return(argmax);    }            /**      * Finds and returns the key in this Counter with the largest count.     * Ties are broken according to the natural ordering of the objects.      * This will prefer smaller numeric keys and lexicographically earlier     * String keys. To use a different tie-breaking Comparator, use     * {@link #argmax(Comparator}}. Returns null if this Counter is empty.     */    public Object argmax() { return(argmax(naturalComparator)); }        /**      * Finds and returns the key in this Counter with the smallest count.     * Ties are broken by comparing the objects using the given tie breaking     * Comparator, favoring Objects that are sorted to the front. This is useful     * if the keys are numeric and there is a bias to prefer smaller or larger     * values, and can be useful in other circumstances where random tie-breaking     * is not desirable. Returns null if this Counter is empty.     */    public Object argmin(Comparator tieBreaker)    {        double min=Double.POSITIVE_INFINITY;        Object argmin=null;        for(Iterator iter=keySet().iterator();iter.hasNext();)        {            Object key=iter.next();            double count=getCount(key);            if(count<min || (count==min && tieBreaker.compare(key,argmin)<0))            {                min=count;                argmin=key;            }        }        return(argmin);    }        /**      * Finds and returns the key in this Counter with the smallest count.     * Ties are broken according to the natural ordering of the objects.      * This will prefer smaller numeric keys and lexicographically earlier     * String keys. To use a different tie-breaking Comparator, use     * {@link #argmin(Comparator}}. Returns null if this Counter is empty.     */    public Object argmin() { return(argmin(naturalComparator)); }        /**     * Returns the set of keys whose counts are at or above the given threshold.     * This set may have 0 elements but will not be null.     */    public Set keysAbove(double countThreshold)    {        Set keys=new HashSet();        for(Iterator iter=keySet().iterator();iter.hasNext();)        {            Object key=iter.next();            if(getCount(key)>=countThreshold)                keys.add(key);        }        return(keys);    }        /**     * Returns the set of keys whose counts are at or below the given threshold.     * This set may have 0 elements but will not be null.     */    public Set keysBelow(double countThreshold)    {        Set keys=new HashSet();        for(Iterator iter=keySet().iterator();iter.hasNext();)        {            Object key=iter.next();            if(getCount(key)<=countThreshold)                keys.add(key);        }        return(keys);    }        /**     * Returns the set of keys that have exactly the given count.     * This set may have 0 elements but will not be null.     */    public Set keysAt(double count)    {        Set keys=new HashSet();        for(Iterator iter=keySet().iterator();iter.hasNext();)        {            Object key=iter.next();            if(getCount(key)==count)                keys.add(key);        }        return(keys);    }           /**      * Calculates the entropy of this counter (in bits). This method internally     * uses normalized counts (so they sum to one), but the value returned is      * meaningless if some of the counts are negative.     * @return The entropy of this counter (in bits)     */    public double entropy()     {       	double entropy=0.0;        for(Iterator iter=keySet().iterator();iter.hasNext();)         {    	    double count=getCount(iter.next());    	    if(count==0) continue; // 0.0 doesn't add entropy but may cause -Inf            count/=total(); // use normalized count            entropy-=count*(Math.log(count)/Math.log(2.0));	    }        return(entropy);    }    /**      * Calculates the KL divergence between this counter and another      * counter.  That is, it calculates KL(this||other). This method internally     * uses normalized counts (so they sum to one), but the value returned is     * meaningless if some of the counts are negative.     * @param other The other <code>Counter</code>     * @return The KL divergence between the distributions     */    public double klDivergence(Counter other)     {        double kl = 0.0;        double tot = total();        double tot2 = other.total();        // System.out.println("tot is " + tot + " tot2 is " + tot2);        for (Iterator it = seenSet().iterator(); it.hasNext(); ) {            Object key = it.next();            double num = countOf(key);            if (num == 0) continue;            num /= tot;            double num2 = other.countOf(key);            num2 /= tot2;            // System.out.println("num is " + num + " num2 is " + num2);            kl += num * (Math.log(num/num2)/Math.log(2.0));        }        return kl;    }        /**     * Calculates the information radius (aka the Jensen-Shannon divergence)      * between     * this Counter and another counter.  This measure is defined as:     * <blockquote> iRad(p,q) = D(p||(p+q)/2)+D(q,(p+q)/2) </blockquote>     * where p is this Counter, q is the other counter, and D(p||q) is the      * KL divergence bewteen p and q.  Note that iRad(p,q) = iRad(q,p).     * @param other The other <code>Counter</code>     * @return The information radius between the distributions     */    public double informationRadius(Counter other)    {        Counter avg=average(other); // (p+q)/2        return(klDivergence(avg)+other.klDivergence(avg));    }        /**     * Returns a new Counter with counts averaged from this Counter and the      * given Counter. The average Counter will contain the union of keys in     * both     * source Counters, and each count will be the average of the two source      * counts for that key, where as usual a missing count in one Counter     * is treated as count 0.     */    public Counter average(Counter other)    {        Counter average=new Counter();        Set allKeys=new HashSet(keySet());        allKeys.addAll(other.keySet());        for(Iterator iter=allKeys.iterator();iter.hasNext();)        {            Object key=iter.next();            average.setCount(key,(getCount(key)+other.getCount(key))*0.5);        }        return(average);    }    /**     * Returns a comparator suitable for sorting this Counter's keys or entries     * by their respective counts. If <tt>ascending</tt> is true, lower counts     * will be returned first, otherwise higher counts will be returned first.     * <p>     * Sample usage:     * <pre>     * Counter c = new Counter();     * // add to the counter...     * List biggestKeys = Collections.sort(new ArrayList(c.keySet()), c.comparator(false));     * List smallestEntries = Collections.sort(new ArrayList(c.entrySet()), c.comparator(true))     * </pre>     */    public Comparator comparator(boolean ascending)    {        return(new EntryValueComparator(this,ascending));    }        /**      * Comparator that sorts objects by (increasing) count. Shortcut for calling     * {@link #comparator(boolean) comparator(true)}.     */    public Comparator comparator() { return(comparator(true)); }         /** Comparator that uses natural ordering. Returns 0 if o1 is not Comparable. */    private static class NaturalComparator implements Comparator    {        public int compare(Object o1,Object o2)        {            if(o1 instanceof Comparable)                return(((Comparable)o1).compareTo(o2));            return(0); // soft-fail        }    }            /** For internal debugging purposes only. */    public static void main(String[] args)    {        Counter c=new Counter();        c.setCount("p",0);        c.setCount("q",2);        System.out.println(c+" -> "+c.total());        c.incrementCount("p");        System.out.println(c+" -> "+c.total());        c.put("p",new Integer(3));        System.out.println(c+" -> "+c.total());        System.out.println(c.entropy());        System.out.println(c.klDivergence(c));        System.out.println(c.min());        System.out.println(c.argmin());    }        // --- OLD DEPRECATED METHODS --- //    /**      * Returns whether count of object is non-zero.  This normally corresponds to      * the object having been previously seen.      * @deprecated use the standard containsKey function of Map.     */    public boolean hasSeen(Object o) { return containsKey(o); }        /**     * Returns the set of objects with non-zero counts. This normally corresponds     * to the objects that have been previously seen.     * @deprecated use the standard keySet() function of Map.     */    public Set seenSet() { return keySet(); }        /**     * Returne the current count for the given object, or 0 if it hasn't been     * seen before.     * @deprecated use getCount instead.     */    public double countOf(Object o) { return(getCount(o)); }        /**     * Returns the total count.     * @deprecated use totalCount instead.     */    public double total() { return(totalCount()); }        /**     * Adds the given count to the given object.     * @deprecated use incrementCount instead.     */    public void add(Object o,double count) { incrementCount(o,count); }        /**     * Adds all the counts from the given Counter.     * @deprecated use addAll instead.     */    public void addCounter(Counter c) { addAll(c); }        /**     * Adds 1.0 to the count of the given Object.     * @deprecated use incrementCount instead.     */    public void increment(Object o) { incrementCount(o); }}
上一页 12
💿 文件大小 231 K
👤 上传用户 tanghui2008
📂 所属分类多国语言处理
🏷️ 相关标签

#Classifier #Standord #Java #熵
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -