📄 counter.java
字号:
* Removes all keys whose count is 0. After incrementing and decrementing * counts or adding and subtracting Counters, there may be keys left whose * count is 0, though normally this is undesirable. This method cleans up * the map. * <p> * Maybe in the future we should try to do this more on-the-fly, though it's * not clear whether a distinction should be made between "never seen" (i.e. * null count) and "seen with 0 count". Certainly there's no distinction in * getCount() but there is in containsKey(). */ public void removeZeroCounts() { for(Iterator iter=keySet().iterator();iter.hasNext();) if(getCount(iter.next())==0) iter.remove(); } /** Finds and returns the largest count in this Counter. */ public double max() { double max=Double.NEGATIVE_INFINITY; for(Iterator iter=keySet().iterator();iter.hasNext();) max=Math.max(max,getCount(iter.next())); return(max); } /** Finds and returns the smallest count in this Counter. */ public double min() { double min=Double.POSITIVE_INFINITY; for(Iterator iter=keySet().iterator();iter.hasNext();) min=Math.min(min,getCount(iter.next())); return(min); } /** * Finds and returns the key in this Counter with the largest count. * Ties are broken by comparing the objects using the given tie breaking * Comparator, favoring Objects that are sorted to the front. This is useful * if the keys are numeric and there is a bias to prefer smaller or larger * values, and can be useful in other circumstances where random tie-breaking * is not desirable. Returns null if this Counter is empty. */ public Object argmax(Comparator tieBreaker) { double max=Double.NEGATIVE_INFINITY; Object argmax=null; for(Iterator iter=keySet().iterator();iter.hasNext();) { Object key=iter.next(); double count=getCount(key); if(count>max || (count==max && tieBreaker.compare(key,argmax)<0)) { max=count; argmax=key; } } return(argmax); } /** * Finds and returns the key in this Counter with the largest count. * Ties are broken according to the natural ordering of the objects. * This will prefer smaller numeric keys and lexicographically earlier * String keys. To use a different tie-breaking Comparator, use * {@link #argmax(Comparator}}. Returns null if this Counter is empty. */ public Object argmax() { return(argmax(naturalComparator)); } /** * Finds and returns the key in this Counter with the smallest count. * Ties are broken by comparing the objects using the given tie breaking * Comparator, favoring Objects that are sorted to the front. This is useful * if the keys are numeric and there is a bias to prefer smaller or larger * values, and can be useful in other circumstances where random tie-breaking * is not desirable. Returns null if this Counter is empty. */ public Object argmin(Comparator tieBreaker) { double min=Double.POSITIVE_INFINITY; Object argmin=null; for(Iterator iter=keySet().iterator();iter.hasNext();) { Object key=iter.next(); double count=getCount(key); if(count<min || (count==min && tieBreaker.compare(key,argmin)<0)) { min=count; argmin=key; } } return(argmin); } /** * Finds and returns the key in this Counter with the smallest count. * Ties are broken according to the natural ordering of the objects. * This will prefer smaller numeric keys and lexicographically earlier * String keys. To use a different tie-breaking Comparator, use * {@link #argmin(Comparator}}. Returns null if this Counter is empty. */ public Object argmin() { return(argmin(naturalComparator)); } /** * Returns the set of keys whose counts are at or above the given threshold. * This set may have 0 elements but will not be null. */ public Set keysAbove(double countThreshold) { Set keys=new HashSet(); for(Iterator iter=keySet().iterator();iter.hasNext();) { Object key=iter.next(); if(getCount(key)>=countThreshold) keys.add(key); } return(keys); } /** * Returns the set of keys whose counts are at or below the given threshold. * This set may have 0 elements but will not be null. */ public Set keysBelow(double countThreshold) { Set keys=new HashSet(); for(Iterator iter=keySet().iterator();iter.hasNext();) { Object key=iter.next(); if(getCount(key)<=countThreshold) keys.add(key); } return(keys); } /** * Returns the set of keys that have exactly the given count. * This set may have 0 elements but will not be null. */ public Set keysAt(double count) { Set keys=new HashSet(); for(Iterator iter=keySet().iterator();iter.hasNext();) { Object key=iter.next(); if(getCount(key)==count) keys.add(key); } return(keys); } /** * Calculates the entropy of this counter (in bits). This method internally * uses normalized counts (so they sum to one), but the value returned is * meaningless if some of the counts are negative. * @return The entropy of this counter (in bits) */ public double entropy() { double entropy=0.0; for(Iterator iter=keySet().iterator();iter.hasNext();) { double count=getCount(iter.next()); if(count==0) continue; // 0.0 doesn't add entropy but may cause -Inf count/=total(); // use normalized count entropy-=count*(Math.log(count)/Math.log(2.0)); } return(entropy); } /** * Calculates the KL divergence between this counter and another * counter. That is, it calculates KL(this||other). This method internally * uses normalized counts (so they sum to one), but the value returned is * meaningless if some of the counts are negative. * @param other The other <code>Counter</code> * @return The KL divergence between the distributions */ public double klDivergence(Counter other) { double kl = 0.0; double tot = total(); double tot2 = other.total(); // System.out.println("tot is " + tot + " tot2 is " + tot2); for (Iterator it = seenSet().iterator(); it.hasNext(); ) { Object key = it.next(); double num = countOf(key); if (num == 0) continue; num /= tot; double num2 = other.countOf(key); num2 /= tot2; // System.out.println("num is " + num + " num2 is " + num2); kl += num * (Math.log(num/num2)/Math.log(2.0)); } return kl; } /** * Calculates the information radius (aka the Jensen-Shannon divergence) * between * this Counter and another counter. This measure is defined as: * <blockquote> iRad(p,q) = D(p||(p+q)/2)+D(q,(p+q)/2) </blockquote> * where p is this Counter, q is the other counter, and D(p||q) is the * KL divergence bewteen p and q. Note that iRad(p,q) = iRad(q,p). * @param other The other <code>Counter</code> * @return The information radius between the distributions */ public double informationRadius(Counter other) { Counter avg=average(other); // (p+q)/2 return(klDivergence(avg)+other.klDivergence(avg)); } /** * Returns a new Counter with counts averaged from this Counter and the * given Counter. The average Counter will contain the union of keys in * both * source Counters, and each count will be the average of the two source * counts for that key, where as usual a missing count in one Counter * is treated as count 0. */ public Counter average(Counter other) { Counter average=new Counter(); Set allKeys=new HashSet(keySet()); allKeys.addAll(other.keySet()); for(Iterator iter=allKeys.iterator();iter.hasNext();) { Object key=iter.next(); average.setCount(key,(getCount(key)+other.getCount(key))*0.5); } return(average); } /** * Returns a comparator suitable for sorting this Counter's keys or entries * by their respective counts. If <tt>ascending</tt> is true, lower counts * will be returned first, otherwise higher counts will be returned first. * <p> * Sample usage: * <pre> * Counter c = new Counter(); * // add to the counter... * List biggestKeys = Collections.sort(new ArrayList(c.keySet()), c.comparator(false)); * List smallestEntries = Collections.sort(new ArrayList(c.entrySet()), c.comparator(true)) * </pre> */ public Comparator comparator(boolean ascending) { return(new EntryValueComparator(this,ascending)); } /** * Comparator that sorts objects by (increasing) count. Shortcut for calling * {@link #comparator(boolean) comparator(true)}. */ public Comparator comparator() { return(comparator(true)); } /** Comparator that uses natural ordering. Returns 0 if o1 is not Comparable. */ private static class NaturalComparator implements Comparator { public int compare(Object o1,Object o2) { if(o1 instanceof Comparable) return(((Comparable)o1).compareTo(o2)); return(0); // soft-fail } } /** For internal debugging purposes only. */ public static void main(String[] args) { Counter c=new Counter(); c.setCount("p",0); c.setCount("q",2); System.out.println(c+" -> "+c.total()); c.incrementCount("p"); System.out.println(c+" -> "+c.total()); c.put("p",new Integer(3)); System.out.println(c+" -> "+c.total()); System.out.println(c.entropy()); System.out.println(c.klDivergence(c)); System.out.println(c.min()); System.out.println(c.argmin()); } // --- OLD DEPRECATED METHODS --- // /** * Returns whether count of object is non-zero. This normally corresponds to * the object having been previously seen. * @deprecated use the standard containsKey function of Map. */ public boolean hasSeen(Object o) { return containsKey(o); } /** * Returns the set of objects with non-zero counts. This normally corresponds * to the objects that have been previously seen. * @deprecated use the standard keySet() function of Map. */ public Set seenSet() { return keySet(); } /** * Returne the current count for the given object, or 0 if it hasn't been * seen before. * @deprecated use getCount instead. */ public double countOf(Object o) { return(getCount(o)); } /** * Returns the total count. * @deprecated use totalCount instead. */ public double total() { return(totalCount()); } /** * Adds the given count to the given object. * @deprecated use incrementCount instead. */ public void add(Object o,double count) { incrementCount(o,count); } /** * Adds all the counts from the given Counter. * @deprecated use addAll instead. */ public void addCounter(Counter c) { addAll(c); } /** * Adds 1.0 to the count of the given Object. * @deprecated use incrementCount instead. */ public void increment(Object o) { incrementCount(o); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -