📄 bitoutput.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
package com.aliasi.io;import com.aliasi.util.Math;import java.io.OutputStream;import java.io.IOException;/**  * A <code>BitOutput</code> wraps an underlying output stream to * provide bit-level output.  Output is written through the method * {@link #writeBit(boolean)}, with <code>true</code> used for the bit * <code>1</code> and <code>false</code> for the bit <code>0</code>. * The methods {@link #writeTrue()} and {@link #writeFalse()} are * shorthand for <code>writeBit(true)</code> and * <code>writeBit(false)</code> respectively. * * <P>If the number of bits written before closing the output does not * land on a byte boundary, the remaining fractional byte is filled * with <code>0</code> bits. * * <P>None of the methods in this class are safe for concurrent access * by multiple threads. * * @author Bob Carpenter * @version 2.1.1 * @since LingPipe2.1.1 */public class BitOutput {    private int mNextByte;    private int mNextBitIndex;    private final OutputStream mOut;    /**      * Construct a bit output wrapping the specified output stream.     *     * @param out Underlying output stream.     */    public BitOutput(OutputStream out) {	mOut = out;	reset();    }    /**     * Writes the bits for a unary code for the specified positive     * number.  The unary code for the number <code>n</code> is     * defined by:     *     * <blockquote><code>     * unaryCode(n) = 0<sup><sup>n-1</sup></sup> 1     * </code></blockquote>     *     * In words, the number <code>n</code> is coded as     * <code>n-1</code> zeros followed by a one.  The following     * table illustrates the first few unary codes:     *     * <blockquote><table border='1' cellpadding='5'>     * <tr><td><i>Number</i></td><td><i>Code</i></td></tr>     * <tr><td>1</td><td><code>1</code></td></tr>     * <tr><td>2</td><td><code>01</code></td></tr>     * <tr><td>3</td><td><code>001</code></td></tr>     * <tr><td>4</td><td><code>0001</code></td></tr>     * <tr><td>5</td><td><code>00001</code></td></tr>     * </table></blockquote>     *      * @param n Number to code.     * @throws IOException If there is an I/O error writing     * to the underlying output stream.     * @throws IllegalArgumentException If the number to be encoded is     * zero or negative.     */    public void writeUnary(int n) throws IOException {	validatePositive(n);	// fit in buffer	int numZeros = n - 1;	if (numZeros <= mNextBitIndex) {	    mNextByte = mNextByte << numZeros;	    mNextBitIndex -= numZeros;	    writeTrue();	    return;	} 	// fill buffer, write and flush	// numZeros > mNextBitIndex	mOut.write(mNextByte << mNextBitIndex);	numZeros -= (mNextBitIndex+1);	reset();	// fill in even multiples of eight	for (; numZeros >= 8; numZeros -= 8)	    mOut.write(ZERO_BYTE);	// fill in last zeros	mNextBitIndex -= numZeros;	writeTrue();    }    /**     * Writes the bits of a binary representation of the specified     * non-negative number in the specified number of bits.  if the     * number will not fit in the number of bits specified, an     * exception is raised.     *     * <P>For instance, the following illustrates one, two and     * three-bit codings.     *      * <blockquote><table border='1' cellpadding='5'>     * <tr><td rowspan='2'><i>Number</i></td>     *     <td rowspan='2'><i>Binary</i></td>     *     <td colspan='3'><i>Code for Num Bits</i></td></tr>     * <tr><td><i>1</i></td> <td><i>2</i></td> <td><i>3</i></td></tr>     * <tr><td>0</td><td>1</td>       *     <td>0</td>      *     <td>00</td>      *     <td>000</td></tr>     * <tr><td>1</td><td>1</td>       *     <td>1</td>      *     <td>01</td>      *     <td>001</td></tr>     * <tr><td>2</td><td>10</td>      *     <td><code>Exception</code></td>     *     <td>10</td>     *     <td>010</td></tr>     * <tr><td>3</td><td>10</td>      *     <td><code>Exception</code></td>     *     <td>11</td>     *     <td>011</td></tr>     * <tr><td>4</td><td>100</td>      *     <td><code>Exception</code></td>     *     <td><code>Exception</code></td>     *     <td>100</td></tr>     * <tr><td>5</td><td>101</td>      *     <td><code>Exception</code></td>     *     <td><code>Exception</code></td>     *     <td>101</td></tr>     * <tr><td>6</td><td>110</td>      *     <td><code>Exception</code></td>     *     <td><code>Exception</code></td>     *     <td>110</td></tr>     * <tr><td>7</td><td>111</td>      *     <td><code>Exception</code></td>     *     <td><code>Exception</code></td>     *     <td>111</td></tr>     * <tr><td>8</td><td>1000</td>      *     <td><code>Exception</code></td>     *     <td><code>Exception</code></td>     *     <td><code>Exception</code></td></tr>     * </table></blockquote>     *     * @param n Number to code.     * @param numBits Number of bits to use for coding.     * @throws IllegalArgumentException If the number to code is     * negative, the number of bits is greater than 63, or the number     * will not fit into the specified number of bits.     * @throws IOException If there is an error writing to the     * underlying output stream.     */    public void writeBinary(long n, int numBits) throws IOException {	validateNonNegative(n);	validateNumBits(numBits);	int k = mostSignificantPowerOfTwo(n);	if (k >= numBits) {	    String msg = "Number will not fit into number of bits."		+ " n=" + n		+ " numBits=" + numBits;	    throw new IllegalArgumentException(msg);	}	writeLowOrderBits(numBits,n);    }    /**     * Writes the bits for Rice code for the specified non-negative     * number with the specified number of bits fixed for the binary     * remainder.  Rice coding is a form of Golomb coding where the     * Golomb paramemter is a power of two (2 to the number of bits in     * the remainder).  The Rice code is defined by unary coding a     * magnitude and then binary coding the remainder.  It can be     * defined by taking a quotient and remainder:     *     * <blockquote>     * <table border='0' cellpadding='5'>     * <tr><td align='right'>m</td> <td>=</td> <td>2<sup><sup>b</sup></sup></td>      *                  <td>=</td> <td>(1&lt;&lt;b)</td></tr>     * <tr><td align='right'>q</td> <td>=</td> <td>(n - 1) / m</td>     *                <td>=</td> <td>(n - 1) &gt;&gt;&gt; b</td></tr>     * <tr><td align='right'>r</td> <td>=</td> <td>n - q*m - 1</td>      *                <td>=</td> <td>n - (q &lt;&lt; b) - 1</td></tr>     * </table>     * </code></blockquote>     *     * both of which are defined by shifting, and then coding each     * in turn using a unary code for the quotient and binary code for     * the remainder:     *      * <blockquote><code>     * riceCode(n,b) = unaryCode(q) binaryCode(r)     * </code></blockquote>     *     * For example, we get the following codes with the number of     * fixed remainder bits set to 1, 2 and 3, with the unary coded     * quotient separated from the binary coded remainder by a space:     *     * <blockquote><table border='1' cellpadding='5'>     * <tr><td rowspan='2'>Number<br><code>n</code></td>     *     <td rowspan='2'>Binary</td>     *     <td colspan='3'>Code for Number of Remainder Bits</td></tr>     * <tr><td>b=<i>1</i></td> <td>b=<i>2</i></td> <td>b=<i>3</i></td></tr>     * <tr><td>1</td> <td>1</td>      *     <td>1 0</td> <td>1 00</td> <td>1 000</td></tr>     * <tr><td>2</td> <td>10</td>      *     <td>1 1</td> <td>1 01</td> <td>1 001</td></tr>     * <tr><td>3</td> <td>11</td>      *     <td>01 0</td> <td>1 10</td> <td>1 010</td></tr>     * <tr><td>4</td> <td>100</td>      *     <td>01 1</td> <td>1 11</td> <td>1 011</td></tr>     * <tr><td>5</td> <td>101</td>      *     <td>001 0</td> <td>01 00</td> <td>1 100</td></tr>     * <tr><td>6</td> <td>110</td>      *     <td>001 1</td> <td>01 01</td> <td>1 101</td></tr>     * <tr><td>7</td> <td>111</td>      *     <td>0001 0</td> <td>01 10</td> <td>1 110</td></tr>     * <tr><td>8</td> <td>1000</td>      *     <td>0001 1</td> <td>01 11</td> <td>1 111</td></tr>     * <tr><td>9</td> <td>1001</td>      *     <td>00001 0</td> <td>001 00</td> <td>01 000</td></tr>     * <tr><td>10</td> <td>1010</td>      *     <td>00001 1</td> <td>001 01</td> <td>01 001</td></tr>     * <tr><td>11</td> <td>1011</td>      *     <td>000001 0</td> <td>001 10</td> <td>01 010</td></tr>     * <tr><td>12</td> <td>1100</td>      *     <td>000001 1</td> <td>001 11</td> <td>01 011</td></tr>     * <tr><td>13</td> <td>1101</td>      *     <td>0000001 0</td> <td>0001 00</td> <td>01 100</td></tr>     * <tr><td>14</td> <td>1110</td>      *     <td>0000001 1</td> <td>0001 01</td> <td>01 101</td></tr>     * <tr><td>15</td> <td>1111</td>      *     <td>00000001 0</td> <td>0001 10</td> <td>01 110</td></tr>     * <tr><td>16</td> <td>10000</td>      *     <td>00000001 1</td> <td>0001 11</td> <td>01 111</td></tr>     * <tr><td>17</td> <td>10001</td>      *     <td>000000001 0</td> <td>00001 00</td> <td>001 000</td></tr>     * </table></blockquote>     *      * In the limit, if the number of remaining bits to code is set to     * zero, the Rice code would reduce to a unary code:     *     * <blockquote><code>     * riceCode(n,0) = unaryCode(n)     * </code></blockquote>     *     * but this method will throw an exception with a remainder size     * of zero.     *     * <P>In the limit the other way, if the number of remaining bits     * is set to the width of the maximum value, the Rice code is just     * the unary coding of 1, which is the single binary digit 1,     * followed by the binary code itself:     *     * <blockquote><code>     * riceCode(n,64) = unaryCode(1) binaryCode(n,64) = 1 binaryCode(n,64)     * </code></blockquote>     *      * <P>The method will throw an exception if the encoding     * produces a unary code that would output more bits     * than would fit in a positive integer (that is, more     * than (2<sup><sup>32</sup></sup>-1) bits.     *      * For more information, see:     *      * <UL>     *     * <LI> Golomb, S. 1966. Run-length encodings. <i>IEEE     * Trans. Inform. Theory</i>.  <bf>12</bf>(3):399-401.     *     * <LI> Rice, R. F. 1979. Some practical universal noiseless     * coding techniques. <i>JPL Publication 79-22</i>. March 1979.     *     * <LI> Witten, Ian H., Alistair Moffat, and Timothy C. Bell.     * 1999. <i>Managing Gigabytes</i>. Academic Press.     *     * <LI> <a href="http://en.wikipedia.org/wiki/Golomb_coding"     * >Wikipedia: Golomb coding</a>     *     * </UL>     *     * @param n Number to code.     * @param numFixedBits Number of bits to use for the fixed     * remainder encoding.       * @throws IOException If there is an error writing to the     * underlying output stream.     * @throws IllegalArgumentException If the number to be encoded is     * not positive, if the number of fixed bits is not positive, or if     * the unary prefix code overflows.     */    public void writeRice(long n, int numFixedBits) throws IOException {	validatePositive(n);	validateNumBits(numFixedBits);	long q = (n - 1l) >> numFixedBits;	long prefixBits = q + 1l;	if (prefixBits >= Integer.MAX_VALUE) {	    String msg = "Prefix too long to code."		+ " n=" + n		+ " numFixedBits=" + numFixedBits		+ " number of prefix bits=(n>>numFixBits)=" + prefixBits;	    throw new IllegalArgumentException(msg);	}	writeUnary((int) prefixBits);	long remainder = n - (q << numFixedBits) - 1;	writeLowOrderBits(numFixedBits,remainder);    }    /**     * Writes the Fibonacci code for the specified positive number.     * Roughly speaking, the Fibonacci code specifies a number     * as a sum of non-consecutive Fibonacci numbers, terminating     * a representation with two consecutive 1 bits.       *     * <P>Fibonacci     * numbers are defined by setting      *     * <blockquote><pre>     * Fib(0) = 0     * Fib(1) = 1     * Fib(n+2) = Fib(n+1) + Fib(n)     * </pre></blockquote>     *     * The first few Fibonacci numbers are:     *     * <blockquote><code>     * 0, 1, 1, 2, 3, 5, 8, 13, 21, ...     * </code></blockquote>     *     * This method starts with the second <code>1</code> value,     * namely <code>Fib(2)</code>, making the sequence a sequence     * of unique numbers starting with <code>1, 2, 3, 5,...</code>.     *     * <P>The Fibonacci representation of a number is a bit vector     * indicating the Fibonacci numbers used in the sum.  The     * Fibonacci code reverses the Fibonacci representation and     * appends a 1 bit.  Here are examples for the first 17 numbers:     *     * <blockquote><table border='1' cellpadding='5'>     * <tr><td><i>Number</i></td> <td><i>Fibonacci Representation</i></td>     *     <td><i>Fibonacci Code</i></td></tr>     * <tr><td>1</td> <td>1</td> <td>11</td></tr>     * <tr><td>2</td> <td>10</td> <td>01 1</td></tr>     * <tr><td>3</td> <td>100</td> <td>001 1</td></tr>     * <tr><td>4</td> <td>101</td> <td>101 1</td></tr>     * <tr><td>5</td> <td>1000</td> <td>0001 1</td></tr>     * <tr><td>6</td> <td>1001</td> <td>1001 1</td></tr>     * <tr><td>7</td> <td>1010</td> <td>0101 1</td></tr>     * <tr><td>8</td> <td>10000</td> <td>00001 1</td></tr>     * <tr><td>9</td> <td>10001</td> <td>10001 1</td></tr>     * <tr><td>10</td> <td>10010</td> <td>01001 1</td></tr>     * <tr><td>11</td> <td>10100</td> <td>00101 1</td></tr>     * <tr><td>12</td> <td>10101</td> <td>10101 1</td></tr>     * <tr><td>13</td> <td>100000</td> <td>000001 1</td></tr>     * <tr><td>14</td> <td>100001</td> <td>100001 1</td></tr>     * <tr><td>15</td> <td>100010</td> <td>010001 1</td></tr>     * <tr><td>16</td> <td>100100</td> <td>001001 1</td></tr>     * <tr><td>17</td> <td>100101</td> <td>101001 1</td></tr>     * </table></blockquote>     *     * For example, the number 11 is coded as the sum of the     * non-consecutive Fibonacci numbers 8 + 3, so the Fibonacci     * representation is <code>10100</code> (8 is the fifth number in     * the series above, 3 is the third).  Its Fibonacci code reverses     * the number to <code>00101</code> and appends a <code>1</code>     * to yield <code>001011</code>.     *     * <P>Fibonacci codes can represent arbitrary positive numbers up     * to <code>Long.MAX_VALUE</code>.       *     * <P>See {@link Math#FIBONACCI_SEQUENCE} for a definition of     * the Fibonacci sequence as an array of longs.     *     * <P>In the limit (for larger numbers), the number of bits     * used by a Fibonacci coding is roughly 60 percent higher     * than the number of bits used for a binary code.  The benefit     * is that Fibonacci codes are prefix codes, whereas binary codes     * are not.     *     * @param n Number to encode.     * @throws IllegalArgumentException If the number is not positive.     * @throws IOException If there is an I/O exception writing to the     * underlying stream.     */    public void writeFibonacci(long n) throws IOException {	validatePositive(n);	long[] fibs = Math.FIBONACCI_SEQUENCE;	boolean[] buf = FIB_BUF;	int mostSigPlace = mostSigFibonacci(fibs,n);	for (int place = mostSigPlace; place >= 0; --place) {	    if (n >= fibs[place]) {		n -= fibs[place];		buf[place] = true;	    } else {		buf[place] = false;	    }	}	for (int i = 0; i <= mostSigPlace; ++i)	    writeBit(buf[i]);
12 下一页
💿 文件大小 4561 K
👤 上传用户 edan1181
📂 所属分类 Java编程
🏷️ 相关标签

#LingPipe #Java #自然语言处理 #开源
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -