📄 replaycharsequencefactory.java
字号:
} } /** * @return Length of characters in stream to replay. Starts counting * at the HTTP header/body boundary. */ public int length() { return this.length; } /** * Get character at passed absolute position. * * Called by {@link #charAt(int)} which has a relative index into the * content, one that doesn't account for HTTP header if present. * * @param index Index into content adjusted to accomodate initial offset * to get us past the HTTP header if present (i.e. * {@link #contentOffset}). * * @return Characater at offset <code>index</code>. */ public char charAt(int index) { int c = -1; // Add to index start-of-content offset to get us over HTTP header // if present. index += this.contentOffset; if (index < this.prefixBuffer.length) { // If index is into our prefix buffer. c = this.prefixBuffer[index]; } else if (index >= this.wrapOrigin && (index - this.wrapOrigin) < this.wraparoundBuffer.length) { // If index is into our buffer window on underlying backing file. c = this.wraparoundBuffer[ ((index - this.wrapOrigin) + this.wrapOffset) % this.wraparoundBuffer.length]; } else { // Index is outside of both prefix buffer and our buffer window // onto the underlying backing file. Fix the buffer window // location. c = faultCharAt(index); } // Stream is treated as single byte. Make sure characters returned // are not negative. return (char)(c & 0xff); } /** * Get a character that's outside the current buffers. * * will cause the wraparoundBuffer to be changed to * cover a region including the index * * if index is higher than the highest index in the * wraparound buffer, buffer is moved forward such * that requested char is last item in buffer * * if index is lower than lowest index in the * wraparound buffer, buffet is reset centered around * index * * @param index Index of character to fetch. * @return A character that's outside the current buffers */ private int faultCharAt(int index) { if(Thread.interrupted()) { throw new RuntimeException("thread interrupted"); } if(index >= this.wrapOrigin + this.wraparoundBuffer.length) { // Moving forward while (index >= this.wrapOrigin + this.wraparoundBuffer.length) { // TODO optimize this advanceBuffer(); } return charAt(index - this.contentOffset); } // Moving backward recenterBuffer(index); return charAt(index - this.contentOffset); } /** * Move the buffer window on backing file back centering current access * position in middle of window. * * @param index Index of character to access. */ private void recenterBuffer(int index) { if (logger.isLoggable(Level.FINE)) { logger.fine("Recentering around " + index + " in " + this.backingFilename); } this.wrapOrigin = index - (this.wraparoundBuffer.length / 2); if(this.wrapOrigin < this.prefixBuffer.length) { this.wrapOrigin = this.prefixBuffer.length; } this.wrapOffset = 0; loadBuffer(); } /** * Load from backing file into the wrapper buffer. */ private void loadBuffer() { long len = -1; try { len = this.raFile.length(); this.raFile.seek(this.wrapOrigin - this.prefixBuffer.length); this.raFile.readFully(this.wraparoundBuffer, 0, Math.min(this.wraparoundBuffer.length, this.absoluteLength - this.wrapOrigin)); } catch (IOException e) { // TODO convert this to a runtime error? DevUtils.logger.log ( Level.SEVERE, "raFile.seek(" + (this.wrapOrigin - this.prefixBuffer.length) + ")\n" + "raFile.readFully(wraparoundBuffer,0," + (Math.min(this.wraparoundBuffer.length, this.length - this.wrapOrigin )) + ")\n"+ "raFile.length()" + len + "\n" + DevUtils.extraInfo(), e); throw new RuntimeException(e); } } /** * Roll the wraparound buffer forward one position */ private void advanceBuffer() { try { this.wraparoundBuffer[this.wrapOffset] = (byte)this.raFile.read(); this.wrapOffset++; this.wrapOffset %= this.wraparoundBuffer.length; this.wrapOrigin++; } catch (IOException e) { DevUtils.logger.log(Level.SEVERE, "advanceBuffer()" + DevUtils.extraInfo(), e); throw new RuntimeException(e); } } public CharSequence subSequence(int start, int end) { return new CharSubSequence(this, start, end); } /** * Cleanup resources. * * @exception IOException Failed close of random access file. */ public void close() throws IOException { this.prefixBuffer = null; if (this.raFile != null) { this.raFile.close(); this.raFile = null; } } /* (non-Javadoc) * @see java.lang.Object#finalize() */ protected void finalize() throws Throwable { super.finalize(); close(); } /* (non-Javadoc) * @see org.archive.io.EnhancedCharSequence#substring(int, int) */ public String substring(int offset, int len) { StringBuffer ret = new StringBuffer(len); // Add to offset start-of-content offset to get us over HTTP header // if present. offset += this.contentOffset; if (offset < this.prefixBuffer.length) { // Need something from the prefix buffer. int from = offset; // To the end of the buffer int count = this.prefixBuffer.length - from; if (offset + len < this.prefixBuffer.length) { count = len; // length falls within the buffer. } else { // Will need more then is in the prefixBuffer. offset = this.prefixBuffer.length + 1; len = len - count; } // Since we are dealing with a byte buffer we'll have to use // a String and then wrap up in a StringBuffer to concat with // the backing file. TODO: This can probably be optimized. // // Also, force an 8-bit encoding. Default jvm encoding is // usually -- us context -- 7 bit ascii. If we don't force // 8-bit, characters above 127 are considered rubbish. try { ret.append(new String(this.prefixBuffer,from,count, DEFAULT_SINGLE_BYTE_ENCODING)); } catch (UnsupportedEncodingException e) { logger.severe("Failed encoding string: " + e.getMessage()); } } if (offset >= this.prefixBuffer.length) { // TODO: Maybe better performance can be gained by reading // blocks from files. int to = offset + len; for(int i = offset ; i < to ; i++) { ret.append(charAt(i - this.contentOffset)); } } return ret.toString(); } public String toString() { return substring(0, length()); } } /** * Provides a (Replay)CharSequence view on recorded streams (a prefix * buffer and overflow backing file) that can handle streams of multibyte * characters. * * If possible, use {@link ByteReplayCharSequence}. It performs better even * for the single byte case (Decoding is an expensive process). * * <p>Call close on this class when done so can clean up resources. * * <p>Implementation currently works by checking to see if content to read * all fits the in-memory buffer. If so, we decode into a CharBuffer and * keep this around for CharSequence operations. This CharBuffer is * discarded on close. * * <p>If content length is greater than in-memory buffer, we decode the * buffer plus backing file into a new file named for the backing file w/ * a suffix of the encoding we write the file as. We then run w/ a * memory-mapped CharBuffer against this file to implement CharSequence. * Reasons for this implemenation are that CharSequence wants to return the * length of the CharSequence. * * <p>Obvious optimizations would keep around decodings whether the * in-memory decoded buffer or the file of decodings written to disk but the * general usage pattern processing URIs is that the decoding is used by one * processor only. Also of note, files usually fit into the in-memory * buffer. * * <p>We might also be able to keep up 3 windows that moved across the file * decoding a window at a time trying to keep one of the buffers just in * front of the regex processing returning it a length that would be only * the length of current position to end of current block or else the length * could be got by multipling the backing files length by the decoders' * estimate of average character size. This would save us writing out the * decoded file. We'd have to do the latter for files that are * > Integer.MAX_VALUE. * * @author stack * @version $Revision: 1.39 $, $Date: 2006/06/01 05:58:37 $ */ private class MultiByteReplayCharSequence implements ReplayCharSequence { /** * Name of the encoding we use writing out concatenated decoded prefix * buffer and decoded backing file. * * <p>This define is also used as suffix for the file that holds the * decodings. The name of the file that holds the decoding is the name * of the backing file w/ this encoding for a suffix. * * <p>See <a ref="http://java.sun.com/j2se/1.4.2/docs/guide/intl/encoding.doc.html">Encoding</a>. */ private static final String WRITE_ENCODING = "UTF-16BE"; /** * CharBuffer of decoded content. * * Content of this buffer is unicode. */ private CharBuffer content = null; /** * File that has decoded content. * * Keep it around so we can remove on close. */ private File decodedFile = null; /** * Constructor. * * @param buffer In-memory buffer of recordings prefix. We read from * here first and will only go to the backing file if <code>size</code> * requested is greater than <code>buffer.length</code>. * @param size Total size of stream to replay in bytes. Used to find * EOS. This is total length of content including HTTP headers if * present. * @param responseBodyStart Where the response body starts in bytes. * Used to skip over the HTTP headers if present. * @param backingFilename Path to backing file with content in excess of * whats in <code>buffer</code>. * @param encoding Encoding to use reading the passed prefix buffer and * backing file. For now, should be java canonical name for the * encoding. (If null is passed, we will default to * ByteReplayCharSequence). * * @throws IOException */ private MultiByteReplayCharSequence(byte[] buffer, long size, long responseBodyStart, String backingFilename, String encoding) throws IOException { super(); if (encoding == null) { throw new NullPointerException("Character encoding is null."); } this.content = decode(buffer, backingFilename, size, responseBodyStart, encoding); } /** * Decode passed buffer and backing file into a CharBuffer. * * This method writes a new file made of the decoded concatenation of * the in-memory prefix buffer and the backing file. Returns a * charSequence view onto this new file. * * @param buffer In-memory buffer of recordings prefix. We read from * here first and will only go to the backing file if <code>size</code> * requested is greater than <code>buffer.length</code>. * @param size Total size of stream to replay in bytes. Used to find * EOS. This is total length of content including HTTP headers if * present. * @param responseBodyStart Where the response body starts in bytes. * Used to skip over the HTTP headers if present. * @param backingFilename Path to backing file with content in excess of * whats in <code>buffer</code>. * @param encoding Encoding to use reading the passed prefix buffer and * backing file. For now, should be java canonical name for the * encoding. (If null is passed, we will default to * ByteReplayCharSequence). *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -