📄 token.java
字号:
* {@link #setTermBuffer(String, int, int)} * to optimally combine the resize with the setting of the termBuffer. * @param newSize minimum size of the new termBuffer * @return newly created termBuffer with length >= newSize */ public char[] resizeTermBuffer(int newSize) { char[] newCharBuffer = growTermBuffer(newSize); if (termBuffer == null) { // If there were termText, then preserve it. // note that if termBuffer is null then newCharBuffer cannot be null assert newCharBuffer != null; if (termText != null) { termText.getChars(0, termText.length(), newCharBuffer, 0); } termBuffer = newCharBuffer; } else if (newCharBuffer != null) { // Note: if newCharBuffer != null then termBuffer needs to grow. // If there were a termBuffer, then preserve it System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length); termBuffer = newCharBuffer; } termText = null; return termBuffer; } /** Allocates a buffer char[] of at least newSize * @param newSize minimum size of the buffer * @return newly created buffer with length >= newSize or null if the current termBuffer is big enough */ private char[] growTermBuffer(int newSize) { if (termBuffer != null) { if (termBuffer.length >= newSize) // Already big enough return null; else // Not big enough; create a new array with slight // over allocation: return new char[ArrayUtil.getNextSize(newSize)]; } else { // determine the best size // The buffer is always at least MIN_BUFFER_SIZE if (newSize < MIN_BUFFER_SIZE) { newSize = MIN_BUFFER_SIZE; } // If there is already a termText, then the size has to be at least that big if (termText != null) { int ttLength = termText.length(); if (newSize < ttLength) { newSize = ttLength; } } return new char[newSize]; } } // TODO: once we remove the deprecated termText() method // and switch entirely to char[] termBuffer we don't need // to use this method anymore private void initTermBuffer() { if (termBuffer == null) { if (termText == null) { termBuffer = new char[MIN_BUFFER_SIZE]; termLength = 0; } else { int length = termText.length(); if (length < MIN_BUFFER_SIZE) length = MIN_BUFFER_SIZE; termBuffer = new char[length]; termLength = termText.length(); termText.getChars(0, termText.length(), termBuffer, 0); termText = null; } } else if (termText != null) termText = null; } /** Return number of valid characters (length of the term) * in the termBuffer array. */ public final int termLength() { initTermBuffer(); return termLength; } /** Set number of valid characters (length of the term) in * the termBuffer array. Use this to truncate the termBuffer * or to synchronize with external manipulation of the termBuffer. * Note: to grow the size of the array, * use {@link #resizeTermBuffer(int)} first. * @param length the truncated length */ public final void setTermLength(int length) { initTermBuffer(); if (length > termBuffer.length) throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")"); termLength = length; } /** Returns this Token's starting offset, the position of the first character corresponding to this token in the source text. Note that the difference between endOffset() and startOffset() may not be equal to termText.length(), as the term text may have been altered by a stemmer or some other filter. */ public final int startOffset() { return startOffset; } /** Set the starting offset. @see #startOffset() */ public void setStartOffset(int offset) { this.startOffset = offset; } /** Returns this Token's ending offset, one greater than the position of the last character corresponding to this token in the source text. The length of the token in the source text is (endOffset - startOffset). */ public final int endOffset() { return endOffset; } /** Set the ending offset. @see #endOffset() */ public void setEndOffset(int offset) { this.endOffset = offset; } /** Returns this Token's lexical type. Defaults to "word". */ public final String type() { return type; } /** Set the lexical type. @see #type() */ public final void setType(String type) { this.type = type; } /** * EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. * <p/> * * Get the bitset for any bits that have been set. This is completely distinct from {@link #type()}, although they do share similar purposes. * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s. * * * @return The bits */ public int getFlags() { return flags; } /** * @see #getFlags() */ public void setFlags(int flags) { this.flags = flags; } /** * Returns this Token's payload. */ public Payload getPayload() { return this.payload; } /** * Sets this Token's payload. */ public void setPayload(Payload payload) { this.payload = payload; } public String toString() { StringBuffer sb = new StringBuffer(); sb.append('('); initTermBuffer(); if (termBuffer == null) sb.append("null"); else sb.append(termBuffer, 0, termLength); sb.append(',').append(startOffset).append(',').append(endOffset); if (!type.equals("word")) sb.append(",type=").append(type); if (positionIncrement != 1) sb.append(",posIncr=").append(positionIncrement); sb.append(')'); return sb.toString(); } /** Resets the term text, payload, flags, and positionIncrement to default. * Other fields such as startOffset, endOffset and the token type are * not reset since they are normally overwritten by the tokenizer. */ public void clear() { payload = null; // Leave termBuffer to allow re-use termLength = 0; termText = null; positionIncrement = 1; flags = 0; // startOffset = endOffset = 0; // type = DEFAULT_TYPE; } public Object clone() { try { Token t = (Token)super.clone(); // Do a deep clone if (termBuffer != null) { t.termBuffer = (char[]) termBuffer.clone(); } if (payload != null) { t.setPayload((Payload) payload.clone()); } return t; } catch (CloneNotSupportedException e) { throw new RuntimeException(e); // shouldn't happen } } /** Makes a clone, but replaces the term buffer & * start/end offset in the process. This is more * efficient than doing a full clone (and then calling * setTermBuffer) because it saves a wasted copy of the old * termBuffer. */ public Token clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) { final Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset); t.positionIncrement = positionIncrement; t.flags = flags; t.type = type; if (payload != null) t.payload = (Payload) payload.clone(); return t; } public boolean equals(Object obj) { if (obj == this) return true; if (obj instanceof Token) { Token other = (Token) obj; initTermBuffer(); other.initTermBuffer(); if (termLength == other.termLength && startOffset == other.startOffset && endOffset == other.endOffset && flags == other.flags && positionIncrement == other.positionIncrement && subEqual(type, other.type) && subEqual(payload, other.payload)) { for(int i=0;i<termLength;i++) if (termBuffer[i] != other.termBuffer[i]) return false; return true; } else return false; } else return false; } private boolean subEqual(Object o1, Object o2) { if (o1 == null) return o2 == null; else return o1.equals(o2); } public int hashCode() { initTermBuffer(); int code = termLength; code = code * 31 + startOffset; code = code * 31 + endOffset; code = code * 31 + flags; code = code * 31 + positionIncrement; code = code * 31 + type.hashCode(); code = (payload == null ? code : code * 31 + payload.hashCode()); code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength); return code; } // like clear() but doesn't clear termBuffer/text private void clearNoTermBuffer() { payload = null; positionIncrement = 1; flags = 0; } /** Shorthand for calling {@link #clear}, * {@link #setTermBuffer(char[], int, int)}, * {@link #setStartOffset}, * {@link #setEndOffset}, * {@link #setType} * @return this Token instance */ public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) { clearNoTermBuffer(); payload = null; positionIncrement = 1; setTermBuffer(newTermBuffer, newTermOffset, newTermLength); startOffset = newStartOffset; endOffset = newEndOffset; type = newType; return this; } /** Shorthand for calling {@link #clear}, * {@link #setTermBuffer(char[], int, int)}, * {@link #setStartOffset}, * {@link #setEndOffset} * {@link #setType} on Token.DEFAULT_TYPE * @return this Token instance */ public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) { clearNoTermBuffer(); setTermBuffer(newTermBuffer, newTermOffset, newTermLength); startOffset = newStartOffset; endOffset = newEndOffset; type = DEFAULT_TYPE; return this; } /** Shorthand for calling {@link #clear}, * {@link #setTermBuffer(String)}, * {@link #setStartOffset}, * {@link #setEndOffset} * {@link #setType} * @return this Token instance */ public Token reinit(String newTerm, int newStartOffset, int newEndOffset, String newType) { clearNoTermBuffer(); setTermBuffer(newTerm); startOffset = newStartOffset; endOffset = newEndOffset; type = newType; return this; } /** Shorthand for calling {@link #clear}, * {@link #setTermBuffer(String, int, int)}, * {@link #setStartOffset}, * {@link #setEndOffset} * {@link #setType} * @return this Token instance */ public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) { clearNoTermBuffer(); setTermBuffer(newTerm, newTermOffset, newTermLength); startOffset = newStartOffset; endOffset = newEndOffset; type = newType; return this; } /** Shorthand for calling {@link #clear}, * {@link #setTermBuffer(String)}, * {@link #setStartOffset}, * {@link #setEndOffset} * {@link #setType} on Token.DEFAULT_TYPE * @return this Token instance */ public Token reinit(String newTerm, int newStartOffset, int newEndOffset) { clearNoTermBuffer(); setTermBuffer(newTerm); startOffset = newStartOffset; endOffset = newEndOffset; type = DEFAULT_TYPE; return this; } /** Shorthand for calling {@link #clear}, * {@link #setTermBuffer(String, int, int)}, * {@link #setStartOffset}, * {@link #setEndOffset} * {@link #setType} on Token.DEFAULT_TYPE * @return this Token instance */ public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) { clearNoTermBuffer(); setTermBuffer(newTerm, newTermOffset, newTermLength); startOffset = newStartOffset; endOffset = newEndOffset; type = DEFAULT_TYPE; return this; } /** * Copy the prototype token's fields into this one. Note: Payloads are shared. * @param prototype */ public void reinit(Token prototype) { prototype.initTermBuffer(); setTermBuffer(prototype.termBuffer, 0, prototype.termLength); positionIncrement = prototype.positionIncrement; flags = prototype.flags; startOffset = prototype.startOffset; endOffset = prototype.endOffset; type = prototype.type; payload = prototype.payload; } /** * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. * @param prototype * @param newTerm */ public void reinit(Token prototype, String newTerm) { setTermBuffer(newTerm); positionIncrement = prototype.positionIncrement; flags = prototype.flags; startOffset = prototype.startOffset; endOffset = prototype.endOffset; type = prototype.type; payload = prototype.payload; } /** * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. * @param prototype * @param newTermBuffer * @param offset * @param length */ public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) { setTermBuffer(newTermBuffer, offset, length); positionIncrement = prototype.positionIncrement; flags = prototype.flags; startOffset = prototype.startOffset; endOffset = prototype.endOffset; type = prototype.type; payload = prototype.payload; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -