📄 indexreader.java
字号:
* The array contains a vector for each vectorized field in the document. * Each vector contains terms and frequencies for all terms in a given vectorized field. * If no such fields existed, the method returns null. The term vectors that are * returned my either be of type TermFreqVector or of type TermPositionsVector if * positions or offsets have been stored. * * @param docNumber document for which term frequency vectors are returned * @return array of term frequency vectors. May be null if no term vectors have been * stored for the specified document. * @throws IOException if index cannot be accessed * @see org.apache.lucene.document.Field.TermVector */ abstract public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException; /** * Return a term frequency vector for the specified document and field. The * returned vector contains terms and frequencies for the terms in * the specified field of this document, if the field had the storeTermVector * flag set. If termvectors had been stored with positions or offsets, a * TermPositionsVector is returned. * * @param docNumber document for which the term frequency vector is returned * @param field field for which the term frequency vector is returned. * @return term frequency vector May be null if field does not exist in the specified * document or term vector was not stored. * @throws IOException if index cannot be accessed * @see org.apache.lucene.document.Field.TermVector */ abstract public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException; /** * Returns <code>true</code> if an index exists at the specified directory. * If the directory does not exist or if there is no index in it. * <code>false</code> is returned. * @param directory the directory to check for an index * @return <code>true</code> if an index exists; <code>false</code> otherwise */ public static boolean indexExists(String directory) { return indexExists(new File(directory)); } /** * Returns <code>true</code> if an index exists at the specified directory. * If the directory does not exist or if there is no index in it. * @param directory the directory to check for an index * @return <code>true</code> if an index exists; <code>false</code> otherwise */ public static boolean indexExists(File directory) { return SegmentInfos.getCurrentSegmentGeneration(directory.list()) != -1; } /** * Returns <code>true</code> if an index exists at the specified directory. * If the directory does not exist or if there is no index in it. * @param directory the directory to check for an index * @return <code>true</code> if an index exists; <code>false</code> otherwise * @throws IOException if there is a problem with accessing the index */ public static boolean indexExists(Directory directory) throws IOException { return SegmentInfos.getCurrentSegmentGeneration(directory) != -1; } /** Returns the number of documents in this index. */ public abstract int numDocs(); /** Returns one greater than the largest possible document number. * This may be used to, e.g., determine how big to allocate an array which * will have an element for every document number in an index. */ public abstract int maxDoc(); /** Returns the stored fields of the <code>n</code><sup>th</sup> <code>Document</code> in this index. */ public Document document(int n) throws IOException{ return document(n, null); } /** * Get the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup> position. The {@link org.apache.lucene.document.FieldSelector} * may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded. * * <b>NOTE:</b> If this Reader (more specifically, the underlying {@link FieldsReader} is closed before the lazy {@link org.apache.lucene.document.Field} is * loaded an exception may be thrown. If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must * explicitly load it or fetch the Document again with a new loader. * * * @param n Get the document at the <code>n</code><sup>th</sup> position * @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded. * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position * @throws IOException If there is a problem reading this document * * @see org.apache.lucene.document.Fieldable * @see org.apache.lucene.document.FieldSelector * @see org.apache.lucene.document.SetBasedFieldSelector * @see org.apache.lucene.document.LoadFirstFieldSelector */ //When we convert to JDK 1.5 make this Set<String> public abstract Document document(int n, FieldSelector fieldSelector) throws IOException; /** Returns true if document <i>n</i> has been deleted */ public abstract boolean isDeleted(int n); /** Returns true if any documents have been deleted */ public abstract boolean hasDeletions(); /** Returns true if there are norms stored for this field. */ public boolean hasNorms(String field) throws IOException { // backward compatible implementation. // SegmentReader has an efficient implementation. return norms(field) != null; } /** Returns the byte-encoded normalization factor for the named field of * every document. This is used by the search code to score documents. * * @see org.apache.lucene.document.Field#setBoost(float) */ public abstract byte[] norms(String field) throws IOException; /** Reads the byte-encoded normalization factor for the named field of every * document. This is used by the search code to score documents. * * @see org.apache.lucene.document.Field#setBoost(float) */ public abstract void norms(String field, byte[] bytes, int offset) throws IOException; /** Expert: Resets the normalization factor for the named field of the named * document. The norm represents the product of the field's {@link * Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String, * int) length normalization}. Thus, to preserve the length normalization * values when resetting this, one should base the new value upon the old. * * @see #norms(String) * @see Similarity#decodeNorm(byte) */ public final synchronized void setNorm(int doc, String field, byte value) throws IOException{ if(directoryOwner) aquireWriteLock(); hasChanges = true; doSetNorm(doc, field, value); } /** Implements setNorm in subclass.*/ protected abstract void doSetNorm(int doc, String field, byte value) throws IOException; /** Expert: Resets the normalization factor for the named field of the named * document. * * @see #norms(String) * @see Similarity#decodeNorm(byte) */ public void setNorm(int doc, String field, float value) throws IOException { setNorm(doc, field, Similarity.encodeNorm(value)); } /** Returns an enumeration of all the terms in the index. * The enumeration is ordered by Term.compareTo(). Each term * is greater than all that precede it in the enumeration. */ public abstract TermEnum terms() throws IOException; /** Returns an enumeration of all terms after a given term. * The enumeration is ordered by Term.compareTo(). Each term * is greater than all that precede it in the enumeration. */ public abstract TermEnum terms(Term t) throws IOException; /** Returns the number of documents containing the term <code>t</code>. */ public abstract int docFreq(Term t) throws IOException; /** Returns an enumeration of all the documents which contain * <code>term</code>. For each document, the document number, the frequency of * the term in that document is also provided, for use in search scoring. * Thus, this method implements the mapping: * <p><ul> * Term => <docNum, freq><sup>*</sup> * </ul> * <p>The enumeration is ordered by document number. Each document number * is greater than all that precede it in the enumeration. */ public TermDocs termDocs(Term term) throws IOException { TermDocs termDocs = termDocs(); termDocs.seek(term); return termDocs; } /** Returns an unpositioned {@link TermDocs} enumerator. */ public abstract TermDocs termDocs() throws IOException; /** Returns an enumeration of all the documents which contain * <code>term</code>. For each document, in addition to the document number * and frequency of the term in that document, a list of all of the ordinal * positions of the term in the document is available. Thus, this method * implements the mapping: * * <p><ul> * Term => <docNum, freq, * <pos<sub>1</sub>, pos<sub>2</sub>, ... * pos<sub>freq-1</sub>> * ><sup>*</sup> * </ul> * <p> This positional information faciliates phrase and proximity searching. * <p>The enumeration is ordered by document number. Each document number is * greater than all that precede it in the enumeration. */ public TermPositions termPositions(Term term) throws IOException { TermPositions termPositions = termPositions(); termPositions.seek(term); return termPositions; } /** Returns an unpositioned {@link TermPositions} enumerator. */ public abstract TermPositions termPositions() throws IOException; /** * Tries to acquire the WriteLock on this directory. * this method is only valid if this IndexReader is directory owner. * * @throws IOException If WriteLock cannot be acquired. */ private void aquireWriteLock() throws IOException { if (stale) throw new IOException("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations"); if (writeLock == null) { Lock writeLock = directory.makeLock(IndexWriter.WRITE_LOCK_NAME); if (!writeLock.obtain(IndexWriter.WRITE_LOCK_TIMEOUT)) // obtain write lock throw new IOException("Index locked for write: " + writeLock); this.writeLock = writeLock; // we have to check whether index has changed since this reader was opened. // if so, this reader is no longer valid for deletion if (SegmentInfos.readCurrentVersion(directory) > segmentInfos.getVersion()) { stale = true; this.writeLock.release(); this.writeLock = null; throw new IOException("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations"); } } } /** Deletes the document numbered <code>docNum</code>. Once a document is * deleted it will not appear in TermDocs or TermPostitions enumerations. * Attempts to read its field with the {@link #document} * method will result in an error. The presence of this document may still be * reflected in the {@link #docFreq} statistic, though * this will be corrected eventually as the index is further modified. */ public final synchronized void deleteDocument(int docNum) throws IOException { if(directoryOwner) aquireWriteLock(); hasChanges = true; doDelete(docNum); } /** Implements deletion of the document numbered <code>docNum</code>. * Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. */ protected abstract void doDelete(int docNum) throws IOException; /** Deletes all documents that have a given <code>term</code> indexed. * This is useful if one uses a document field to hold a unique ID string for * the document. Then to delete such a document, one merely constructs a * term with the appropriate field and the unique ID string as its text and * passes it to this method. * See {@link #deleteDocument(int)} for information about when this deletion will * become effective. * @return the number of documents deleted */ public final int deleteDocuments(Term term) throws IOException { TermDocs docs = termDocs(term); if (docs == null) return 0; int n = 0; try { while (docs.next()) { deleteDocument(docs.doc());
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -