📄 memoryindex.java
字号:
* the better the match. * @see org.apache.lucene.queryParser.QueryParser#parse(String) */ public float search(Query query) { if (query == null) throw new IllegalArgumentException("query must not be null"); Searcher searcher = createSearcher(); try { final float[] scores = new float[1]; // inits to 0.0f (no match) searcher.search(query, new HitCollector() { public void collect(int doc, float score) { scores[0] = score; } }); float score = scores[0]; return score; } catch (IOException e) { // can never happen (RAMDirectory) throw new RuntimeException(e); } finally { // searcher.close(); /* * Note that it is harmless and important for good performance to * NOT close the index reader!!! This avoids all sorts of * unnecessary baggage and locking in the Lucene IndexReader * superclass, all of which is completely unnecessary for this main * memory index data structure without thread-safety claims. * * Wishing IndexReader would be an interface... * * Actually with the new tight createSearcher() API auto-closing is now * made impossible, hence searcher.close() would be harmless and also * would not degrade performance... */ } } /** * Returns a reasonable approximation of the main memory [bytes] consumed by * this instance. Useful for smart memory sensititive caches/pools. Assumes * fieldNames are interned, whereas tokenized terms are memory-overlaid. * * @return the main memory consumption */ public int getMemorySize() { // for example usage in a smart cache see nux.xom.pool.Pool int PTR = VM.PTR; int INT = VM.INT; int size = 0; size += VM.sizeOfObject(2*PTR + INT); // memory index if (sortedFields != null) size += VM.sizeOfObjectArray(sortedFields.length); size += VM.sizeOfHashMap(fields.size()); Iterator iter = fields.entrySet().iterator(); while (iter.hasNext()) { // for each Field Info Map.Entry entry = (Map.Entry) iter.next(); Info info = (Info) entry.getValue(); size += VM.sizeOfObject(2*INT + 3*PTR); // Info instance vars if (info.sortedTerms != null) size += VM.sizeOfObjectArray(info.sortedTerms.length); int len = info.terms.size(); size += VM.sizeOfHashMap(len); Iterator iter2 = info.terms.entrySet().iterator(); while (--len >= 0) { // for each term Map.Entry e = (Map.Entry) iter2.next(); size += VM.sizeOfObject(PTR + 3*INT); // assumes substring() memory overlay// size += STR + 2 * ((String) e.getKey()).length(); ArrayIntList positions = (ArrayIntList) e.getValue(); size += VM.sizeOfArrayIntList(positions.size()); } } return size; } private int numPositions(ArrayIntList positions) { return positions.size() / stride; } /** sorts into ascending order (on demand), reusing memory along the way */ private void sortFields() { if (sortedFields == null) sortedFields = sort(fields); } /** returns a view of the given map's entries, sorted ascending by key */ private static Map.Entry[] sort(HashMap map) { int size = map.size(); Map.Entry[] entries = new Map.Entry[size]; Iterator iter = map.entrySet().iterator(); for (int i=0; i < size; i++) { entries[i] = (Map.Entry) iter.next(); } if (size > 1) Arrays.sort(entries, termComparator); return entries; } /** * Returns a String representation of the index data for debugging purposes. * * @return the string representation */ public String toString() { StringBuffer result = new StringBuffer(256); sortFields(); int sumChars = 0; int sumPositions = 0; int sumTerms = 0; for (int i=0; i < sortedFields.length; i++) { Map.Entry entry = sortedFields[i]; String fieldName = (String) entry.getKey(); Info info = (Info) entry.getValue(); info.sortTerms(); result.append(fieldName + ":\n"); int numChars = 0; int numPositions = 0; for (int j=0; j < info.sortedTerms.length; j++) { Map.Entry e = info.sortedTerms[j]; String term = (String) e.getKey(); ArrayIntList positions = (ArrayIntList) e.getValue(); result.append("\t'" + term + "':" + numPositions(positions) + ":"); result.append(positions.toString(stride)); // ignore offsets result.append("\n"); numPositions += numPositions(positions); numChars += term.length(); } result.append("\tterms=" + info.sortedTerms.length); result.append(", positions=" + numPositions); result.append(", Kchars=" + (numChars/1000.0f)); result.append("\n"); sumPositions += numPositions; sumChars += numChars; sumTerms += info.sortedTerms.length; } result.append("\nfields=" + sortedFields.length); result.append(", terms=" + sumTerms); result.append(", positions=" + sumPositions); result.append(", Kchars=" + (sumChars/1000.0f)); return result.toString(); } /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// /** * Index data structure for a field; Contains the tokenized term texts and * their positions. */ private static final class Info implements Serializable { /** * Term strings and their positions for this field: Map <String * termText, ArrayIntList positions> */ private final HashMap terms; /** Terms sorted ascending by term text; computed on demand */ private transient Map.Entry[] sortedTerms; /** Number of added tokens for this field */ private final int numTokens; /** Boost factor for hits for this field */ private final float boost; /** Term for this field's fieldName, lazily computed on demand */ public transient Term template; private static final long serialVersionUID = 2882195016849084649L; public Info(HashMap terms, int numTokens, float boost) { this.terms = terms; this.numTokens = numTokens; this.boost = boost; } /** * Sorts hashed terms into ascending order, reusing memory along the * way. Note that sorting is lazily delayed until required (often it's * not required at all). If a sorted view is required then hashing + * sort + binary search is still faster and smaller than TreeMap usage * (which would be an alternative and somewhat more elegant approach, * apart from more sophisticated Tries / prefix trees). */ public void sortTerms() { if (sortedTerms == null) sortedTerms = sort(terms); } /** note that the frequency can be calculated as numPosition(getPositions(x)) */ public ArrayIntList getPositions(String term) { return (ArrayIntList) terms.get(term); } /** note that the frequency can be calculated as numPosition(getPositions(x)) */ public ArrayIntList getPositions(int pos) { return (ArrayIntList) sortedTerms[pos].getValue(); } public float getBoost() { return boost; } } /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// /** * Efficient resizable auto-expanding list holding <code>int</code> elements; * implemented with arrays. */ private static final class ArrayIntList implements Serializable { private int[] elements; private int size = 0; private static final long serialVersionUID = 2282195016849084649L; public ArrayIntList() { this(10); } public ArrayIntList(int initialCapacity) { elements = new int[initialCapacity]; } public void add(int elem) { if (size == elements.length) ensureCapacity(size + 1); elements[size++] = elem; } public void add(int pos, int start, int end) { if (size + 3 > elements.length) ensureCapacity(size + 3); elements[size] = pos; elements[size+1] = start; elements[size+2] = end; size += 3; } public int get(int index) { if (index >= size) throwIndex(index); return elements[index]; } public int size() { return size; } public int[] toArray(int stride) { int[] arr = new int[size() / stride]; if (stride == 1) { System.arraycopy(elements, 0, arr, 0, size); // fast path } else { for (int i=0, j=0; j < size; i++, j += stride) arr[i] = elements[j]; } return arr; } private void ensureCapacity(int minCapacity) { int newCapacity = Math.max(minCapacity, (elements.length * 3) / 2 + 1); int[] newElements = new int[newCapacity]; System.arraycopy(elements, 0, newElements, 0, size); elements = newElements; } private void throwIndex(int index) { throw new IndexOutOfBoundsException("index: " + index + ", size: " + size); } /** returns the first few positions (without offsets); debug only */ public String toString(int stride) { int s = size() / stride; int len = Math.min(10, s); // avoid printing huge lists StringBuffer buf = new StringBuffer(4*len); buf.append("["); for (int i = 0; i < len; i++) { buf.append(get(i*stride)); if (i < len-1) buf.append(", "); } if (len != s) buf.append(", ..."); // and some more... buf.append("]"); return buf.toString(); } } /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// private static final Term MATCH_ALL_TERM = new Term("", ""); /** * Search support for Lucene framework integration; implements all methods * required by the Lucene IndexReader contracts. */ private final class MemoryIndexReader extends IndexReader { private Searcher searcher; // needed to find searcher.getSimilarity() private MemoryIndexReader() { super(null); // avoid as much superclass baggage as possible } // lucene >= 1.9 or lucene-1.4.3 with patch removing "final" in superclass protected void finalize() {} private Info getInfo(String fieldName) { return (Info) fields.get(fieldName); } private Info getInfo(int pos) { return (Info) sortedFields[pos].getValue(); } public int docFreq(Term term) { Info info = getInfo(term.field()); int freq = 0; if (info != null) freq = info.getPositions(term.text()) != null ? 1 : 0; if (DEBUG) System.err.println("MemoryIndexReader.docFreq: " + term + ", freq:" + freq); return freq; } public TermEnum terms() { if (DEBUG) System.err.println("MemoryIndexReader.terms()"); return terms(MATCH_ALL_TERM); } public TermEnum terms(Term term) { if (DEBUG) System.err.println("MemoryIndexReader.terms: " + term); int i; // index into info.sortedTerms int j; // index into sortedFields sortFields(); if (sortedFields.length == 1 && sortedFields[0].getKey() == term.field()) { j = 0; // fast path } else { j = Arrays.binarySearch(sortedFields, term.field(), termComparator); } if (j < 0) { // not found; choose successor j = -j -1; i = 0; if (j < sortedFields.length) getInfo(j).sortTerms(); } else { // found Info info = getInfo(j); info.sortTerms(); i = Arrays.binarySearch(info.sortedTerms, term.text(), termComparator); if (i < 0) { // not found; choose successor i = -i -1; if (i >= info.sortedTerms.length) { // move to next successor j++; i = 0; if (j < sortedFields.length) getInfo(j).sortTerms(); } } } final int ix = i; final int jx = j; return new TermEnum() { private int i = ix; // index into info.sortedTerms private int j = jx; // index into sortedFields public boolean next() { if (DEBUG) System.err.println("TermEnum.next"); if (j >= sortedFields.length) return false; Info info = getInfo(j); if (++i < info.sortedTerms.length) return true; // move to successor j++; i = 0; if (j >= sortedFields.length) return false; getInfo(j).sortTerms(); return true; } public Term term() { if (DEBUG) System.err.println("TermEnum.term: " + i); if (j >= sortedFields.length) return null; Info info = getInfo(j); if (i >= info.sortedTerms.length) return null;// if (DEBUG) System.err.println("TermEnum.term: " + i + ", " + info.sortedTerms[i].getKey()); return createTerm(info, j, (String) info.sortedTerms[i].getKey()); } public int docFreq() { if (DEBUG) System.err.println("TermEnum.docFreq"); if (j >= sortedFields.length) return 0; Info info = getInfo(j); if (i >= info.sortedTerms.length) return 0; return numPositions(info.getPositions(i)); } public void close() { if (DEBUG) System.err.println("TermEnum.close"); } /** Returns a new Term object, minimizing String.intern() overheads. */ private Term createTerm(Info info, int pos, String text) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -