📄 memoryindex.java
字号:
// Assertion: sortFields has already been called before Term template = info.template; if (template == null) { // not yet cached? String fieldName = (String) sortedFields[pos].getKey(); template = new Term(fieldName, ""); info.template = template; } return template.createTerm(text); } }; } public TermPositions termPositions() { if (DEBUG) System.err.println("MemoryIndexReader.termPositions"); return new TermPositions() { private boolean hasNext; private int cursor = 0; private ArrayIntList current; public void seek(Term term) { if (DEBUG) System.err.println(".seek: " + term); Info info = getInfo(term.field()); current = info == null ? null : info.getPositions(term.text()); hasNext = (current != null); cursor = 0; } public void seek(TermEnum termEnum) { if (DEBUG) System.err.println(".seekEnum"); seek(termEnum.term()); } public int doc() { if (DEBUG) System.err.println(".doc"); return 0; } public int freq() { int freq = current != null ? numPositions(current) : 0; if (DEBUG) System.err.println(".freq: " + freq); return freq; } public boolean next() { if (DEBUG) System.err.println(".next: " + current + ", oldHasNext=" + hasNext); boolean next = hasNext; hasNext = false; return next; } public int read(int[] docs, int[] freqs) { if (DEBUG) System.err.println(".read: " + docs.length); if (!hasNext) return 0; hasNext = false; docs[0] = 0; freqs[0] = freq(); return 1; } public boolean skipTo(int target) { if (DEBUG) System.err.println(".skipTo: " + target); return next(); } public void close() { if (DEBUG) System.err.println(".close"); } public int nextPosition() { // implements TermPositions int pos = current.get(cursor); cursor += stride; if (DEBUG) System.err.println(".nextPosition: " + pos); return pos; } /** * Not implemented. * @throws UnsupportedOperationException */ public int getPayloadLength() { throw new UnsupportedOperationException(); } /** * Not implemented. * @throws UnsupportedOperationException */ public byte[] getPayload(byte[] data, int offset) throws IOException { throw new UnsupportedOperationException(); } public boolean isPayloadAvailable() { // unsuported return false; } }; } public TermDocs termDocs() { if (DEBUG) System.err.println("MemoryIndexReader.termDocs"); return termPositions(); } public TermFreqVector[] getTermFreqVectors(int docNumber) { if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVectors"); TermFreqVector[] vectors = new TermFreqVector[fields.size()];// if (vectors.length == 0) return null; Iterator iter = fields.keySet().iterator(); for (int i=0; i < vectors.length; i++) { String fieldName = (String) iter.next(); vectors[i] = getTermFreqVector(docNumber, fieldName); } return vectors; } public TermFreqVector getTermFreqVector(int docNumber, final String fieldName) { if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVector"); final Info info = getInfo(fieldName); if (info == null) return null; // TODO: or return empty vector impl??? info.sortTerms(); return new TermPositionVector() { private final Map.Entry[] sortedTerms = info.sortedTerms; public String getField() { return fieldName; } public int size() { return sortedTerms.length; } public String[] getTerms() { String[] terms = new String[sortedTerms.length]; for (int i=sortedTerms.length; --i >= 0; ) { terms[i] = (String) sortedTerms[i].getKey(); } return terms; } public int[] getTermFrequencies() { int[] freqs = new int[sortedTerms.length]; for (int i=sortedTerms.length; --i >= 0; ) { freqs[i] = numPositions((ArrayIntList) sortedTerms[i].getValue()); } return freqs; } public int indexOf(String term) { int i = Arrays.binarySearch(sortedTerms, term, termComparator); return i >= 0 ? i : -1; } public int[] indexesOf(String[] terms, int start, int len) { int[] indexes = new int[len]; for (int i=0; i < len; i++) { indexes[i] = indexOf(terms[start++]); } return indexes; } // lucene >= 1.4.3 public int[] getTermPositions(int index) { return ((ArrayIntList) sortedTerms[index].getValue()).toArray(stride); } // lucene >= 1.9 (remove this method for lucene-1.4.3) public org.apache.lucene.index.TermVectorOffsetInfo[] getOffsets(int index) { if (stride == 1) return null; // no offsets stored ArrayIntList positions = (ArrayIntList) sortedTerms[index].getValue(); int size = positions.size(); org.apache.lucene.index.TermVectorOffsetInfo[] offsets = new org.apache.lucene.index.TermVectorOffsetInfo[size / stride]; for (int i=0, j=1; j < size; i++, j += stride) { int start = positions.get(j); int end = positions.get(j+1); offsets[i] = new org.apache.lucene.index.TermVectorOffsetInfo(start, end); } return offsets; } }; } private Similarity getSimilarity() { if (searcher != null) return searcher.getSimilarity(); return Similarity.getDefault(); } private void setSearcher(Searcher searcher) { this.searcher = searcher; } /** performance hack: cache norms to avoid repeated expensive calculations */ private byte[] cachedNorms; private String cachedFieldName; private Similarity cachedSimilarity; public byte[] norms(String fieldName) { byte[] norms = cachedNorms; Similarity sim = getSimilarity(); if (fieldName != cachedFieldName || sim != cachedSimilarity) { // not cached? Info info = getInfo(fieldName); int numTokens = info != null ? info.numTokens : 0; float n = sim.lengthNorm(fieldName, numTokens); float boost = info != null ? info.getBoost() : 1.0f; n = n * boost; // see DocumentWriter.writeNorms(String segment) byte norm = Similarity.encodeNorm(n); norms = new byte[] {norm}; // cache it for future reuse cachedNorms = norms; cachedFieldName = fieldName; cachedSimilarity = sim; if (DEBUG) System.err.println("MemoryIndexReader.norms: " + fieldName + ":" + n + ":" + norm + ":" + numTokens); } return norms; } public void norms(String fieldName, byte[] bytes, int offset) { if (DEBUG) System.err.println("MemoryIndexReader.norms*: " + fieldName); byte[] norms = norms(fieldName); System.arraycopy(norms, 0, bytes, offset, norms.length); } protected void doSetNorm(int doc, String fieldName, byte value) { throw new UnsupportedOperationException(); } public int numDocs() { if (DEBUG) System.err.println("MemoryIndexReader.numDocs"); return fields.size() > 0 ? 1 : 0; } public int maxDoc() { if (DEBUG) System.err.println("MemoryIndexReader.maxDoc"); return 1; } public Document document(int n) { if (DEBUG) System.err.println("MemoryIndexReader.document"); return new Document(); // there are no stored fields } //When we convert to JDK 1.5 make this Set<String> public Document document(int n, FieldSelector fieldSelector) throws IOException { if (DEBUG) System.err.println("MemoryIndexReader.document"); return new Document(); // there are no stored fields } public boolean isDeleted(int n) { if (DEBUG) System.err.println("MemoryIndexReader.isDeleted"); return false; } public boolean hasDeletions() { if (DEBUG) System.err.println("MemoryIndexReader.hasDeletions"); return false; } protected void doDelete(int docNum) { throw new UnsupportedOperationException(); } protected void doUndeleteAll() { throw new UnsupportedOperationException(); } protected void doCommit() { if (DEBUG) System.err.println("MemoryIndexReader.doCommit"); } protected void doClose() { if (DEBUG) System.err.println("MemoryIndexReader.doClose"); } // lucene >= 1.9 (remove this method for lucene-1.4.3) public Collection getFieldNames(FieldOption fieldOption) { if (DEBUG) System.err.println("MemoryIndexReader.getFieldNamesOption"); if (fieldOption == FieldOption.UNINDEXED) return Collections.EMPTY_SET; if (fieldOption == FieldOption.INDEXED_NO_TERMVECTOR) return Collections.EMPTY_SET; if (fieldOption == FieldOption.TERMVECTOR_WITH_OFFSET && stride == 1) return Collections.EMPTY_SET; if (fieldOption == FieldOption.TERMVECTOR_WITH_POSITION_OFFSET && stride == 1) return Collections.EMPTY_SET; return Collections.unmodifiableSet(fields.keySet()); } } /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// private static final class VM { public static final int PTR = is64BitVM() ? 8 : 4; // bytes occupied by primitive data types public static final int BOOLEAN = 1; public static final int BYTE = 1; public static final int CHAR = 2; public static final int SHORT = 2; public static final int INT = 4; public static final int LONG = 8; public static final int FLOAT = 4; public static final int DOUBLE = 8; private static final int LOG_PTR = (int) Math.round(log2(PTR)); /** * Object header of any heap allocated Java object. * ptr to class, info for monitor, gc, hash, etc. */// private static final int OBJECT_HEADER = 2*4; // even on 64 bit VMs? private static final int OBJECT_HEADER = 2*PTR; /** * Modern VMs tend to trade space for time, allocating memory on word * boundaries. For example, on a 64 bit VM, the variables of a class with * one 32 bit integer and one Java char really consume 8 bytes instead of 6 * bytes. 2 bytes are spent on padding. Similary, on a 64 bit VM a * java.lang.Integer consumes OBJECT_HEADER + 8 bytes rather than * OBJECT_HEADER + 4 bytes. */ private static final boolean IS_WORD_ALIGNED_VM = true; private VM() {} // not instantiable // assumes n > 0 // 64 bit VM: // 0 --> 0*PTR // 1..8 --> 1*PTR // 9..16 --> 2*PTR private static int sizeOf(int n) { return IS_WORD_ALIGNED_VM ?// ((n-1)/PTR + 1) * PTR : // slow version (((n-1) >> LOG_PTR) + 1) << LOG_PTR : // fast version n; } public static int sizeOfObject(int n) { return sizeOf(OBJECT_HEADER + n); } public static int sizeOfObjectArray(int len) { return sizeOfObject(INT + PTR*len); } public static int sizeOfCharArray(int len) { return sizeOfObject(INT + CHAR*len); } public static int sizeOfIntArray(int len) { return sizeOfObject(INT + INT*len); } public static int sizeOfString(int len) { return sizeOfObject(3*INT + PTR) + sizeOfCharArray(len); } public static int sizeOfHashMap(int len) { return sizeOfObject(4*PTR + 4*INT) + sizeOfObjectArray(len) + len * sizeOfObject(3*PTR + INT); // entries } // note: does not include referenced objects public static int sizeOfArrayList(int len) { return sizeOfObject(PTR + 2*INT) + sizeOfObjectArray(len); } public static int sizeOfArrayIntList(int len) { return sizeOfObject(PTR + INT) + sizeOfIntArray(len); } private static boolean is64BitVM() { try { int bits = Integer.getInteger("sun.arch.data.model", 0).intValue(); if (bits != 0) return bits == 64; // fallback if sun.arch.data.model isn't available return System.getProperty("java.vm.name").toLowerCase().indexOf("64") >= 0; } catch (Throwable t) { return false; // better safe than sorry (applets, security managers, etc.) ... } } /** logarithm to the base 2. Example: log2(4) == 2, log2(8) == 3 */ private static double log2(double value) { return Math.log(value) / Math.log(2); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -