📄 immutablebinarytrie.java
字号:
pos += i; // Completely contained in the current path: we go searching for left and right delimiters. if ( pos == length ) break; } n = word.get( pos++ ) ? n.right : n.left; } // If n == null, we did not found the path. Otherwise, it's the current node, // and we must search for left and right delimiters. if ( n == null ) return Intervals.EMPTY_INTERVAL; Node l = n; // Searching for the left extreme... while( l.word < 0 ) l = l.left != null ? l.left : l.right; // Searching for the right extreme, unless we're on a leaf. while( ! n.isLeaf() ) n = n.right != null ? n.right : n.left; return Interval.valueOf( l.word, n.word ); } /** Returns an interval given by the smallest and the largest word in the trie starting with * the word returned by the given iterator. * * @param iterator an iterator. * @return an interval given by the smallest and the largest word in the trie * that start with the word returned by <code>iterator</code> (thus, the {@linkplain Intervals#EMPTY_INTERVAL empty inteval} * if no such words exist). * @see #getInterval(BitVector) */ public Interval getInterval( final BooleanIterator iterator ) { Node n = root; boolean mismatch = false; long[] path; int pathLength; while( n != null ) { // We found the current path: we go searching for left and right delimiters. if ( ! iterator.hasNext() ) break; pathLength = n.pathLength; if ( pathLength != 0 ) { int i; path = n.path; for( i = 0; i < pathLength && iterator.hasNext(); i++ ) if ( ( mismatch = ( iterator.nextBoolean() != QuickBitVector.get( path, i ) ) ) ) break; // Incompatible with current path--we return the empty interval. if ( mismatch ) return Intervals.EMPTY_INTERVAL; // Completely contained in the current path: we go searching for left and right delimiters. if ( ! iterator.hasNext() ) break; } n = iterator.nextBoolean() ? n.right : n.left; } // If n == null, we did not found the path. Otherwise, it's the current node, // and we must search for left and right delimiters. if ( n == null ) return Intervals.EMPTY_INTERVAL; Node l = n; // Searching for the left extreme... while( l.word < 0 ) l = l.left != null ? l.left : l.right; // Searching for the right extreme, unless we're on a leaf. while ( ! n.isLeaf() ) n = n.right != null ? n.right : n.left; return Interval.valueOf( l.word, n.word ); } /** Returns an approximated prefix interval around the specified word. * * @param word a word. * @return an approximated interval around the specified word: if the words in this trie * are thought of as left interval extremes in a larger lexicographically ordered set of words, * and we number these word intervals using the indices of their left extremes, * then the first word extending <code>word</code> would be in the word interval given by * the left extreme of the {@link Interval} returned by this method, whereas * the last word extending <code>word</code> would be in the word * interval given by the right extreme of the {@link Interval} returned by this method. * @see #getApproximatedInterval(BooleanIterator) */ public Interval getApproximatedInterval( final BitVector word ) { final int length = word.size(); Node n = root; long[] path; boolean exactMatch = false, nextBit; int pos = 0; // Current position in word while( n != null ) { // We found the current path: we go searching for left and right delimiters. path = n.path; if ( pos == length ) { if ( n.word >= 0 && path == null ) exactMatch = true; break; } if ( path != null ) { int maxLength = Math.min( length - pos, n.pathLength ); int i; for( i = 0; i < maxLength; i++ ) if ( word.get( pos + i ) != QuickBitVector.get( path, i ) ) break; if ( i < maxLength ) { // System.err.println( "Exit 1" ); // A mismatch. In this case, it is guaranteed that all // strings starting with the prefix examined so far lie // in a single block. The block index depends, however // on the bit that went wrong. if ( QuickBitVector.get( path, i ) ) { while( n.word < 0 ) n = n.left != null ? n.left : n.right; return n.word > 0 ? Interval.valueOf( n.word - 1 ) : Intervals.EMPTY_INTERVAL; } else { while( ! n.isLeaf() ) n = n.right != null ? n.right : n.left; return Interval.valueOf( n.word ); } } pos += i; // Completely contained in the current path if ( pos == length ) { if ( ASSERTS ) assert n.pathLength == maxLength; if ( i == n.pathLength && n.word >= 0 ) exactMatch = true; break; } } if ( n.isLeaf() ) break; nextBit = word.get( pos++ ); // We would like to take an impossible turn. This case is similar to // prefix mismatches, with subtly different off-by-ones. if ( nextBit && n.right == null ) { while( ! n.isLeaf() ) n = n.right != null ? n.right : n.left; return Interval.valueOf( n.word ); } else if ( ! nextBit && n.left == null ) { while( n.word < 0 ) n = n.left != null ? n.left : n.right; return Interval.valueOf( n.word ); } n = nextBit ? n.right : n.left; } Node l = n; // Searching for the left extreme... //System.err.println("Going for exit 2: l:" + l + " n:" + n); while( l.word < 0 ) l = l.left != null ? l.left : l.right; // Searching for the right extreme, unless we're on a leaf. while ( ! n.isLeaf() ) n = n.right != null ? n.right : n.left; // If did not find an exact match and l.word is 0 we are lexicographically before every word. if ( pos == length && ! exactMatch ) { if ( l.word == 0 ) return Intervals.EMPTY_INTERVAL; else return Interval.valueOf( l.word - 1, n.word ); } // System.err.println( "Exit 2 (hasNext: " +iterator.hasNext() + " exactMatch: " + exactMatch +")" ); return Interval.valueOf( l.word, n.word ); } /** Returns an approximated prefix interval around the word returned by the specified iterator. * * @param iterator an iterator. * @return an approximated interval around the specified word: if the words in this trie * are thought of as left interval extremes in a larger lexicographically ordered set of words, * and we number these word intervals using the indices of their left extremes, * then the first word extending <code>word</code> would be in the word interval given by * the left extreme of the {@link Interval} returned by this method, whereas * the last word extending <code>word</code> would be in the word * interval given by the right extreme of the {@link Interval} returned by this method. * @see #getApproximatedInterval(BitVector) */ public Interval getApproximatedInterval( final BooleanIterator iterator ) { Node n = root; long[] path; boolean exactMatch = false, mismatch = false, nextBit; for(;;) { // We found the current path: we go searching for left and right delimiters. path = n.path; if ( ! iterator.hasNext() ) { if ( n.word >= 0 && path == null ) exactMatch = true; break; } if ( path != null ) { int i; final int pathSize = n.pathLength; for( i = 0; i < pathSize && iterator.hasNext(); i++ ) if ( ( mismatch = ( iterator.nextBoolean() != QuickBitVector.get( path, i ) ) ) ) break; if ( mismatch ) { // System.err.println( "Exit 1" ); // A mismatch. In this case, it is guaranteed that all // strings starting with the prefix examined so far lie // in a single block. The block index depends, however // on the bit that went wrong. if ( QuickBitVector.get( path, i ) ) { while( n.word < 0 ) n = n.left != null ? n.left : n.right; return n.word > 0 ? Interval.valueOf( n.word - 1 ) : Intervals.EMPTY_INTERVAL; } else { while( ! n.isLeaf() ) n = n.right != null ? n.right : n.left; return Interval.valueOf( n.word ); } } // Completely contained in the current path if ( ! iterator.hasNext() ) { if ( i == pathSize && n.word >= 0 ) exactMatch = true; break; } } if ( n.isLeaf() ) break; nextBit = iterator.nextBoolean(); // We would like to take an impossible turn. This case is similar to // prefix mismatches, with subtly different off-by-ones. if ( nextBit && n.right == null ) { while( ! n.isLeaf() ) n = n.right != null ? n.right : n.left; return Interval.valueOf( n.word ); } else if ( ! nextBit && n.left == null ) { while( n.word < 0 ) n = n.left != null ? n.left : n.right; return Interval.valueOf( n.word ); } n = nextBit ? n.right : n.left; } Node l = n; // Searching for the left extreme... //System.err.println("Going for exit 2: l:" + l + " n:" + n); while( l.word < 0 ) l = l.left != null ? l.left : l.right; // Searching for the right extreme, unless we're on a leaf. while ( ! n.isLeaf() ) n = n.right != null ? n.right : n.left; // If did not find an exact match and l.word is 0 we are lexicographically before every word. if ( ! iterator.hasNext() && ! exactMatch ) { if ( l.word == 0 ) return mismatch ? Intervals.EMPTY_INTERVAL : Interval.valueOf( 0 ); else return Interval.valueOf( l.word - 1, n.word ); } // System.err.println( "Exit 2 (hasNext: " +iterator.hasNext() + " exactMatch: " + exactMatch +")" ); return Interval.valueOf( l.word, n.word ); } private void recToString( final Node n, final MutableString printPrefix, final MutableString result, final MutableString path, final int level ) { if ( n == null ) return; result.append( printPrefix ).append( '(' ).append( level ).append( ')' ); if ( path != null ) { path.append( path ); result.append( " path:" ).append( path ); } if ( n.word >= 0 ) result.append( " word: " ).append( n.word ).append( " (" ).append( path ).append( ')' ); result.append( '\n' ); path.append( '0' ); recToString( n.left, printPrefix.append( '\t' ).append( "0 => " ), result, path, level + 1 ); path.charAt( path.length() - 1, '1' ); recToString( n.right, printPrefix.replace( printPrefix.length() - 5, printPrefix.length(), "1 => "), result, path, level + 1 ); path.delete( path.length() - 1, path.length() ); printPrefix.delete( printPrefix.length() - 6, printPrefix.length() ); path.delete( path.length() - n.pathLength, path.length() ); } public String toString() { MutableString s = new MutableString(); recToString( root, new MutableString(), s, new MutableString(), 0 ); return s.toString(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -