⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 immutablebinarytrie.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
				pos += i;				// Completely contained in the current path: we go searching for left and right delimiters.				if ( pos == length ) break;			}			n = word.get( pos++ ) ? n.right : n.left;			}				// If n == null, we did not found the path. Otherwise, it's the current node,		// and we must search for left and right delimiters.		if ( n == null ) return Intervals.EMPTY_INTERVAL;		Node l = n;		// Searching for the left extreme...		while( l.word < 0 ) l = l.left != null ? l.left : l.right;		// Searching for the right extreme, unless we're on a leaf.		while( ! n.isLeaf() ) n = n.right != null ? n.right : n.left;				return Interval.valueOf( l.word, n.word );			}	/** Returns an interval given by the smallest and the largest word in the trie starting with 	 * the word returned by the given iterator.	 * 	 * @param iterator an iterator.	 * @return  an interval given by the smallest and the largest word in the trie 	 * that start with the word returned by <code>iterator</code> (thus, the {@linkplain Intervals#EMPTY_INTERVAL empty inteval}	 * if no such words exist).	 * @see #getInterval(BitVector)	 */			public Interval getInterval( final BooleanIterator iterator ) {		Node n = root;		boolean mismatch = false;		long[] path;		int pathLength;				while( n != null ) {			// We found the current path: we go searching for left and right delimiters.			if ( ! iterator.hasNext() ) break;			pathLength = n.pathLength;			if ( pathLength != 0 ) {				int i;				path = n.path;				for( i = 0; i < pathLength && iterator.hasNext(); i++ ) if ( ( mismatch = ( iterator.nextBoolean() != QuickBitVector.get( path, i ) ) ) ) break;				// Incompatible with current path--we return the empty interval.				if ( mismatch ) return Intervals.EMPTY_INTERVAL;							// Completely contained in the current path: we go searching for left and right delimiters.				if ( ! iterator.hasNext() ) break;							}			n = iterator.nextBoolean() ? n.right : n.left;		}				// If n == null, we did not found the path. Otherwise, it's the current node,		// and we must search for left and right delimiters.		if ( n == null ) return Intervals.EMPTY_INTERVAL;		Node l = n;		// Searching for the left extreme...		while( l.word < 0 ) l = l.left != null ? l.left : l.right;		// Searching for the right extreme, unless we're on a leaf.		while ( ! n.isLeaf() ) n = n.right != null ? n.right : n.left;				return Interval.valueOf( l.word, n.word );			}		/** Returns an approximated prefix interval around the specified word.	 * 	 * @param word a word.	 * @return an approximated interval around the specified word: if the words in this trie	 * are thought of as left interval extremes in a larger lexicographically ordered set of words,	 * and we number these word intervals using the indices of their left extremes,	 * then the first word extending <code>word</code> would be in the word interval given by	 * the left extreme of the {@link Interval} returned by this method, whereas	 * the last word extending <code>word</code> would be in the word	 * interval given by the right extreme of the {@link Interval} returned by this method.	 * @see #getApproximatedInterval(BooleanIterator)  	 */			public Interval getApproximatedInterval( final BitVector word ) {		final int length = word.size();		Node n = root;		long[] path;		boolean exactMatch = false, nextBit;				int pos = 0; // Current position in word		while( n != null ) {			// We found the current path: we go searching for left and right delimiters.						path = n.path;						if ( pos == length ) {				if ( n.word >= 0 && path == null ) exactMatch = true;				break;			}			if ( path != null ) {				int maxLength = Math.min( length - pos, n.pathLength );				int i;				for( i = 0; i < maxLength; i++ ) if ( word.get( pos + i ) != QuickBitVector.get( path, i ) ) break;				if ( i < maxLength ) {					// System.err.println( "Exit 1" );					// A mismatch. In this case, it is guaranteed that all					// strings starting with the prefix examined so far lie					// in a single block. The block index depends, however					// on the bit that went wrong.					if ( QuickBitVector.get( path, i ) ) {						while( n.word < 0 ) n = n.left != null ? n.left : n.right;						return n.word > 0 ? Interval.valueOf( n.word - 1 ) : Intervals.EMPTY_INTERVAL;					}					else {						while( ! n.isLeaf() ) n = n.right != null ? n.right : n.left;						return Interval.valueOf( n.word );					}				}				pos += i;								// Completely contained in the current path				if ( pos == length ) {					if ( ASSERTS ) assert n.pathLength == maxLength;					if ( i == n.pathLength && n.word >= 0 ) exactMatch = true;					break;				}							}			if ( n.isLeaf() ) break;						nextBit = word.get( pos++ );			// We would like to take an impossible turn. This case is similar to			// prefix mismatches, with subtly different off-by-ones.			if ( nextBit && n.right == null ) {				while( ! n.isLeaf() ) n = n.right != null ? n.right : n.left;				return Interval.valueOf( n.word );			}			else if ( ! nextBit && n.left == null ) {				while( n.word < 0 ) n = n.left != null ? n.left : n.right;				return Interval.valueOf( n.word );			}			n = nextBit ? n.right : n.left;		}				Node l = n;		// Searching for the left extreme...				//System.err.println("Going for exit 2: l:" + l + " n:" + n);				while( l.word < 0 ) l = l.left != null ? l.left : l.right;		// Searching for the right extreme, unless we're on a leaf.		while ( ! n.isLeaf() ) n = n.right != null ? n.right : n.left;		// If did not find an exact match and l.word is 0 we are lexicographically before every word.		if ( pos == length && ! exactMatch ) {			if ( l.word == 0 ) return Intervals.EMPTY_INTERVAL;			else return Interval.valueOf( l.word - 1, n.word );		}				// System.err.println( "Exit 2 (hasNext: " +iterator.hasNext() + " exactMatch: " + exactMatch +")" );		return Interval.valueOf( l.word, n.word );	}	/** Returns an approximated prefix interval around the word returned by the specified iterator.	 * 	 * @param iterator an iterator.	 * @return an approximated interval around the specified word: if the words in this trie	 * are thought of as left interval extremes in a larger lexicographically ordered set of words,	 * and we number these word intervals using the indices of their left extremes,	 * then the first word extending <code>word</code> would be in the word interval given by	 * the left extreme of the {@link Interval} returned by this method, whereas	 * the last word extending <code>word</code> would be in the word	 * interval given by the right extreme of the {@link Interval} returned by this method.	 * @see #getApproximatedInterval(BitVector)	 */			public Interval getApproximatedInterval( final BooleanIterator iterator ) {		Node n = root;		long[] path;		boolean exactMatch = false, mismatch = false, nextBit;				for(;;) {			// We found the current path: we go searching for left and right delimiters.						path = n.path;						if ( ! iterator.hasNext() ) {				if ( n.word >= 0 && path == null ) exactMatch = true;				break;			}			if ( path != null ) {				int i;				final int pathSize = n.pathLength;				for( i = 0; i < pathSize && iterator.hasNext(); i++ ) if ( ( mismatch = ( iterator.nextBoolean() != QuickBitVector.get( path, i ) ) ) ) break;				if ( mismatch ) {					// System.err.println( "Exit 1" );					// A mismatch. In this case, it is guaranteed that all					// strings starting with the prefix examined so far lie					// in a single block. The block index depends, however					// on the bit that went wrong.					if ( QuickBitVector.get( path, i ) ) {						while( n.word < 0 ) n = n.left != null ? n.left : n.right;						return n.word > 0 ? Interval.valueOf( n.word - 1 ) : Intervals.EMPTY_INTERVAL;					}					else {						while( ! n.isLeaf() ) n = n.right != null ? n.right : n.left;						return Interval.valueOf( n.word );					}				}				// Completely contained in the current path				if ( ! iterator.hasNext() ) {					if ( i == pathSize && n.word >= 0 ) exactMatch = true;					break;				}							}			if ( n.isLeaf() ) break;						nextBit = iterator.nextBoolean();						// We would like to take an impossible turn. This case is similar to			// prefix mismatches, with subtly different off-by-ones.			if ( nextBit && n.right == null ) {				while( ! n.isLeaf() ) n = n.right != null ? n.right : n.left;				return Interval.valueOf( n.word );			}			else if ( ! nextBit && n.left == null ) {				while( n.word < 0 ) n = n.left != null ? n.left : n.right;				return Interval.valueOf( n.word );			}						n = nextBit ? n.right : n.left;		}				Node l = n;		// Searching for the left extreme...				//System.err.println("Going for exit 2: l:" + l + " n:" + n);				while( l.word < 0 ) l = l.left != null ? l.left : l.right;			// Searching for the right extreme, unless we're on a leaf.		while ( ! n.isLeaf() ) n = n.right != null ? n.right : n.left;				// If did not find an exact match and l.word is 0 we are lexicographically before every word.		if ( ! iterator.hasNext() && ! exactMatch ) {			if ( l.word == 0 ) return mismatch ? Intervals.EMPTY_INTERVAL : Interval.valueOf( 0 );			else return Interval.valueOf( l.word - 1, n.word );		}		// System.err.println( "Exit 2 (hasNext: " +iterator.hasNext() + " exactMatch: " + exactMatch +")" );		return Interval.valueOf( l.word, n.word );	}			private void recToString( final Node n, final MutableString printPrefix, final MutableString result, final MutableString path, final int level ) {		if ( n == null ) return;				result.append( printPrefix ).append( '(' ).append( level ).append( ')' );				if ( path != null ) {			path.append( path );			result.append( " path:" ).append( path );		}		if ( n.word >= 0 ) result.append( " word: " ).append( n.word ).append( " (" ).append( path ).append( ')' );		result.append( '\n' );				path.append( '0' );		recToString( n.left, printPrefix.append( '\t' ).append( "0 => " ), result, path, level + 1 );		path.charAt( path.length() - 1, '1' ); 		recToString( n.right, printPrefix.replace( printPrefix.length() - 5, printPrefix.length(), "1 => "), result, path, level + 1 );		path.delete( path.length() - 1, path.length() ); 		printPrefix.delete( printPrefix.length() - 6, printPrefix.length() );				path.delete( path.length() - n.pathLength, path.length() );	}		public String toString() {		MutableString s = new MutableString();		recToString( root, new MutableString(), s, new MutableString(), 0 );		return s.toString();	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -