📄 ternaryintervalsearchtree.java
字号:
s.append( e.path, 0, e.path.length - 1 ); e = e.left; continue; } index -= e.left.numNodes; } if ( e.isWord ) { if ( index == 0 ) return s.append( e.path ).compact(); index--; } if ( e.middle != null ) { if ( index < e.middle.numNodes ) { s.append( e.path ); e = e.middle; continue; } index -= e.middle.numNodes; } s.append( e.path, 0, e.path.length - 1 ); e = e.right; } } public CharSequence get( final int index ) { return getTerm( index ); } @Deprecated public int getIndex( final CharSequence s ) { return getNumber( s ); } public int getNumber( final CharSequence s ) { final int l = s.length(); Node e = root; int i; int offset = 0; int wordsAtLeft = 0; char c; char[] path; while( e != null ) { path = e.path; for( i = 0; i < path.length - 1; i++ ) if ( offset + i == l || s.charAt( offset + i ) != path[ i ] ) return -1; offset += i; if ( offset == l ) return -1; c = s.charAt( offset ); if ( c < e.path[ i ] ) e = e.left; else if ( c > e.path[ i ] ) { if ( e.left != null ) wordsAtLeft += e.left.numNodes; if ( e.middle != null ) wordsAtLeft += e.middle.numNodes; if ( e.isWord ) wordsAtLeft++; e = e.right; } else { offset++; if ( e.left != null ) wordsAtLeft += e.left.numNodes; if ( offset == l ) return e.isWord ? wordsAtLeft : -1; if ( e.isWord ) wordsAtLeft++; e = e.middle; } } return -1; } /** True if the last {@link #add(CharSequence)} modified the tree. */ private boolean modified; public boolean add( final CharSequence s ) { modified = false; root = addRec( s, 0, s.length(), root ); return modified; } public CharSequence getPrefix( final Interval interval ) { final MutableString s = new MutableString(); return getPrefix ( interval, s ); } public MutableString getPrefix( final Interval interval, final MutableString prefix ) { if ( interval == Intervals.EMPTY_INTERVAL || interval.left < 0 || interval.right < 0 ) throw new IllegalArgumentException(); getTerm( interval.left, prefix ); if ( interval.length() == 1 ) return prefix; final MutableString s = (MutableString)getTerm( interval.right ); final int l = Math.min( prefix.length(), s.length() ); int i; for( i = 0; i < l; i++ ) if ( s.charAt( i ) != prefix.charAt( i ) ) break; return prefix.length( i ); } /** Inserts the given character sequence, starting at the given position, in the given subtree. * * @param s the character sequence containing the characters to be inserted. * @param offset the first character to be inserted. * @param length the number of characters to be inserted. * @param e the subtree in which the characters should be inserted, or <code>null</code> if * a new node should be created. * @return the new node at the top of the subtree. */ private Node addRec( final CharSequence s, final int offset, final int length, final Node e ) { if ( e == null ) { // We create a new node containing all the characters and return it. modified = true; size++; return new Node( s, offset, length, true, 1 ); } /* We start scanning the path contained in the current node, up to * the last character excluded. If we find a mismatch, or if we exhaust our * characters, we must fork this node. */ char c; int i; Node n = null; final char[] path = e.path; for ( i = 0; i < path.length - 1; i++ ) { c = s.charAt( offset + i ); if ( c < path[ i ] ) { /* We fork on the left, keeping just the first i + 1 characters (this is necessary * as at least one character must be present in every node). The new * node will cover one word more than e. */ n = new Node( path, 0, i + 1, false, e.numNodes + 1 ); n.middle = e; e.removePathPrefix( i + 1 ); n.left = addRec( s, offset + i, length - i, null ); break; } else if ( c > path[ i ] ) { // As before, but on the right. n = new Node( path, 0, i + 1, false, e.numNodes + 1 ); n.middle = e; e.removePathPrefix( i + 1 ); n.right = addRec( s, offset + i, length - i, null ); break; } else { if ( i == length - 1 ) { /* We exhausted the character sequence. We fork in the middle, * keeping length characters and marking the new node as * containing one work. Again, the new code will cover one word * more than e. */ n = new Node( s, offset, length, true, e.numNodes + 1 ); n.middle = e; e.removePathPrefix( length ); size++; modified = true; break; } } } if ( i < path.length - 1 ) return n; /* We are positioned on the last character of the path. In this case our * behaviour is different, as if we must fork we must not perform any * splitting. Moreover, if we exhaust the characters we either found * the new sequence in the tree, or we just have to mark the node. */ c = s.charAt( offset + i ); if ( c < path[ i ] ) { /** We fork on the left. The number of words under this node will * increase only if the structure is modified. */ e.left = addRec( s, offset + i, length - i, e.left ); if ( modified ) e.numNodes++; } else if ( c > path[ i ] ) { e.right = addRec( s, offset + i, length - i, e.right ); if ( modified ) e.numNodes++; } else { if ( i == length - 1 ) { // This is the node. if ( modified = !e.isWord ) { e.numNodes++; size++; } e.isWord = true; } else { // We add a node in the middle, completing the sequence. e.middle = addRec( s, offset + i + 1, length - i - 1, e.middle ); if ( modified ) e.numNodes++; } } return e; } public int size() { return size; } public boolean hasPrefixes() { return true; } public boolean hasTerms() { return true; } public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException { final SimpleJSAP jsap = new SimpleJSAP( TernaryIntervalSearchTree.class.getName(), "Builds a ternary interval search tree reading from standard input a newline-separated list of terms.", new Parameter[] { new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read terms." ), new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ), new UnflaggedOption( "tree", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised tree." ) }); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; final TernaryIntervalSearchTree tree = new TernaryIntervalSearchTree(); MutableString term = new MutableString(); final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "terms"; final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( System.in, (Charset)jsapResult.getObject( "encoding" ) ), jsapResult.getInt( "bufferSize" ) ); pl.start( "Reading terms..." ); while( terms.readLine( term ) != null ) { pl.update(); tree.add( term ); } pl.done(); BinIO.storeObject( tree, jsapResult.getString( "tree" ) ); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -