📄 immutabletrieprefixtree.java
字号:
package it.unimi.dsi.mg4j.util;/* * MG4J: Managing Gigabytes for Java** Copyright (C) 2005-2007 Sebastiano Vigna ** This library is free software; you can redistribute it and/or modify it* under the terms of the GNU Lesser General Public License as published by the Free* Software Foundation; either version 2.1 of the License, or (at your option)* any later version.** This library is distributed in the hope that it will be useful, but* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License* for more details.** You should have received a copy of the GNU Lesser General Public License* along with this program; if not, write to the Free Software* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.**/import it.unimi.dsi.fastutil.booleans.BooleanIterator;import it.unimi.dsi.fastutil.chars.Char2IntMap;import it.unimi.dsi.fastutil.objects.ObjectArrayList;import it.unimi.dsi.mg4j.compression.CodedCharSequenceBooleanIterator;import it.unimi.dsi.mg4j.compression.PrefixCoder;import it.unimi.dsi.mg4j.index.PrefixMap;import it.unimi.dsi.mg4j.index.TermMap;import it.unimi.dsi.mg4j.search.Interval;import java.util.Iterator;import java.util.List;import cern.colt.bitvector.BitVector;/** A class adapter from immutable binary tries to prefix trees. * * <P>Instances of this class map transparently strings into a binary trie. For this to happen, * however, the character-to-symbol map provided at construction time <em>must</em> contain * all characters with which the trie will be ever queried. * * <P>Depending on the {@linkplain it.unimi.dsi.fastutil.chars.Char2IntMap#defaultReturnValue() default return value of the map}, * character sequences containing extraneous * characters will cause an {@link java.lang.IndexOutOfBoundsException}, or will behave as if * the extraneous characters are mapped to the symbol returned as default value. * * <P>This class implements both {@link it.unimi.dsi.mg4j.index.TermMap}, and {@link it.unimi.dsi.mg4j.index.PrefixMap}, * albeit no optional operation is supported. * @deprecated Moved to <code>dsiutils</code>. */@Deprecatedpublic class ImmutableTriePrefixTree extends ImmutableBinaryTrie implements TermMap, PrefixMap, ImmutableExternalPrefixDictionary.IntervalApproximator { public static final long serialVersionUID = -7046029254386353130L; /** The coder used by this prefix tree. */ private final PrefixCoder prefixCoder; /** The map from characters to symbols. */ private final Char2IntMap char2symbol; /** Creates a new prefix tree. * * @param words a lexicographically ordered list of words. * @param prefixCoder a (lexicographic-order preserving) coder used to encoding words. * @param char2symbol the map from characters to symbols in the coder. */ public ImmutableTriePrefixTree( final List<? extends CharSequence> words, final PrefixCoder prefixCoder, final Char2IntMap char2symbol ) { super( codeWords( words, prefixCoder, char2symbol ) ); this.prefixCoder = prefixCoder; this.char2symbol = char2symbol; } /** Turns a list of words into a list of bit strings using the given map and coder. * * @param words a list of words. * @param prefixCoder a prefix coder used to code the words in <code>words</code>. * @param char2symbol a map from characters appearing in the <code>words</code> to symbols of <code>prefixCoder</code>. * @return a list of bit strings representing the words in <code>words</code> under the given character-to-symbol mapping and prefix coder. */ private static ObjectArrayList<BitVector> codeWords( final List<? extends CharSequence> words, final PrefixCoder prefixCoder, final Char2IntMap char2symbol ) { final ObjectArrayList<BitVector> bitWords = new ObjectArrayList<BitVector>(); for( Iterator<? extends CharSequence> i = words.iterator(); i.hasNext(); ) { BitVector v = new BitVector( 0 ); BooleanIterator booleanIterator = new CodedCharSequenceBooleanIterator( i.next().toString(), prefixCoder, char2symbol ); int size = 0; while( booleanIterator.hasNext() ) { v.setSize( ++size ); if ( booleanIterator.nextBoolean() ) v.set( size - 1 ); } bitWords.add( v ); } return bitWords; } public Interval getApproximatedInterval( final CharSequence word ) { return getApproximatedInterval( new CodedCharSequenceBooleanIterator( word, prefixCoder, char2symbol ) ); } @Deprecated public int getIndex( final CharSequence s ) { return getNumber( s ); } public int getNumber( final CharSequence term ) { return get( new CodedCharSequenceBooleanIterator( term, prefixCoder, char2symbol ) ); } public CharSequence getTerm( final int index ) { throw new UnsupportedOperationException(); } public MutableString getTerm( final int index, final MutableString term ) { throw new UnsupportedOperationException(); } public Interval getInterval( final CharSequence prefix ) { return getInterval( new CodedCharSequenceBooleanIterator( prefix, prefixCoder, char2symbol ) ); } public CharSequence getPrefix( final Interval interval ) { throw new UnsupportedOperationException(); } public MutableString getPrefix( final Interval interval, final MutableString prefix ) { throw new UnsupportedOperationException(); } public boolean hasTerms() { return false; } public boolean hasPrefixes() { return false; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -