⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 immutabletrieprefixtree.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
字号:
package it.unimi.dsi.mg4j.util;/*		 * MG4J: Managing Gigabytes for Java** Copyright (C) 2005-2007 Sebastiano Vigna **  This library is free software; you can redistribute it and/or modify it*  under the terms of the GNU Lesser General Public License as published by the Free*  Software Foundation; either version 2.1 of the License, or (at your option)*  any later version.**  This library is distributed in the hope that it will be useful, but*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License*  for more details.**  You should have received a copy of the GNU Lesser General Public License*  along with this program; if not, write to the Free Software*  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.**/import it.unimi.dsi.fastutil.booleans.BooleanIterator;import it.unimi.dsi.fastutil.chars.Char2IntMap;import it.unimi.dsi.fastutil.objects.ObjectArrayList;import it.unimi.dsi.mg4j.compression.CodedCharSequenceBooleanIterator;import it.unimi.dsi.mg4j.compression.PrefixCoder;import it.unimi.dsi.mg4j.index.PrefixMap;import it.unimi.dsi.mg4j.index.TermMap;import it.unimi.dsi.mg4j.search.Interval;import java.util.Iterator;import java.util.List;import cern.colt.bitvector.BitVector;/** A class adapter from immutable binary tries to prefix trees. *  * <P>Instances of this class map transparently strings into a binary trie. For this to happen, * however, the character-to-symbol map provided at construction time <em>must</em> contain * all characters with which the trie will be ever queried.  *  * <P>Depending on the {@linkplain it.unimi.dsi.fastutil.chars.Char2IntMap#defaultReturnValue() default return value of the map},  * character sequences containing extraneous * characters will cause an {@link java.lang.IndexOutOfBoundsException}, or will behave as if * the extraneous characters are mapped to the symbol returned as default value. *  * <P>This class implements both {@link it.unimi.dsi.mg4j.index.TermMap}, and {@link it.unimi.dsi.mg4j.index.PrefixMap}, * albeit no optional operation is supported.  * @deprecated Moved to <code>dsiutils</code>. */@Deprecatedpublic class ImmutableTriePrefixTree extends ImmutableBinaryTrie implements TermMap, PrefixMap, ImmutableExternalPrefixDictionary.IntervalApproximator {	public static final long serialVersionUID = -7046029254386353130L;		/** The coder used by this prefix tree. */	private final PrefixCoder prefixCoder;	/** The map from characters to symbols. */	private final Char2IntMap char2symbol;		/** Creates a new prefix tree.	 * 	 * @param words a lexicographically ordered list of words.	 * @param prefixCoder a (lexicographic-order preserving) coder used to encoding words.	 * @param char2symbol the map from characters to symbols in the coder.	 */	public ImmutableTriePrefixTree( final List<? extends CharSequence> words, final PrefixCoder prefixCoder, final Char2IntMap char2symbol ) {		super( codeWords( words, prefixCoder, char2symbol ) );		this.prefixCoder = prefixCoder;		this.char2symbol = char2symbol;	}	/** Turns a list of words into a list of bit strings using the given map and coder.	 * 	 * @param words a list of words.	 * @param prefixCoder a prefix coder used to code the words in <code>words</code>.	 * @param char2symbol a map from characters appearing in the <code>words</code> to symbols of <code>prefixCoder</code>.	 * @return a list of bit strings representing the words in <code>words</code> under the given character-to-symbol mapping and prefix coder.	 */		private static ObjectArrayList<BitVector> codeWords( final List<? extends CharSequence> words, final PrefixCoder prefixCoder, final Char2IntMap char2symbol ) {		final ObjectArrayList<BitVector> bitWords = new ObjectArrayList<BitVector>();		for( Iterator<? extends CharSequence> i = words.iterator(); i.hasNext(); ) {			BitVector v = new BitVector( 0 );			BooleanIterator booleanIterator = new CodedCharSequenceBooleanIterator( i.next().toString(), prefixCoder, char2symbol );			int size = 0;			while( booleanIterator.hasNext() ) {				v.setSize( ++size );				if ( booleanIterator.nextBoolean() ) v.set( size - 1 );			}			bitWords.add( v );		}		return bitWords;	}	public Interval getApproximatedInterval( final CharSequence word ) {		return getApproximatedInterval( new CodedCharSequenceBooleanIterator( word, prefixCoder, char2symbol ) );	}	@Deprecated	public int getIndex( final CharSequence s ) {		return getNumber( s );	}	public int getNumber( final CharSequence term ) {		return get( new CodedCharSequenceBooleanIterator( term, prefixCoder, char2symbol ) );	}	public CharSequence getTerm( final int index ) {		throw new UnsupportedOperationException();	}	public MutableString getTerm( final int index, final MutableString term ) {		throw new UnsupportedOperationException();	}	public Interval getInterval( final CharSequence prefix ) {		return getInterval( new CodedCharSequenceBooleanIterator( prefix, prefixCoder, char2symbol ) );	}	public CharSequence getPrefix( final Interval interval ) {		throw new UnsupportedOperationException();	}	public MutableString getPrefix( final Interval interval, final MutableString prefix ) {		throw new UnsupportedOperationException();	}	public boolean hasTerms() {		return false;	}	public boolean hasPrefixes() {		return false;	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -