⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bloomfilter.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
	 * not been added to the filter. This will happen with probability 2<sup>-<var>d</var></sup>,	 * where <var>d</var> is the number of hash functions specified at creation time, if	 * the number of the elements in the filter is less than <var>n</var>, the number	 * of expected elements specified at creation time.	 * 	 * @param s a character sequence.	 * @return true if <code>s</code> (or some element	 * with the same hash sequence as <code>s</code>) is in the filter.	 */	public boolean contains( final CharSequence s ) {		int i = d, l = s.length();		long bits[] = this.bits;		while( i-- != 0 ) if ( ! get( bits, hash( s, l, i ) ) ) return false;		return true;	}	/** Checks whether the given byte array is in this filter. 	 * 	 * @param a a byte array.	 * @return true if <code>a</code> (or some element	 * with the same hash sequence as <code>a</code>) is in the filter.	 * @see #contains(CharSequence)	 */	public boolean contains( final byte[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		while( i-- != 0 ) if ( ! get( bits, hash( a, l, i ) ) ) return false;		return true;	}	/** Checks whether the given short array is in this filter. 	 * 	 * @param a a short array.	 * @return true if <code>a</code> (or some element	 * with the same hash sequence as <code>a</code>) is in the filter.	 * @see #contains(CharSequence)	 */	public boolean contains( final short[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		while( i-- != 0 ) if ( ! get( bits, hash( a, l, i ) ) ) return false;		return true;	}	/** Checks whether the given character array is in this filter. 	 * 	 * @param a a character array.	 * @return true if <code>a</code> (or some element	 * with the same hash sequence as <code>a</code>) is in the filter.	 * @see #contains(CharSequence)	 */	public boolean contains( final char[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		while( i-- != 0 ) if ( ! get( bits, hash( a, l, i ) ) ) return false;		return true;	}	/** Checks whether the given int array is in this filter. 	 * 	 * @param a an int array.	 * @return true if <code>a</code> (or some element	 * with the same hash sequence as <code>a</code>) is in the filter.	 * @see #contains(CharSequence)	 */	public boolean contains( final int[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		while( i-- != 0 ) if ( ! get( bits, hash( a, l, i ) ) ) return false;		return true;	}	/** Checks whether the given long array is in this filter. 	 * 	 * @param a a long array.	 * @return true if <code>a</code> (or some element	 * with the same hash sequence as <code>a</code>) is in the filter.	 * @see #contains(CharSequence)	 */	public boolean contains( final long[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		while( i-- != 0 ) if ( ! get( bits, hash( a, l, i ) ) ) return false;		return true;	}	/** Checks whether the given float array is in this filter. 	 * 	 * @param a a float array.	 * @return true if <code>a</code> (or some element	 * with the same hash sequence as <code>a</code>) is in the filter.	 * @see #contains(CharSequence)	 */	public boolean contains( final float[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		while( i-- != 0 ) if ( ! get( bits, hash( a, l, i ) ) ) return false;		return true;	}	/** Checks whether the given double array is in this filter. 	 * 	 * @param a a double array.	 * @return true if <code>a</code> (or some element	 * with the same hash sequence as <code>a</code>) is in the filter.	 * @see #contains(CharSequence)	 */	public boolean contains( final double[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		while( i-- != 0 ) if ( ! get( bits, hash( a, l, i ) ) ) return false;		return true;	}	/** Adds a character sequence to the filter.	 * 	 * @param s a character sequence.	 * @return true if this filter was modified (i.e., neither <code>s</code> nor any	 * other element with the same hash sequence as <code>s</code> was already in this filter).	 */	public boolean add( final CharSequence s ) {		int i = d, l = s.length();		long bits[] = this.bits;		boolean alreadySet = true;		while( i-- != 0 ) alreadySet &= set( bits, hash( s, l, i ) );		if ( ! alreadySet ) size++;		return ! alreadySet;	}		/** Adds a byte array to the filter.	 * 	 * @param a a byte array.	 * @return true if this filter was modified (i.e., neither <code>a</code> nor any	 * other element with the same hash sequence as <code>a</code> was already in this filter).	 */	public boolean add( final byte[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		boolean alreadySet = true;		while( i-- != 0 ) alreadySet &= set( bits, hash( a, l, i ) );		if ( ! alreadySet ) size++;		return ! alreadySet;	}	/** Adds a short array to the filter.	 * 	 * @param a a short array.	 * @return true if this filter was modified (i.e., neither <code>a</code> nor any	 * other element with the same hash sequence as <code>a</code> was already in this filter).	 */	public boolean add( final short[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		boolean alreadySet = true;		while( i-- != 0 ) alreadySet &= set( bits, hash( a, l, i ) );		if ( ! alreadySet ) size++;		return ! alreadySet;	}	/** Adds a character array to the filter.	 * 	 * @param a a character array.	 * @return true if this filter was modified (i.e., neither <code>a</code> nor any	 * other element with the same hash sequence as <code>a</code> was already in this filter).	 */	public boolean add( final char[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		boolean alreadySet = true;		while( i-- != 0 ) alreadySet &= set( bits, hash( a, l, i ) );		if ( ! alreadySet ) size++;		return ! alreadySet;	}	/** Adds an int array to the filter.	 * 	 * @param a an int array.	 * @return true if this filter was modified (i.e., neither <code>a</code> nor any	 * other element with the same hash sequence as <code>a</code> was already in this filter).	 */	public boolean add( final int[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		boolean alreadySet = true;		while( i-- != 0 ) alreadySet &= set( bits, hash( a, l, i ) );		if ( ! alreadySet ) size++;		return ! alreadySet;	}	/** Adds a long array to the filter.	 * 	 * @param a a long array.	 * @return true if this filter was modified (i.e., neither <code>a</code> nor any	 * other element with the same hash sequence as <code>a</code> was already in this filter).	 */	public boolean add( final long[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		boolean alreadySet = true;		while( i-- != 0 ) alreadySet &= set( bits, hash( a, l, i ) );		if ( ! alreadySet ) size++;		return ! alreadySet;	}	/** Adds a float array to the filter.	 * 	 * @param a a float array.	 * @return true if this filter was modified (i.e., neither <code>a</code> nor any	 * other element with the same hash sequence as <code>a</code> was already in this filter).	 */	public boolean add( final float[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		boolean alreadySet = true;		while( i-- != 0 ) alreadySet &= set( bits, hash( a, l, i ) );		if ( ! alreadySet ) size++;		return ! alreadySet;	}	/** Adds a double array to the filter.	 * 	 * @param a a double array.	 * @return true if this filter was modified (i.e., neither <code>a</code> nor any	 * other element with the same hash sequence as <code>a</code> was already in this filter).	 */	public boolean add( final double[] a ) {		int i = d, l = a.length;		long bits[] = this.bits;		boolean alreadySet = true;		while( i-- != 0 ) alreadySet &= set( bits, hash( a, l, i ) );		if ( ! alreadySet ) size++;		return ! alreadySet;	}	/** Clears this filter.	 */		public void clear() {		LongArrays.fill( bits, 0 );		size = 0;	}	/** Returns the size of this filter.	 *	 * <p>Note that the size of a Bloom filter is only a <em>lower bound</em>	 * for the number of distinct elements that have been added to the filter.	 * False positives might make the number returned by this method smaller	 * than it should be.	 * 	 * @return the size of this filter.	 */		public long size() {		return size;	}		public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException {				final SimpleJSAP jsap = new SimpleJSAP( BloomFilter.class.getName(), "Creates a Bloom filter reading from standard input a newline-separated list of terms.",				new Parameter[] {					new FlaggedOption( "bufferSize", IntSizeStringParser.getParser(), "64Ki", JSAP.NOT_REQUIRED, 'b',  "buffer-size", "The size of the I/O buffer used to read terms." ),					new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ),					new UnflaggedOption( "bloomFilter", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised front-coded list." ),					new UnflaggedOption( "size", JSAP.INTSIZE_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The size of the filter (i.e., the expected number of elements in the filter; usually, the number of terms)." ),					new UnflaggedOption( "precision", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The precision of the filter." )		});				JSAPResult jsapResult = jsap.parse( arg );		if ( jsap.messagePrinted() ) return;				final int bufferSize = jsapResult.getInt( "bufferSize" );		final String filterName = jsapResult.getString( "bloomFilter" );		final Charset encoding = (Charset)jsapResult.getObject( "encoding" );		BloomFilter filter = new BloomFilter( jsapResult.getInt( "size" ), jsapResult.getInt( "precision" ) );		final ProgressLogger pl = new ProgressLogger();		pl.itemsName = "terms";		pl.start( "Reading terms..." );		MutableString s = new MutableString();		FastBufferedReader reader = new FastBufferedReader( new InputStreamReader( System.in, encoding ), bufferSize );		while( reader.readLine( s ) != null ) { 			filter.add( s );			pl.lightUpdate();		}		pl.done();		BinIO.storeObject( filter, filterName );	}	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -