📄 queryengine.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
			this.scorer.setWeights( index2Weight );		}	}	/** Sets the transformer for this engine, or disables query transformation. 	 * 	 * @param transformer a {@linkplain QueryTransformer query transformer}, or <code>null</code> to disable query transformation.	 * */	public synchronized void transformer( final QueryTransformer transformer ) {		this.transformer = transformer;	}		/** Sets the index weights.	 * 	 * <p>This method just delegates to {@link Scorer#setWeights(Reference2DoubleMap)}. 	 * 	 * @param index2Weight a map from indices to weights.	 */		public synchronized void setWeights( final Reference2DoubleMap<Index> index2Weight ) {		this.index2Weight.clear();		this.index2Weight.defaultReturnValue( 0 );		this.index2Weight.putAll( index2Weight );		if ( scorer != null ) scorer.setWeights( index2Weight );	}	/** Turns the given query into a multiplexed query if {@link #multiplex} is on.	 * 	 * @param query a query.	 * @return <code>query</code>, if {@link #multiplex} is off; a multiplexed version of <code>query</code>, otherwise.	 */		private String multiplex( final String query ) {		if ( ! multiplex ) return query;				final Iterator<String> it = indexMap.keySet().iterator();		final StringBuilder builder = new StringBuilder();				while ( it.hasNext() ) {			builder.append( it.next() + ":(" + query + ")" );			if ( it.hasNext() ) builder.append( " | " );		}		LOGGER.debug( "Multiplex is active: submitting " + builder );		return builder.toString();	}	/** Parses one or more comma-separated queries and deposits in a given array a segment of the	 * results corresponding to the queries, using the current settings of this query engine.	 * 	 * <p>Results are accumulated with an &ldquo;and-then&rdquo; semantics: results	 * are added from each query in order, provided they did not appear before.	 * 	 * @param queries one or more queries separated by commas. 	 * @param offset the first result to be added to <code>results</code>.	 * @param length the number of results to be added to <code>results</code>	 * @param results an array list that will hold all results.	 * @return the number of relevant documents scanned while filling <code>results</code>.	 */		public int process( final String queries, int offset, final int length, final ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>>> results ) throws QueryParserException, QueryBuilderVisitorException, IOException {		LOGGER.debug( "Processing query \"" + queries + "\", offset=" + offset + ", length="+ length );		final String[] part = queries.split( "," );		final Query[] partQuery = new Query[ part.length ]; 		for( int i = 0; i < part.length; i++ ) {			final String q = multiplex( part[ i ] );			partQuery[ i ] = queryParser.parse( q );			if ( transformer != null ) partQuery[ i ] = transformer.transform( partQuery[ i ] );		}				return process( partQuery, offset, length, results );	}			/** Processes one or more pre-parsed queries and deposits in a given array a segment of the	 * results corresponding to the queries, using the current settings of this query engine.	 * 	 * <p>Results are accumulated with an &ldquo;and-then&rdquo; semantics: results	 * are added from each query in order, provided they did not appear before.	 * 	 * @param query an array of queries. 	 * @param offset the first result to be added to <code>results</code>.	 * @param length the number of results to be added to <code>results</code>	 * @param results an array list that will hold all results.	 * @return the number of documents scanned while filling <code>results</code>.	 */	@SuppressWarnings("unchecked")	public int process( final Query query[], final int offset, final int length, final ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>>> results ) throws QueryBuilderVisitorException, IOException {		LOGGER.debug( "Processing Query array \"" + Arrays.toString( query ) + "\", offset=" + offset + ", length="+ length );		results.clear();		double lastMinScore = 1;		int total = 0, count, currOffset = offset, currLength = length;		final IntOpenHashSet alreadySeen = query.length > 1 ? new IntOpenHashSet() : null;		for( int i = 0; i < query.length; i++ ) {			final int initialResultSize = results.size();						DocumentIterator documentIterator = query[ i ].accept( builderVisitor.prepare() );			count = scorer != null? 					getScoredResults( documentIterator, currOffset, currLength, lastMinScore, results, alreadySeen ) :						getResults( documentIterator, currOffset, currLength, results, alreadySeen );								documentIterator.dispose();			if ( results.size() > 0 ) lastMinScore = results.get( results.size() - 1 ).score;						total += count;			currOffset -= count;			if ( currOffset < 0 ) {				currLength += currOffset;				currOffset = 0;			}			// Check whether we have intervals, we want intervals *and* we added some results.			boolean someHavePositions = false;			for( Index index: documentIterator.indices() ) someHavePositions |= index.hasPositions;						if ( someHavePositions && intervalSelector != null && results.size() != initialResultSize ) {				// We must now enrich the returned result with intervals				DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>> sorted[] = 					results.subList( initialResultSize, results.size() ).toArray( new DocumentScoreInfo[ results.size() - initialResultSize ] );				Sorting.quickSort( sorted, DocumentScoreInfo.DOCUMENT_COMPARATOR );				documentIterator = query[ i ].accept( builderVisitor.prepare() );							for( DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>> dsi: sorted ) {					documentIterator.skipTo( dsi.document );					dsi.info = intervalSelector.select( documentIterator, new Reference2ObjectArrayMap<Index,SelectedInterval[]>( numIndices ) );				}							documentIterator.dispose();			}						if ( ASSERTS ) assert length >= results.size();			if ( length == results.size() ) break;		}		return total;	}		private int getScoredResults( final DocumentIterator documentIterator, final int offset, final int length, final double lastMinScore, final ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>>> results, final IntSet alreadySeen ) throws IOException {		final ScoredDocumentBoundedSizeQueue<Reference2ObjectMap<Index,SelectedInterval[]>> top = new ScoredDocumentBoundedSizeQueue<Reference2ObjectMap<Index,SelectedInterval[]>>( offset + length );		int document, count = 0; // Number of not-already-seen documents		scorer.wrap( documentIterator );		while ( ( document = scorer.nextDocument() ) != -1 ) {			if ( alreadySeen != null && ! alreadySeen.add( document ) ) continue;			count++;			// TODO: we should avoid enqueuing until we really know we shall use the values			top.enqueue( document, scorer.score() );		}				final int n = Math.max( top.size() - offset, 0 ); // Number of actually useful documents, if any		if ( ASSERTS ) assert n <= length : n;		if ( n > 0 ) {			final int s = results.size();			results.size( s + n );			final Object[] elements = results.elements();			// We scale all newly inserted item so that scores are always decreasing			for ( int i = n; i-- != 0; ) elements[ i + s ] = top.dequeue();			// The division by the maximum score was missing in previous versions; can be removed to reproduce regressions.			// TODO: this will change scores if offset leaves out an entire query			final double adjustment = lastMinScore / ( s != 0 ? ((DocumentScoreInfo<?>)elements[ s ]).score : 1.0 );			for ( int i = n; i-- != 0; ) ((DocumentScoreInfo<?>)elements[ i + s ]).score *= adjustment;		}				return count;	}	private int getResults( final DocumentIterator documentIterator, final int offset, final int length, final ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>>> results, final IntSet alreadySeen ) throws IOException {		int document, count = 0; // Number of not-already-seen documents		// Unfortunately, to provide the exact count of results we have to scan the whole iterator.		while ( ( document = documentIterator.nextDocument() ) != -1 ) {			if ( alreadySeen != null && ! alreadySeen.add( document ) ) continue;			if ( count >= offset && count < offset + length ) results.add( new DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>>( document, -1 ) );			count++;		}				return count;	}	public String toString() {		return this.getClass().getName() + indexMap;	}}
上一页 12
💿 文件大小 1037 K
👤 上传用户 fairbank
📂 所属分类 Java编程
🏷️ 相关标签

#collections #Gigabytes #for #full-text
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -