📄 queryengine.java
字号:
this.scorer.setWeights( index2Weight ); } } /** Sets the transformer for this engine, or disables query transformation. * * @param transformer a {@linkplain QueryTransformer query transformer}, or <code>null</code> to disable query transformation. * */ public synchronized void transformer( final QueryTransformer transformer ) { this.transformer = transformer; } /** Sets the index weights. * * <p>This method just delegates to {@link Scorer#setWeights(Reference2DoubleMap)}. * * @param index2Weight a map from indices to weights. */ public synchronized void setWeights( final Reference2DoubleMap<Index> index2Weight ) { this.index2Weight.clear(); this.index2Weight.defaultReturnValue( 0 ); this.index2Weight.putAll( index2Weight ); if ( scorer != null ) scorer.setWeights( index2Weight ); } /** Turns the given query into a multiplexed query if {@link #multiplex} is on. * * @param query a query. * @return <code>query</code>, if {@link #multiplex} is off; a multiplexed version of <code>query</code>, otherwise. */ private String multiplex( final String query ) { if ( ! multiplex ) return query; final Iterator<String> it = indexMap.keySet().iterator(); final StringBuilder builder = new StringBuilder(); while ( it.hasNext() ) { builder.append( it.next() + ":(" + query + ")" ); if ( it.hasNext() ) builder.append( " | " ); } LOGGER.debug( "Multiplex is active: submitting " + builder ); return builder.toString(); } /** Parses one or more comma-separated queries and deposits in a given array a segment of the * results corresponding to the queries, using the current settings of this query engine. * * <p>Results are accumulated with an “and-then” semantics: results * are added from each query in order, provided they did not appear before. * * @param queries one or more queries separated by commas. * @param offset the first result to be added to <code>results</code>. * @param length the number of results to be added to <code>results</code> * @param results an array list that will hold all results. * @return the number of relevant documents scanned while filling <code>results</code>. */ public int process( final String queries, int offset, final int length, final ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>>> results ) throws QueryParserException, QueryBuilderVisitorException, IOException { LOGGER.debug( "Processing query \"" + queries + "\", offset=" + offset + ", length="+ length ); final String[] part = queries.split( "," ); final Query[] partQuery = new Query[ part.length ]; for( int i = 0; i < part.length; i++ ) { final String q = multiplex( part[ i ] ); partQuery[ i ] = queryParser.parse( q ); if ( transformer != null ) partQuery[ i ] = transformer.transform( partQuery[ i ] ); } return process( partQuery, offset, length, results ); } /** Processes one or more pre-parsed queries and deposits in a given array a segment of the * results corresponding to the queries, using the current settings of this query engine. * * <p>Results are accumulated with an “and-then” semantics: results * are added from each query in order, provided they did not appear before. * * @param query an array of queries. * @param offset the first result to be added to <code>results</code>. * @param length the number of results to be added to <code>results</code> * @param results an array list that will hold all results. * @return the number of documents scanned while filling <code>results</code>. */ @SuppressWarnings("unchecked") public int process( final Query query[], final int offset, final int length, final ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>>> results ) throws QueryBuilderVisitorException, IOException { LOGGER.debug( "Processing Query array \"" + Arrays.toString( query ) + "\", offset=" + offset + ", length="+ length ); results.clear(); double lastMinScore = 1; int total = 0, count, currOffset = offset, currLength = length; final IntOpenHashSet alreadySeen = query.length > 1 ? new IntOpenHashSet() : null; for( int i = 0; i < query.length; i++ ) { final int initialResultSize = results.size(); DocumentIterator documentIterator = query[ i ].accept( builderVisitor.prepare() ); count = scorer != null? getScoredResults( documentIterator, currOffset, currLength, lastMinScore, results, alreadySeen ) : getResults( documentIterator, currOffset, currLength, results, alreadySeen ); documentIterator.dispose(); if ( results.size() > 0 ) lastMinScore = results.get( results.size() - 1 ).score; total += count; currOffset -= count; if ( currOffset < 0 ) { currLength += currOffset; currOffset = 0; } // Check whether we have intervals, we want intervals *and* we added some results. boolean someHavePositions = false; for( Index index: documentIterator.indices() ) someHavePositions |= index.hasPositions; if ( someHavePositions && intervalSelector != null && results.size() != initialResultSize ) { // We must now enrich the returned result with intervals DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>> sorted[] = results.subList( initialResultSize, results.size() ).toArray( new DocumentScoreInfo[ results.size() - initialResultSize ] ); Sorting.quickSort( sorted, DocumentScoreInfo.DOCUMENT_COMPARATOR ); documentIterator = query[ i ].accept( builderVisitor.prepare() ); for( DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>> dsi: sorted ) { documentIterator.skipTo( dsi.document ); dsi.info = intervalSelector.select( documentIterator, new Reference2ObjectArrayMap<Index,SelectedInterval[]>( numIndices ) ); } documentIterator.dispose(); } if ( ASSERTS ) assert length >= results.size(); if ( length == results.size() ) break; } return total; } private int getScoredResults( final DocumentIterator documentIterator, final int offset, final int length, final double lastMinScore, final ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>>> results, final IntSet alreadySeen ) throws IOException { final ScoredDocumentBoundedSizeQueue<Reference2ObjectMap<Index,SelectedInterval[]>> top = new ScoredDocumentBoundedSizeQueue<Reference2ObjectMap<Index,SelectedInterval[]>>( offset + length ); int document, count = 0; // Number of not-already-seen documents scorer.wrap( documentIterator ); while ( ( document = scorer.nextDocument() ) != -1 ) { if ( alreadySeen != null && ! alreadySeen.add( document ) ) continue; count++; // TODO: we should avoid enqueuing until we really know we shall use the values top.enqueue( document, scorer.score() ); } final int n = Math.max( top.size() - offset, 0 ); // Number of actually useful documents, if any if ( ASSERTS ) assert n <= length : n; if ( n > 0 ) { final int s = results.size(); results.size( s + n ); final Object[] elements = results.elements(); // We scale all newly inserted item so that scores are always decreasing for ( int i = n; i-- != 0; ) elements[ i + s ] = top.dequeue(); // The division by the maximum score was missing in previous versions; can be removed to reproduce regressions. // TODO: this will change scores if offset leaves out an entire query final double adjustment = lastMinScore / ( s != 0 ? ((DocumentScoreInfo<?>)elements[ s ]).score : 1.0 ); for ( int i = n; i-- != 0; ) ((DocumentScoreInfo<?>)elements[ i + s ]).score *= adjustment; } return count; } private int getResults( final DocumentIterator documentIterator, final int offset, final int length, final ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>>> results, final IntSet alreadySeen ) throws IOException { int document, count = 0; // Number of not-already-seen documents // Unfortunately, to provide the exact count of results we have to scan the whole iterator. while ( ( document = documentIterator.nextDocument() ) != -1 ) { if ( alreadySeen != null && ! alreadySeen.add( document ) ) continue; if ( count >= offset && count < offset + length ) results.add( new DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>>( document, -1 ) ); count++; } return count; } public String toString() { return this.getClass().getName() + indexMap; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -