📄 indexsearcher.java
字号:
/** * Copyright 2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.apache.nutch.searcher;import java.io.IOException;import java.util.ArrayList;import java.util.Enumeration;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.MultiReader;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.FieldDoc;import org.apache.lucene.search.FieldCache;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.hadoop.fs.*;import org.apache.hadoop.io.*;import org.apache.hadoop.conf.*;import org.apache.nutch.indexer.*;/** Implements {@link Searcher} and {@link HitDetailer} for either a single * merged index, or a set of indexes. */public class IndexSearcher implements Searcher, HitDetailer { private org.apache.lucene.search.Searcher luceneSearcher; private org.apache.lucene.index.IndexReader reader; private LuceneQueryOptimizer optimizer; private FileSystem fs; private Configuration conf; private QueryFilters queryFilters; /** Construct given a number of indexes. */ public IndexSearcher(Path[] indexDirs, Configuration conf) throws IOException { IndexReader[] readers = new IndexReader[indexDirs.length]; this.conf = conf; this.fs = FileSystem.get(conf); for (int i = 0; i < indexDirs.length; i++) { readers[i] = IndexReader.open(getDirectory(indexDirs[i])); } init(new MultiReader(readers), conf); } /** Construct given a single merged index. */ public IndexSearcher(Path index, Configuration conf) throws IOException { this.conf = conf; this.fs = FileSystem.get(conf); init(IndexReader.open(getDirectory(index)), conf); } private void init(IndexReader reader, Configuration conf) throws IOException { this.reader = reader; this.luceneSearcher = new org.apache.lucene.search.IndexSearcher(reader); this.luceneSearcher.setSimilarity(new NutchSimilarity()); this.optimizer = new LuceneQueryOptimizer(conf); this.queryFilters = new QueryFilters(conf); } private Directory getDirectory(Path file) throws IOException { if ("local".equals(this.fs.getName())) { return FSDirectory.getDirectory(file.toString(), false); } else { return new FsDirectory(this.fs, file, false, this.conf); } } public Hits search(Query query, int numHits, String dedupField, String sortField, boolean reverse) throws IOException { org.apache.lucene.search.BooleanQuery luceneQuery = this.queryFilters.filter(query); return translateHits (optimizer.optimize(luceneQuery, luceneSearcher, numHits, sortField, reverse), dedupField, sortField); } public String getExplanation(Query query, Hit hit) throws IOException { return luceneSearcher.explain(this.queryFilters.filter(query), hit.getIndexDocNo()).toHtml(); } public HitDetails getDetails(Hit hit) throws IOException { ArrayList fields = new ArrayList(); ArrayList values = new ArrayList(); Document doc = luceneSearcher.doc(hit.getIndexDocNo()); Enumeration e = doc.fields(); while (e.hasMoreElements()) { Field field = (Field)e.nextElement(); fields.add(field.name()); values.add(field.stringValue()); } return new HitDetails((String[])fields.toArray(new String[fields.size()]), (String[])values.toArray(new String[values.size()])); } public HitDetails[] getDetails(Hit[] hits) throws IOException { HitDetails[] results = new HitDetails[hits.length]; for (int i = 0; i < hits.length; i++) results[i] = getDetails(hits[i]); return results; } private Hits translateHits(TopDocs topDocs, String dedupField, String sortField) throws IOException { String[] dedupValues = null; if (dedupField != null) dedupValues = FieldCache.DEFAULT.getStrings(reader, dedupField); ScoreDoc[] scoreDocs = topDocs.scoreDocs; int length = scoreDocs.length; Hit[] hits = new Hit[length]; for (int i = 0; i < length; i++) { int doc = scoreDocs[i].doc; WritableComparable sortValue; // convert value to writable if (sortField == null) { sortValue = new FloatWritable(scoreDocs[i].score); } else { Object raw = ((FieldDoc)scoreDocs[i]).fields[0]; if (raw instanceof Integer) { sortValue = new IntWritable(((Integer)raw).intValue()); } else if (raw instanceof Float) { sortValue = new FloatWritable(((Float)raw).floatValue()); } else if (raw instanceof String) { sortValue = new UTF8((String)raw); } else { throw new RuntimeException("Unknown sort value type!"); } } String dedupValue = dedupValues == null ? null : dedupValues[doc]; hits[i] = new Hit(doc, sortValue, dedupValue); } return new Hits(topDocs.totalHits, hits); } public void close() throws IOException { if (luceneSearcher != null) { luceneSearcher.close(); } if (reader != null) { reader.close(); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -