📄 lucenepathindexretrievalengine.java
字号:
/*
* This file is part of Caliph & Emir.
*
* Caliph & Emir is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Caliph & Emir is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Caliph & Emir; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Copyright statement:
* --------------------
* (c) 2002-2005 by Mathias Lux (mathias@juggle.at)
* http://www.juggle.at, http://caliph-emir.sourceforge.net
*/
package at.lux.fotoretrieval.retrievalengines;
import at.lux.components.StatusBar;
import at.lux.fotoretrieval.FileOperations;
import at.lux.fotoretrieval.ResultListEntry;
import at.lux.fotoretrieval.RetrievalToolkit;
import at.lux.fotoretrieval.lucene.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import javax.swing.*;
import java.io.File;
import java.io.IOException;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.*;
/**
* Date: 25.03.2005
* Time: 23:58:46
*
* @author Mathias Lux, mathias@juggle.at
*/
public class LucenePathIndexRetrievalEngine extends AbstractRetrievalEngine {
private static Namespace xsi = Namespace.getNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance");
private static final int MAX_RESULTS = 20;
private IndexSearcher indexSearcher;
public List<ResultListEntry> getImagesBySemantics(String xPath, Vector objects, String whereToSearch, boolean recursive, JProgressBar progress) {
if (progress != null) {
progress.setString("Query expansion running");
}
List<Graph> graphList = getExpandedGraphs(xPath, whereToSearch);
LinkedList<ResultListEntry> results = new LinkedList<ResultListEntry>();
if (progress != null) {
progress.setMinimum(0);
progress.setMaximum(graphList.size());
progress.setValue(0);
progress.setString("Querying expanded graphs");
}
int countGraph = 0;
try {
indexSearcher = new IndexSearcher(parsePathIndexDirectory(whereToSearch));
for (Iterator<Graph> iterator = graphList.iterator(); iterator.hasNext();) {
Graph graph = iterator.next();
results.addAll(searchForGraph(graph, whereToSearch));
countGraph++;
if (progress != null) progress.setValue(countGraph);
}
indexSearcher.close();
} catch (IOException e) {
e.printStackTrace();
}
Collections.sort(results);
LinkedList<ResultListEntry> sorted = new LinkedList<ResultListEntry>();
HashSet<String> doublettes = new HashSet<String>();
for (Iterator<ResultListEntry> it = results.iterator(); it.hasNext();) {
ResultListEntry entry = it.next();
String descriptionPath = entry.getDescriptionPath();
if (!doublettes.contains(descriptionPath)) {
doublettes.add(descriptionPath);
sorted.add(entry);
}
}
return sorted.subList(0, Math.min(sorted.size(), MAX_RESULTS));
}
public List<ResultListEntry> searchForGraph(Graph graph, String whereToSearch) {
LinkedList<ResultListEntry> results = new LinkedList<ResultListEntry>();
// LabeledGraph lg = new LabeledGraph(graph);
String query = createLucenePathQuery(graph);
// System.out.println(query);
try {
Query q = QueryParser.parse(query, "paths", new WhitespaceAnalyzer());
Hits hits = indexSearcher.search(q);
SAXBuilder builder = new SAXBuilder();
int maxResults = Math.min(hits.length(), MAX_RESULTS);
for (int i = 0; i< maxResults; i++) {
String[] filenames = hits.doc(i).getValues("file");
for (int j = 0; j < filenames.length; j++) {
String fileName = filenames[j];
Element e = builder.build(fileName).getRootElement();
ResultListEntry entry = new ResultListEntry((double) hits.score(i), e, fileName);
results.add(entry);
}
}
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
} catch (JDOMException e) {
e.printStackTrace();
}
return results;
}
private List<Graph> getExpandedGraphs(String query, String whereToSearch) {
List<String> nodeQueries = new LinkedList<String>();
StringTokenizer st = new StringTokenizer(query, "]");
String relationString = "";
List<Relation> relations = new LinkedList<Relation>();
while (st.hasMoreTokens()) {
String s = st.nextToken().trim();
if (s.startsWith("[")) {
s = s.substring(1);
nodeQueries.add(s);
} else {
relationString = s;
}
}
if (relationString.length() > 1) {
// there are relations, go ahead and parse them:
StringTokenizer sr = new StringTokenizer(relationString);
Relation currentRelation = null;
while (sr.hasMoreTokens()) {
String s = sr.nextToken();
try {
int i = Integer.parseInt(s);
if (currentRelation.getSource() < 0) {
currentRelation.setSource(i);
} else if (currentRelation.getTarget() < 0) {
currentRelation.setTarget(i);
currentRelation.eliminateInverse();
relations.add(currentRelation);
currentRelation = null;
}
} catch (NumberFormatException e) {
// its the type :)
currentRelation = new Relation(-1, -1, s.trim());
}
}
}
// so for now do the retrieval for the nodes:
int numOfNodes = nodeQueries.size();
List<List<Node>> nodeResults = new LinkedList<List<Node>>();
for (int i = 0; i < numOfNodes; i++) {
String queryString = nodeQueries.get(i);
List<Node> nodes;
if (!queryString.equals("*")) {
nodes = getNodes(queryString, whereToSearch);
} else {
nodes = new LinkedList<Node>();
nodes.add(new Node(-1, 1f, "*"));
}
nodeResults.add(nodes);
}
// now we can expand our query on retrieved nodes:
List<Graph> graphList = getExpandedGraphsFromResults(nodeResults, relations, 5);
return graphList;
}
private List<Graph> getExpandedGraphsFromResults(List<List<Node>> nodeResults, List<Relation> relations, int depth) {
List<List<Node>> expanded = getExpandedSets(nodeResults, depth);
List<Graph> results = new LinkedList<Graph>();
for (Iterator<List<Node>> iterator = expanded.iterator(); iterator.hasNext();) {
List<Node> nodes = iterator.next();
Graph g = getGraphFromResults(nodes, relations);
results.add(g);
}
return results;
}
private Graph getGraphFromResults(List<Node> nodeResults, List<Relation> relations) {
HashMap<Integer, Integer> idReplacementTable = new HashMap<Integer, Integer>(nodeResults.size());
List<Node> nodes = new LinkedList<Node>();
List<Relation> myRelations = new LinkedList<Relation>();
for (int i = 0; i < nodeResults.size(); i++) {
Node node = nodeResults.get(i);
idReplacementTable.put(i + 1, node.getNodeID());
nodes.add(node);
}
// Create the relations with the real IDs:
for (Iterator<Relation> iterator = relations.iterator(); iterator.hasNext();) {
Relation r = iterator.next();
int src = (idReplacementTable.get(r.getSource()));
int tgt = (idReplacementTable.get(r.getTarget()));
myRelations.add(new Relation(src, tgt, r.getType()));
}
// now we can create the graph we want to search for:
Graph g = new Graph(nodes, myRelations);
return g;
}
private List<List<Node>> getExpandedSets(List<List<Node>> nodeResults, int depth) {
if (nodeResults.size() > 1) {
List<Node> firstNodesResults = nodeResults.get(0);
int numLevels = 0;
for (Iterator<Node> iterator = firstNodesResults.iterator(); iterator.hasNext();) {
Node node = iterator.next();
if (node.getWeight() < 1f) break;
numLevels++;
}
numLevels += depth;
if (firstNodesResults.size() < depth) {
numLevels = firstNodesResults.size();
}
List<List<Node>> tmpNodeResults = new LinkedList<List<Node>>(nodeResults);
tmpNodeResults.remove(0);
List<List<Node>> results = getExpandedSets(tmpNodeResults, depth);
List<List<Node>> endResult = new LinkedList<List<Node>>();
for (int i = 0; i < numLevels && i < firstNodesResults.size(); i++) {
for (int j = 0; j < results.size(); j++) {
List<Node> nodeList = new LinkedList<Node>(results.get(j));
nodeList.add(0, firstNodesResults.get(i));
endResult.add(nodeList);
}
}
return endResult;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -