📄 luceneretrievalengine.java
字号:
/*
* This file is part of Caliph & Emir.
*
* Caliph & Emir is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Caliph & Emir is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Caliph & Emir; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Copyright statement:
* --------------------
* (c) 2002-2005 by Mathias Lux (mathias@juggle.at)
* http://www.juggle.at, http://caliph-emir.sourceforge.net
*/
package at.lux.fotoretrieval.retrievalengines;
import at.lux.components.StatusBar;
import at.lux.fotoretrieval.FileOperations;
import at.lux.fotoretrieval.ResultListEntry;
import at.lux.fotoretrieval.RetrievalToolkit;
import at.lux.fotoretrieval.lucene.Graph;
import at.lux.fotoretrieval.lucene.Node;
import at.lux.fotoretrieval.lucene.Relation;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import javax.swing.*;
import java.io.*;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
/**
* Date: 13.10.2004
* Time: 21:47:58
*
* @author Mathias Lux, mathias@juggle.at
*/
public class LuceneRetrievalEngine extends AbstractRetrievalEngine {
public static final int MAX_RESULTS = 15;
private static Namespace xsi = Namespace.getNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance");
public static final HashMap<String, String> relationMapping;
static {
relationMapping = new HashMap<String, String>(27);
relationMapping.put("key", "keyFor");
relationMapping.put("annotates", "annotatedBy");
relationMapping.put("shows", "appearsIn");
relationMapping.put("references", "referencedBy");
relationMapping.put("quality", "qualityOf");
relationMapping.put("symbolizes", "symbolizedBy");
relationMapping.put("location", "locationOf");
relationMapping.put("source", "sourceOf");
relationMapping.put("destination", "destinationOf");
relationMapping.put("path", "pathOf");
relationMapping.put("time", "timeOf");
relationMapping.put("depicts", "depictedBy");
relationMapping.put("represents", "representedBy");
relationMapping.put("context", "contextFor");
relationMapping.put("interprets", "interpretedBy");
relationMapping.put("agent", "agentOf");
relationMapping.put("patient", "patientOf");
relationMapping.put("experiencer", "experiencerOf");
relationMapping.put("stimulus", "stimulusOf");
relationMapping.put("causer", "causerOf");
relationMapping.put("goal", "goalOf");
relationMapping.put("beneficiary", "beneficiaryOf");
relationMapping.put("theme", "themeOf");
relationMapping.put("result", "resultOf");
relationMapping.put("instrument", "instrumentOf");
relationMapping.put("accompanier", "accompanierOf");
relationMapping.put("summarizes", "summarizedBt");
relationMapping.put("specializes", "generalizes");
relationMapping.put("exemplifies", "exemplifiedBy");
relationMapping.put("part", "partOf");
relationMapping.put("property", "propertyOf");
relationMapping.put("user", "userOf");
relationMapping.put("component", "componentOf");
relationMapping.put("substance", "substanceOf");
relationMapping.put("entails", "entailedBy");
relationMapping.put("manner", "mannerOf");
relationMapping.put("state", "stateOf");
relationMapping.put("influences", "dependsOn");
}
/**
* In this case we can search for images with a String query deinfing
* a graph, where the nodes are build by search queries in square brackets
* and are referenced in relations by their postion starting with number 1.
* relations follow the nodes starting with the type followed by the
* position of the source node in the List and the target node:
* <code>
* query := node+ relation+
* node := [term+]
* relation := type source target
* term := String
* type := String
* source := Integer
* target := Integer
* </code>
* e.g."[Mathias Lux] [Talking] agentOf 1 2"
*
* @param xPath
* @param objects can be set to <code>null</code>
* @param whereToSearch
* @param recursive
* @param progress
* @return
*/
public List<ResultListEntry> getImagesBySemantics(String xPath, Vector objects, String whereToSearch, boolean recursive, JProgressBar progress) {
List<String> nodeQueries = new LinkedList<String>();
StringTokenizer st = new StringTokenizer(xPath, "]");
String relationString = "";
List<Relation> relations = new LinkedList<Relation>();
while (st.hasMoreTokens()) {
String s = st.nextToken().trim();
if (s.startsWith("[")) {
s = s.substring(1);
nodeQueries.add(s);
} else {
relationString = s;
}
}
if (relationString.length() > 1) {
// there are relations, go ahead and parse them:
StringTokenizer sr = new StringTokenizer(relationString);
Relation currentRelation = null;
while (sr.hasMoreTokens()) {
String s = sr.nextToken();
try {
int i = Integer.parseInt(s);
if (currentRelation.getSource() < 0) {
currentRelation.setSource(i);
} else if (currentRelation.getTarget() < 0) {
currentRelation.setTarget(i);
currentRelation.eliminateInverse();
relations.add(currentRelation);
currentRelation = null;
}
} catch (NumberFormatException e) {
// its the type :)
currentRelation = new Relation(-1, -1, s.trim());
}
}
}
// so for now do the retrieval for the nodes:
int numOfNodes = nodeQueries.size();
List<List<Node>> nodeResults = new LinkedList<List<Node>>();
for (int i = 0; i < numOfNodes; i++) {
String queryString = nodeQueries.get(i);
List<Node> nodes = getNodes(queryString, whereToSearch);
nodeResults.add(nodes);
}
// now we can expand our query on retrieved nodes:
List<Graph> graphList = getExpandedGraphsFromResults(nodeResults, relations, 3);
LinkedList<ResultListEntry> results = new LinkedList<ResultListEntry>();
if (progress != null) {
progress.setMinimum(0);
progress.setMaximum(graphList.size());
progress.setValue(0);
progress.setString("Querying expanded graphs");
}
int countGraph = 0;
for (Iterator<Graph> iterator = graphList.iterator(); iterator.hasNext();) {
Graph graph = iterator.next();
results.addAll(searchForGraph(graph, whereToSearch));
countGraph++;
if (progress != null) progress.setValue(countGraph);
}
// for now eliminate the doublettes
if (progress != null) {
progress.setMinimum(0);
progress.setMaximum(results.size());
progress.setValue(0);
progress.setString("Removing double entries");
}
countGraph = 0;
HashMap<String, ResultListEntry> gegencheck = new HashMap<String, ResultListEntry>();
for (Iterator<ResultListEntry> iterator = results.iterator(); iterator.hasNext();) {
ResultListEntry resultListEntry = iterator.next();
String file = resultListEntry.getFilePath();
double relevance = resultListEntry.getRelevance();
if (gegencheck.containsKey(file)) {
double rel = gegencheck.get(file).getRelevance();
if (rel < relevance) {
gegencheck.put(file, resultListEntry);
}
} else {
gegencheck.put(file, resultListEntry);
}
countGraph++;
if (progress != null) progress.setValue(countGraph);
}
results.clear();
results.addAll(gegencheck.values());
Collections.sort(results);
return results;
}
private List<Graph> getExpandedGraphsFromResults(List<List<Node>> nodeResults, List<Relation> relations, int depth) {
List<List<Node>> expanded = getExpandedSets(nodeResults, depth);
// System.out.println("Expanding to " + expanded.size() + " graphs");
List<Graph> results = new LinkedList<Graph>();
for (Iterator<List<Node>> iterator = expanded.iterator(); iterator.hasNext();) {
List<Node> nodes = iterator.next();
Graph g = getGraphFromResults(nodes, relations);
results.add(g);
}
// if there are any relations without type we have to
// create a reverse relation each, otherwise we won't
// get all our results:
// List<Graph> additionalResults = new LinkedList<Graph>();
// for (Iterator<Graph> iterator = results.iterator(); iterator.hasNext();) {
// Graph graph = iterator.next();
// expandUntypedRelations(graph, additionalResults);
// }
return results;
}
private List<List<Node>> getExpandedSets(List<List<Node>> nodeResults, int depth) {
if (nodeResults.size() > 1) {
List<Node> firstNodesResults = nodeResults.get(0);
int numLevels = 0;
for (Iterator<Node> iterator = firstNodesResults.iterator(); iterator.hasNext();) {
Node node = iterator.next();
if (node.getWeight() < 1f) break;
numLevels++;
}
numLevels += depth;
if (firstNodesResults.size() < depth) {
numLevels = firstNodesResults.size();
}
List<List<Node>> tmpNodeResults = new LinkedList<List<Node>>(nodeResults);
tmpNodeResults.remove(0);
List<List<Node>> results = getExpandedSets(tmpNodeResults, depth);
List<List<Node>> endResult = new LinkedList<List<Node>>();
for (int i = 0; i < numLevels && i < firstNodesResults.size(); i++) {
for (int j = 0; j < results.size(); j++) {
List<Node> nodeList = new LinkedList<Node>(results.get(j));
nodeList.add(0, firstNodesResults.get(i));
endResult.add(nodeList);
}
}
return endResult;
} else {
List<List<Node>> endResult = new LinkedList<List<Node>>();
List<Node> firstNodesResults = nodeResults.get(0);
int numLevels = 0;
for (Iterator<Node> iterator = firstNodesResults.iterator(); iterator.hasNext();) {
Node node = iterator.next();
if (node.getWeight() < 1f) break;
numLevels++;
}
numLevels += depth;
for (int i = 0; i < numLevels && i < firstNodesResults.size(); i++) {
List<Node> nodeList = new LinkedList<Node>();
nodeList.add(firstNodesResults.get(i));
endResult.add(nodeList);
}
return endResult;
}
}
private Graph getGraphFromResults(List<Node> nodeResults, List<Relation> relations) {
HashMap<Integer, Integer> idReplacementTable = new HashMap<Integer, Integer>(nodeResults.size());
List<Node> nodes = new LinkedList<Node>();
List<Relation> myRelations = new LinkedList<Relation>();
for (int i = 0; i < nodeResults.size(); i++) {
Node node = nodeResults.get(i);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -