📄 luceneretrievalengine.java
字号:
idReplacementTable.put(i + 1, node.getNodeID());
nodes.add(node);
}
// Create the relations with the real IDs:
for (Iterator<Relation> iterator = relations.iterator(); iterator.hasNext();) {
Relation r = iterator.next();
int src = (idReplacementTable.get(r.getSource()));
int tgt = (idReplacementTable.get(r.getTarget()));
myRelations.add(new Relation(src, tgt, r.getType()));
}
// now we can create the graph we want to search for:
Graph g = new Graph(nodes, myRelations);
return g;
}
private List<ResultListEntry> searchForGraph(Graph g, String whereToSearch) {
// System.out.println("Querying for graph: " + g.toString());
// for (Iterator<Node> iterator = g.getNodes().iterator(); iterator.hasNext();) {
// Node node = iterator.next();
// System.out.println(node.getLabel() + ": " + node.getNodeID() + " (" + node.getWeight() + ") ");
// }
// and we search for it ion the text file:
String indexFile;
// create regex string:
// as there are all nodes and relations surrounded with square brackets this is easy
// between the relations there may various literals: '.*'
String regexInsert = ".*";
StringBuilder graphSearch = new StringBuilder(g.toString().length() * 2);
StringTokenizer stok = new StringTokenizer(g.toString(), "[");
String s = "";
graphSearch.append(regexInsert);
LinkedList<String> graphSearchList = new LinkedList<String>();
while (stok.hasMoreTokens()) {
StringBuilder regexItem = new StringBuilder(32);
s = stok.nextToken().trim();
s = s.substring(0, s.length() - 1);
// and there may be other nodes & relations:
regexItem.append(regexInsert);
// opening bracket '['
regexItem.append("\\x5B");
// the actual content (node or relation)
regexItem.append(s);
// closing bracket ']'
regexItem.append("\\x5D");
// and there may be other nodes & relations:
regexItem.append(regexInsert);
String regex = regexItem.toString();
if (regex.indexOf("\\w*") > -1) {
// here we create support for relation wildcards:
regex = expandUntypedRelation(regex);
}
graphSearchList.add(regex);
/*
// opening bracket '['
graphSearch.append("\\x5B");
// the actual content (node or relation)
graphSearch.append(s);
// closing bracket ']'
graphSearch.append("\\x5D");
// and there may be other nodes & relations:
graphSearch.append(regexInsert);
*/
}
List<ResultListEntry> resultList = new LinkedList<ResultListEntry>();
SAXBuilder builder = new SAXBuilder();
if (!whereToSearch.endsWith(File.separator)) {
indexFile = whereToSearch + File.separator + "idx_graphs.list";
} else {
indexFile = whereToSearch + "idx_graphs.list";
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(indexFile))));
String line = null;
// String regex = graphSearch.toString();
// String oldRegex = regex;
// if (regex.indexOf("\\w*") > -1) {
// here we create support for relation wildcards:
// regex = expandUntypedRelation(regex);
//
// }
// System.out.println("REGEX: " + regex);
while ((line = br.readLine()) != null) {
boolean match = true;
for (Iterator<String> iterator = graphSearchList.iterator(); iterator.hasNext();) {
String regex = iterator.next();
if (!line.matches(regex)) {
match = false;
continue;
}
}
if (match) {
// we found a graph:
System.out.println("FOUND: " + line);
StringTokenizer st = new StringTokenizer(line, "|");
String graphString = st.nextToken();
Graph theGraph = new Graph(graphString);
float similarity = g.getMcsSimilarity(theGraph);
while (st.hasMoreTokens()) {
String fileName = st.nextToken();
Element e = builder.build(fileName).getRootElement();
ResultListEntry entry = new ResultListEntry((double) similarity, e, fileName);
resultList.add(entry);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
return resultList;
}
private String expandUntypedRelation(String regex) {
String behind = regex.substring(regex.indexOf("\\w*") + 4);
String before = regex.substring(0, regex.indexOf("\\w*") + 4);
String firstNum = behind.substring(0, behind.indexOf(' '));
String secondNum = behind.substring(behind.indexOf(' ') + 1, behind.indexOf('\\'));
behind = behind.substring(behind.indexOf('\\'));
regex = before + "((" + firstNum + " " + secondNum + ")|(" + secondNum + " " + firstNum + "))" + behind;
return regex;
}
public Vector getSimilarImages(Element VisualDescriptor, String whereToSearch, boolean recursive, JProgressBar progress) {
return null;
}
public Vector getImagesByXPathSearch(String xPath, String whereToSearch, boolean recursive, JProgressBar progress) {
Vector results = new Vector();
if (progress != null)
progress.setString("Searching through index");
SAXBuilder builder = new SAXBuilder();
try {
IndexSearcher searcher = new IndexSearcher(parseFulltextIndexDirectory(whereToSearch));
Query query = QueryParser.parse(xPath, "all", new StandardAnalyzer());
Hits hits = searcher.search(query);
int hitsCount = hits.length();
if (hitsCount > MAX_RESULTS) hitsCount = MAX_RESULTS;
if (progress != null) {
progress.setMinimum(0);
progress.setMaximum(hitsCount);
progress.setValue(0);
progress.setString("Reading results from disk");
}
for (int i = 0; i < hitsCount; i++) {
Document d = hits.doc(i);
Element e = builder.build(d.get("file")).getRootElement();
results.add(new ResultListEntry(hits.score(i), e, d.get("file")));
if (progress != null) progress.setValue(i);
}
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
System.err.println("XPath was: " + xPath);
e.printStackTrace();
} catch (JDOMException e) {
e.printStackTrace();
}
return results;
}
/**
* In general we take the base path for our search for the pathToIndex parameter.
* we then add the directory "index" and create it there.
*
* @param pathToIndex
* @param statusBar
*/
public void indexFiles(String pathToIndex, StatusBar statusBar) {
// parsing and eventually creating the directory for the index ...
String indexDir = parseFulltextIndexDirectory(pathToIndex);
Analyzer analyzer = new StandardAnalyzer();
boolean createFlag = true;
SAXBuilder builder = new SAXBuilder();
String prefix = "Creating fulltext index: ";
try {
IndexWriter writer = new IndexWriter(indexDir, analyzer, createFlag);
String[] descriptions = FileOperations.getAllDescriptions(new File(pathToIndex), true);
if (descriptions == null) return;
float numAllDocsPercent = (float) descriptions.length / 100f;
DecimalFormat df = (DecimalFormat) NumberFormat.getInstance();
df.setMaximumFractionDigits(1);
for (int i = 0; i < descriptions.length; i++) {
try {
Element e = builder.build(descriptions[i]).getRootElement();
Document idxDocument = new Document();
// adding the file itself ...
idxDocument.add(Field.UnIndexed("file", descriptions[i]));
// adding all given names
StringBuilder all = new StringBuilder(255);
addToDocument(idxDocument, e, "//Agent/Name/GivenName", "GivenName", all);
addToDocument(idxDocument, e, "//Agent/Name/FamilyName", "FamilyName", all);
addToDocument(idxDocument, e, "//Label/Name", "Label", all);
addToDocument(idxDocument, e, "//FreeTextAnnotation", "FreeTextAnnotation", all);
addToDocument(idxDocument, e, "//StructuredAnnotation/Who/Name", "Who", all);
addToDocument(idxDocument, e, "//StructuredAnnotation/Where/Name", "Where", all);
addToDocument(idxDocument, e, "//StructuredAnnotation/How/Name", "How", all);
addToDocument(idxDocument, e, "//StructuredAnnotation/Why/Name", "Why", all);
addToDocument(idxDocument, e, "//StructuredAnnotation/When/Name", "When", all);
addToDocument(idxDocument, e, "//StructuredAnnotation/WhatObject/Name", "WhatObjects", all);
addToDocument(idxDocument, e, "//StructuredAnnotation/WhatAction/Name", "WhatAction", all);
idxDocument.add(Field.UnStored("all", all.toString()));
writer.addDocument(idxDocument);
if (statusBar != null) {
StringBuilder status = new StringBuilder(13).append(prefix);
status.append(df.format(((float) i) / numAllDocsPercent));
status.append('%');
statusBar.setStatus(status.toString());
}
} catch (Exception e1) {
System.err.println("Error with file " + descriptions[i] + " (" + e1.getMessage() + ")");
}
}
writer.optimize();
writer.close();
if (statusBar != null) {
statusBar.setStatus("Indexing finished");
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Creates a path from the base directory to a index directory for storing
* the fulltext index
*
* @param pathToIndex directory where the index dir should be created
* @return path to index directory for with Lucene
*/
public static String parseFulltextIndexDirectory(String pathToIndex) {
String indexDir = pathToIndex;
if (!indexDir.endsWith(System.getProperty("file.separator"))) indexDir += System.getProperty("file.separator");
indexDir += "idx_fulltext";
File indexDirFile = new File(indexDir);
if (!indexDirFile.exists()) indexDirFile.mkdir();
return indexDir;
}
/**
* Creates a path from the base directory to a index directory for storing
* the index of semantic objects
*
* @param pathToIndex directory where the index dir should be created
* @return path to index directory for with Lucene
*/
public static String parseSemanticIndexDirectory(String pathToIndex) {
String indexDir = pathToIndex;
if (!indexDir.endsWith(System.getProperty("file.separator"))) indexDir += System.getProperty("file.separator");
indexDir += "idx_semantic";
File indexDirFile = new File(indexDir);
if (!indexDirFile.exists()) indexDirFile.mkdir();
return indexDir;
}
private void addToDocument(Document document, Element root, String xPath, String fieldName, StringBuilder allContents) {
List l = RetrievalToolkit.xpathQuery(root, xPath, null);
StringWriter sw = new StringWriter(128);
for (Iterator iterator = l.iterator(); iterator.hasNext();) {
Element e = (Element) iterator.next();
sw.append(e.getTextTrim());
sw.append(" ");
allContents.append(e.getTextTrim());
allContents.append(" ");
}
document.add(Field.Text(fieldName, sw.toString()));
}
public void indexFilesSemantically(String pathToIndex, StatusBar statusBar) {
if (statusBar != null) statusBar.setStatus("Creating index from semantic annotations");
SAXBuilder builder = new SAXBuilder();
XMLOutputter outputter = new XMLOutputter(Format.getRawFormat().setIndent("").setLineSeparator("").setExpandEmptyElements(false));
try {
String[] descriptions = FileOperations.getAllDescriptions(new File(pathToIndex), true);
if (descriptions == null) return;
float numAllDocsPercent = (float) descriptions.length / 100f;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -