📄 lucenepathindexretrievalengine.java

📁 基于MPEG 7 标准,符合未来语义网架构,很值得参考
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*
 * This file is part of Caliph & Emir.
 *
 * Caliph & Emir is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Caliph & Emir is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Caliph & Emir; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Copyright statement:
 * --------------------
 * (c) 2002-2005 by Mathias Lux (mathias@juggle.at)
 * http://www.juggle.at, http://caliph-emir.sourceforge.net
 */
package at.lux.fotoretrieval.retrievalengines;

import at.lux.components.StatusBar;
import at.lux.fotoretrieval.FileOperations;
import at.lux.fotoretrieval.ResultListEntry;
import at.lux.fotoretrieval.RetrievalToolkit;
import at.lux.fotoretrieval.lucene.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;

import javax.swing.*;
import java.io.File;
import java.io.IOException;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.*;

/**
 * Date: 25.03.2005
 * Time: 23:58:46
 *
 * @author Mathias Lux, mathias@juggle.at
 */
public class LucenePathIndexRetrievalEngine extends AbstractRetrievalEngine {
    private static Namespace xsi = Namespace.getNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance");
    private static final int MAX_RESULTS = 20;
    private IndexSearcher indexSearcher;

    public List<ResultListEntry> getImagesBySemantics(String xPath, Vector objects, String whereToSearch, boolean recursive, JProgressBar progress) {
        if (progress != null) {
            progress.setString("Query expansion running");
        }
        List<Graph> graphList = getExpandedGraphs(xPath, whereToSearch);
        LinkedList<ResultListEntry> results = new LinkedList<ResultListEntry>();
        if (progress != null) {
            progress.setMinimum(0);
            progress.setMaximum(graphList.size());
            progress.setValue(0);
            progress.setString("Querying expanded graphs");
        }
        int countGraph = 0;
        try {
            indexSearcher = new IndexSearcher(parsePathIndexDirectory(whereToSearch));
            for (Iterator<Graph> iterator = graphList.iterator(); iterator.hasNext();) {
                Graph graph = iterator.next();
                results.addAll(searchForGraph(graph, whereToSearch));
                countGraph++;
                if (progress != null) progress.setValue(countGraph);
            }
            indexSearcher.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        Collections.sort(results);
        LinkedList<ResultListEntry> sorted = new LinkedList<ResultListEntry>();
        HashSet<String> doublettes = new HashSet<String>();
        for (Iterator<ResultListEntry> it = results.iterator(); it.hasNext();) {
            ResultListEntry entry = it.next();
            String descriptionPath = entry.getDescriptionPath();
            if (!doublettes.contains(descriptionPath)) {
                doublettes.add(descriptionPath);
                sorted.add(entry);
            }
        }
        return sorted.subList(0, Math.min(sorted.size(), MAX_RESULTS));
    }

    public List<ResultListEntry> searchForGraph(Graph graph, String whereToSearch) {
        LinkedList<ResultListEntry> results = new LinkedList<ResultListEntry>();
//        LabeledGraph lg = new LabeledGraph(graph);
        String query = createLucenePathQuery(graph);
//        System.out.println(query);
        try {
            Query q = QueryParser.parse(query, "paths", new WhitespaceAnalyzer());
            Hits hits = indexSearcher.search(q);
            SAXBuilder builder = new SAXBuilder();
            int maxResults = Math.min(hits.length(), MAX_RESULTS);
            for (int i = 0; i< maxResults; i++) {
                String[] filenames = hits.doc(i).getValues("file");
                for (int j = 0; j < filenames.length; j++) {
                    String fileName = filenames[j];
                    Element e = builder.build(fileName).getRootElement();
                    ResultListEntry entry = new ResultListEntry((double) hits.score(i), e, fileName);
                    results.add(entry);
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (JDOMException e) {
            e.printStackTrace();
        }

        return results;
    }

    private List<Graph> getExpandedGraphs(String query, String whereToSearch) {
        List<String> nodeQueries = new LinkedList<String>();
        StringTokenizer st = new StringTokenizer(query, "]");
        String relationString = "";
        List<Relation> relations = new LinkedList<Relation>();
        while (st.hasMoreTokens()) {
            String s = st.nextToken().trim();
            if (s.startsWith("[")) {
                s = s.substring(1);
                nodeQueries.add(s);
            } else {
                relationString = s;
            }
        }
        if (relationString.length() > 1) {
            // there are relations, go ahead and parse them:
            StringTokenizer sr = new StringTokenizer(relationString);
            Relation currentRelation = null;
            while (sr.hasMoreTokens()) {
                String s = sr.nextToken();
                try {
                    int i = Integer.parseInt(s);
                    if (currentRelation.getSource() < 0) {
                        currentRelation.setSource(i);
                    } else if (currentRelation.getTarget() < 0) {
                        currentRelation.setTarget(i);
                        currentRelation.eliminateInverse();
                        relations.add(currentRelation);
                        currentRelation = null;
                    }
                } catch (NumberFormatException e) {
                    // its the type :)
                    currentRelation = new Relation(-1, -1, s.trim());
                }
            }
        }

        // so for now do the retrieval for the nodes:
        int numOfNodes = nodeQueries.size();
        List<List<Node>> nodeResults = new LinkedList<List<Node>>();

        for (int i = 0; i < numOfNodes; i++) {
            String queryString = nodeQueries.get(i);
            List<Node> nodes;
            if (!queryString.equals("*")) {
                nodes = getNodes(queryString, whereToSearch);
            } else {
                nodes = new LinkedList<Node>();
                nodes.add(new Node(-1, 1f, "*"));
            }
            nodeResults.add(nodes);
        }

        // now we can expand our query on retrieved nodes:
        List<Graph> graphList = getExpandedGraphsFromResults(nodeResults, relations, 5);
        return graphList;
    }

    private List<Graph> getExpandedGraphsFromResults(List<List<Node>> nodeResults, List<Relation> relations, int depth) {
        List<List<Node>> expanded = getExpandedSets(nodeResults, depth);
        List<Graph> results = new LinkedList<Graph>();
        for (Iterator<List<Node>> iterator = expanded.iterator(); iterator.hasNext();) {
            List<Node> nodes = iterator.next();
            Graph g = getGraphFromResults(nodes, relations);
            results.add(g);
        }
        return results;
    }

    private Graph getGraphFromResults(List<Node> nodeResults, List<Relation> relations) {
        HashMap<Integer, Integer> idReplacementTable = new HashMap<Integer, Integer>(nodeResults.size());
        List<Node> nodes = new LinkedList<Node>();
        List<Relation> myRelations = new LinkedList<Relation>();
        for (int i = 0; i < nodeResults.size(); i++) {
            Node node = nodeResults.get(i);
            idReplacementTable.put(i + 1, node.getNodeID());
            nodes.add(node);
        }
        // Create the relations with the real IDs:
        for (Iterator<Relation> iterator = relations.iterator(); iterator.hasNext();) {
            Relation r = iterator.next();
            int src = (idReplacementTable.get(r.getSource()));
            int tgt = (idReplacementTable.get(r.getTarget()));
            myRelations.add(new Relation(src, tgt, r.getType()));
        }
        // now we can create the graph we want to search for:
        Graph g = new Graph(nodes, myRelations);
        return g;
    }

    private List<List<Node>> getExpandedSets(List<List<Node>> nodeResults, int depth) {
        if (nodeResults.size() > 1) {
            List<Node> firstNodesResults = nodeResults.get(0);
            int numLevels = 0;
            for (Iterator<Node> iterator = firstNodesResults.iterator(); iterator.hasNext();) {
                Node node = iterator.next();
                if (node.getWeight() < 1f) break;
                numLevels++;
            }
            numLevels += depth;
            if (firstNodesResults.size() < depth) {
                numLevels = firstNodesResults.size();
            }
            List<List<Node>> tmpNodeResults = new LinkedList<List<Node>>(nodeResults);
            tmpNodeResults.remove(0);
            List<List<Node>> results = getExpandedSets(tmpNodeResults, depth);
            List<List<Node>> endResult = new LinkedList<List<Node>>();
            for (int i = 0; i < numLevels && i < firstNodesResults.size(); i++) {
                for (int j = 0; j < results.size(); j++) {
                    List<Node> nodeList = new LinkedList<Node>(results.get(j));
                    nodeList.add(0, firstNodesResults.get(i));
                    endResult.add(nodeList);
                }
            }
            return endResult;
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -