luceneretrievalengine.java

来自「基于MPEG 7 标准,符合未来语义网架构,很值得参考」· Java 代码 · 共 889 行 · 第 1/3 页
JAVA
889 行
/*
 * This file is part of Caliph & Emir.
 *
 * Caliph & Emir is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Caliph & Emir is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Caliph & Emir; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Copyright statement:
 * --------------------
 * (c) 2002-2005 by Mathias Lux (mathias@juggle.at)
 * http://www.juggle.at, http://caliph-emir.sourceforge.net
 */
package at.lux.fotoretrieval.retrievalengines;

import at.lux.components.StatusBar;
import at.lux.fotoretrieval.FileOperations;
import at.lux.fotoretrieval.ResultListEntry;
import at.lux.fotoretrieval.RetrievalToolkit;
import at.lux.fotoretrieval.lucene.Graph;
import at.lux.fotoretrieval.lucene.Node;
import at.lux.fotoretrieval.lucene.Relation;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;

import javax.swing.*;
import java.io.*;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

/**
 * Date: 13.10.2004
 * Time: 21:47:58
 *
 * @author Mathias Lux, mathias@juggle.at
 */
public class LuceneRetrievalEngine extends AbstractRetrievalEngine {
    public static final int MAX_RESULTS = 15;
    private static Namespace xsi = Namespace.getNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance");
    public static final HashMap<String, String> relationMapping;

    static {
        relationMapping = new HashMap<String, String>(27);
        relationMapping.put("key", "keyFor");
        relationMapping.put("annotates", "annotatedBy");
        relationMapping.put("shows", "appearsIn");
        relationMapping.put("references", "referencedBy");
        relationMapping.put("quality", "qualityOf");
        relationMapping.put("symbolizes", "symbolizedBy");
        relationMapping.put("location", "locationOf");
        relationMapping.put("source", "sourceOf");
        relationMapping.put("destination", "destinationOf");
        relationMapping.put("path", "pathOf");
        relationMapping.put("time", "timeOf");
        relationMapping.put("depicts", "depictedBy");
        relationMapping.put("represents", "representedBy");
        relationMapping.put("context", "contextFor");
        relationMapping.put("interprets", "interpretedBy");
        relationMapping.put("agent", "agentOf");
        relationMapping.put("patient", "patientOf");
        relationMapping.put("experiencer", "experiencerOf");
        relationMapping.put("stimulus", "stimulusOf");
        relationMapping.put("causer", "causerOf");
        relationMapping.put("goal", "goalOf");
        relationMapping.put("beneficiary", "beneficiaryOf");
        relationMapping.put("theme", "themeOf");
        relationMapping.put("result", "resultOf");
        relationMapping.put("instrument", "instrumentOf");
        relationMapping.put("accompanier", "accompanierOf");
        relationMapping.put("summarizes", "summarizedBt");
        relationMapping.put("specializes", "generalizes");
        relationMapping.put("exemplifies", "exemplifiedBy");
        relationMapping.put("part", "partOf");
        relationMapping.put("property", "propertyOf");
        relationMapping.put("user", "userOf");
        relationMapping.put("component", "componentOf");
        relationMapping.put("substance", "substanceOf");
        relationMapping.put("entails", "entailedBy");
        relationMapping.put("manner", "mannerOf");
        relationMapping.put("state", "stateOf");
        relationMapping.put("influences", "dependsOn");
    }

    /**
     * In this case we can search for images with a String query deinfing
     * a graph, where the nodes are build by search queries in square brackets
     * and are referenced in relations by their postion starting with number 1.
     * relations follow the nodes starting with the type followed by the
     * position of the source node in the List and the target node:
     * <code>
     * query := node+ relation+
     * node := [term+]
     * relation := type source target
     * term := String
     * type := String
     * source := Integer
     * target := Integer
     * </code>
     * e.g."[Mathias Lux] [Talking] agentOf 1 2"
     *
     * @param xPath
     * @param objects       can be set to <code>null</code>
     * @param whereToSearch
     * @param recursive
     * @param progress
     * @return
     */
    public List<ResultListEntry> getImagesBySemantics(String xPath, Vector objects, String whereToSearch, boolean recursive, JProgressBar progress) {
        List<String> nodeQueries = new LinkedList<String>();
        StringTokenizer st = new StringTokenizer(xPath, "]");
        String relationString = "";
        List<Relation> relations = new LinkedList<Relation>();
        while (st.hasMoreTokens()) {
            String s = st.nextToken().trim();
            if (s.startsWith("[")) {
                s = s.substring(1);
                nodeQueries.add(s);
            } else {
                relationString = s;
            }
        }
        if (relationString.length() > 1) {
            // there are relations, go ahead and parse them:
            StringTokenizer sr = new StringTokenizer(relationString);
            Relation currentRelation = null;
            while (sr.hasMoreTokens()) {
                String s = sr.nextToken();
                try {
                    int i = Integer.parseInt(s);
                    if (currentRelation.getSource() < 0) {
                        currentRelation.setSource(i);
                    } else if (currentRelation.getTarget() < 0) {
                        currentRelation.setTarget(i);
                        currentRelation.eliminateInverse();
                        relations.add(currentRelation);
                        currentRelation = null;
                    }
                } catch (NumberFormatException e) {
                    // its the type :)
                    currentRelation = new Relation(-1, -1, s.trim());
                }
            }
        }

        // so for now do the retrieval for the nodes:
        int numOfNodes = nodeQueries.size();
        List<List<Node>> nodeResults = new LinkedList<List<Node>>();

        for (int i = 0; i < numOfNodes; i++) {
            String queryString = nodeQueries.get(i);
            List<Node> nodes = getNodes(queryString, whereToSearch);
            nodeResults.add(nodes);
        }

        // now we can expand our query on retrieved nodes:
        List<Graph> graphList = getExpandedGraphsFromResults(nodeResults, relations, 3);
        LinkedList<ResultListEntry> results = new LinkedList<ResultListEntry>();
        if (progress != null) {
            progress.setMinimum(0);
            progress.setMaximum(graphList.size());
            progress.setValue(0);
            progress.setString("Querying expanded graphs");
        }
        int countGraph = 0;
        for (Iterator<Graph> iterator = graphList.iterator(); iterator.hasNext();) {
            Graph graph = iterator.next();
            results.addAll(searchForGraph(graph, whereToSearch));
            countGraph++;
            if (progress != null) progress.setValue(countGraph);
        }

        // for now eliminate the doublettes
        if (progress != null) {
            progress.setMinimum(0);
            progress.setMaximum(results.size());
            progress.setValue(0);
            progress.setString("Removing double entries");
        }
        countGraph = 0;
        HashMap<String, ResultListEntry> gegencheck = new HashMap<String, ResultListEntry>();
        for (Iterator<ResultListEntry> iterator = results.iterator(); iterator.hasNext();) {
            ResultListEntry resultListEntry = iterator.next();
            String file = resultListEntry.getFilePath();
            double relevance = resultListEntry.getRelevance();
            if (gegencheck.containsKey(file)) {
                double rel = gegencheck.get(file).getRelevance();
                if (rel < relevance) {
                    gegencheck.put(file, resultListEntry);
                }
            } else {
                gegencheck.put(file, resultListEntry);
            }
            countGraph++;
            if (progress != null) progress.setValue(countGraph);
        }
        results.clear();
        results.addAll(gegencheck.values());
        Collections.sort(results);
        return results;
    }

    private List<Graph> getExpandedGraphsFromResults(List<List<Node>> nodeResults, List<Relation> relations, int depth) {
        List<List<Node>> expanded = getExpandedSets(nodeResults, depth);
//        System.out.println("Expanding to " + expanded.size() + " graphs");
        List<Graph> results = new LinkedList<Graph>();
        for (Iterator<List<Node>> iterator = expanded.iterator(); iterator.hasNext();) {
            List<Node> nodes = iterator.next();
            Graph g = getGraphFromResults(nodes, relations);
            results.add(g);
        }
        // if there are any relations without type we have to
        // create a reverse relation each, otherwise we won't
        // get all our results:
//        List<Graph> additionalResults = new LinkedList<Graph>();
//        for (Iterator<Graph> iterator = results.iterator(); iterator.hasNext();) {
//            Graph graph = iterator.next();
//            expandUntypedRelations(graph, additionalResults);
//        }
        return results;
    }

    private List<List<Node>> getExpandedSets(List<List<Node>> nodeResults, int depth) {
        if (nodeResults.size() > 1) {
            List<Node> firstNodesResults = nodeResults.get(0);
            int numLevels = 0;
            for (Iterator<Node> iterator = firstNodesResults.iterator(); iterator.hasNext();) {
                Node node = iterator.next();
                if (node.getWeight() < 1f) break;
                numLevels++;
            }
            numLevels += depth;
            if (firstNodesResults.size() < depth) {
                numLevels = firstNodesResults.size();
            }
            List<List<Node>> tmpNodeResults = new LinkedList<List<Node>>(nodeResults);
            tmpNodeResults.remove(0);
            List<List<Node>> results = getExpandedSets(tmpNodeResults, depth);
            List<List<Node>> endResult = new LinkedList<List<Node>>();
            for (int i = 0; i < numLevels && i < firstNodesResults.size(); i++) {
                for (int j = 0; j < results.size(); j++) {
                    List<Node> nodeList = new LinkedList<Node>(results.get(j));
                    nodeList.add(0, firstNodesResults.get(i));
                    endResult.add(nodeList);
                }
            }
            return endResult;
        } else {
            List<List<Node>> endResult = new LinkedList<List<Node>>();
            List<Node> firstNodesResults = nodeResults.get(0);
            int numLevels = 0;
            for (Iterator<Node> iterator = firstNodesResults.iterator(); iterator.hasNext();) {
                Node node = iterator.next();
                if (node.getWeight() < 1f) break;
                numLevels++;
            }
            numLevels += depth;
            for (int i = 0; i < numLevels && i < firstNodesResults.size(); i++) {
                List<Node> nodeList = new LinkedList<Node>();
                nodeList.add(firstNodesResults.get(i));
                endResult.add(nodeList);
            }
            return endResult;
        }
    }

    private Graph getGraphFromResults(List<Node> nodeResults, List<Relation> relations) {
        HashMap<Integer, Integer> idReplacementTable = new HashMap<Integer, Integer>(nodeResults.size());
        List<Node> nodes = new LinkedList<Node>();
        List<Relation> myRelations = new LinkedList<Relation>();
        for (int i = 0; i < nodeResults.size(); i++) {
            Node node = nodeResults.get(i);
luceneretrievalengine.java - 源码说明

本页面展示了「基于MPEG 7 标准,符合未来语义网架构,很值得参考」中的 luceneretrievalengine.java 源码文件，采用 Java 编程语言编写，共 889 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与MPEG相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?