⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 luceneretrievalengine.java

📁 基于MPEG 7 标准,符合未来语义网架构,很值得参考
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
            idReplacementTable.put(i + 1, node.getNodeID());
            nodes.add(node);
        }
        // Create the relations with the real IDs:
        for (Iterator<Relation> iterator = relations.iterator(); iterator.hasNext();) {
            Relation r = iterator.next();
            int src = (idReplacementTable.get(r.getSource()));
            int tgt = (idReplacementTable.get(r.getTarget()));
            myRelations.add(new Relation(src, tgt, r.getType()));
        }
        // now we can create the graph we want to search for:
        Graph g = new Graph(nodes, myRelations);
        return g;
    }

    private List<ResultListEntry> searchForGraph(Graph g, String whereToSearch) {
//        System.out.println("Querying for graph: " + g.toString());
//        for (Iterator<Node> iterator = g.getNodes().iterator(); iterator.hasNext();) {
//            Node node = iterator.next();
//            System.out.println(node.getLabel() + ": " + node.getNodeID() + " (" + node.getWeight() + ") ");
//        }
        // and we search for it ion the text file:
        String indexFile;

        // create regex string:
        // as there are all nodes and relations surrounded with square brackets this is easy
        // between the relations there may various literals: '.*'
        String regexInsert = ".*";
        StringBuilder graphSearch = new StringBuilder(g.toString().length() * 2);
        StringTokenizer stok = new StringTokenizer(g.toString(), "[");
        String s = "";
        graphSearch.append(regexInsert);
        LinkedList<String> graphSearchList = new LinkedList<String>();
        while (stok.hasMoreTokens()) {
            StringBuilder regexItem = new StringBuilder(32);
            s = stok.nextToken().trim();
            s = s.substring(0, s.length() - 1);

            // and there may be other nodes & relations:
            regexItem.append(regexInsert);
            // opening bracket '['
            regexItem.append("\\x5B");
            // the actual content (node or relation)
            regexItem.append(s);
            // closing bracket ']'
            regexItem.append("\\x5D");
            // and there may be other nodes & relations:
            regexItem.append(regexInsert);

            String regex = regexItem.toString();

            if (regex.indexOf("\\w*") > -1) {
                // here we create support for relation wildcards:
                regex = expandUntypedRelation(regex);
            }

            graphSearchList.add(regex);

/*
            // opening bracket '['
            graphSearch.append("\\x5B");
            // the actual content (node or relation)
            graphSearch.append(s);
            // closing bracket ']'
            graphSearch.append("\\x5D");
            // and there may be other nodes & relations:
            graphSearch.append(regexInsert);
*/
        }

        List<ResultListEntry> resultList = new LinkedList<ResultListEntry>();
        SAXBuilder builder = new SAXBuilder();

        if (!whereToSearch.endsWith(File.separator)) {
            indexFile = whereToSearch + File.separator + "idx_graphs.list";
        } else {
            indexFile = whereToSearch + "idx_graphs.list";
        }
        try {
            BufferedReader br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(indexFile))));
            String line = null;
//            String regex = graphSearch.toString();
//            String oldRegex = regex;
//            if (regex.indexOf("\\w*") > -1) {
//                 here we create support for relation wildcards:
//                regex = expandUntypedRelation(regex);
//
//            }
//            System.out.println("REGEX: " + regex);
            while ((line = br.readLine()) != null) {
                boolean match = true;
                for (Iterator<String> iterator = graphSearchList.iterator(); iterator.hasNext();) {
                    String regex = iterator.next();
                    if (!line.matches(regex)) {
                        match = false;
                        continue;
                    }
                }
                if (match) {
                    // we found a graph:
                    System.out.println("FOUND: " + line);
                    StringTokenizer st = new StringTokenizer(line, "|");
                    String graphString = st.nextToken();
                    Graph theGraph = new Graph(graphString);
                    float similarity = g.getMcsSimilarity(theGraph);
                    while (st.hasMoreTokens()) {
                        String fileName = st.nextToken();
                        Element e = builder.build(fileName).getRootElement();
                        ResultListEntry entry = new ResultListEntry((double) similarity, e, fileName);
                        resultList.add(entry);
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return resultList;
    }

    private String expandUntypedRelation(String regex) {
        String behind = regex.substring(regex.indexOf("\\w*") + 4);
        String before = regex.substring(0, regex.indexOf("\\w*") + 4);

        String firstNum = behind.substring(0, behind.indexOf(' '));
        String secondNum = behind.substring(behind.indexOf(' ') + 1, behind.indexOf('\\'));

        behind = behind.substring(behind.indexOf('\\'));

        regex = before + "((" + firstNum + " " + secondNum + ")|(" + secondNum + " " + firstNum + "))" + behind;
        return regex;
    }

    public Vector getSimilarImages(Element VisualDescriptor, String whereToSearch, boolean recursive, JProgressBar progress) {
        return null;
    }

    public Vector getImagesByXPathSearch(String xPath, String whereToSearch, boolean recursive, JProgressBar progress) {
        Vector results = new Vector();
        if (progress != null)
            progress.setString("Searching through index");
        SAXBuilder builder = new SAXBuilder();
        try {
            IndexSearcher searcher = new IndexSearcher(parseFulltextIndexDirectory(whereToSearch));
            Query query = QueryParser.parse(xPath, "all", new StandardAnalyzer());
            Hits hits = searcher.search(query);
            int hitsCount = hits.length();
            if (hitsCount > MAX_RESULTS) hitsCount = MAX_RESULTS;
            if (progress != null) {
                progress.setMinimum(0);
                progress.setMaximum(hitsCount);
                progress.setValue(0);
                progress.setString("Reading results from disk");
            }

            for (int i = 0; i < hitsCount; i++) {
                Document d = hits.doc(i);
                Element e = builder.build(d.get("file")).getRootElement();
                results.add(new ResultListEntry(hits.score(i), e, d.get("file")));
                if (progress != null) progress.setValue(i);
            }

        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e) {
            System.err.println("XPath was: " + xPath);
            e.printStackTrace();
        } catch (JDOMException e) {
            e.printStackTrace();
        }
        return results;

    }

    /**
     * In general we take the base path for our search for the pathToIndex parameter.
     * we then add the directory "index" and create it there.
     *
     * @param pathToIndex
     * @param statusBar
     */
    public void indexFiles(String pathToIndex, StatusBar statusBar) {
        // parsing and eventually creating the directory for the index ...
        String indexDir = parseFulltextIndexDirectory(pathToIndex);

        Analyzer analyzer = new StandardAnalyzer();
        boolean createFlag = true;
        SAXBuilder builder = new SAXBuilder();
        String prefix = "Creating fulltext index: ";
        try {
            IndexWriter writer = new IndexWriter(indexDir, analyzer, createFlag);
            String[] descriptions = FileOperations.getAllDescriptions(new File(pathToIndex), true);
            if (descriptions == null) return;
            float numAllDocsPercent = (float) descriptions.length / 100f;
            DecimalFormat df = (DecimalFormat) NumberFormat.getInstance();
            df.setMaximumFractionDigits(1);

            for (int i = 0; i < descriptions.length; i++) {
                try {
                    Element e = builder.build(descriptions[i]).getRootElement();
                    Document idxDocument = new Document();
                    // adding the file itself ...
                    idxDocument.add(Field.UnIndexed("file", descriptions[i]));
                    // adding all given names
                    StringBuilder all = new StringBuilder(255);

                    addToDocument(idxDocument, e, "//Agent/Name/GivenName", "GivenName", all);
                    addToDocument(idxDocument, e, "//Agent/Name/FamilyName", "FamilyName", all);
                    addToDocument(idxDocument, e, "//Label/Name", "Label", all);
                    addToDocument(idxDocument, e, "//FreeTextAnnotation", "FreeTextAnnotation", all);
                    addToDocument(idxDocument, e, "//StructuredAnnotation/Who/Name", "Who", all);
                    addToDocument(idxDocument, e, "//StructuredAnnotation/Where/Name", "Where", all);
                    addToDocument(idxDocument, e, "//StructuredAnnotation/How/Name", "How", all);
                    addToDocument(idxDocument, e, "//StructuredAnnotation/Why/Name", "Why", all);
                    addToDocument(idxDocument, e, "//StructuredAnnotation/When/Name", "When", all);
                    addToDocument(idxDocument, e, "//StructuredAnnotation/WhatObject/Name", "WhatObjects", all);
                    addToDocument(idxDocument, e, "//StructuredAnnotation/WhatAction/Name", "WhatAction", all);

                    idxDocument.add(Field.UnStored("all", all.toString()));

                    writer.addDocument(idxDocument);

                    if (statusBar != null) {
                        StringBuilder status = new StringBuilder(13).append(prefix);
                        status.append(df.format(((float) i) / numAllDocsPercent));
                        status.append('%');
                        statusBar.setStatus(status.toString());
                    }

                } catch (Exception e1) {
                    System.err.println("Error with file " + descriptions[i] + " (" + e1.getMessage() + ")");
                }
            }
            writer.optimize();
            writer.close();
            if (statusBar != null) {
                statusBar.setStatus("Indexing finished");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * Creates a path from the base directory to a index directory for storing
     * the fulltext index
     *
     * @param pathToIndex directory where the index dir should be created
     * @return path to index directory for with Lucene
     */
    public static String parseFulltextIndexDirectory(String pathToIndex) {
        String indexDir = pathToIndex;
        if (!indexDir.endsWith(System.getProperty("file.separator"))) indexDir += System.getProperty("file.separator");
        indexDir += "idx_fulltext";
        File indexDirFile = new File(indexDir);
        if (!indexDirFile.exists()) indexDirFile.mkdir();
        return indexDir;
    }

    /**
     * Creates a path from the base directory to a index directory for storing
     * the index of semantic objects
     *
     * @param pathToIndex directory where the index dir should be created
     * @return path to index directory for with Lucene
     */
    public static String parseSemanticIndexDirectory(String pathToIndex) {
        String indexDir = pathToIndex;
        if (!indexDir.endsWith(System.getProperty("file.separator"))) indexDir += System.getProperty("file.separator");
        indexDir += "idx_semantic";
        File indexDirFile = new File(indexDir);
        if (!indexDirFile.exists()) indexDirFile.mkdir();
        return indexDir;
    }

    private void addToDocument(Document document, Element root, String xPath, String fieldName, StringBuilder allContents) {
        List l = RetrievalToolkit.xpathQuery(root, xPath, null);
        StringWriter sw = new StringWriter(128);
        for (Iterator iterator = l.iterator(); iterator.hasNext();) {
            Element e = (Element) iterator.next();
            sw.append(e.getTextTrim());
            sw.append(" ");
            allContents.append(e.getTextTrim());
            allContents.append(" ");
        }
        document.add(Field.Text(fieldName, sw.toString()));
    }

    public void indexFilesSemantically(String pathToIndex, StatusBar statusBar) {
        if (statusBar != null) statusBar.setStatus("Creating index from semantic annotations");

        SAXBuilder builder = new SAXBuilder();
        XMLOutputter outputter = new XMLOutputter(Format.getRawFormat().setIndent("").setLineSeparator("").setExpandEmptyElements(false));

        try {
            String[] descriptions = FileOperations.getAllDescriptions(new File(pathToIndex), true);
            if (descriptions == null) return;
            float numAllDocsPercent = (float) descriptions.length / 100f;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -