📄 lucenesearcher.java.svn-base
字号:
//=== Copyright (C) 2001-2005 Food and Agriculture Organization of the//=== United Nations (FAO-UN), United Nations World Food Programme (WFP)//=== and United Nations Environment Programme (UNEP)//===//=== This program is free software; you can redistribute it and/or modify//=== it under the terms of the GNU General Public License as published by//=== the Free Software Foundation; either version 2 of the License, or (at//=== your option) any later version.//===//=== This program is distributed in the hope that it will be useful, but//=== WITHOUT ANY WARRANTY; without even the implied warranty of//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU//=== General Public License for more details.//===//=== You should have received a copy of the GNU General Public License//=== along with this program; if not, write to the Free Software//=== Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA//===//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,//=== Rome - Italy. email: GeoNetwork@fao.org//==============================================================================package org.fao.geonet.kernel.search;import java.io.IOException;import java.util.Comparator;import java.util.Enumeration;import java.util.HashSet;import java.util.Hashtable;import java.util.Iterator;import java.util.Map;import java.util.TreeSet;import jeeves.resources.dbms.Dbms;import jeeves.server.ServiceConfig;import jeeves.server.context.ServiceContext;import jeeves.utils.Log;import jeeves.utils.Xml;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.Term;import org.apache.lucene.search.BooleanQuery;import org.apache.lucene.search.Hits;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.PhraseQuery;import org.apache.lucene.search.PrefixQuery;import org.apache.lucene.search.Query;import org.apache.lucene.search.RangeQuery;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.FuzzyQuery;import org.apache.lucene.search.WildcardQuery;import org.fao.geonet.GeonetContext;import org.fao.geonet.constants.Edit;import org.fao.geonet.constants.Geonet;import org.jdom.Element;//--------------------------------------------------------------------------------// search metadata locally using lucene//--------------------------------------------------------------------------------public class LuceneSearcher extends MetaSearcher{ private SearchManager _sm; private String _styleSheetName; private IndexReader _reader; private IndexSearcher _searcher; private Query _query; private Hits _hits; private Element _elSummary; private int _maxSummaryKeys; //-------------------------------------------------------------------------------- // constructor public LuceneSearcher(SearchManager sm, String styleSheetName) { _sm = sm; _styleSheetName = styleSheetName; } //-------------------------------------------------------------------------------- // MetaSearcher API public void search(ServiceContext srvContext, Element request, ServiceConfig config) throws Exception { computeQuery(srvContext, request, config); performQuery(); initSearchRange(srvContext); } public Element present(ServiceContext srvContext, Element request, ServiceConfig config) throws Exception { if (!isValid()) performQuery(); updateSearchRange(request); GeonetContext gc = (GeonetContext) srvContext.getHandlerContext(Geonet.CONTEXT_NAME); String sFast = request.getChildText("fast"); boolean fast = sFast != null && sFast.equals("true"); // srvContext.log("METASEARCHER " + _styleSheetName + " FROM: " + from + "(" + sFrom + ")"); // DEBUG // srvContext.log("METASEARCHER " + _styleSheetName + " TO: " + to + "(" + sTo + ")"); // DEBUG // build response Element response = new Element("response"); response.setAttribute("from", getFrom()+""); response.setAttribute("to", getTo()+""); response.addContent((Element)_elSummary.clone()); if (getTo() > 0) { for(int i = getFrom() - 1; i < getTo(); i++) { Document doc = _hits.doc(i); String id = doc.get("_id"); if (fast) { Element md = getMetadataFromIndex(doc, id); response.addContent(md); } else { Element md = gc.getDataManager().getMetadata(srvContext, id, false); if (md != null) // RGFIX: should not be necessary response.addContent(md); } } } return response; } public int getSize() { return _hits.length(); } public Element getSummary() throws Exception { Element response = new Element("response"); response.addContent((Element)_elSummary.clone()); return response; } // RGFIX: check this public void close() { try { _reader.close(); } catch (IOException e) { e.printStackTrace(); } // DEBUG } //-------------------------------------------------------------------------------- // private setup, index, delete and search functions private void computeQuery(ServiceContext srvContext, Element request, ServiceConfig config) throws Exception { String sMaxSummaryKeys = request.getChildText("maxSummaryKeys"); if (sMaxSummaryKeys == null) sMaxSummaryKeys = config.getValue("maxSummaryKeys", "10"); _maxSummaryKeys = Integer.parseInt(sMaxSummaryKeys); GeonetContext gc = (GeonetContext) srvContext.getHandlerContext(Geonet.CONTEXT_NAME); Dbms dbms = (Dbms) srvContext.getResourceManager().open(Geonet.Res.MAIN_DB); HashSet<String> hs = gc.getAccessManager().getUserGroups(dbms, srvContext.getUserSession(), srvContext.getIpAddress()); for (String group : hs) request.addContent(new Element("group").addContent(group)); Log.debug(Geonet.SEARCH_ENGINE, "CRITERIA:\n"+ Xml.getString(request)); Element xmlQuery = _sm.transform(_styleSheetName, request); Log.debug(Geonet.SEARCH_ENGINE, "XML QUERY:\n"+ Xml.getString(xmlQuery)); _query = makeQuery(xmlQuery); } // perform the query private void performQuery() throws Exception { _reader = IndexReader.open(_sm.getLuceneDir()); _searcher = new IndexSearcher(_reader); _hits = _searcher.search(_query); // srvContext.log("METASEARCHER " + _styleSheetName + ": FOUND " + _hits.length() + " HITS"); // DEBUG makeSummary(); setValid(true); } // makes a new lucene query // converts to lowercase if needed as the StandardAnalyzer public static Query makeQuery(Element xmlQuery) throws Exception { String name = xmlQuery.getName(); if (name.equals("TermQuery")) { String fld = xmlQuery.getAttributeValue("fld"); String txt = xmlQuery.getAttributeValue("txt").toLowerCase(); return new TermQuery(new Term(fld, txt)); } else if (name.equals("FuzzyQuery")) { String fld = xmlQuery.getAttributeValue("fld"); Float sim = Float.valueOf(xmlQuery.getAttributeValue("sim")); String txt = xmlQuery.getAttributeValue("txt").toLowerCase(); return new FuzzyQuery(new Term(fld, txt), sim.floatValue()); } else if (name.equals("PrefixQuery")) { String fld = xmlQuery.getAttributeValue("fld"); String txt = xmlQuery.getAttributeValue("txt").toLowerCase(); return new PrefixQuery(new Term(fld, txt)); } else if (name.equals("WildcardQuery")) { String fld = xmlQuery.getAttributeValue("fld"); String txt = xmlQuery.getAttributeValue("txt").toLowerCase(); return new WildcardQuery(new Term(fld, txt)); } else if (name.equals("PhraseQuery")) { PhraseQuery query = new PhraseQuery(); for (Iterator iter = xmlQuery.getChildren().iterator(); iter.hasNext(); ) { Element xmlTerm = (Element)iter.next(); String fld = xmlTerm.getAttributeValue("fld"); String txt = xmlTerm.getAttributeValue("txt").toLowerCase(); query.add(new Term(fld, txt)); } return query; } else if (name.equals("RangeQuery")) { String fld = xmlQuery.getAttributeValue("fld"); String lowerTxt = xmlQuery.getAttributeValue("lowerTxt"); String upperTxt = xmlQuery.getAttributeValue("upperTxt"); String sInclusive = xmlQuery.getAttributeValue("inclusive"); boolean inclusive = sInclusive != null && !sInclusive.equals("true"); Term lowerTerm = (lowerTxt == null ? null : new Term(fld, lowerTxt.toLowerCase())); Term upperTerm = (upperTxt == null ? null : new Term(fld, upperTxt.toLowerCase())); return new RangeQuery(lowerTerm, upperTerm, inclusive); } else if (name.equals("BooleanQuery")) { BooleanQuery query = new BooleanQuery(); for (Iterator iter = xmlQuery.getChildren().iterator(); iter.hasNext(); ) { Element xmlBooleanClause = (Element)iter.next(); String sRequired = xmlBooleanClause.getAttributeValue("required"); String sProhibited = xmlBooleanClause.getAttributeValue("prohibited"); boolean required = sRequired != null && sRequired.equals("true"); boolean prohibited = sProhibited != null && sProhibited.equals("true"); Element xmlSubQuery = (Element)xmlBooleanClause.getChildren().get(0); query.add(makeQuery(xmlSubQuery), required, prohibited); } query.setMaxClauseCount(16384); // FIXME: quick fix; using Filters should be better return query; } else throw new Exception("unknown lucene query type: " + name); } private void makeSummary() throws Exception { _elSummary = new Element("summary"); int count = getSize(); _elSummary.setAttribute("count", count+""); _elSummary.setAttribute("type", "local"); // count keyword frequencies Element elKeywords = new Element("keywords"); Hashtable htKeywords = new Hashtable(); for(int i = 0; i < count; i++) { Document doc = _hits.doc(i); String keywords[] = doc.getValues("keyword"); if (keywords != null) // if there are no keywords lucene returns null instead of an empty array for (int j = 0; j < keywords.length; j++) { String keyword = keywords[j]; Integer keyCount = (Integer)htKeywords.get(keyword); if (keyCount == null) keyCount = new Integer(1); else keyCount = new Integer(keyCount.intValue() + 1); htKeywords.put(keyword, keyCount); } } // sort keywords according to frequency TreeSet setKeywords = new TreeSet(new Comparator() { public int compare(Object p1, Object p2) { Map.Entry me1 = (Map.Entry)p1; Map.Entry me2 = (Map.Entry)p2; String key1 = (String)me1.getKey(); String key2 = (String)me2.getKey(); Integer count1 = (Integer)me1.getValue(); Integer count2 = (Integer)me2.getValue(); int cmp = count2.compareTo(count1); if (cmp != 0) return cmp; else return key1.compareTo(key2); } }); setKeywords.addAll(htKeywords.entrySet()); int nKeys = 0; for (Iterator iter = setKeywords.iterator(); iter.hasNext(); ) { if (++nKeys > _maxSummaryKeys) break; Map.Entry me = (Map.Entry)iter.next(); String keyword = (String)me.getKey(); Integer keyCount = (Integer)me.getValue(); Element elKeyword = new Element("keyword"); elKeyword.setAttribute("count", keyCount.toString()); elKeyword.setAttribute("name", keyword); elKeywords.addContent(elKeyword); } _elSummary.addContent(elKeywords); // count categories frequencies Element elCategories = new Element("categories"); Hashtable htCategories = new Hashtable(); for(int i = 0; i < count; i++) { Document doc = _hits.doc(i); String categories[] = doc.getValues("_cat"); if (categories != null) // if there are no categories lucene returns null instead of an empty array for (int j = 0; j < categories.length; j++) { String category = categories[j]; Integer catCount = (Integer)htCategories.get(category); if (catCount == null) catCount = new Integer(1); else catCount = new Integer(catCount.intValue() + 1); htCategories.put(category, catCount); } } // sort categories according to name TreeSet setCategories = new TreeSet(new Comparator() { public int compare(Object p1, Object p2) { Map.Entry me1 = (Map.Entry)p1; Map.Entry me2 = (Map.Entry)p2; String cat1 = (String)me1.getKey(); String cat2 = (String)me2.getKey(); return cat1.compareTo(cat2); } }); setCategories.addAll(htCategories.entrySet()); for (Iterator iter = setCategories.iterator(); iter.hasNext(); ) { Map.Entry me = (Map.Entry)iter.next(); String category = (String)me.getKey(); Integer catCount = (Integer)me.getValue(); Element elCategory = new Element("category"); elCategory.setAttribute("count", catCount.toString()); elCategory.setAttribute("name", category); elCategories.addContent(elCategory); } _elSummary.addContent(elCategories); } private static Element getMetadataFromIndex(Document doc, String id) { String root = doc.get("_root"); String schema = doc.get("_schema"); String createDate = doc.get("_createDate").toUpperCase(); String changeDate = doc.get("_changeDate").toUpperCase(); String source = doc.get("_source"); String uuid = doc.get("_uuid"); Element md = new Element(root); Element info = new Element(Edit.RootChild.INFO, Edit.NAMESPACE); addElement(info, Edit.Info.Elem.ID, id); addElement(info, Edit.Info.Elem.UUID, uuid); addElement(info, Edit.Info.Elem.SCHEMA, schema); addElement(info, Edit.Info.Elem.CREATE_DATE, createDate); addElement(info, Edit.Info.Elem.CHANGE_DATE, changeDate); addElement(info, Edit.Info.Elem.SOURCE, source); for (Enumeration enu = doc.fields(); enu.hasMoreElements(); ) { Field field = (Field) enu.nextElement(); String name = field.name(); String value = field.stringValue(); if (name.equals("_cat")) addElement(info, Edit.Info.Elem.CATEGORY, value); } md.addContent(info); return md; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -