📄 searchmanager.java.svn-base

📁 由国外的一个著名的geonetwork修改而来
💻 SVN-BASE
字号:
//===	Copyright (C) 2001-2005 Food and Agriculture Organization of the//===	United Nations (FAO-UN), United Nations World Food Programme (WFP)//===	and United Nations Environment Programme (UNEP)//===//===	This program is free software; you can redistribute it and/or modify//===	it under the terms of the GNU General Public License as published by//===	the Free Software Foundation; either version 2 of the License, or (at//===	your option) any later version.//===//===	This program is distributed in the hope that it will be useful, but//===	WITHOUT ANY WARRANTY; without even the implied warranty of//===	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU//===	General Public License for more details.//===//===	You should have received a copy of the GNU General Public License//===	along with this program; if not, write to the Free Software//===	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA//===//===	Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,//===	Rome - Italy. email: GeoNetwork@fao.org//==============================================================================package org.fao.geonet.kernel.search;import org.fao.geonet.constants.Geonet;import jeeves.utils.Xml;import org.apache.lucene.analysis.standard.*;import org.apache.lucene.document.*;import org.apache.lucene.index.*;import com.k_int.IR.Searchable;import com.k_int.util.Repository.CollectionDirectory;import com.k_int.hss.HeterogeneousSetOfSearchable;import com.k_int.util.LoggingFacade.*;import org.jdom.Element;import java.io.*;import java.util.*;import javax.naming.*;//--------------------------------------------------------------------------------// index metadata using lucene//--------------------------------------------------------------------------------public class SearchManager{	public static final int LUCENE = 1;	public static final int Z3950  = 2;	public static final int UNUSED = 3;	private static final String SEARCH_STYLESHEETS_DIR_PATH = "xml/search";	private File           _stylesheetsDir;	private File           _luceneDir;	private LoggingContext _cat;	private Searchable     _hssSearchable;	//-----------------------------------------------------------------------------	public SearchManager(String appPath, String luceneDir) throws Exception	{		_stylesheetsDir = new File(appPath, SEARCH_STYLESHEETS_DIR_PATH);		if (!_stylesheetsDir.isDirectory())			throw new Exception("directory " + _stylesheetsDir + " not found");		initLucene(appPath, luceneDir);		initZ3950(appPath);	}	//-----------------------------------------------------------------------------	public void end() throws Exception	{		endZ3950();	}	//-----------------------------------------------------------------------------	public MetaSearcher newSearcher(int type, String stylesheetName)		throws Exception	{		switch (type)		{			case LUCENE: return new LuceneSearcher(this, stylesheetName);			case Z3950:  return new Z3950Searcher(this, stylesheetName);			case UNUSED: return new UnusedSearcher();			default:     throw new Exception("unknown MetaSearcher type: " + type);		}	}	//-----------------------------------------------------------------------------	// lucene init/end methods	private void initLucene(String appPath, String luceneDir)		throws Exception	{		_luceneDir = new File(luceneDir);		if (!_luceneDir.isAbsolute())			_luceneDir = new File(appPath + luceneDir);		//--- the lucene dir cannot be inside the CVS so it is better to create it here		_luceneDir.mkdirs();		setupIndex(false); // RGFIX: check if this is correct	}	//-----------------------------------------------------------------------------	// Z39.50 init/end methods	/** initializes the Z3950 client searcher	 */	private void initZ3950(String appPath)		throws Exception	{		_cat = LogContextFactory.getContext("GeoNetwork"); // FIXME: maybe it should use the webapp path		String configClass = "com.k_int.util.Repository.XMLDataSource";		String configUrl   = "file:///" + appPath + jeeves.constants.Jeeves.Path.XML + "/repositories.xml";		String directoryNamingLocation = "/Services/IR/Directory"; // RGFIX: change to use servlet context		Properties props = new Properties();		props.setProperty("CollectionDataSourceClassName", configClass);		props.setProperty("RepositoryDataSourceURL",       configUrl);		props.setProperty("DirectoryServiceName",          directoryNamingLocation); // RGFIX: check this		// set up the collection directory and register it with the naming service in the		// default way		// RGFIX: this could not work for different servlet instances, should be changed to use servlet context		CollectionDirectory cd = new CollectionDirectory(configClass, configUrl);		Context context = new InitialContext();		Context services_context = context.createSubcontext("Services");		Context ir_context = services_context.createSubcontext("IR");		ir_context.bind("Directory", cd);		// pull in the repository		_hssSearchable = new HeterogeneousSetOfSearchable();		_hssSearchable.init(props);	}	/** deinitializes the Z3950 client searcher	 */	private void endZ3950()	{		if (_hssSearchable != null)		{			_hssSearchable.destroy();			_hssSearchable = null;		}	}	//--------------------------------------------------------------------------------	// indexing methods	public synchronized void index(String type, Element metadata, String id, List moreFields) throws Exception	{		delete("_id", id);		// get metadata fields		String stylesheetName = type + ".xsl";		Element xmlDoc = transform(stylesheetName, metadata);//System.out.println("XML DOC for id:"+id+"\n" + jeeves.utils.Xml.getString(xmlDoc)); // DEBUG		// add _id field		Element idField = new Element("Field");		idField.setAttribute("name",   "_id");		idField.setAttribute("string", id);		idField.setAttribute("store",  "true");		idField.setAttribute("index",  "true");		idField.setAttribute("token",  "false");		xmlDoc.addContent(idField);		// add more fields		for (Iterator iter = moreFields.iterator(); iter.hasNext(); )		{			Element field = (Element)iter.next();			xmlDoc.addContent(field);		}		Document doc = newDocument(xmlDoc);		IndexWriter writer = new IndexWriter(_luceneDir, new StandardAnalyzer(new String[] {}), false);		try		{			writer.addDocument(doc);			lazyOptimize(writer);		}		finally		{			writer.close();		}	}	//--------------------------------------------------------------------------------	//  delete a document	public synchronized void delete(String fld, String txt) throws Exception	{		// possibly remove old document		IndexReader reader = IndexReader.open(_luceneDir);		try		{			reader.delete(new Term(fld, txt));			// RGFIX: should I optimize here, or at least increase updateCount?		}		finally		{			reader.close();		}	}	//--------------------------------------------------------------------------------	public Hashtable getDocs() throws Exception	{		IndexReader reader = IndexReader.open(_luceneDir);		try		{			Hashtable docs = new Hashtable();			for (int i = 0; i < reader.numDocs(); i++)			{				if (reader.isDeleted(i)) continue; // FIXME: strange lucene hack: sometimes it tries to load a deleted document				Hashtable record = new Hashtable();				Document doc = reader.document(i);				String id = doc.get("_id");				for (Enumeration j = doc.fields(); j.hasMoreElements(); )				{					Field field = (Field)j.nextElement();					record.put(field.name(), field.stringValue());				}				docs.put(id, record);			}			return docs;		}		finally		{			reader.close();		}	}	//--------------------------------------------------------------------------------	public Vector getTerms(String fld) throws Exception	{		Vector terms = new Vector();		IndexReader reader = IndexReader.open(_luceneDir);		try		{			TermEnum enu = reader.terms(new Term(fld, ""));			while (enu.next())			{				Term term = enu.term();				if (term.field().equals(fld))					terms.add(enu.term().text());			}		}		finally		{			reader.close();		}		return terms;	}	//-----------------------------------------------------------------------------	// utilities	Element transform(String styleSheetName, Element xml)		throws Exception	{		String styleSheetPath = new File(_stylesheetsDir, styleSheetName).getAbsolutePath();		return Xml.transform(xml, styleSheetPath);	}	public File getLuceneDir() { return _luceneDir; }	Searchable getSearchable() { return _hssSearchable; }	//-----------------------------------------------------------------------------	// private methods	// creates an index in directory luceneDir with StandardAnalyzer if not present	private void setupIndex(boolean rebuild)		throws Exception	{		// if rebuild forced don't check		boolean badIndex = true;		if (!rebuild)		{			try			{				IndexReader reader = IndexReader.open(_luceneDir);				reader.close();				badIndex = false;			}			catch (Exception e)			{				System.err.println("exception while opening lucene index, going to rebuild it: " + e.getMessage());			}		}		// if rebuild forced or bad index then rebuild index		if (rebuild || badIndex)		{			System.err.println("rebuilding lucene index");			IndexWriter writer = new IndexWriter(_luceneDir, new StandardAnalyzer(new String[] {}), true);			writer.close();		}	}	// creates a new document	private Document newDocument(Element xml)	{		Document doc = new Document();		for (Iterator iter = xml.getChildren().iterator(); iter.hasNext(); )		{			Element field = (Element)iter.next();			String name   = field.getAttributeValue("name");			String string = field.getAttributeValue("string").toLowerCase(); // RGFIX: should be only needed for non-tokenized fields			if (string.trim().length() > 0)			{				String sStore = field.getAttributeValue("store");				String sIndex = field.getAttributeValue("index");				String sToken = field.getAttributeValue("token");				boolean store = sStore != null && sStore.equals("true");				boolean index = sIndex != null && sIndex.equals("true");				boolean token = sToken != null && sToken.equals("true");				doc.add(new Field(name, string, store, index, token));			}		}		return doc;	}	//--------------------------------------------------------------------------------	// lazy optimization: optimize index if	// at least TIME_BETWEEN_OPTS time passed or	// at least UPDTATES_BETWEEN_OPTS updates were performed	// since last optimization	private static final long TIME_BETWEEN_OPTS     = 1000; // time between two optimizations in ms	private static final int  UPDTATES_BETWEEN_OPTS = 10;   // number of updates between two optimizations	private long    lastOptTime = 0; // time since last optimization	private int     updateCount = UPDTATES_BETWEEN_OPTS - 1; // number of updates since last uptimization	private boolean optimizing = false; // true iff optimization is in progress	private Object  mutex = new Object(); // RGFIX: check concurrent access from multiple servlets	private void lazyOptimize(IndexWriter writer)		throws Exception	{		if (optimizing) return;		boolean doOptimize;		synchronized (mutex)		{			if (System.currentTimeMillis() - lastOptTime < TIME_BETWEEN_OPTS				 && ++updateCount < UPDTATES_BETWEEN_OPTS)				doOptimize = false;			else			{				doOptimize  = true; 				optimizing  = true;				updateCount = 0;			}		}		if (doOptimize)		{			// System.out.println("**** OPTIMIZING"); // DEBUG			writer.optimize();			lastOptTime = System.currentTimeMillis();			optimizing = false;		}	}}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -