📄 htmlindex.java

📁 JAVA在线商店带JSP的
💻 JAVA
字号:
package index;import index.*;import java.util.*;import java.io.*;import DebugLog;/** * HTMLIndex represents an index for a directory of HTML files. * Once loaded the index is thread safe, but the loading and * creation process should only happen from a single thread. */public class HTMLIndex{	protected Vector files;	protected Hashtable theIndex;	protected File indexFile;	protected File direc;	protected boolean indexLoaded;	protected String useRel;		public static final String INDEX_FILE_NAME=".htmlindex";	    /**	 * Looks for an index file and creates one if necessary.	 */	public HTMLIndex(File dir)	{		theIndex = new Hashtable();		files = new Vector();        if((dir != null)&&dir.isDirectory()) direc = dir;		if(direc != null) indexFile = new File(dir,INDEX_FILE_NAME);        useRel = null;	}		/**	 * Should files be based on the 	 * provided path or the directories absolute path.	 * Set this value BEFORE loading the index.	 */	public void setRelativePath(String rel)	{	    useRel = rel;	}		/**	 * Return true if the index is loaded.	 */	public synchronized boolean isIndexLoaded()	{		return indexLoaded;	}		public boolean isIndexBuilt()	{		if((indexFile != null)&&(indexFile.exists())) return true;		else return false;	}		public boolean indexNeedsRebuilding()	{		return IndexBuilder.indexNeedsRebuilding(direc);	}		public void buildIndex()	{	    if((direc != null)&&(!isIndexBuilt() || indexNeedsRebuilding()))	    {       		    IndexBuilder.buildIndex(direc);		}	}		public void loadIndex()	{	    loadIndex(true);	}		public void loadIndex(boolean build)	{	    if(!isIndexLoaded())	    {	        if(build) buildIndex();		    loadIndexFile();		}	}		public Enumeration getFiles()	{		return files.elements();	}		public Enumeration getWords()	{		return theIndex.keys();		}		public File getDirectory()	{	    return direc;	}		public Vector allFiles()	{		Vector results = new Vector();		int i,max;				max = files.size();				for(i=0;i<max;i++)		{			results.addElement(files.elementAt(i));		}				return results;	}		/**	 * Private method that loads the index file.	 */	protected void loadIndexFile()	{		FileReader fileIn;		LineNumberReader lineIn;		String curLine;		int index;		String word,curId;		Vector entryForWord;		String dirPath;				try		{			fileIn = new FileReader(indexFile);			lineIn = new LineNumberReader(fileIn);						//Read the file names			if(useRel != null)			{			    if(useRel.equals("/")||useRel.equals("\\")			        ||useRel.endsWith("/")||useRel.endsWith("\\"))			        dirPath = useRel;			    else			        dirPath = useRel+File.separator;			}			else			{			    if(useRel.equals("/")||useRel.equals("\\")			        ||useRel.endsWith("/")||useRel.endsWith("\\"))			        dirPath = direc.getAbsolutePath();			    else			        dirPath = direc.getAbsolutePath()+File.separator;			}						while(((curLine = lineIn.readLine()) != null)					&&(curLine.length()>0))			{				files.addElement(dirPath+curLine);			}						//Read the words						while(((curLine = lineIn.readLine()) != null)					&&(curLine.length()>0))			{				index = curLine.indexOf("|");								if(index > 0)				{				    word = curLine.substring(0,index);								    theIndex.put(word,curLine);				}			}						lineIn.close();			fileIn.close();						synchronized(this)			{			    indexLoaded = true;		    }		}		catch(Exception exp)		{			indexLoaded = false;		}	}		protected synchronized Vector cacheFilesForWord(String word)	{	    Vector dataForWord;	    String realWord = word.toLowerCase();	    int ind;	    String curEntry,curLine;	    IndexEntry newEntry;	    StringTokenizer cursor;	    int curFile,curOc;	    Object data;	    	    data = theIndex.get(realWord);	    	    //Double check that we need to cache this word	    if(data instanceof String)	    {	        curLine = (String) data;	        dataForWord = new Vector();	    }	    else	    {	        return (Vector) data;	    }	    	    try	    {			cursor= new StringTokenizer(curLine,"|");				    //Skip the word			cursor.nextToken();					while(cursor.hasMoreTokens())			{				curEntry = cursor.nextToken();				newEntry = new IndexEntry();								ind = curEntry.indexOf(" ");								curFile = Integer.parseInt(curEntry.substring(0,ind));				curOc = Integer.parseInt(curEntry.substring(ind+1));								newEntry.file = curFile;				newEntry.occurences = curOc;								if(!dataForWord.contains(newEntry))					dataForWord.addElement(newEntry);			}		}		catch(Exception exp)		{		    dataForWord.removeAllElements();		}				theIndex.put(realWord,dataForWord);				return dataForWord;	}		public Vector filesForWord(String word)	{		Vector dataForWord;		Object data;		Vector results = new Vector();				IndexEntry entry;				if(word == null) return results;				if(!isIndexLoaded()) return results;				data = theIndex.get(word.toLowerCase());				if(data instanceof String)		{		    dataForWord = cacheFilesForWord(word);		}		else		{		    dataForWord = (Vector) data;		}				if(dataForWord != null)		{			int i,max;						max = dataForWord.size();						for(i=0;i<max;i++)			{			    entry = (IndexEntry) dataForWord.elementAt(i);				results.addElement(files.elementAt(entry.file));			}		}				return results;	}		public int occurencesOfWord(String file,String word)	{	    Vector dataForWord;		Object data;		int result=0;				IndexEntry entry;		String curFile;				if((word == null)||(file==null)) return result;				if(!isIndexLoaded()) return result;				data = theIndex.get(word.toLowerCase());				if(data instanceof String)		{		    dataForWord = cacheFilesForWord(word);		}		else		{		    dataForWord = (Vector) data;		}				if(dataForWord != null)		{			int i,max;						max = dataForWord.size();						for(i=0;i<max;i++)			{			    entry = (IndexEntry) dataForWord.elementAt(i);				curFile = (String) files.elementAt(entry.file);								if(file.equals(curFile))				{				    result = entry.occurences;				    break;				}			}		}				return result;	}			/*	 * Supports boolean queries using & | and !.	 * Space is an implicit or	 * No Parenthesis	 */	public Vector filesForQuery(String query)	{		Vector results=null;		boolean not=false,and=false,or=false;		StringTokenizer cursor = new StringTokenizer(query,"!&| ",true);		String curToken;		String curWord = null;				if(query == null) return results;				if(!isIndexLoaded()) return results;				while(cursor.hasMoreTokens())		{			curToken = cursor.nextToken();						if(curToken.equals("!")||curToken.equalsIgnoreCase("not"))			{				not = true;			}			else if(curToken.equals("|")||curToken.equalsIgnoreCase("or"))			{				or = true;				if(curWord == null)//Error				{					results = null;					break;				}			}			else if(curToken.equals("&")||curToken.equalsIgnoreCase("and"))			{				and = true;				if(curWord == null)//Error				{					results = null;					break;				}			}			else if(curToken.equals(" "))			{				//Skip it			}			else //its a word			{				curWord = curToken;								if(results == null)//first word				{					if(and || or)					{						//Error						break;					}					else					{						if(not)						{							results = filesNotForWord(curWord);						}						else						{							results = filesForWord(curWord);						}					}				}				else				{					//Two word in a row => or										if(and)					{						if(not)						{							addAndNotWordToResults(results,curWord);						}						else						{							addAndWordToResults(results,curWord);						}					}					else //either or or space					{						if(not)						{							addOrNotWordToResults(results,curWord);						}						else						{							addOrWordToResults(results,curWord);						}					}				}								//reset booleans				not = false;				and = false;				or = false;			}		}				return results;	}		protected void addAndWordToResults(Vector curResults,String word)	{		Vector wordFiles = filesForWord(word);		int i,max;		Object tmp;				max = curResults.size();				for(i=max-1;i>=0;i--)		{			tmp = curResults.elementAt(i);			if(!wordFiles.contains(tmp))			{				curResults.removeElementAt(i);			}		}	}		protected void addOrWordToResults(Vector curResults,String word)	{		Vector wordFiles = filesForWord(word);		int i,max;		Object tmp;				max = wordFiles.size();				for(i=0;i<max;i++)		{			tmp = wordFiles.elementAt(i);			if(!curResults.contains(tmp))			{				curResults.addElement(tmp);			}		}	}		protected void addAndNotWordToResults(Vector curResults,String word)	{		Vector wordFiles = filesNotForWord(word);		int i,max;		Object tmp;				max = curResults.size();				for(i=max-1;i>=0;i--)		{			tmp = curResults.elementAt(i);			if(!wordFiles.contains(tmp))			{				curResults.removeElementAt(i);			}		}	}		protected void addOrNotWordToResults(Vector curResults,String word)	{		Vector wordFiles = filesNotForWord(word);		int i,max;		Object tmp;				max = wordFiles.size();				for(i=0;i<max;i++)		{			tmp = wordFiles.elementAt(i);			if(!curResults.contains(tmp))			{				curResults.addElement(tmp);			}		}	}		protected Vector filesNotForWord(String word)	{		Vector results = allFiles();		Vector wordFiles = filesForWord(word);		int i,max;		Object tmp;				max = wordFiles.size();				for(i=0;i<max;i++)		{			tmp = wordFiles.elementAt(i);			results.removeElement(tmp);		}				return results;	}	public static void main(String args[])	{		if(args.length <= 1)		{			System.out.println("usage: java HTMLIndex dir word");			return;		}		DebugLog log = new DebugLog();				log.logTo(System.out);				IndexBuilder.log = log;				HTMLIndex index = new HTMLIndex(new File(args[0]));		String curFile;				index.loadIndex();				Vector files = index.filesForQuery(args[1]);		int i,max;				max = files.size();				if(max == 0) System.out.println("No Files Match Query");		else System.out.println("Matching Files---------\n");				for(i=0;i<max;i++)		{		    curFile = (String) files.elementAt(i);			System.out.println(curFile +" "			           + index.occurencesOfWord(curFile,args[1]));		}	}}
💿 文件大小 152 K
👤 上传用户 wuseyue
📂 所属分类百货/超市行业
🏷️ 相关标签

#JAVA #JSP #商
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -