📄 htmlindex.java
字号:
package index;import index.*;import java.util.*;import java.io.*;import DebugLog;/** * HTMLIndex represents an index for a directory of HTML files. * Once loaded the index is thread safe, but the loading and * creation process should only happen from a single thread. */public class HTMLIndex{ protected Vector files; protected Hashtable theIndex; protected File indexFile; protected File direc; protected boolean indexLoaded; protected String useRel; public static final String INDEX_FILE_NAME=".htmlindex"; /** * Looks for an index file and creates one if necessary. */ public HTMLIndex(File dir) { theIndex = new Hashtable(); files = new Vector(); if((dir != null)&&dir.isDirectory()) direc = dir; if(direc != null) indexFile = new File(dir,INDEX_FILE_NAME); useRel = null; } /** * Should files be based on the * provided path or the directories absolute path. * Set this value BEFORE loading the index. */ public void setRelativePath(String rel) { useRel = rel; } /** * Return true if the index is loaded. */ public synchronized boolean isIndexLoaded() { return indexLoaded; } public boolean isIndexBuilt() { if((indexFile != null)&&(indexFile.exists())) return true; else return false; } public boolean indexNeedsRebuilding() { return IndexBuilder.indexNeedsRebuilding(direc); } public void buildIndex() { if((direc != null)&&(!isIndexBuilt() || indexNeedsRebuilding())) { IndexBuilder.buildIndex(direc); } } public void loadIndex() { loadIndex(true); } public void loadIndex(boolean build) { if(!isIndexLoaded()) { if(build) buildIndex(); loadIndexFile(); } } public Enumeration getFiles() { return files.elements(); } public Enumeration getWords() { return theIndex.keys(); } public File getDirectory() { return direc; } public Vector allFiles() { Vector results = new Vector(); int i,max; max = files.size(); for(i=0;i<max;i++) { results.addElement(files.elementAt(i)); } return results; } /** * Private method that loads the index file. */ protected void loadIndexFile() { FileReader fileIn; LineNumberReader lineIn; String curLine; int index; String word,curId; Vector entryForWord; String dirPath; try { fileIn = new FileReader(indexFile); lineIn = new LineNumberReader(fileIn); //Read the file names if(useRel != null) { if(useRel.equals("/")||useRel.equals("\\") ||useRel.endsWith("/")||useRel.endsWith("\\")) dirPath = useRel; else dirPath = useRel+File.separator; } else { if(useRel.equals("/")||useRel.equals("\\") ||useRel.endsWith("/")||useRel.endsWith("\\")) dirPath = direc.getAbsolutePath(); else dirPath = direc.getAbsolutePath()+File.separator; } while(((curLine = lineIn.readLine()) != null) &&(curLine.length()>0)) { files.addElement(dirPath+curLine); } //Read the words while(((curLine = lineIn.readLine()) != null) &&(curLine.length()>0)) { index = curLine.indexOf("|"); if(index > 0) { word = curLine.substring(0,index); theIndex.put(word,curLine); } } lineIn.close(); fileIn.close(); synchronized(this) { indexLoaded = true; } } catch(Exception exp) { indexLoaded = false; } } protected synchronized Vector cacheFilesForWord(String word) { Vector dataForWord; String realWord = word.toLowerCase(); int ind; String curEntry,curLine; IndexEntry newEntry; StringTokenizer cursor; int curFile,curOc; Object data; data = theIndex.get(realWord); //Double check that we need to cache this word if(data instanceof String) { curLine = (String) data; dataForWord = new Vector(); } else { return (Vector) data; } try { cursor= new StringTokenizer(curLine,"|"); //Skip the word cursor.nextToken(); while(cursor.hasMoreTokens()) { curEntry = cursor.nextToken(); newEntry = new IndexEntry(); ind = curEntry.indexOf(" "); curFile = Integer.parseInt(curEntry.substring(0,ind)); curOc = Integer.parseInt(curEntry.substring(ind+1)); newEntry.file = curFile; newEntry.occurences = curOc; if(!dataForWord.contains(newEntry)) dataForWord.addElement(newEntry); } } catch(Exception exp) { dataForWord.removeAllElements(); } theIndex.put(realWord,dataForWord); return dataForWord; } public Vector filesForWord(String word) { Vector dataForWord; Object data; Vector results = new Vector(); IndexEntry entry; if(word == null) return results; if(!isIndexLoaded()) return results; data = theIndex.get(word.toLowerCase()); if(data instanceof String) { dataForWord = cacheFilesForWord(word); } else { dataForWord = (Vector) data; } if(dataForWord != null) { int i,max; max = dataForWord.size(); for(i=0;i<max;i++) { entry = (IndexEntry) dataForWord.elementAt(i); results.addElement(files.elementAt(entry.file)); } } return results; } public int occurencesOfWord(String file,String word) { Vector dataForWord; Object data; int result=0; IndexEntry entry; String curFile; if((word == null)||(file==null)) return result; if(!isIndexLoaded()) return result; data = theIndex.get(word.toLowerCase()); if(data instanceof String) { dataForWord = cacheFilesForWord(word); } else { dataForWord = (Vector) data; } if(dataForWord != null) { int i,max; max = dataForWord.size(); for(i=0;i<max;i++) { entry = (IndexEntry) dataForWord.elementAt(i); curFile = (String) files.elementAt(entry.file); if(file.equals(curFile)) { result = entry.occurences; break; } } } return result; } /* * Supports boolean queries using & | and !. * Space is an implicit or * No Parenthesis */ public Vector filesForQuery(String query) { Vector results=null; boolean not=false,and=false,or=false; StringTokenizer cursor = new StringTokenizer(query,"!&| ",true); String curToken; String curWord = null; if(query == null) return results; if(!isIndexLoaded()) return results; while(cursor.hasMoreTokens()) { curToken = cursor.nextToken(); if(curToken.equals("!")||curToken.equalsIgnoreCase("not")) { not = true; } else if(curToken.equals("|")||curToken.equalsIgnoreCase("or")) { or = true; if(curWord == null)//Error { results = null; break; } } else if(curToken.equals("&")||curToken.equalsIgnoreCase("and")) { and = true; if(curWord == null)//Error { results = null; break; } } else if(curToken.equals(" ")) { //Skip it } else //its a word { curWord = curToken; if(results == null)//first word { if(and || or) { //Error break; } else { if(not) { results = filesNotForWord(curWord); } else { results = filesForWord(curWord); } } } else { //Two word in a row => or if(and) { if(not) { addAndNotWordToResults(results,curWord); } else { addAndWordToResults(results,curWord); } } else //either or or space { if(not) { addOrNotWordToResults(results,curWord); } else { addOrWordToResults(results,curWord); } } } //reset booleans not = false; and = false; or = false; } } return results; } protected void addAndWordToResults(Vector curResults,String word) { Vector wordFiles = filesForWord(word); int i,max; Object tmp; max = curResults.size(); for(i=max-1;i>=0;i--) { tmp = curResults.elementAt(i); if(!wordFiles.contains(tmp)) { curResults.removeElementAt(i); } } } protected void addOrWordToResults(Vector curResults,String word) { Vector wordFiles = filesForWord(word); int i,max; Object tmp; max = wordFiles.size(); for(i=0;i<max;i++) { tmp = wordFiles.elementAt(i); if(!curResults.contains(tmp)) { curResults.addElement(tmp); } } } protected void addAndNotWordToResults(Vector curResults,String word) { Vector wordFiles = filesNotForWord(word); int i,max; Object tmp; max = curResults.size(); for(i=max-1;i>=0;i--) { tmp = curResults.elementAt(i); if(!wordFiles.contains(tmp)) { curResults.removeElementAt(i); } } } protected void addOrNotWordToResults(Vector curResults,String word) { Vector wordFiles = filesNotForWord(word); int i,max; Object tmp; max = wordFiles.size(); for(i=0;i<max;i++) { tmp = wordFiles.elementAt(i); if(!curResults.contains(tmp)) { curResults.addElement(tmp); } } } protected Vector filesNotForWord(String word) { Vector results = allFiles(); Vector wordFiles = filesForWord(word); int i,max; Object tmp; max = wordFiles.size(); for(i=0;i<max;i++) { tmp = wordFiles.elementAt(i); results.removeElement(tmp); } return results; } public static void main(String args[]) { if(args.length <= 1) { System.out.println("usage: java HTMLIndex dir word"); return; } DebugLog log = new DebugLog(); log.logTo(System.out); IndexBuilder.log = log; HTMLIndex index = new HTMLIndex(new File(args[0])); String curFile; index.loadIndex(); Vector files = index.filesForQuery(args[1]); int i,max; max = files.size(); if(max == 0) System.out.println("No Files Match Query"); else System.out.println("Matching Files---------\n"); for(i=0;i<max;i++) { curFile = (String) files.elementAt(i); System.out.println(curFile +" " + index.occurencesOfWord(curFile,args[1])); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -