⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 operatorparser.java

📁 java实现的全文搜索引擎
💻 JAVA
字号:
/**
 * 
 */
package cn.edu.nju.software.ruse;

import java.io.File;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

//import org.tartarus.snowball.SnowballStemmer;

/**
 * @author spring
 *
 */
public class OperatorParser implements Parser {

	
	/**
	 * Parse the query.
	 */
	
	//private Map<String,HashSet<File>> hm;
	private Index index;
	
	/**
	 * A stack to hold the operator.
	 */
	private Stack<Operator> oprStack = new Stack<Operator>();
	
	/**
	 * A stack to hold the operand.
	 */
	private Stack<HashSet<File>> opdStack = new Stack<HashSet<File>>();
	
	/**
	 * Use operatorFactory to create AND,OR,NOT
	 */
	
	public OperatorParser(Index index) {
		this.index = index;
	}
	
	public OperatorParser() {
		
	}
	
	public void parse(String s) {
//System.err.println("@OperatorParser starting to parse!");
System.out.println("Search: " + s);
		
		//String expression = s.toLowerCase();
		String expression = s;
		Pattern p = Pattern.compile("\\(");
		Matcher m = p.matcher(expression);
		expression = m.replaceAll(" \\( ");
		
		p = Pattern.compile("\\)");
		m = p.matcher(expression);
		expression = m.replaceAll(" \\) ");
		
		p = Pattern.compile("\\s*:\\s*");
		m = p.matcher(expression);
		expression = m.replaceAll(":");
		
		p = Pattern.compile("\\s*-\\s*");
		m = p.matcher(expression);
		expression = m.replaceAll("-");
		
		p = Pattern.compile("(?i)(fileContents\\s*:\\s*)");
		m = p.matcher(expression);
		expression = m.replaceAll(" ");
		/**
		 * catch the "love me"~2 type expression, and replace them all to "@"
		 * */
		Pattern p1 = Pattern.compile("\"[\\w\\s]+\"(~\\d+)?");
		Matcher m1 = p1.matcher(expression);
		ArrayList<String> al = new ArrayList<String>();
		int count = -1;
		while(m1.find()) {
			count++;
			al.add(count, m1.group());
		}
		expression = m1.replaceAll("@");
		count = 0;
		
//System.out.println(expression);
		String[] expArray = Pattern.compile("\\s+").split(expression);
		String[] array = new String[expArray.length + 1];
		for(int i = 0;i < expArray.length;i++) {
			if(expArray[i].equals("@")) {
				expArray[i] = al.get(count);
				count++;
			}
			array[i] = expArray[i];
		}
		array[array.length - 1] = "#";
//for(String i:array) {
//	System.out.println(i);
//}
		p = Pattern.compile("(?i)(^\\($)|(^\\)$)|(^AND$)|(^OR$)|(^NOT$)|(^#$)");
		//SnowballStemmer stemmer = new EnglishStemmer().createStemmer();
		
		/**
		 * item is each member of the array
		 * */
		String item;
		
		/**
		 * Pattern p_* is used for detect the modTime, fileSize, fileName, fileContents expression
		 * */
		Pattern p_ModTime = Pattern.compile("(?i)(modTime\\s*:)"); 
		Pattern p_FileSize = Pattern.compile("(?i)(fileSize\\s*:)"); 
		Pattern p_FileName = Pattern.compile("(?i)(fileName\\s*:)"); 
		Pattern p_Proximity = Pattern.compile("\"[\\w\\s]+\"(~\\d+)?"); 
		
		for(int i = 0;i < array.length;i++) {
			item = array[i];
			if(item.equals("")) continue;
			if(p.matcher(item).find()) {
				item = item.toUpperCase();
				Operator op = OperatorFactory.createOperator(item);
				if(op.getDesription().equals(")") && oprStack.peek().getDesription().equals("(")) {
					oprStack.pop();
					continue;
				}
				if(oprStack.isEmpty() || (oprStack.peek().getPRI() < op.getPRI())) {
					if(op.getDesription().equals("(")) {
						op.PRI = -1;
						//System.out.println(op.getPRI());
					} else {
						op.PRI -= 1;
					}
					oprStack.push(op);
				}else {
					i--;
					op = oprStack.pop();
					if(op.desription.equals("(")) {
						continue;
					}else if(op.desription.equals("AND")) {
						HashSet<File> x2 = opdStack.pop();
						HashSet<File> x1 = opdStack.pop();
						opdStack.push(((Operator_AND)op).and(x1,x2));
					}else if(op.desription.equals("OR")) {
						HashSet<File> x2 = opdStack.pop();
						HashSet<File> x1 = opdStack.pop();
						opdStack.push(((Operator_OR)op).or(x1,x2));
					}else if(op.desription.equals("NOT")) {
						HashSet<File> x = opdStack.pop();
						opdStack.push(((Operator_NOT)op).not(x,index));
					}
				}
			} else {
				Matcher m_ModTime = p_ModTime.matcher(item);
				Matcher m_FileSize = p_FileSize.matcher(item);
				Matcher m_FileName = p_FileName.matcher(item);
				Matcher m_Proximity = p_Proximity.matcher(item);
				
				if(m_ModTime.find()) {
					Operator op = OperatorFactory.createOperator("MODTIME");
					//System.out.println(item.substring(m_ModTime.group().length()));
					String expre = item.substring(m_ModTime.group().length());
					opdStack.push(((Operator_ModTime)op).getFileSet(expre,index));
					
				} else if(m_FileSize.find()) {
					Operator op = OperatorFactory.createOperator("FILESIZE");
					//System.out.println(item.substring(m_FileSize.group().length()));
					String expre = item.substring(m_FileSize.group().length());
					opdStack.push(((Operator_FileSize)op).getFileSet(expre,index));
					
				} else if(m_FileName.find()) {
					Operator op = OperatorFactory.createOperator("FILENAME");
					//System.out.println(item.substring(m_FileName.group().length()));
					String expre = item.substring(m_FileName.group().length());
					opdStack.push(((Operator_FileName)op).getFileSet(expre,index));
					
				} else if(m_Proximity.find()) {
					Operator op = OperatorFactory.createOperator("PROXIMITY");
					//System.out.println(item);
					String expre = item;
					opdStack.push(((Operator_Proximity)op).getFileSet(expre,index));
					
				} else {
					Operator op = OperatorFactory.createOperator("FILECONTENTS");
					String expre = item;
					//System.out.println(item);
					opdStack.push(((Operator_FileContents)op).getFileSet(expre,index));
				}
			}
		}
		//printResult(s);
	}

//	/**
//	 * 
//	 * Print the result.
//	 */
//	public void printResult() {
//		HashSet<File> result = opdStack.peek();
//		File f;
//		DateFormat df = DateFormat.getDateFormat();
//		String dateString;
//		if(result.isEmpty()) {
//			System.out.println("Can't not find any files!");
//		} else {
//			Iterator<File> it = result.iterator();
//			while(it.hasNext()) {
//				f = (File)it.next();
//				dateString = df.format(f.lastModified());
//				System.out.println("->" + f + "---" + "ModTime: " + dateString + "---" + "FileSize: " + f.length());
//			}
//		}
//	}
	
	public HashSet<File> getResult() {
		HashSet<File> resultSet = opdStack.peek();
		return resultSet;
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -