⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 colortitle.java

📁 nutch搜索的改进型工具和优化爬虫的相关工具
💻 JAVA
字号:
/*
 * 创建日期 2005-3-4
 *
 * TODO 要更改此生成的文件的模板,请转至
 * 窗口 - 首选项 - Java - 代码样式 - 代码模板
 */
package net.nutch.util;

import java.util.StringTokenizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;

import org.apache.log4j.Logger;

import net.nutch.searcher.*;

import kit.nlp.util.*;

/**
 * @author Administrator
 *
 * TODO 要更改此生成的类型注释的模板,请转至
 * 窗口 - 首选项 - Java - 代码样式 - 代码模板
 */
public class ColorTitle {
	public static final Logger LOG = Logger.getLogger("search");
	public static final String HIGHTLIGHT_BEGIN = "<FONT color=#e10900>";
	public static final String HIGHTLIGHT_END = "</FONT>";
/*	
	static class TermLenComparator implements Comparator{
		public int compare(Object o1, Object o2){
			String s1 = (String)o1;
			String s2 = (String)o2;
			if (s1.length() > s2.length())
				return -1;
			if (s1.length() < s2.length())
				return 1;
			return 0;
		}
	}
	*/
	static class HightlightComparator implements Comparator{
		public int compare(Object o1, Object o2){
			int[] i1 = (int[])o1;
			int[] i2 = (int[])o2;
			if (i1[0] > i2[0])
				return 1;
			if (i1[0] < i2[0])
				return -1;
			if (i1[1] > i2[1])
				return 1;
			if (i1[1] < i2[1])
				return -1;
			return 0;
		}
	}
	
	/*******************************/
	public static String colorTitle(String t, String queryStr){
		StringTokenizer token = new StringTokenizer(queryStr);
		ArrayList<String> termList = new ArrayList<String>();
		while(token.hasMoreTokens()){
			String term = token.nextToken();
			termList.add(term);
		}
		//String[] terms = (String[])termList.toArray(new String[termList.size()]);
		//Arrays.sort(terms,new TermLenComparator());
		//String title = "";
		try{
			ArrayList<int[]> hightLightList = new ArrayList<int[]>();
			for (String term : termList){
				//if (Stopwords.isStopword(term) || Stopwords.isSymbol(term))
				//	term = "\\" + term;
				//title = title.replaceAll(term,"<FONT color=#e10900>" + term + "</FONT>");
				int begin = 0;
				int hightBegin = 0;
				while ((hightBegin = t.indexOf(term,begin)) >= 0){
					begin = hightBegin + term.length();
					hightLightList.add(new int[]{hightBegin, begin});
				}
			}
			int[][] hightArray = (int[][])hightLightList.toArray(new int[hightLightList.size()][]);
			Arrays.sort(hightArray, new HightlightComparator());
			for (int i=0; i<hightArray.length; i++){
				if (i + 1 == hightArray.length)
					break;
				int[] hight = hightArray[i];
				int[] next = hightArray[i+1];
				if (hight[0] < next[0] && hight[1] < next[0])
					continue;
				hightArray[i] = null;
				next[0] = hight[0];
				next[1] = hight[1] > next[1] ? hight[1] : next[1];
			}
			int begin = 0;
			StringBuffer titleBuf = new StringBuffer();
			for (int[] hight : hightArray){
				if (hight == null)
					continue;
				titleBuf.append( t.substring(begin,hight[0]));
				titleBuf.append(HIGHTLIGHT_BEGIN);
				titleBuf.append(t.substring(hight[0],hight[1]));
				titleBuf.append(HIGHTLIGHT_END);
				begin = hight[1];
			}
			if (begin < t.length())
				titleBuf.append(t.substring(begin));
			//title = title.replaceAll(HIGHTLIGHT_END + HIGHTLIGHT_BEGIN,"");
			return titleBuf.toString();
		}catch(Exception e){
			return t;
		}
	}
	/******************************/
	/**************************
	public static String colorTitle(String t,Query query){
		try{
			StringTokenizer token = new StringTokenizer(t," ");
			String[] tWords = new String[token.countTokens()];
			for( int i=0; i<tWords.length; i++)
				tWords[i] = token.nextToken();
			String title = "";
			
			String queryStr = query.getQueryStr();
			//System.out.println("queryStr:"+ queryStr);
			ArrayList<String> queryList = new ArrayList<String>();
			token = new StringTokenizer(queryStr," ");
			while( token.hasMoreTokens()){
				queryList.add(token.nextToken());
			}

			for(String tword : tWords){
				//System.out.println("tword:"+ tword.getTerm());
				boolean find = false;
				for (String term : queryList){
					if ( term.equalsIgnoreCase(tword)){
						find = true;
						break;
					}
				}
				if (find)
					title += "<FONT color=#e10900>" + tword + "</FONT>";
				else
					title += tword;
			}
			return title;
		}catch(Exception e){
			LOG.warn("Color Title Error:"+ t + "**********");
			LOG.warn(e.getMessage());
			return t;
		}
	}
	***************************/
	public static void main(String[] args) throws Exception{
		Query query = SearchQuery.parse(args[0],0,0);
		System.out.println("queryStr : " + query.getQueryStr());
		System.out.println("title : " + args[1]);
		System.out.println(ColorTitle.colorTitle(args[1],query.getQueryStr()));
	}

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -