📄 highlighter.java

📁 lucene2.2.0版本
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
					newText.append(encoder.encodeText(text.substring(lastEndOffset, startOffset)));				newText.append(markedUpText);				lastEndOffset=Math.max(lastEndOffset,endOffset);			}			//Test what remains of the original text beyond the point where we stopped analyzing 			if (//					if there is text beyond the last token considered..					(lastEndOffset < text.length()) 					&&//					and that text is not too large...					(text.length()<maxDocBytesToAnalyze)				)							{				//append it to the last fragment				newText.append(encoder.encodeText(text.substring(lastEndOffset)));			}			currentFrag.textEndPos = newText.length();			//sort the most relevant sections of the text			for (Iterator i = docFrags.iterator(); i.hasNext();)			{				currentFrag = (TextFragment) i.next();				//If you are running with a version of Lucene before 11th Sept 03				// you do not have PriorityQueue.insert() - so uncomment the code below				/*									if (currentFrag.getScore() >= minScore)									{										fragQueue.put(currentFrag);										if (fragQueue.size() > maxNumFragments)										{ // if hit queue overfull											fragQueue.pop(); // remove lowest in hit queue											minScore = ((TextFragment) fragQueue.top()).getScore(); // reset minScore										}									}				*/				//The above code caused a problem as a result of Christoph Goller's 11th Sept 03				//fix to PriorityQueue. The correct method to use here is the new "insert" method				// USE ABOVE CODE IF THIS DOES NOT COMPILE!				fragQueue.insert(currentFrag);			}			//return the most relevant fragments			TextFragment frag[] = new TextFragment[fragQueue.size()];			for (int i = frag.length - 1; i >= 0; i--)			{				frag[i] = (TextFragment) fragQueue.pop();			}			//merge any contiguous fragments to improve readability			if(mergeContiguousFragments)			{				mergeContiguousFragments(frag);				ArrayList fragTexts = new ArrayList();				for (int i = 0; i < frag.length; i++)				{					if ((frag[i] != null) && (frag[i].getScore() > 0))					{						fragTexts.add(frag[i]);					}				}				frag= (TextFragment[]) fragTexts.toArray(new TextFragment[0]);			}			return frag;		}		finally		{			if (tokenStream != null)			{				try				{					tokenStream.close();				}				catch (Exception e)				{				}			}		}	}	/** Improves readability of a score-sorted list of TextFragments by merging any fragments	 * that were contiguous in the original text into one larger fragment with the correct order.	 * This will leave a "null" in the array entry for the lesser scored fragment. 	 * 	 * @param frag An array of document fragments in descending score	 */	private void mergeContiguousFragments(TextFragment[] frag)	{		boolean mergingStillBeingDone;		if (frag.length > 1)			do			{				mergingStillBeingDone = false; //initialise loop control flag				//for each fragment, scan other frags looking for contiguous blocks				for (int i = 0; i < frag.length; i++)				{					if (frag[i] == null)					{						continue;					}					//merge any contiguous blocks 					for (int x = 0; x < frag.length; x++)					{						if (frag[x] == null)						{							continue;						}						if (frag[i] == null)						{							break;						}						TextFragment frag1 = null;						TextFragment frag2 = null;						int frag1Num = 0;						int frag2Num = 0;						int bestScoringFragNum;						int worstScoringFragNum;						//if blocks are contiguous....						if (frag[i].follows(frag[x]))						{							frag1 = frag[x];							frag1Num = x;							frag2 = frag[i];							frag2Num = i;						}						else							if (frag[x].follows(frag[i]))							{								frag1 = frag[i];								frag1Num = i;								frag2 = frag[x];								frag2Num = x;							}						//merging required..						if (frag1 != null)						{							if (frag1.getScore() > frag2.getScore())							{								bestScoringFragNum = frag1Num;								worstScoringFragNum = frag2Num;							}							else							{								bestScoringFragNum = frag2Num;								worstScoringFragNum = frag1Num;							}							frag1.merge(frag2);							frag[worstScoringFragNum] = null;							mergingStillBeingDone = true;							frag[bestScoringFragNum] = frag1;						}					}				}			}			while (mergingStillBeingDone);	}			/**	 * Highlights terms in the  text , extracting the most relevant sections	 * and concatenating the chosen fragments with a separator (typically "...").	 * The document text is analysed in chunks to record hit statistics	 * across the document. After accumulating stats, the fragments with the highest scores	 * are returned in order as "separator" delimited strings.	 *	 * @param text        text to highlight terms in	 * @param maxNumFragments  the maximum number of fragments.	 * @param separator  the separator used to intersperse the document fragments (typically "...")	 *	 * @return highlighted text	 */	public final String getBestFragments(		TokenStream tokenStream,			String text,		int maxNumFragments,		String separator)		throws IOException	{		String sections[] =	getBestFragments(tokenStream,text, maxNumFragments);		StringBuffer result = new StringBuffer();		for (int i = 0; i < sections.length; i++)		{			if (i > 0)			{				result.append(separator);			}			result.append(sections[i]);		}		return result.toString();	}	/**	 * @return the maximum number of bytes to be tokenized per doc 	 */	public int getMaxDocBytesToAnalyze()	{		return maxDocBytesToAnalyze;	}	/**	 * @param byteCount the maximum number of bytes to be tokenized per doc	 * (This can improve performance with large documents)	 */	public void setMaxDocBytesToAnalyze(int byteCount)	{		maxDocBytesToAnalyze = byteCount;	}	/**	 */	public Fragmenter getTextFragmenter()	{		return textFragmenter;	}	/**	 * @param fragmenter	 */	public void setTextFragmenter(Fragmenter fragmenter)	{		textFragmenter = fragmenter;	}	/**	 * @return Object used to score each text fragment 	 */	public Scorer getFragmentScorer()	{		return fragmentScorer;	}	/**	 * @param scorer	 */	public void setFragmentScorer(Scorer scorer)	{		fragmentScorer = scorer;	}    public Encoder getEncoder()    {        return encoder;    }    public void setEncoder(Encoder encoder)    {        this.encoder = encoder;    }}class FragmentQueue extends PriorityQueue{	public FragmentQueue(int size)	{		initialize(size);	}	public final boolean lessThan(Object a, Object b)	{		TextFragment fragA = (TextFragment) a;		TextFragment fragB = (TextFragment) b;		if (fragA.getScore() == fragB.getScore())			return fragA.fragNum > fragB.fragNum;		else			return fragA.getScore() < fragB.getScore();	}}
上一页 12
💿 文件大小 5913 K
👤 上传用户 jjjjjkkkkjkjkjk
📂 所属分类 Java编程
🏷️ 相关标签

#lucene #版本
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -