📄 highlightertest.java
字号:
// same token-stream as above, but the bigger token comes first this time protected TokenStream getTS2a() { //String s = "Hi-Speed10 foo"; return new TokenStream() { Iterator iter; List lst; { lst = new ArrayList(); Token t; t = new Token("hispeed",0,8); lst.add(t); t = new Token("hi",0,2); t.setPositionIncrement(0); lst.add(t); t = new Token("speed",3,8); lst.add(t); t = new Token("10",8,10); lst.add(t); t = new Token("foo",11,14); lst.add(t); iter = lst.iterator(); } public Token next() throws IOException { return iter.hasNext() ? (Token)iter.next() : null; } }; } public void testOverlapAnalyzer2() throws Exception { String s = "Hi-Speed10 foo"; Query query; Highlighter highlighter; String result; query = new QueryParser("text",new WhitespaceAnalyzer()).parse("foo"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed10 <B>foo</B>",result); query = new QueryParser("text",new WhitespaceAnalyzer()).parse("10"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed<B>10</B> foo",result); query = new QueryParser("text",new WhitespaceAnalyzer()).parse("hi"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("<B>Hi</B>-Speed10 foo",result); query = new QueryParser("text",new WhitespaceAnalyzer()).parse("speed"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-<B>Speed</B>10 foo",result); query = new QueryParser("text",new WhitespaceAnalyzer()).parse("hispeed"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("<B>Hi-Speed</B>10 foo",result); query = new QueryParser("text",new WhitespaceAnalyzer()).parse("hi speed"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("<B>Hi-Speed</B>10 foo",result); /////////////////// same tests, just put the bigger overlapping token first query = new QueryParser("text",new WhitespaceAnalyzer()).parse("foo"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed10 <B>foo</B>",result); query = new QueryParser("text",new WhitespaceAnalyzer()).parse("10"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed<B>10</B> foo",result); query = new QueryParser("text",new WhitespaceAnalyzer()).parse("hi"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("<B>Hi</B>-Speed10 foo",result); query = new QueryParser("text",new WhitespaceAnalyzer()).parse("speed"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-<B>Speed</B>10 foo",result); query = new QueryParser("text",new WhitespaceAnalyzer()).parse("hispeed"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("<B>Hi-Speed</B>10 foo",result); query = new QueryParser("text",new WhitespaceAnalyzer()).parse("hi speed"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("<B>Hi-Speed</B>10 foo",result); }/* public void testBigramAnalyzer() throws IOException, ParseException { //test to ensure analyzers with none-consecutive start/end offsets //dont double-highlight text //setup index 1 RAMDirectory ramDir = new RAMDirectory(); Analyzer bigramAnalyzer=new CJKAnalyzer(); IndexWriter writer = new IndexWriter(ramDir,bigramAnalyzer , true); Document d = new Document(); Field f = new Field(FIELD_NAME, "java abc def", true, true, true); d.add(f); writer.addDocument(d); writer.close(); IndexReader reader = IndexReader.open(ramDir); IndexSearcher searcher=new IndexSearcher(reader); query = QueryParser.parse("abc", FIELD_NAME, bigramAnalyzer); System.out.println("Searching for: " + query.toString(FIELD_NAME)); hits = searcher.search(query); Highlighter highlighter = new Highlighter(this,new QueryFragmentScorer(query)); for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); TokenStream tokenStream=bigramAnalyzer.tokenStream(FIELD_NAME,new StringReader(text)); String highlightedText = highlighter.getBestFragment(tokenStream,text); System.out.println(highlightedText); } }*/ public String highlightTerm(String originalText , TokenGroup group) { if(group.getTotalScore()<=0) { return originalText; } numHighlights++; //update stats used in assertions return "<b>" + originalText + "</b>"; } public void doSearching(String queryString) throws Exception { QueryParser parser=new QueryParser(FIELD_NAME, new StandardAnalyzer()); query = parser.parse(queryString); doSearching(query); } public void doSearching(Query unReWrittenQuery) throws Exception { searcher = new IndexSearcher(ramDir); //for any multi-term queries to work (prefix, wildcard, range,fuzzy etc) you must use a rewritten query! query=unReWrittenQuery.rewrite(reader); System.out.println("Searching for: " + query.toString(FIELD_NAME)); hits = searcher.search(query); } void doStandardHighlights() throws Exception { Highlighter highlighter =new Highlighter(this,new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(20)); for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text)); String result = highlighter.getBestFragments( tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); System.out.println("\t" + result); } } /* * @see TestCase#setUp() */ protected void setUp() throws Exception { ramDir = new RAMDirectory(); IndexWriter writer = new IndexWriter(ramDir, new StandardAnalyzer(), true); for (int i = 0; i < texts.length; i++) { addDoc(writer, texts[i]); } writer.optimize(); writer.close(); reader = IndexReader.open(ramDir); numHighlights = 0; } private void addDoc(IndexWriter writer, String text) throws IOException { Document d = new Document(); Field f = new Field(FIELD_NAME, text,Field.Store.YES, Field.Index.TOKENIZED); d.add(f); writer.addDocument(d); } /* * @see TestCase#tearDown() */ protected void tearDown() throws Exception { super.tearDown(); }}//===================================================================//========== BEGIN TEST SUPPORTING CLASSES//========== THESE LOOK LIKE, WITH SOME MORE EFFORT THESE COULD BE//========== MADE MORE GENERALLY USEFUL.// TODO - make synonyms all interchangeable with each other and produce// a version that does hyponyms - the "is a specialised type of ...."// so that car = audi, bmw and volkswagen but bmw != audi so different// behaviour to synonyms//===================================================================class SynonymAnalyzer extends Analyzer{ private Map synonyms; public SynonymAnalyzer(Map synonyms) { this.synonyms = synonyms; } /* (non-Javadoc) * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader) */ public TokenStream tokenStream(String arg0, Reader arg1) { return new SynonymTokenizer(new LowerCaseTokenizer(arg1), synonyms); }}/** * Expands a token stream with synonyms (TODO - make the synonyms analyzed by choice of analyzer) * @author MAHarwood */class SynonymTokenizer extends TokenStream{ private TokenStream realStream; private Token currentRealToken = null; private Map synonyms; StringTokenizer st = null; public SynonymTokenizer(TokenStream realStream, Map synonyms) { this.realStream = realStream; this.synonyms = synonyms; } public Token next() throws IOException { if (currentRealToken == null) { Token nextRealToken = realStream.next(); if (nextRealToken == null) { return null; } String expansions = (String) synonyms.get(nextRealToken.termText()); if (expansions == null) { return nextRealToken; } st = new StringTokenizer(expansions, ","); if (st.hasMoreTokens()) { currentRealToken = nextRealToken; } return currentRealToken; } else { String nextExpandedValue = st.nextToken(); Token expandedToken = new Token( nextExpandedValue, currentRealToken.startOffset(), currentRealToken.endOffset()); expandedToken.setPositionIncrement(0); if (!st.hasMoreTokens()) { currentRealToken = null; st = null; } return expandedToken; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -