📄 highlightertest.java
字号:
{ String text = hits.doc(i).get(FIELD_NAME); TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text)); String result = highlighter.getBestFragment(tokenStream,text); System.out.println("\t" + result); } assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4); } public void testGetTextFragments() throws Exception { doSearching("Kennedy"); Highlighter highlighter = new Highlighter(this,new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(20)); for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text)); String stringResults[] = highlighter.getBestFragments(tokenStream,text,10); tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text)); TextFragment fragmentResults[] = highlighter.getBestTextFragments(tokenStream,text,true,10); assertTrue("Failed to find correct number of text Fragments: " + fragmentResults.length + " vs "+ stringResults.length, fragmentResults.length==stringResults.length); for (int j = 0; j < stringResults.length; j++) { System.out.println(fragmentResults[j]); assertTrue("Failed to find same text Fragments: " + fragmentResults[j] + " found", fragmentResults[j].toString().equals(stringResults[j])); } } } public void testMaxSizeHighlight() throws Exception { doSearching("meat"); Highlighter highlighter = new Highlighter(this,new QueryScorer(query)); highlighter.setMaxDocBytesToAnalyze(30); TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(texts[0])); highlighter.getBestFragment(tokenStream,texts[0]); assertTrue("Setting MaxDocBytesToAnalyze should have prevented " + "us from finding matches for this record: " + numHighlights + " found", numHighlights == 0); } public void testMaxSizeHighlightTruncates() throws IOException { String goodWord="goodtoken"; String stopWords[]={"stoppedtoken"}; TermQuery query= new TermQuery( new Term( "data", goodWord )); SimpleHTMLFormatter fm=new SimpleHTMLFormatter(); Highlighter hg = new Highlighter(fm, new QueryScorer( query )); hg.setTextFragmenter( new NullFragmenter() ); String match = null; StringBuffer sb=new StringBuffer(); sb.append(goodWord); for(int i=0;i<10000;i++) { sb.append(" "); sb.append(stopWords[0]); } hg.setMaxDocBytesToAnalyze(100); match = hg.getBestFragment( new StandardAnalyzer(stopWords), "data", sb.toString()); assertTrue("Matched text should be no more than 100 chars in length ", match.length()<hg.getMaxDocBytesToAnalyze()); //add another tokenized word to the overrall length - but set way beyond //the length of text under consideration (after a large slug of stop words + whitespace) sb.append(" "); sb.append(goodWord); match = hg.getBestFragment( new StandardAnalyzer(stopWords), "data", sb.toString()); assertTrue("Matched text should be no more than 100 chars in length ", match.length()<hg.getMaxDocBytesToAnalyze()); } public void testUnRewrittenQuery() throws IOException, ParseException { //test to show how rewritten query can still be used searcher = new IndexSearcher(ramDir); Analyzer analyzer=new StandardAnalyzer(); QueryParser parser=new QueryParser(FIELD_NAME,analyzer); Query query = parser.parse("JF? or Kenned*"); System.out.println("Searching with primitive query"); //forget to set this and... //query=query.rewrite(reader); Hits hits = searcher.search(query); //create an instance of the highlighter with the tags used to surround highlighted text// QueryHighlightExtractor highlighter = new QueryHighlightExtractor(this, query, new StandardAnalyzer()); Highlighter highlighter = new Highlighter(this,new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(40)); int maxNumFragmentsRequired = 3; for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text)); String highlightedText = highlighter.getBestFragments(tokenStream,text,maxNumFragmentsRequired,"..."); System.out.println(highlightedText); } //We expect to have zero highlights if the query is multi-terms and is not rewritten! assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 0); } public void testNoFragments() throws Exception { doSearching("AnInvalidQueryWhichShouldYieldNoResults"); Highlighter highlighter = new Highlighter(this,new QueryScorer(query)); for (int i = 0; i < texts.length; i++) { String text = texts[i]; TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text)); String result = highlighter.getBestFragment(tokenStream,text); assertNull("The highlight result should be null for text with no query terms", result); } } /** * Demonstrates creation of an XHTML compliant doc using new encoding facilities. * @throws Exception */ public void testEncoding() throws Exception { String rawDocContent = "\"Smith & sons' prices < 3 and >4\" claims article"; //run the highlighter on the raw content (scorer does not score any tokens for // highlighting but scores a single fragment for selection Highlighter highlighter = new Highlighter(this, new SimpleHTMLEncoder(), new Scorer() { public void startFragment(TextFragment newFragment) { } public float getTokenScore(Token token) { return 0; } public float getFragmentScore() { return 1; } }); highlighter.setTextFragmenter(new SimpleFragmenter(2000)); TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(rawDocContent)); String encodedSnippet = highlighter.getBestFragments(tokenStream, rawDocContent,1,""); //An ugly bit of XML creation: String xhtml="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+ "<!DOCTYPE html\n"+ "PUBLIC \"//W3C//DTD XHTML 1.0 Transitional//EN\"\n"+ "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"+ "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\n"+ "<head>\n"+ "<title>My Test HTML Document</title>\n"+ "</head>\n"+ "<body>\n"+ "<h2>"+encodedSnippet+"</h2>\n"+ "</body>\n"+ "</html>"; //now an ugly built of XML parsing to test the snippet is encoded OK DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); org.w3c.dom.Document doc = db.parse(new ByteArrayInputStream(xhtml.getBytes())); Element root=doc.getDocumentElement(); NodeList nodes=root.getElementsByTagName("body"); Element body=(Element) nodes.item(0); nodes=body.getElementsByTagName("h2"); Element h2=(Element) nodes.item(0); String decodedSnippet=h2.getFirstChild().getNodeValue(); assertEquals("XHTML Encoding should have worked:", rawDocContent,decodedSnippet); } public void testMultiSearcher() throws Exception { //setup index 1 RAMDirectory ramDir1 = new RAMDirectory(); IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(), true); Document d = new Document(); Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.TOKENIZED); d.add(f); writer1.addDocument(d); writer1.optimize(); writer1.close(); IndexReader reader1 = IndexReader.open(ramDir1); //setup index 2 RAMDirectory ramDir2 = new RAMDirectory(); IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(), true); d = new Document(); f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.TOKENIZED); d.add(f); writer2.addDocument(d); writer2.optimize(); writer2.close(); IndexReader reader2 = IndexReader.open(ramDir2); IndexSearcher searchers[]=new IndexSearcher[2]; searchers[0] = new IndexSearcher(ramDir1); searchers[1] = new IndexSearcher(ramDir2); MultiSearcher multiSearcher=new MultiSearcher(searchers); QueryParser parser=new QueryParser(FIELD_NAME, new StandardAnalyzer()); query = parser.parse("multi*"); System.out.println("Searching for: " + query.toString(FIELD_NAME)); //at this point the multisearcher calls combine(query[]) hits = multiSearcher.search(query); //query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer()); Query expandedQueries[]=new Query[2]; expandedQueries[0]=query.rewrite(reader1); expandedQueries[1]=query.rewrite(reader2); query=query.combine(expandedQueries); //create an instance of the highlighter with the tags used to surround highlighted text Highlighter highlighter = new Highlighter(this,new QueryScorer(query)); for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text)); String highlightedText = highlighter.getBestFragment(tokenStream,text); System.out.println(highlightedText); } assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2); } public void testFieldSpecificHighlighting() throws IOException, ParseException { String docMainText="fred is one of the people"; QueryParser parser=new QueryParser(FIELD_NAME,analyzer); Query query=parser.parse("fred category:people"); //highlighting respects fieldnames used in query QueryScorer fieldSpecificScorer=new QueryScorer(query, "contents"); Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),fieldSpecificScorer); fieldSpecificHighlighter.setTextFragmenter(new NullFragmenter()); String result=fieldSpecificHighlighter.getBestFragment(analyzer,FIELD_NAME,docMainText); assertEquals("Should match",result,"<B>fred</B> is one of the people"); //highlighting does not respect fieldnames used in query QueryScorer fieldInSpecificScorer=new QueryScorer(query); Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),fieldInSpecificScorer); fieldInSpecificHighlighter.setTextFragmenter(new NullFragmenter()); result=fieldInSpecificHighlighter.getBestFragment(analyzer,FIELD_NAME,docMainText); assertEquals("Should match",result,"<B>fred</B> is one of the <B>people</B>"); reader.close(); } protected TokenStream getTS2() { //String s = "Hi-Speed10 foo"; return new TokenStream() { Iterator iter; List lst; { lst = new ArrayList(); Token t; t = new Token("hi",0,2); lst.add(t); t = new Token("hispeed",0,8); lst.add(t); t = new Token("speed",3,8); t.setPositionIncrement(0); lst.add(t); t = new Token("10",8,10); lst.add(t); t = new Token("foo",11,14); lst.add(t); iter = lst.iterator(); } public Token next() throws IOException { return iter.hasNext() ? (Token)iter.next() : null; } }; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -