⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 analyzerutils.java

📁 基于lucene的 全文检索程序。可以对office等文件pdf文件进行检索
💻 JAVA
字号:
package org.tatan.framework;
import junit.framework.Assert; 
import org.apache.lucene.analysis.Analyzer; 
import org.apache.lucene.analysis.SimpleAnalyzer; 
import org.apache.lucene.analysis.Token; 
import org.apache.lucene.analysis.TokenStream; 
import org.apache.lucene.analysis.standard.StandardAnalyzer; 
import java.io.IOException; 
import java.io.StringReader; 
import java.util.ArrayList;
public class AnalyzerUtils { 
public static Token[] tokensFromAnalysis(Analyzer analyzer, 
                             String text) throws IOException {   // 分析 Text 
TokenStream stream = 
  analyzer.tokenStream("contents", new StringReader(text));  // 开始分析处理 
 ArrayList tokenList = new ArrayList(); 
 while (true) { 
 Token token = stream.next(); 
  if (token == null) break; 

     tokenList.add(token); 
    } 
      return (Token[]) tokenList.toArray(new Token[0]);  } 
    public static void displayTokens(Analyzer analyzer, 
                                   String text) throws IOException { 
     Token[] tokens = tokensFromAnalysis(analyzer, text); 
      for (int i = 0; i < tokens.length; i++) { 
        Token token = tokens[i]; 
      System.out.print("[" + token.termText() + "] ");     // 打印结果   结果 (3) 
     } 
    } 
   public static void displayTokensWithPositions(Analyzer analyzer, 
                                   String text) throws IOException {   // 打印带有 Positions 的 token 
     Token[] tokens = tokensFromAnalysis(analyzer, text); 
 
   int position = 0; 
     for (int i = 0; i < tokens.length; i++) { 
        Token token = tokens[i]; 
       int increment = token.getPositionIncrement(); 
       if (increment > 0) { 
          position = position + increment; 
        System.out.println(); 
        System.out.print(position + ": "); 
       } 
       System.out.print("[" + token.termText() + "] "); 
     } 
     System.out.println(); 
   } 
    public static void displayTokensWithFullDetails(   // 打印所有 token 的信息 
       Analyzer analyzer, String text) throws IOException { 
    Token[] tokens = tokensFromAnalysis(analyzer, text); 
      int position = 0; 
     for (int i = 0; i < tokens.length; i++) { 
     Token token = tokens[i]; 
  
        int increment = token.getPositionIncrement(); 
        if (increment > 0) { 
         position = position + increment; 
         System.out.println(); 
          System.out.print(position + ": "); 
       } 
        System.out.print("[" + token.termText() + ":" + 
            token.startOffset() + "->" + 
            token.endOffset() + ":" + 
           token.type() + "] "); 
    } 
      System.out.println(); 
    } 
  
    public static void assertTokensEqual(Token[] tokens, 
                                       String[] strings) { 
      Assert.assertEquals(strings.length, tokens.length); 
 
    for (int i = 0; i < tokens.length; i++) { 
        Assert.assertEquals("index " + i, strings[i], tokens[i].termText()); 
    } 
    } 
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -