analyzerutils.java

来自「一个基于Lucene的Analyzer类详细信息查看程序」· Java 代码 · 共 65 行

JAVA
65
字号
package com;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;

public class AnalyzerUtils {
	public static void main(String[] args) throws IOException {
		displayTokensWithFullDetails(new SimpleAnalyzer(),
		"The quick brown fox....");
		}
	public static Token[] tokensFromAnalysis (Analyzer analyzer, String text) throws IOException {
		TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
		ArrayList tokenList = new ArrayList();
		while (true) {
			Token token = stream.next();
			if (token == null) break;
			tokenList.add(token);
			}
		return (Token[]) tokenList.toArray(new Token[0]);
		}
	
	public static void displayTokens (Analyzer analyzer, String text) throws IOException {
		Token[] tokens = tokensFromAnalysis(analyzer, text);
		for (int i = 0; i < tokens.length; i++) {
			Token token = tokens[i];
			System.out.print("[" + token.termText() + "] ");
			}
		}
//	 ... other methods introduced later ...
	public static void displayTokensWithFullDetails (Analyzer analyzer, String text) throws IOException {
		Token[] tokens = tokensFromAnalysis(analyzer, text);
		int position = 0;
		for (int i = 0; i < tokens.length; i++) {
			Token token = tokens[i];
			int increment = token.getPositionIncrement();
			if (increment > 0) {
				position = position + increment;
				System.out.println();
				System.out.print(position + ": ");
				}
			System.out.print("[" + token.termText() + ":" + token.startOffset() + "->" + token.endOffset() + ":" + token.type() + "] ");
			}
		}
	
	public static void displayTokensWithPositions (Analyzer analyzer, String text) throws IOException {
		Token[] tokens = tokensFromAnalysis(analyzer, text);
		int position = 0;
		for (int i = 0; i < tokens.length; i++) {
			Token token = tokens[i];
			int increment = token.getPositionIncrement();
			if (increment > 0) {
				position = position + increment;
				System.out.println();
				System.out.print(position + ": ");
				}
			System.out.print("[" + token.termText() + "] ");
			}
		}
	}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?