abstractdocumentwordtokenizer.java

来自「Eclipse高级编程3源码(书本源码)」· Java 代码 · 共 453 行
JAVA
453 行
/*******************************************************************************
 * Copyright (c) 2003 Berthold Daum.
 * All rights reserved. This program and the accompanying materials 
 * are made available under the terms of the Common Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/cpl-v10.html
 * 
 * Contributors:
 *     Berthold Daum
 *******************************************************************************/
package com.bdaum.SpellChecker;

import java.text.BreakIterator;

import javax.swing.text.Segment;

import org.eclipse.jface.text.BadLocationException;
import org.eclipse.jface.text.DocumentEvent;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.IDocumentListener;

import com.bdaum.SpellChecker.preferences.SpellCheckerPreferences;
import com.swabunga.spell.event.WordTokenizer;

/** 
 * This is the base class for breaking JFace text documents into
 * single words. In addition, it supports text replacements.
 * 
 * After an instance is created, it must be initialized by calling
 * the init() method.
 *  
 */
public abstract class AbstractDocumentWordTokenizer
	implements WordTokenizer, IDocumentListener {
	/** The document **/
	protected IDocument document;
	/** position of selection **/
	protected int selectionOffset = 0;
	/** length of selection **/
	protected int selectionLength = 0;

	/** start position of current word in document **/
	protected int currentWordPos = 0;
	/** end position of current word **/
	protected int currentWordEnd = 0;
	/** ignored characters at end of word + 1 **/
	protected int endDistance = 0;
	/** start position of next word **/
	protected int nextWordPos = -1;
	/** The character iterator over the document content **/
	protected Segment text;
	protected BreakIterator sentenceIterator;
	/** The number of processed words **/
	protected int wordCount = 0;
	/** indicates if there are any more words **/
	protected boolean moreTokens = true;
	/** indicates if initialization is required **/
	protected boolean first = true;
	/** indicates that we are at a sentence start **/
	protected boolean startsSentence = true;
	/** indicates if the current word is part of a CamelCase word **/
	private boolean camelCase = false;
	/** holds next word **/
	private String nextWord;
	/** keeps length of current word for later inquiry **/
	private int currentWordLength = 0;
	/** Configuration **/
	private SpellCheckConfiguration config;
	/** options **/
	private String compoundCharacters;
	private boolean ignoreOneLetterWords;

	/**
	 * Method init.
	 * Initalize the tokenizer.
	 * @param document - the document to be parsed
	 * @param offset - start position in text
	 * @param len - length of selected area or 0.
	 * @param config - the spell checker configuration
	 */
	public void init(
		IDocument document,
		int offset,
		int len,
		SpellCheckConfiguration config) {
		this.config = config;
		this.document = document;
		// Evaluate selection
		this.selectionOffset = offset;
		this.selectionLength = len;
		// Configure the tokenizer
		configure();
		// Iterator about the text
		sentenceIterator = BreakIterator.getSentenceInstance();
		getTextSegment(document);
		sentenceIterator.setText(text);
		currentWordPos = getNextWordStart(text, 0);
		if (selectionLength > 0) {
			while (currentWordPos != -1 && currentWordPos < selectionOffset) {
				currentWordEnd = getNextWordEnd(text, currentWordPos);
				camelCase = Character.isLetter(text.current());
				if (camelCase)
					currentWordPos = getNextWordStart(text, currentWordEnd);
				else
					currentWordPos =
						getNextWordStart(text, currentWordEnd + endDistance);
			}
			if (currentWordPos > selectionOffset + selectionLength)
				currentWordPos = -1;
		}
		if (currentWordPos != -1) {
			currentWordEnd = getNextWordEnd(text, currentWordPos);
			camelCase = Character.isLetter(text.current());
			if (camelCase)
				nextWordPos = getNextWordStart(text, currentWordEnd);
			else
				nextWordPos =
					getNextWordStart(text, currentWordEnd + endDistance);
		} else
			// whole area consists of whitespace
			moreTokens = false;
		// Register as document listener to be
		// informed about document changes
		document.addDocumentListener(this);

	}
	protected SpellCheckConfiguration getCofiguration() {
		return config;
	}
	/**
	 * Method configure.
	 */
	protected void configure() {
		compoundCharacters =
			config.getString(SpellCheckerPreferences.COMPOUNDCHARACTERS);
		ignoreOneLetterWords =
			config.getBoolean(SpellCheckerPreferences.IGNOREONELETTERWORDS);
	}

	/**
	 * Fetch text segment from document
	 */
	private void getTextSegment(IDocument document) {
		char[] chars = document.get().toCharArray();
		text = new Segment(chars, 0, chars.length);
	}

	/**
	 * Find start position of next word
	 */
	protected int getNextWordStart(Segment text, int startPos) {
		int ignored = 0;
		int endPos =
			(selectionLength <= 0)
				? text.getEndIndex()
				: selectionOffset + selectionLength;
		if (startPos <= endPos)
			for (char ch = text.setIndex(startPos);
				ch != Segment.DONE;
				ch = text.next()) {
				ch = parseAndTranslateCharacter(ch);
				if (ch == 0) {
					++ignored;
					// count ignored characters (necessary for character entities in HTML)
				} else {
					if (Character.isLetterOrDigit(ch) && isToBeChecked()) {
						return text.getIndex() - ignored;
					}
					ignored = 0;
				}
			}
		return -1;
	}
	/**
	 * Method isToBeChecked.
	 * Is called at the start of each word.
	 * @return boolean - true, if the word is to be checked.
	 */
	protected abstract boolean isToBeChecked();
	/**
	 * Method isToBeChecked.
	 * Is called at the end of each word.
	 * @param word - the word to be checked
	 * @return boolean - true, if the word is to be checked.
	 */
	protected boolean isToBeChecked(String word) {
		if (word.length() == 0)
			return false;
		if (ignoreOneLetterWords && word.length() <= 1)
			return false;
		if (compoundCharacters != null && compoundCharacters.length() > 0) {
			for (int i = 0; i < word.length(); i++) {
				char c = word.charAt(i);
				if (compoundCharacters.indexOf(c) >= 0)
					return false;
			}
		}
		return true;
	}
	/**
	 * Method parseCharacter.
	 * @param ch - the current character
	 */
	protected abstract void parseCharacter(char ch);

	/**
	 * Method parseAndTranslateCharacter.
	 *    can be overridden if the current character must be modified.
	 * @param ch - the current character
	 * @return - the modified character, 0 for characters to be ignored
	 */
	protected char parseAndTranslateCharacter(char ch) {
		parseCharacter(ch);
		return ch;
	}

	/**
	 * Computes the end position of the next word
	 */
	protected int getNextWordEnd(Segment text, int startPos) {
		int ignored = 0;
		boolean notFirst = false;
		for (char ch = text.setIndex(startPos);
			ch != Segment.DONE;
			ch = text.next()) {
			ch = parseAndTranslateCharacter(ch);
			// skip character that should be ignored
			if (ch == 0) {
				++ignored;
				continue;
			}
			// special treatment for some character that appear
			// in word-like constructs
			if ((ch == '.' || ch == ':' || ch == '\'' || ch == '@')
				&& notFirst) {
				char ch2 = text.next();
				if (ch2 == Segment.DONE || !Character.isLetterOrDigit(ch2)) {
					text.previous();
					endDistance = ignored + 1;
					return text.getIndex() - ignored;
				}
				ignored = 0;
				continue;
			}
			// standard check for word end
			if (isWordBreak(ch, notFirst)) {
				endDistance = ignored + 1;
				return text.getIndex() - ignored;
			}
			ignored = 0;
			notFirst = true;
		}
		return text.getEndIndex();
	}

	protected boolean isWordBreak(char ch, boolean notFirst) {
		return !Character.isLetterOrDigit(ch);
	}

	/**
	 * @see com.swabunga.spell.event.WordTokenizer#hasMoreWords()
	 */
	public boolean hasMoreWords() {
		getNextWord();
		return nextWord != null;
	}

	/**
	 * @see com.swabunga.spell.event.WordTokenizer#getCurrentWordPosition()
	 */
	public int getCurrentWordPosition() {
		return currentWordPos;
	}

	/**
	 * @see com.swabunga.spell.event.WordTokenizer#getCurrentWordEnd()
	 */
	public int getCurrentWordEnd() {
		return currentWordEnd;
	}

	/**
	 * @see com.swabunga.spell.event.WordTokenizer#nextWord()
	 */
	public String nextWord() {
		currentWordLength = nextWord.length();
		wordCount++;
		return deSerialize(nextWord);
	}

	public void getNextWord() {
		nextWord = null;
		while (moreTokens) {
			if (!first) {
				currentWordPos = nextWordPos;
				currentWordEnd = getNextWordEnd(text, currentWordPos);
				// We simulate a sentence start because we don't want to
				// ignore word components that start with an upper case letter 
				startsSentence = camelCase;
				camelCase = Character.isLetter(text.current());
				if (camelCase)
					nextWordPos = getNextWordStart(text, currentWordEnd);
				else {
					nextWordPos =
						getNextWordStart(text, currentWordEnd + endDistance);
					int current = sentenceIterator.current();
					if (current == currentWordPos)
						startsSentence = true;
					else {
						if (currentWordEnd > current)
							sentenceIterator.next();
					}
				}
			}
			try {
				nextWord =
					document.get(
						currentWordPos,
						currentWordEnd - currentWordPos);
			} catch (BadLocationException ex) {
				moreTokens = false;
			}
			first = false;
			if ((selectionLength > 0
				&& nextWordPos > selectionOffset + selectionLength)
				|| nextWordPos == -1)
				moreTokens = false;
			if (isToBeChecked(nextWord))
				break;
		}
	}

	/**
	 * Returns length of current word
	 * @return - lenght of current word
	 */
	public int getCurrentWordLength() {
		return currentWordLength;
	}

	/**
	 * Converts a word from document format to display format
	 * Subclasses may override.
	 * @param word - the word in document format
	 * @return - the word in display format
	 */
	public String deSerialize(String word) {
		return word;
	}

	/**
	 * Converts a word from display format to document format
	 * Subclasses may override.
	 * @param word - the word in display format
	 * @return - the word in document format
	 */
	public String serializeWord(String word) {
		return word;
	}

	/**
	 * @see com.swabunga.spell.event.WordTokenizer#getCurrentWordCount()
	 */
	public int getCurrentWordCount() {
		return wordCount;
	}

	/**
	 * @see com.swabunga.spell.event.WordTokenizer
	 *                   #replaceWord(java.lang.String)
	 */
	public void replaceWord(String newWord) {
		if (currentWordPos != -1) {
			SpellCheckerPlugin.getManager().replaceWord(
				currentWordPos,
				currentWordEnd - currentWordPos,
				newWord);
			// Compute the position after the replaced word
			first = true;
			currentWordPos =
				getNextWordStart(text, currentWordPos + newWord.length());
			if (currentWordPos != -1) {
				currentWordEnd = getNextWordEnd(text, currentWordPos);
				nextWordPos =
					getNextWordStart(text, currentWordEnd + endDistance - 1);
				sentenceIterator.setText(text);
				sentenceIterator.following(currentWordPos);
			} else
				moreTokens = false;
		}
	}

	/**
	 * @see com.swabunga.spell.event.WordTokenizer#getContext()
	 */
	public String getContext() {
		return text.toString();
	}

	/**
	 * @see com.swabunga.spell.event.WordTokenizer#isNewSentence()
	 */
	public boolean isNewSentence() {
		// BreakIterator doesn't work when the first word in a sentence is not capitalised,
		// but we need to check for capitalisation
		if (startsSentence || currentWordPos < 2)
			return (true);
		String textBefore = null;
		try {
			textBefore = document.get(currentWordPos - 2, 2);
		} catch (BadLocationException ex) {
			return (false);
		}
		return (textBefore != null && ".".equals(textBefore.trim()));
	}

	/**
	 * @see org.eclipse.jface.text.IdocumentListener
	 *      #documentAboutToBeChanged(org.eclipse.jface.text.DocumentEvent)
	 */
	public void documentAboutToBeChanged(DocumentEvent event) {
	}

	/**
	 * @see org.eclipse.jface.text.IdocumentListener
	 *          #documentChanged(org.eclipse.jface.text.DocumentEvent)
	 */
	public void documentChanged(DocumentEvent event) {
		// Update segment 
		getTextSegment(document);
		// Evaluate event
		int offset = event.getOffset();
		String iText = event.getText();
		int iLen = (iText == null) ? 0 : iText.length();
		int increment = iLen - event.getLength();
		// Update word position
		if (currentWordPos > offset)
			currentWordPos += increment;
		if (currentWordEnd > offset)
			currentWordEnd += increment;
		if (nextWordPos > offset)
			nextWordPos += increment;
	}

	/**
	 * Method dispose.
	 */
	public void dispose() {
		// We stop to listen
		document.removeDocumentListener(this);
	}
}
abstractdocumentwordtokenizer.java - 源码说明

本页面展示了「Eclipse高级编程3源码(书本源码)」中的 abstractdocumentwordtokenizer.java 源码文件，采用 Java 编程语言编写，共 453 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?