numberknife.java

来自「庖丁分词模块」· Java 代码 · 共 71 行

JAVA
71
字号
/**
 * Copyright 2007 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.paoding.analysis.knife;

import net.paoding.analysis.dictionary.Dictionary;
import net.paoding.analysis.dictionary.Hit;

/**
 * 
 * @author Zhiliang Wang [qieqie.wang@gmail.com]
 * 
 */
public class NumberKnife extends CharKnife implements DictionariesWare {

	private Dictionary units;

	public NumberKnife() {
	}

	public NumberKnife(Dictionaries dictionaries) {
		setDictionaries(dictionaries);
	}

//	@Override
	public void setDictionaries(Dictionaries dictionaries) {
		units = dictionaries.getUnits();
	}

	public boolean assignable(CharSequence beaf, int index) {
		return CharSet.isArabianNumber(beaf.charAt(index));
	}

	@Override
	protected boolean isTokenChar(CharSequence beaf, int history, int index) {
		char ch = beaf.charAt(index);
		return CharSet.isArabianNumber(ch) || ch == '.';
	}

	protected void collect(Collector collector, CharSequence beaf, int offset,
			int end, String word) {
		super.collect(collector, beaf, offset, end, word);
		if (units != null) {
			Hit wd;
			int i = end + 1;
			while (i <= beaf.length()
					&& (wd = units.search(beaf, end, i - end)).isHit()) {
				collector.collect(word + beaf.subSequence(end, i), offset, i);
				end++;
				if (!wd.isUnclosed()) {
					break;
				}
				i++;
			}
		}
	}

}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?