⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fmm.java

📁 简单分词程序 读入一个pdf 输出一个分好词的txt
💻 JAVA
字号:
/**
 * 
 */
package WordSegment;

import java.util.ArrayList;
import java.util.Hashtable;
import java.util.Vector;

/**
 * @author icdi
 *
 */
public class FMM extends SegStrategy {
        ArrayList segPos = new ArrayList();
        Vector result;
	Hashtable ht = null;
	public Vector Segment(String sentence, Dictionary dic) {
		int maxLength = dic.getMaxLength();	//the length of the longest word in the dictionary
		int pos = 0;
		int targetLength = maxLength;
		int restLength = sentence.length();                 
		Vector<String> seged = new Vector<String>();
                segPos.add(pos);
                ht = new Hashtable();               
                                
		while (restLength > 0)
		{
			if (targetLength > restLength)
				targetLength = restLength;
			String tempStr = sentence.substring(pos, pos + targetLength); 
                        if(tempStr.getBytes().length==tempStr.length()){
                            seged.add(tempStr);
                            pos = pos + targetLength;
                            segPos.add(pos);
                            ht.put(pos, tempStr);
                            targetLength = maxLength;
                            restLength = sentence.length() - pos;
                            continue;
                        }
			if (dic.checkWord(tempStr) || targetLength == 1)
			{
				seged.add(tempStr);
				pos += targetLength;
                                segPos.add(pos);
                                ht.put(pos, tempStr);
				targetLength = maxLength;
				restLength = sentence.length() - pos;                               
			}
			else
				targetLength--;
		}
                result = seged;
		return seged;
	}
        
        public ArrayList getSegPos(){
            return segPos;
        } 
        
        public Vector getSegResult(){
            return result;
        }
        
        public String getWord(int i){
            return (String)ht.get(i);
        }
        
        public boolean isEnglish(char tem ){
            return (tem<'Z'&&tem>'A')||(tem<'z'&&tem >'a');
        }
        
        public int[] getEnglishPos(String temp){
            int[] rest =new int[2];
            for(int i = 0;i<temp.length();i++){
                if(isEnglish(temp.charAt(i))){
                    rest[0] = i;
                }
            }
            return rest;
        }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -