⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bmm.java

📁 简单分词程序 读入一个pdf 输出一个分好词的txt
💻 JAVA
字号:
/**
 * 
 */
package WordSegment;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Hashtable;
import java.util.Vector;

/**
 * @author icdi
 *
 */
public class BMM extends SegStrategy {
        StringBuffer segPosPool = new StringBuffer();
        ArrayList segPos = new ArrayList();
        Vector result;
        Hashtable ht = null;
       
	public Vector Segment(String sentence, Dictionary dic) {
		int maxLength = dic.getMaxLength();	
		int negPos = sentence.length();
		int targetLength = maxLength;
		int restLength = sentence.length();
		Vector<String> seged = new Vector<String>();
                segPos.add(negPos);
                ht = new Hashtable();

		while (restLength > 0)
		{
                        int pos = 0;
			if (targetLength > restLength)
				targetLength = restLength;
			String tempStr = sentence.substring(negPos - targetLength, negPos);
                        if(tempStr.getBytes().length==tempStr.length()){
                            pos = negPos - targetLength;
                            segPos.add(pos);
                            ht.put(pos, tempStr);
                            seged.add(0,tempStr);
                            negPos = negPos - targetLength;
                            restLength -= targetLength;
                            targetLength = maxLength;
                            
                            continue;
                        }
			if (dic.checkWord(tempStr) || targetLength == 1)
			{
                                pos = negPos - targetLength;
                                segPos.add(pos);
                                ht.put(pos, tempStr);
				seged.add(0, tempStr);
				negPos -= targetLength;
				restLength -= targetLength;
				targetLength = maxLength;				
			}
			else
				targetLength--;
		}
                Collections.reverse(segPos);
                result = seged;
		return seged;
	}
        
        public ArrayList getSegPos(){            
            return segPos;
        }  
        
        public Vector getSegResult(){
            return result;
        }
        
        public String getWord(int i){
            return (String)ht.get(i);
        }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -