⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 composite.java

📁 简单分词程序 读入一个pdf 输出一个分好词的txt
💻 JAVA
字号:
/* * To change this template, choose Tools | Templates * and open the template in the editor. */package WordSegment;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.ObjectInputStream;import java.util.ArrayList;import java.util.Vector;/** * * @author icdi */public class composite extends SegStrategy{    FMM fmm;    BMM bmm;         private Dictionary rel_dic;    public composite(){       fmm = new FMM();       bmm = new BMM();       SetRel_Dic("rel_dic.dat");    }        @Override    public Vector Segment(String sentence, Dictionary d) {                  fmm.Segment(sentence, d);       bmm.Segment(sentence, d);                int pos = compareSegPos(fmm.getSegPos(),bmm.getSegPos());       if(pos == -1){           return fmm.getSegResult();       }       else{           int diffIndex = pos;                  Integer f_pos = (Integer) fmm.getSegPos().get(diffIndex);           Integer b_pos = (Integer) bmm.getSegPos().get(diffIndex);           Integer f_pos_r = (Integer) fmm.getSegPos().get(diffIndex+1);           Integer b_pos_r = (Integer) bmm.getSegPos().get(diffIndex+1);                              String f_word_l = fmm.getWord(f_pos);                     String b_word_l = bmm.getWord(b_pos);           String f_word_r = fmm.getWord(f_pos_r);                     String b_word_r = bmm.getWord(b_pos_r);                      int f_word_fre = rel_dic.getFrequency(f_word_l+"-"+f_word_r);           int b_word_fre = rel_dic.getFrequency(b_word_l+"-"+b_word_r);           if(f_word_fre <= b_word_fre)               return bmm.getSegResult();           else               return fmm.getSegResult();                        }             }        //若切分路径完全相同则返回-1    //若不同,则返回在哪个位置出现差别    public int compareSegPos(ArrayList str1,ArrayList str2){        int length = 0;        if(str1.size()!= str2.size()){            if(str1.size()>str2.size())                length = str2.size();            else length = str1.size();            for(int j = 0;j<length;j++){                if(str1.get(j) != str2.get(j)){                    return j;                }                            }        }                for(int i = 0; i<str1.size(); i++){            if(str1.get(i) != str2.get(i))                return i;        }        return -1;    }        public void SetRel_Dic(String dicFile) {		ObjectInputStream objectIn = null;		try {			objectIn = new ObjectInputStream(new FileInputStream(new File(					dicFile)));		} catch (IOException e) {			e.printStackTrace(System.err);			System.exit(1);		}		try {			rel_dic = (Dictionary) (objectIn.readObject());		} catch (ClassNotFoundException e) {			e.printStackTrace(System.err);			System.exit(1);		} catch (IOException e) {			e.printStackTrace(System.err);			System.exit(1);		}	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -