📄 sample.java
字号:
/*
* Sample.java
*
* Created on 2008年12月2日, 下午12:05
*
* To change this template, choose Tools | Template Manager
* and open the template in the editor.
*/
package newpackage;
import java.util.*;
import java.io.*;
/**
*
* @author Administrator
*/
public class Sample {
Dictionary dic;
HashMap wordTable= new HashMap();
int totleWord;
int vocNum;
public Sample() {
}
public void init(Dictionary newDic,HashMap hm,int n) {
dic = newDic;
totleWord=0;
wordTable.putAll(hm);//把所有的分词结果复制到wordTable
vocNum=n;
}
public int wordSegment(String Sentence) { //中文分词
int senLen = Sentence.length();
int i = 0, j = 0;
int M = 12;
String word;
boolean bFind = false;
while (i < senLen) {
int N = i + M < senLen ? i + M : senLen + 1;
bFind = false;
for (j = N - 1; j > i; j--) {
word = Sentence.substring(i, j);
if (dic.Find(word)) {
if (j > i + 1) {
totleWord++; //统计类别总词汇数
if (wordTable.containsKey(word)) {
float c=((Float)wordTable.get(word)).floatValue()+1;
wordTable.put(word,new Float(c));
//统计每个在单词在文本中出现的次数
}
}
bFind = true;
i = j;
break;
}
}
if (bFind == false) {
i = j + 1;
}
}
return 1;
}
public void getP(){
try {
BufferedReader in = new BufferedReader(
new FileReader("vocabulary.txt"));
String s;
while ( (s = in.readLine()) != null) {
//System.out.println(s);
if(wordTable.containsKey(s)){
float nk=((Float)wordTable.get(s)).floatValue();
float p=(nk+1)/(vocNum+totleWord);
//System.out.println(s+" "+new Float(p));
wordTable.put(s,new Float(p));
}
}
}
catch (IOException e) {
System.out.println(e);
}
}
public void countFreq(String fileName) { //按行读入
try {
FileSex ss=new FileSex();
File myDir=new File(fileName);
ss.amain(myDir);
for(String str:ss.getDir()){
if(str!=null){
// System.out.println(str+"");
//////////////////////////////////////////////
BufferedReader in = new BufferedReader(new FileReader(str));
String s;
while ( (s = in.readLine()) != null) {
wordSegment(s);
}
}
}
}
catch (IOException e) {
System.out.println(e);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -